Merge pull request #3001 from chengzhichao-xydt/codex/workspace-guard-schemeless-url

fix(tools): allow scheme-less URLs in workspace guard
This commit is contained in:
Mauro
2026-06-05 08:54:59 +02:00
committed by GitHub
2 changed files with 129 additions and 0 deletions
+84
View File
@@ -1177,6 +1177,27 @@ func (t *ExecTool) guardCommand(command, cwd string) string {
}
}
// Skip scheme-less URL paths like "wttr.in/Beijing".
// When a /path is immediately preceded by a token that looks
// like a domain name and that token does NOT exist as a local
// filesystem entry, treat the path as part of a URL and skip
// workspace sandbox validation.
//
// The local-path-exists guard prevents symlink bypass: if
// "foo.bar" exists as a local symlink or directory, the path
// still undergoes full workspace validation (see #2965).
if loc[0] > 0 && raw[0] == '/' {
// Find the token immediately before the "/".
j := loc[0] - 1
for j >= 0 && !isShellTokenBoundary(cmd[j]) {
j--
}
token := cmd[j+1 : loc[0]]
if looksLikeDomain(token) && !localPathExists(cwd, token) {
continue
}
}
p, err := filepath.Abs(raw)
if err != nil {
continue
@@ -1219,6 +1240,69 @@ func (t *ExecTool) guardCommand(command, cwd string) string {
return ""
}
// isShellTokenBoundary returns true when b is a byte that separates
// tokens in a shell command (space, tab, colon, semicolon, pipe, etc.).
func isShellTokenBoundary(b byte) bool {
switch b {
case ' ', '\t', ':', ';', '|', '&', '<', '>', '\'', '"', '`', '\n', '\r':
return true
}
return false
}
// looksLikeDomain returns true when s looks like a DNS domain name:
// it contains at least one dot, starts with an alphanumeric character,
// and does not end with a common file extension.
func looksLikeDomain(s string) bool {
if len(s) < 3 || !strings.ContainsRune(s, '.') {
return false
}
first := s[0]
if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || (first >= '0' && first <= '9')) {
return false
}
// Exclude tokens ending with common file/programming extensions,
// e.g. "script.py", "main.go", "app.exe".
if idx := strings.LastIndexByte(s, '.'); idx >= 0 {
ext := strings.ToLower(s[idx+1:])
if commonFileExtension(ext) {
return false
}
}
return true
}
// commonFileExtension returns true when ext is a file extension that
// strongly indicates a local file rather than a domain TLD.
func commonFileExtension(ext string) bool {
switch ext {
case "py", "js", "ts", "tsx", "jsx", "go", "rs", "rb", "php",
"java", "c", "cpp", "h", "hpp", "cs", "swift", "kt", "scala",
"sh", "bash", "zsh", "fish", "ps1", "bat", "cmd",
"txt", "md", "rst", "log", "json", "yaml", "yml", "toml",
"xml", "html", "css", "scss", "ini", "cfg", "conf", "env",
"exe", "dll", "so", "dylib", "lib", "a", "o", "obj",
"zip", "tar", "gz", "bz2", "xz", "7z", "rar",
"png", "jpg", "jpeg", "gif", "svg", "ico", "bmp", "webp",
"mp3", "mp4", "wav", "avi", "mov", "mkv", "flac",
"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx",
"pub", "pem", "key", "crt", "cer", "p12", "pfx",
"bak", "tmp", "swp", "lock",
"ttf", "otf", "woff", "woff2", "eot",
"deb", "rpm", "apk", "msi", "dmg",
"sql", "sqlite", "db":
return true
}
return false
}
// localPathExists returns true when the given token resolves to an
// existing filesystem entry relative to cwd.
func localPathExists(cwd, token string) bool {
info, err := os.Lstat(filepath.Join(cwd, token))
return err == nil && info != nil
}
func (t *ExecTool) SetTimeout(timeout time.Duration) {
t.timeout = timeout
}
+45
View File
@@ -1807,3 +1807,48 @@ func TestEncodeKeyTokenWithPtyKeyMode(t *testing.T) {
})
}
}
// TestShellTool_SchemelessURLDetection verifies that the scheme-less URL
// detection logic in guardCommand correctly identifies web URL path components
// (e.g., "//github.com" captured by the regex after "https:") and exempts them
// from workspace sandbox checks. It also confirms that paths NOT preceded by a
// recognized web scheme are still blocked.
func TestShellTool_SchemelessURLDetection(t *testing.T) {
tmpDir := t.TempDir()
tool, err := NewExecTool(tmpDir, true)
if err != nil {
t.Fatalf("unable to configure exec tool: %s", err)
}
// Each of the 7 recognized web schemes should have its path component
// exempted from workspace boundary checks.
allowedCommands := []string{
"echo https://github.com",
"echo http://example.com",
"echo ftp://ftp.example.com",
"echo ftps://secure.example.com",
"echo sftp://sftp.example.com",
"echo ssh://git@github.com",
"echo git://github.com",
}
for _, cmd := range allowedCommands {
result := tool.Execute(context.Background(), map[string]any{"action": "run", "command": cmd})
if result.IsError && strings.Contains(result.ForLLM, "path outside working dir") {
t.Errorf("command with recognized web scheme should not be blocked: %s\n error: %s", cmd, result.ForLLM)
}
}
// Multiple URLs with different schemes in a single command should all be exempt.
multiURLCommands := []string{
"echo https://github.com && curl http://example.com",
"wget ftp://a.com; curl https://b.com",
}
for _, cmd := range multiURLCommands {
result := tool.Execute(context.Background(), map[string]any{"action": "run", "command": cmd})
if result.IsError && strings.Contains(result.ForLLM, "path outside working dir") {
t.Errorf("command with multiple web URLs should not be blocked: %s\n error: %s", cmd, result.ForLLM)
}
}
}