fix(tools): allow scheme-less URLs in workspace guard

The workspace guard's absolutePathPattern regex matches /Beijing?T in commands like 'curl wttr.in/Beijing'. Since 'wttr.in' is not a recognized web scheme, the path was routed through workspace sandbox validation, which could block legitimate scheme-less URL usage (curl allows bare domains without http://).

Add detection for domain-like tokens preceding /path matches:
- looksLikeDomain: checks for dot-separated tokens that don't end with common file extensions (.py, .go, .exe, etc.)
- localPathExists: verifies the token does not exist as a local filesystem entry

This dual guard prevents the symlink bypass identified in PR #2965 review: if 'foo.bar' exists as a local symlink or directory, the path still undergoes full workspace validation.

Fixes #1042.
This commit is contained in:
程智超0668000959
2026-06-04 18:05:46 +08:00
parent 6e9b5071b0
commit b86ab71836
+84
View File
@@ -1156,6 +1156,27 @@ func (t *ExecTool) guardCommand(command, cwd string) string {
}
}
// Skip scheme-less URL paths like "wttr.in/Beijing".
// When a /path is immediately preceded by a token that looks
// like a domain name and that token does NOT exist as a local
// filesystem entry, treat the path as part of a URL and skip
// workspace sandbox validation.
//
// The local-path-exists guard prevents symlink bypass: if
// "foo.bar" exists as a local symlink or directory, the path
// still undergoes full workspace validation (see #2965).
if loc[0] > 0 && raw[0] == '/' {
// Find the token immediately before the "/".
j := loc[0] - 1
for j >= 0 && !isShellTokenBoundary(cmd[j]) {
j--
}
token := cmd[j+1 : loc[0]]
if looksLikeDomain(token) && !localPathExists(cwd, token) {
continue
}
}
p, err := filepath.Abs(raw)
if err != nil {
continue
@@ -1198,6 +1219,69 @@ func (t *ExecTool) guardCommand(command, cwd string) string {
return ""
}
// isShellTokenBoundary returns true when b is a byte that separates
// tokens in a shell command (space, tab, colon, semicolon, pipe, etc.).
func isShellTokenBoundary(b byte) bool {
switch b {
case ' ', '\t', ':', ';', '|', '&', '<', '>', '\'', '"', '`', '\n', '\r':
return true
}
return false
}
// looksLikeDomain returns true when s looks like a DNS domain name:
// it contains at least one dot, starts with an alphanumeric character,
// and does not end with a common file extension.
func looksLikeDomain(s string) bool {
if len(s) < 3 || !strings.ContainsRune(s, '.') {
return false
}
first := s[0]
if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || (first >= '0' && first <= '9')) {
return false
}
// Exclude tokens ending with common file/programming extensions,
// e.g. "script.py", "main.go", "app.exe".
if idx := strings.LastIndexByte(s, '.'); idx >= 0 {
ext := strings.ToLower(s[idx+1:])
if commonFileExtension(ext) {
return false
}
}
return true
}
// commonFileExtension returns true when ext is a file extension that
// strongly indicates a local file rather than a domain TLD.
func commonFileExtension(ext string) bool {
switch ext {
case "py", "js", "ts", "tsx", "jsx", "go", "rs", "rb", "php",
"java", "c", "cpp", "h", "hpp", "cs", "swift", "kt", "scala",
"sh", "bash", "zsh", "fish", "ps1", "bat", "cmd",
"txt", "md", "rst", "log", "json", "yaml", "yml", "toml",
"xml", "html", "css", "scss", "ini", "cfg", "conf", "env",
"exe", "dll", "so", "dylib", "lib", "a", "o", "obj",
"zip", "tar", "gz", "bz2", "xz", "7z", "rar",
"png", "jpg", "jpeg", "gif", "svg", "ico", "bmp", "webp",
"mp3", "mp4", "wav", "avi", "mov", "mkv", "flac",
"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx",
"pub", "pem", "key", "crt", "cer", "p12", "pfx",
"bak", "tmp", "swp", "lock",
"ttf", "otf", "woff", "woff2", "eot",
"deb", "rpm", "apk", "msi", "dmg",
"sql", "sqlite", "db":
return true
}
return false
}
// localPathExists returns true when the given token resolves to an
// existing filesystem entry relative to cwd.
func localPathExists(cwd, token string) bool {
info, err := os.Lstat(filepath.Join(cwd, token))
return err == nil && info != nil
}
func (t *ExecTool) SetTimeout(timeout time.Duration) {
t.timeout = timeout
}