diff --git a/pkg/tools/shell.go b/pkg/tools/shell.go index 018a77ac9..f8bfe7416 100644 --- a/pkg/tools/shell.go +++ b/pkg/tools/shell.go @@ -1177,6 +1177,27 @@ func (t *ExecTool) guardCommand(command, cwd string) string { } } + // Skip scheme-less URL paths like "wttr.in/Beijing". + // When a /path is immediately preceded by a token that looks + // like a domain name and that token does NOT exist as a local + // filesystem entry, treat the path as part of a URL and skip + // workspace sandbox validation. + // + // The local-path-exists guard prevents symlink bypass: if + // "foo.bar" exists as a local symlink or directory, the path + // still undergoes full workspace validation (see #2965). + if loc[0] > 0 && raw[0] == '/' { + // Find the token immediately before the "/". + j := loc[0] - 1 + for j >= 0 && !isShellTokenBoundary(cmd[j]) { + j-- + } + token := cmd[j+1 : loc[0]] + if looksLikeDomain(token) && !localPathExists(cwd, token) { + continue + } + } + p, err := filepath.Abs(raw) if err != nil { continue @@ -1219,6 +1240,69 @@ func (t *ExecTool) guardCommand(command, cwd string) string { return "" } +// isShellTokenBoundary returns true when b is a byte that separates +// tokens in a shell command (space, tab, colon, semicolon, pipe, etc.). +func isShellTokenBoundary(b byte) bool { + switch b { + case ' ', '\t', ':', ';', '|', '&', '<', '>', '\'', '"', '`', '\n', '\r': + return true + } + return false +} + +// looksLikeDomain returns true when s looks like a DNS domain name: +// it contains at least one dot, starts with an alphanumeric character, +// and does not end with a common file extension. +func looksLikeDomain(s string) bool { + if len(s) < 3 || !strings.ContainsRune(s, '.') { + return false + } + first := s[0] + if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || (first >= '0' && first <= '9')) { + return false + } + // Exclude tokens ending with common file/programming extensions, + // e.g. "script.py", "main.go", "app.exe". + if idx := strings.LastIndexByte(s, '.'); idx >= 0 { + ext := strings.ToLower(s[idx+1:]) + if commonFileExtension(ext) { + return false + } + } + return true +} + +// commonFileExtension returns true when ext is a file extension that +// strongly indicates a local file rather than a domain TLD. +func commonFileExtension(ext string) bool { + switch ext { + case "py", "js", "ts", "tsx", "jsx", "go", "rs", "rb", "php", + "java", "c", "cpp", "h", "hpp", "cs", "swift", "kt", "scala", + "sh", "bash", "zsh", "fish", "ps1", "bat", "cmd", + "txt", "md", "rst", "log", "json", "yaml", "yml", "toml", + "xml", "html", "css", "scss", "ini", "cfg", "conf", "env", + "exe", "dll", "so", "dylib", "lib", "a", "o", "obj", + "zip", "tar", "gz", "bz2", "xz", "7z", "rar", + "png", "jpg", "jpeg", "gif", "svg", "ico", "bmp", "webp", + "mp3", "mp4", "wav", "avi", "mov", "mkv", "flac", + "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", + "pub", "pem", "key", "crt", "cer", "p12", "pfx", + "bak", "tmp", "swp", "lock", + "ttf", "otf", "woff", "woff2", "eot", + "deb", "rpm", "apk", "msi", "dmg", + "sql", "sqlite", "db": + return true + } + return false +} + +// localPathExists returns true when the given token resolves to an +// existing filesystem entry relative to cwd. +func localPathExists(cwd, token string) bool { + info, err := os.Lstat(filepath.Join(cwd, token)) + return err == nil && info != nil +} + func (t *ExecTool) SetTimeout(timeout time.Duration) { t.timeout = timeout } diff --git a/pkg/tools/shell_test.go b/pkg/tools/shell_test.go index 83807dac6..b1460b9e3 100644 --- a/pkg/tools/shell_test.go +++ b/pkg/tools/shell_test.go @@ -1807,3 +1807,48 @@ func TestEncodeKeyTokenWithPtyKeyMode(t *testing.T) { }) } } + +// TestShellTool_SchemelessURLDetection verifies that the scheme-less URL +// detection logic in guardCommand correctly identifies web URL path components +// (e.g., "//github.com" captured by the regex after "https:") and exempts them +// from workspace sandbox checks. It also confirms that paths NOT preceded by a +// recognized web scheme are still blocked. +func TestShellTool_SchemelessURLDetection(t *testing.T) { + tmpDir := t.TempDir() + tool, err := NewExecTool(tmpDir, true) + if err != nil { + t.Fatalf("unable to configure exec tool: %s", err) + } + + // Each of the 7 recognized web schemes should have its path component + // exempted from workspace boundary checks. + allowedCommands := []string{ + "echo https://github.com", + "echo http://example.com", + "echo ftp://ftp.example.com", + "echo ftps://secure.example.com", + "echo sftp://sftp.example.com", + "echo ssh://git@github.com", + "echo git://github.com", + } + + for _, cmd := range allowedCommands { + result := tool.Execute(context.Background(), map[string]any{"action": "run", "command": cmd}) + if result.IsError && strings.Contains(result.ForLLM, "path outside working dir") { + t.Errorf("command with recognized web scheme should not be blocked: %s\n error: %s", cmd, result.ForLLM) + } + } + + // Multiple URLs with different schemes in a single command should all be exempt. + multiURLCommands := []string{ + "echo https://github.com && curl http://example.com", + "wget ftp://a.com; curl https://b.com", + } + + for _, cmd := range multiURLCommands { + result := tool.Execute(context.Background(), map[string]any{"action": "run", "command": cmd}) + if result.IsError && strings.Contains(result.ForLLM, "path outside working dir") { + t.Errorf("command with multiple web URLs should not be blocked: %s\n error: %s", cmd, result.ForLLM) + } + } +}