From cef0f28881169fa52d9eadd8414e0cc3f2f18607 Mon Sep 17 00:00:00 2001 From: wenjie Date: Tue, 17 Mar 2026 14:10:11 +0800 Subject: [PATCH] fix(tools): normalize whitelist path checks for symlinked allowed roots (#1660) - keep regex whitelist matching for existing configs - add normalized directory-prefix checks for literal allow-path patterns - support allowed roots that resolve through symlinks - add regression coverage for symlink-backed whitelist paths --- pkg/tools/filesystem.go | 105 ++++++++++++++++++++++++++++++++++- pkg/tools/filesystem_test.go | 35 ++++++++++++ 2 files changed, 138 insertions(+), 2 deletions(-) diff --git a/pkg/tools/filesystem.go b/pkg/tools/filesystem.go index 92946ef98..ae356f248 100644 --- a/pkg/tools/filesystem.go +++ b/pkg/tools/filesystem.go @@ -98,14 +98,115 @@ func isAllowedPath(path string, patterns []*regexp.Regexp) bool { } func matchesAllowedPath(path string, patterns []*regexp.Regexp) bool { + cleaned := filepath.Clean(path) for _, pattern := range patterns { - if pattern.MatchString(path) { + if pattern.MatchString(cleaned) { + return true + } + if root, ok := extractAllowedPathRoot(pattern); ok && isWithinAllowedRoot(cleaned, root) { return true } } return false } +func extractAllowedPathRoot(pattern *regexp.Regexp) (string, bool) { + raw := pattern.String() + if !strings.HasPrefix(raw, "^") { + return "", false + } + + literal := strings.TrimPrefix(raw, "^") + + // Recognize the common "directory prefix" form: ^(?:/|$) + literal = strings.TrimSuffix(literal, "(?:/|$)") + literal = strings.TrimSuffix(literal, `(?:\\|$)`) + + // Reject patterns that still contain regex operators after removing the + // optional anchored-directory suffix. That keeps arbitrary regex behavior + // unchanged and only enables normalized prefix matching for literal paths. + if containsUnescapedRegexMeta(literal) { + return "", false + } + + unescaped, ok := unescapeRegexLiteral(literal) + if !ok || unescaped == "" { + return "", false + } + + return filepath.Clean(unescaped), filepath.IsAbs(unescaped) +} + +func appendUniquePath(paths []string, path string) []string { + for _, existing := range paths { + if existing == path { + return paths + } + } + return append(paths, path) +} + +func containsUnescapedRegexMeta(s string) bool { + escaped := false + for _, r := range s { + if escaped { + escaped = false + continue + } + if r == '\\' { + escaped = true + continue + } + switch r { + case '.', '+', '*', '?', '(', ')', '[', ']', '{', '}', '|': + return true + } + } + return escaped +} + +func unescapeRegexLiteral(s string) (string, bool) { + var b strings.Builder + b.Grow(len(s)) + + escaped := false + for _, r := range s { + if escaped { + b.WriteRune(r) + escaped = false + continue + } + if r == '\\' { + escaped = true + continue + } + b.WriteRune(r) + } + + if escaped { + return "", false + } + + return b.String(), true +} + +func isWithinAllowedRoot(path, root string) bool { + candidate := filepath.Clean(path) + allowedVariants := []string{filepath.Clean(root)} + + if resolvedRoot, err := resolvePathAgainstExistingAncestor(root); err == nil { + allowedVariants = appendUniquePath(allowedVariants, filepath.Clean(resolvedRoot)) + } + + for _, allowedRoot := range allowedVariants { + if isWithinWorkspace(candidate, allowedRoot) { + return true + } + } + + return false +} + func resolveExistingAncestor(path string) (string, error) { for current := filepath.Clean(path); ; current = filepath.Dir(current) { if resolved, err := filepath.EvalSymlinks(current); err == nil { @@ -144,7 +245,7 @@ func resolvePathAgainstExistingAncestor(path string) (string, error) { func isWithinWorkspace(candidate, workspace string) bool { rel, err := filepath.Rel(filepath.Clean(workspace), filepath.Clean(candidate)) - return err == nil && filepath.IsLocal(rel) + return err == nil && (rel == "." || filepath.IsLocal(rel)) } type ReadFileTool struct { diff --git a/pkg/tools/filesystem_test.go b/pkg/tools/filesystem_test.go index 78d69273f..5ebf38df2 100644 --- a/pkg/tools/filesystem_test.go +++ b/pkg/tools/filesystem_test.go @@ -570,6 +570,41 @@ func TestWhitelistFs_WriteAllowsNewFileUnderAllowedDir(t *testing.T) { } } +func TestWhitelistFs_AllowsResolvedAllowedRootAlias(t *testing.T) { + workspace := t.TempDir() + realDir := t.TempDir() + linkParent := t.TempDir() + allowedAlias := filepath.Join(linkParent, "allowed-link") + + if err := os.Symlink(realDir, allowedAlias); err != nil { + t.Skipf("symlink not supported in this environment: %v", err) + } + + targetFile := filepath.Join(allowedAlias, "nested", "alias.txt") + if err := os.MkdirAll(filepath.Dir(targetFile), 0o755); err != nil { + t.Fatalf("MkdirAll(targetFile dir) error = %v", err) + } + if err := os.WriteFile(targetFile, []byte("through alias"), 0o644); err != nil { + t.Fatalf("WriteFile(targetFile) error = %v", err) + } + + patterns := []*regexp.Regexp{ + regexp.MustCompile( + "^" + regexp.QuoteMeta(filepath.Clean(allowedAlias)) + + "(?:" + regexp.QuoteMeta(string(os.PathSeparator)) + "|$)", + ), + } + tool := NewReadFileTool(workspace, true, MaxReadFileSize, patterns) + + result := tool.Execute(context.Background(), map[string]any{"path": targetFile}) + if result.IsError { + t.Fatalf("expected symlink-backed allowed root to be readable, got: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "through alias") { + t.Fatalf("expected file content, got: %s", result.ForLLM) + } +} + // TestReadFileTool_ChunkedReading verifies the pagination logic of the tool // by reading a file in multiple chunks using 'offset' and 'length'. func TestReadFileTool_ChunkedReading(t *testing.T) {