package tools import ( "bytes" "context" "fmt" "io" "io/fs" "net/http" "os" "path/filepath" "regexp" "strings" "time" "github.com/sipeed/picoclaw/pkg/fileutil" ) // validatePath ensures the given path is within the workspace if restrict is true. func validatePath(path, workspace string, restrict bool) (string, error) { if workspace == "" { return path, fmt.Errorf("workspace is not defined") } absWorkspace, err := filepath.Abs(workspace) if err != nil { return "", fmt.Errorf("failed to resolve workspace path: %w", err) } var absPath string if filepath.IsAbs(path) { absPath = filepath.Clean(path) } else { absPath, err = filepath.Abs(filepath.Join(absWorkspace, path)) if err != nil { return "", fmt.Errorf("failed to resolve file path: %w", err) } } if restrict { if !isWithinWorkspace(absPath, absWorkspace) { return "", fmt.Errorf("access denied: path is outside the workspace") } var resolved string workspaceReal := absWorkspace if resolved, err = filepath.EvalSymlinks(absWorkspace); err == nil { workspaceReal = resolved } if resolved, err = filepath.EvalSymlinks(absPath); err == nil { if !isWithinWorkspace(resolved, workspaceReal) { return "", fmt.Errorf("access denied: symlink resolves outside workspace") } } else if os.IsNotExist(err) { var parentResolved string if parentResolved, err = resolveExistingAncestor(filepath.Dir(absPath)); err == nil { if !isWithinWorkspace(parentResolved, workspaceReal) { return "", fmt.Errorf("access denied: symlink resolves outside workspace") } } else if !os.IsNotExist(err) { return "", fmt.Errorf("failed to resolve path: %w", err) } } else { return "", fmt.Errorf("failed to resolve path: %w", err) } } return absPath, nil } func resolveExistingAncestor(path string) (string, error) { for current := filepath.Clean(path); ; current = filepath.Dir(current) { if resolved, err := filepath.EvalSymlinks(current); err == nil { return resolved, nil } else if !os.IsNotExist(err) { return "", err } if filepath.Dir(current) == current { return "", os.ErrNotExist } } } func isWithinWorkspace(candidate, workspace string) bool { rel, err := filepath.Rel(filepath.Clean(workspace), filepath.Clean(candidate)) return err == nil && filepath.IsLocal(rel) } type ReadFileTool struct { fs fileSystem } func NewReadFileTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *ReadFileTool { var patterns []*regexp.Regexp if len(allowPaths) > 0 { patterns = allowPaths[0] } return &ReadFileTool{fs: buildFs(workspace, restrict, patterns)} } func (t *ReadFileTool) Name() string { return "read_file" } func (t *ReadFileTool) Description() string { return "Read the contents of a file" } func (t *ReadFileTool) Parameters() map[string]any { return map[string]any{ "type": "object", "properties": map[string]any{ "path": map[string]any{ "type": "string", "description": "Path to the file to read", }, }, "required": []string{"path"}, } } func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult { path, ok := args["path"].(string) if !ok { return ErrorResult("path is required") } // open file instead of loading it all into memory file, err := t.fs.Open(path) if err != nil { return ErrorResult(err.Error()) } defer file.Close() // read only an initial chunk (512 bytes is the standard for MIME sniffing) header := make([]byte, 512) n, err := file.Read(header) if err != nil && err != io.EOF { return ErrorResult(fmt.Sprintf("failed to read file header: %v", err)) } header = header[:n] // Lock the binaries now before using more RAM if isBinaryFile(header) { return ErrorResult( fmt.Sprintf( "cannot read file %q: appears to be a binary file (e.g., PDF, image, executable)", filepath.Base(path), ), ) } // If it is text, let's read the rest of the file // (io.ReadAll will continue reading starting from byte 513) rest, err := io.ReadAll(file) if err != nil { return ErrorResult(fmt.Sprintf("failed to read file content: %v", err)) } // Recompose the complete content by merging the header and the rest fullContent := make([]byte, 0, len(header)+len(rest)) fullContent = append(fullContent, header...) fullContent = append(fullContent, rest...) return NewToolResult(string(fullContent)) } type WriteFileTool struct { fs fileSystem } func NewWriteFileTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *WriteFileTool { var patterns []*regexp.Regexp if len(allowPaths) > 0 { patterns = allowPaths[0] } return &WriteFileTool{fs: buildFs(workspace, restrict, patterns)} } func (t *WriteFileTool) Name() string { return "write_file" } func (t *WriteFileTool) Description() string { return "Write content to a file" } func (t *WriteFileTool) Parameters() map[string]any { return map[string]any{ "type": "object", "properties": map[string]any{ "path": map[string]any{ "type": "string", "description": "Path to the file to write", }, "content": map[string]any{ "type": "string", "description": "Content to write to the file", }, }, "required": []string{"path", "content"}, } } func (t *WriteFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult { path, ok := args["path"].(string) if !ok { return ErrorResult("path is required") } content, ok := args["content"].(string) if !ok { return ErrorResult("content is required") } if err := t.fs.WriteFile(path, []byte(content)); err != nil { return ErrorResult(err.Error()) } return SilentResult(fmt.Sprintf("File written: %s", path)) } type ListDirTool struct { fs fileSystem } func NewListDirTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *ListDirTool { var patterns []*regexp.Regexp if len(allowPaths) > 0 { patterns = allowPaths[0] } return &ListDirTool{fs: buildFs(workspace, restrict, patterns)} } func (t *ListDirTool) Name() string { return "list_dir" } func (t *ListDirTool) Description() string { return "List files and directories in a path" } func (t *ListDirTool) Parameters() map[string]any { return map[string]any{ "type": "object", "properties": map[string]any{ "path": map[string]any{ "type": "string", "description": "Path to list", }, }, "required": []string{"path"}, } } func (t *ListDirTool) Execute(ctx context.Context, args map[string]any) *ToolResult { path, ok := args["path"].(string) if !ok { path = "." } entries, err := t.fs.ReadDir(path) if err != nil { return ErrorResult(fmt.Sprintf("failed to read directory: %v", err)) } return formatDirEntries(entries) } func formatDirEntries(entries []os.DirEntry) *ToolResult { var result strings.Builder for _, entry := range entries { if entry.IsDir() { result.WriteString("DIR: " + entry.Name() + "\n") } else { result.WriteString("FILE: " + entry.Name() + "\n") } } return NewToolResult(result.String()) } // fileSystem abstracts reading, writing, and listing files, allowing both // unrestricted (host filesystem) and sandbox (os.Root) implementations to share the same polymorphic interface. type fileSystem interface { ReadFile(path string) ([]byte, error) WriteFile(path string, data []byte) error ReadDir(path string) ([]os.DirEntry, error) Open(path string) (fs.File, error) } // hostFs is an unrestricted fileReadWriter that operates directly on the host filesystem. type hostFs struct{} func (h *hostFs) ReadFile(path string) ([]byte, error) { content, err := os.ReadFile(path) if err != nil { if os.IsNotExist(err) { return nil, fmt.Errorf("failed to read file: file not found: %w", err) } if os.IsPermission(err) { return nil, fmt.Errorf("failed to read file: access denied: %w", err) } return nil, fmt.Errorf("failed to read file: %w", err) } return content, nil } func (h *hostFs) ReadDir(path string) ([]os.DirEntry, error) { return os.ReadDir(path) } func (h *hostFs) WriteFile(path string, data []byte) error { // Use unified atomic write utility with explicit sync for flash storage reliability. // Using 0o600 (owner read/write only) for secure default permissions. return fileutil.WriteFileAtomic(path, data, 0o600) } func (h *hostFs) Open(path string) (fs.File, error) { f, err := os.Open(path) if err != nil { if os.IsNotExist(err) { return nil, fmt.Errorf("failed to open file: file not found: %w", err) } if os.IsPermission(err) { return nil, fmt.Errorf("failed to open file: access denied: %w", err) } return nil, fmt.Errorf("failed to open file: %w", err) } return f, nil } // sandboxFs is a sandboxed fileSystem that operates within a strictly defined workspace using os.Root. type sandboxFs struct { workspace string } func (r *sandboxFs) execute(path string, fn func(root *os.Root, relPath string) error) error { if r.workspace == "" { return fmt.Errorf("workspace is not defined") } root, err := os.OpenRoot(r.workspace) if err != nil { return fmt.Errorf("failed to open workspace: %w", err) } defer root.Close() relPath, err := getSafeRelPath(r.workspace, path) if err != nil { return err } return fn(root, relPath) } func (r *sandboxFs) ReadFile(path string) ([]byte, error) { var content []byte err := r.execute(path, func(root *os.Root, relPath string) error { fileContent, err := root.ReadFile(relPath) if err != nil { if os.IsNotExist(err) { return fmt.Errorf("failed to read file: file not found: %w", err) } // os.Root returns "escapes from parent" for paths outside the root if os.IsPermission(err) || strings.Contains(err.Error(), "escapes from parent") || strings.Contains(err.Error(), "permission denied") { return fmt.Errorf("failed to read file: access denied: %w", err) } return fmt.Errorf("failed to read file: %w", err) } content = fileContent return nil }) return content, err } func (r *sandboxFs) WriteFile(path string, data []byte) error { return r.execute(path, func(root *os.Root, relPath string) error { dir := filepath.Dir(relPath) if dir != "." && dir != "/" { if err := root.MkdirAll(dir, 0o755); err != nil { return fmt.Errorf("failed to create parent directories: %w", err) } } // Use atomic write pattern with explicit sync for flash storage reliability. // Using 0o600 (owner read/write only) for secure default permissions. tmpRelPath := fmt.Sprintf(".tmp-%d-%d", os.Getpid(), time.Now().UnixNano()) tmpFile, err := root.OpenFile(tmpRelPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0o600) if err != nil { root.Remove(tmpRelPath) return fmt.Errorf("failed to open temp file: %w", err) } if _, err := tmpFile.Write(data); err != nil { tmpFile.Close() root.Remove(tmpRelPath) return fmt.Errorf("failed to write temp file: %w", err) } // CRITICAL: Force sync to storage medium before rename. // This ensures data is physically written to disk, not just cached. if err := tmpFile.Sync(); err != nil { tmpFile.Close() root.Remove(tmpRelPath) return fmt.Errorf("failed to sync temp file: %w", err) } if err := tmpFile.Close(); err != nil { root.Remove(tmpRelPath) return fmt.Errorf("failed to close temp file: %w", err) } if err := root.Rename(tmpRelPath, relPath); err != nil { root.Remove(tmpRelPath) return fmt.Errorf("failed to rename temp file over target: %w", err) } // Sync directory to ensure rename is durable if dirFile, err := root.Open("."); err == nil { _ = dirFile.Sync() dirFile.Close() } return nil }) } func (r *sandboxFs) ReadDir(path string) ([]os.DirEntry, error) { var entries []os.DirEntry err := r.execute(path, func(root *os.Root, relPath string) error { dirEntries, err := fs.ReadDir(root.FS(), relPath) if err != nil { return err } entries = dirEntries return nil }) return entries, err } func (r *sandboxFs) Open(path string) (fs.File, error) { var f fs.File err := r.execute(path, func(root *os.Root, relPath string) error { file, err := root.Open(relPath) if err != nil { if os.IsNotExist(err) { return fmt.Errorf("failed to open file: file not found: %w", err) } if os.IsPermission(err) || strings.Contains(err.Error(), "escapes from parent") || strings.Contains(err.Error(), "permission denied") { return fmt.Errorf("failed to open file: access denied: %w", err) } return fmt.Errorf("failed to open file: %w", err) } f = file return nil }) return f, err } // whitelistFs wraps a sandboxFs and allows access to specific paths outside // the workspace when they match any of the provided patterns. type whitelistFs struct { sandbox *sandboxFs host hostFs patterns []*regexp.Regexp } func (w *whitelistFs) matches(path string) bool { for _, p := range w.patterns { if p.MatchString(path) { return true } } return false } func (w *whitelistFs) ReadFile(path string) ([]byte, error) { if w.matches(path) { return w.host.ReadFile(path) } return w.sandbox.ReadFile(path) } func (w *whitelistFs) WriteFile(path string, data []byte) error { if w.matches(path) { return w.host.WriteFile(path, data) } return w.sandbox.WriteFile(path, data) } func (w *whitelistFs) ReadDir(path string) ([]os.DirEntry, error) { if w.matches(path) { return w.host.ReadDir(path) } return w.sandbox.ReadDir(path) } func (w *whitelistFs) Open(path string) (fs.File, error) { if w.matches(path) { return w.host.Open(path) } return w.sandbox.Open(path) } // buildFs returns the appropriate fileSystem implementation based on restriction // settings and optional path whitelist patterns. func buildFs(workspace string, restrict bool, patterns []*regexp.Regexp) fileSystem { if !restrict { return &hostFs{} } sandbox := &sandboxFs{workspace: workspace} if len(patterns) > 0 { return &whitelistFs{sandbox: sandbox, patterns: patterns} } return sandbox } // Helper to get a safe relative path for os.Root usage func getSafeRelPath(workspace, path string) (string, error) { if workspace == "" { return "", fmt.Errorf("workspace is not defined") } rel := filepath.Clean(path) if filepath.IsAbs(rel) { var err error rel, err = filepath.Rel(workspace, rel) if err != nil { return "", fmt.Errorf("failed to calculate relative path: %w", err) } } if !filepath.IsLocal(rel) { return "", fmt.Errorf("path escapes workspace: %s", path) } return rel, nil } // isBinaryFile uses common heuristics to determine if the content is a binary file. func isBinaryFile(content []byte) bool { if len(content) == 0 { return false } // Sample the first 512 bytes (or less if the file is smaller) limit := len(content) if limit > 512 { limit = 512 } sample := content[:limit] // Check for NUL bytes in the sample (standard binary detection) if bytes.IndexByte(sample, 0) != -1 { return true } // Use standard library content type detection to catch specific formats like PDF contentType := http.DetectContentType(sample) if contentType == "application/pdf" || strings.HasPrefix(contentType, "image/") || strings.HasPrefix(contentType, "video/") || strings.HasPrefix(contentType, "audio/") { return true } return false }