package tools import ( "bytes" "context" "fmt" "io" "io/fs" "net/http" "os" "path/filepath" "regexp" "strconv" "strings" "time" "github.com/sipeed/picoclaw/pkg/fileutil" ) const MaxReadFileSize = 128 * 1024 // 64KB limit to avoid context overflow // validatePath ensures the given path is within the workspace if restrict is true. func validatePath(path, workspace string, restrict bool) (string, error) { if workspace == "" { return path, fmt.Errorf("workspace is not defined") } absWorkspace, err := filepath.Abs(workspace) if err != nil { return "", fmt.Errorf("failed to resolve workspace path: %w", err) } var absPath string if filepath.IsAbs(path) { absPath = filepath.Clean(path) } else { absPath, err = filepath.Abs(filepath.Join(absWorkspace, path)) if err != nil { return "", fmt.Errorf("failed to resolve file path: %w", err) } } if restrict { if !isWithinWorkspace(absPath, absWorkspace) { return "", fmt.Errorf("access denied: path is outside the workspace") } var resolved string workspaceReal := absWorkspace if resolved, err = filepath.EvalSymlinks(absWorkspace); err == nil { workspaceReal = resolved } if resolved, err = filepath.EvalSymlinks(absPath); err == nil { if !isWithinWorkspace(resolved, workspaceReal) { return "", fmt.Errorf("access denied: symlink resolves outside workspace") } } else if os.IsNotExist(err) { var parentResolved string if parentResolved, err = resolveExistingAncestor(filepath.Dir(absPath)); err == nil { if !isWithinWorkspace(parentResolved, workspaceReal) { return "", fmt.Errorf("access denied: symlink resolves outside workspace") } } else if !os.IsNotExist(err) { return "", fmt.Errorf("failed to resolve path: %w", err) } } else { return "", fmt.Errorf("failed to resolve path: %w", err) } } return absPath, nil } func resolveExistingAncestor(path string) (string, error) { for current := filepath.Clean(path); ; current = filepath.Dir(current) { if resolved, err := filepath.EvalSymlinks(current); err == nil { return resolved, nil } else if !os.IsNotExist(err) { return "", err } if filepath.Dir(current) == current { return "", os.ErrNotExist } } } func isWithinWorkspace(candidate, workspace string) bool { rel, err := filepath.Rel(filepath.Clean(workspace), filepath.Clean(candidate)) return err == nil && filepath.IsLocal(rel) } type ReadFileTool struct { fs fileSystem } func NewReadFileTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *ReadFileTool { var patterns []*regexp.Regexp if len(allowPaths) > 0 { patterns = allowPaths[0] } return &ReadFileTool{fs: buildFs(workspace, restrict, patterns)} } func (t *ReadFileTool) Name() string { return "read_file" } func (t *ReadFileTool) Description() string { return "Read the contents of a file. Supports pagination via `offset` and `length` " + "for files larger than the per-call limit. If the response header indicates the " + "file is TRUNCATED, use the provided offset in your next call to continue reading." } func (t *ReadFileTool) Parameters() map[string]any { return map[string]any{ "type": "object", "properties": map[string]any{ "path": map[string]any{ "type": "string", "description": "Path to the file to read.", }, "offset": map[string]any{ "type": "integer", "description": "Byte offset to start reading from (default: 0).", "default": 0, }, "length": map[string]any{ "type": "integer", "description": fmt.Sprintf( "Maximum number of bytes to read (default / max: %d).", MaxReadFileSize, ), "default": MaxReadFileSize, }, }, "required": []string{"path"}, } } func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult { path, ok := args["path"].(string) if !ok { return ErrorResult("path is required") } // offset (optional, default 0) offset, err := getInt64Arg(args, "offset", 0) if err != nil { return ErrorResult(err.Error()) } if offset < 0 { return ErrorResult("offset must be >= 0") } // length (optional, capped at MaxReadFileSize) length, err := getInt64Arg(args, "length", MaxReadFileSize) if err != nil { return ErrorResult(err.Error()) } if length <= 0 { return ErrorResult("length must be > 0") } if length > MaxReadFileSize { length = MaxReadFileSize } file, err := t.fs.Open(path) if err != nil { return ErrorResult(err.Error()) } defer file.Close() // measure total size totalSize := int64(-1) // -1 means unknown if info, err := file.Stat(); err == nil { totalSize = info.Size() } else { return ErrorResult(fmt.Sprintf("failed to get file info: %v", err)) } // seek to offset if seeker, ok := file.(io.Seeker); ok { if _, err := seeker.Seek(offset, io.SeekStart); err != nil { return ErrorResult(fmt.Sprintf("failed to seek to offset %d: %v", offset, err)) } } else if offset > 0 { // Fallback for non-seekable streams: discard leading bytes. if _, err := io.CopyN(io.Discard, file, offset); err != nil { return ErrorResult(fmt.Sprintf("failed to advance to offset %d: %v", offset, err)) } } // read up to `length` bytes data, err := io.ReadAll(io.LimitReader(file, length)) if err != nil { return ErrorResult(fmt.Sprintf("failed to read file content: %v", err)) } if len(data) == 0 && offset > 0 { return NewToolResult("[END OF FILE — no content at this offset]") } // build metadata header readEnd := offset + int64(len(data)) hasMore := int64(len(data)) == length && (totalSize < 0 || readEnd < totalSize) // Calculates the reading range avoiding negative numbers if the file is empty var readRange string if len(data) == 0 { readRange = "0 bytes" } else { readRange = fmt.Sprintf("bytes %d–%d", offset, readEnd-1) } var header string if totalSize >= 0 { header = fmt.Sprintf( "[file: %s | total: %d bytes | read: %s]", path, totalSize, readRange, ) } else { header = fmt.Sprintf( "[file: %s | read: %s | total size unknown]", path, readRange, ) } if hasMore { header += fmt.Sprintf( "\n[TRUNCATED — file has more content. Call read_file again with offset=%d to continue.]", readEnd, ) } else { header += "\n[END OF FILE — no further content.]" } return NewToolResult(header + "\n\n" + string(data)) } // getInt64Arg extracts an integer argument from the args map, returning the // provided default if the key is absent. func getInt64Arg(args map[string]any, key string, defaultVal int64) (int64, error) { raw, exists := args[key] if !exists { return defaultVal, nil } switch v := raw.(type) { case float64: return int64(v), nil case int: return int64(v), nil case int64: return v, nil case string: parsed, err := strconv.ParseInt(v, 10, 64) if err != nil { return 0, fmt.Errorf("invalid integer format for %s parameter: %w", key, err) } return parsed, nil default: return 0, fmt.Errorf("unsupported type %T for %s parameter", raw, key) } } type WriteFileTool struct { fs fileSystem } func NewWriteFileTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *WriteFileTool { var patterns []*regexp.Regexp if len(allowPaths) > 0 { patterns = allowPaths[0] } return &WriteFileTool{fs: buildFs(workspace, restrict, patterns)} } func (t *WriteFileTool) Name() string { return "write_file" } func (t *WriteFileTool) Description() string { return "Write content to a file" } func (t *WriteFileTool) Parameters() map[string]any { return map[string]any{ "type": "object", "properties": map[string]any{ "path": map[string]any{ "type": "string", "description": "Path to the file to write", }, "content": map[string]any{ "type": "string", "description": "Content to write to the file", }, }, "required": []string{"path", "content"}, } } func (t *WriteFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult { path, ok := args["path"].(string) if !ok { return ErrorResult("path is required") } content, ok := args["content"].(string) if !ok { return ErrorResult("content is required") } if err := t.fs.WriteFile(path, []byte(content)); err != nil { return ErrorResult(err.Error()) } return SilentResult(fmt.Sprintf("File written: %s", path)) } type ListDirTool struct { fs fileSystem } func NewListDirTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *ListDirTool { var patterns []*regexp.Regexp if len(allowPaths) > 0 { patterns = allowPaths[0] } return &ListDirTool{fs: buildFs(workspace, restrict, patterns)} } func (t *ListDirTool) Name() string { return "list_dir" } func (t *ListDirTool) Description() string { return "List files and directories in a path" } func (t *ListDirTool) Parameters() map[string]any { return map[string]any{ "type": "object", "properties": map[string]any{ "path": map[string]any{ "type": "string", "description": "Path to list", }, }, "required": []string{"path"}, } } func (t *ListDirTool) Execute(ctx context.Context, args map[string]any) *ToolResult { path, ok := args["path"].(string) if !ok { path = "." } entries, err := t.fs.ReadDir(path) if err != nil { return ErrorResult(fmt.Sprintf("failed to read directory: %v", err)) } return formatDirEntries(entries) } func formatDirEntries(entries []os.DirEntry) *ToolResult { var result strings.Builder for _, entry := range entries { if entry.IsDir() { result.WriteString("DIR: " + entry.Name() + "\n") } else { result.WriteString("FILE: " + entry.Name() + "\n") } } return NewToolResult(result.String()) } // fileSystem abstracts reading, writing, and listing files, allowing both // unrestricted (host filesystem) and sandbox (os.Root) implementations to share the same polymorphic interface. type fileSystem interface { ReadFile(path string) ([]byte, error) WriteFile(path string, data []byte) error ReadDir(path string) ([]os.DirEntry, error) Open(path string) (fs.File, error) } // hostFs is an unrestricted fileReadWriter that operates directly on the host filesystem. type hostFs struct{} func (h *hostFs) ReadFile(path string) ([]byte, error) { content, err := os.ReadFile(path) if err != nil { if os.IsNotExist(err) { return nil, fmt.Errorf("failed to read file: file not found: %w", err) } if os.IsPermission(err) { return nil, fmt.Errorf("failed to read file: access denied: %w", err) } return nil, fmt.Errorf("failed to read file: %w", err) } return content, nil } func (h *hostFs) ReadDir(path string) ([]os.DirEntry, error) { return os.ReadDir(path) } func (h *hostFs) WriteFile(path string, data []byte) error { // Use unified atomic write utility with explicit sync for flash storage reliability. // Using 0o600 (owner read/write only) for secure default permissions. return fileutil.WriteFileAtomic(path, data, 0o600) } func (h *hostFs) Open(path string) (fs.File, error) { f, err := os.Open(path) if err != nil { if os.IsNotExist(err) { return nil, fmt.Errorf("failed to open file: file not found: %w", err) } if os.IsPermission(err) { return nil, fmt.Errorf("failed to open file: access denied: %w", err) } return nil, fmt.Errorf("failed to open file: %w", err) } return f, nil } // sandboxFs is a sandboxed fileSystem that operates within a strictly defined workspace using os.Root. type sandboxFs struct { workspace string } func (r *sandboxFs) execute(path string, fn func(root *os.Root, relPath string) error) error { if r.workspace == "" { return fmt.Errorf("workspace is not defined") } root, err := os.OpenRoot(r.workspace) if err != nil { return fmt.Errorf("failed to open workspace: %w", err) } defer root.Close() relPath, err := getSafeRelPath(r.workspace, path) if err != nil { return err } return fn(root, relPath) } func (r *sandboxFs) ReadFile(path string) ([]byte, error) { var content []byte err := r.execute(path, func(root *os.Root, relPath string) error { fileContent, err := root.ReadFile(relPath) if err != nil { if os.IsNotExist(err) { return fmt.Errorf("failed to read file: file not found: %w", err) } // os.Root returns "escapes from parent" for paths outside the root if os.IsPermission(err) || strings.Contains(err.Error(), "escapes from parent") || strings.Contains(err.Error(), "permission denied") { return fmt.Errorf("failed to read file: access denied: %w", err) } return fmt.Errorf("failed to read file: %w", err) } content = fileContent return nil }) return content, err } func (r *sandboxFs) WriteFile(path string, data []byte) error { return r.execute(path, func(root *os.Root, relPath string) error { dir := filepath.Dir(relPath) if dir != "." && dir != "/" { if err := root.MkdirAll(dir, 0o755); err != nil { return fmt.Errorf("failed to create parent directories: %w", err) } } // Use atomic write pattern with explicit sync for flash storage reliability. // Using 0o600 (owner read/write only) for secure default permissions. tmpRelPath := fmt.Sprintf(".tmp-%d-%d", os.Getpid(), time.Now().UnixNano()) tmpFile, err := root.OpenFile(tmpRelPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0o600) if err != nil { root.Remove(tmpRelPath) return fmt.Errorf("failed to open temp file: %w", err) } if _, err := tmpFile.Write(data); err != nil { tmpFile.Close() root.Remove(tmpRelPath) return fmt.Errorf("failed to write temp file: %w", err) } // CRITICAL: Force sync to storage medium before rename. // This ensures data is physically written to disk, not just cached. if err := tmpFile.Sync(); err != nil { tmpFile.Close() root.Remove(tmpRelPath) return fmt.Errorf("failed to sync temp file: %w", err) } if err := tmpFile.Close(); err != nil { root.Remove(tmpRelPath) return fmt.Errorf("failed to close temp file: %w", err) } if err := root.Rename(tmpRelPath, relPath); err != nil { root.Remove(tmpRelPath) return fmt.Errorf("failed to rename temp file over target: %w", err) } // Sync directory to ensure rename is durable if dirFile, err := root.Open("."); err == nil { _ = dirFile.Sync() dirFile.Close() } return nil }) } func (r *sandboxFs) ReadDir(path string) ([]os.DirEntry, error) { var entries []os.DirEntry err := r.execute(path, func(root *os.Root, relPath string) error { dirEntries, err := fs.ReadDir(root.FS(), relPath) if err != nil { return err } entries = dirEntries return nil }) return entries, err } func (r *sandboxFs) Open(path string) (fs.File, error) { var f fs.File err := r.execute(path, func(root *os.Root, relPath string) error { file, err := root.Open(relPath) if err != nil { if os.IsNotExist(err) { return fmt.Errorf("failed to open file: file not found: %w", err) } if os.IsPermission(err) || strings.Contains(err.Error(), "escapes from parent") || strings.Contains(err.Error(), "permission denied") { return fmt.Errorf("failed to open file: access denied: %w", err) } return fmt.Errorf("failed to open file: %w", err) } f = file return nil }) return f, err } // whitelistFs wraps a sandboxFs and allows access to specific paths outside // the workspace when they match any of the provided patterns. type whitelistFs struct { sandbox *sandboxFs host hostFs patterns []*regexp.Regexp } func (w *whitelistFs) matches(path string) bool { for _, p := range w.patterns { if p.MatchString(path) { return true } } return false } func (w *whitelistFs) ReadFile(path string) ([]byte, error) { if w.matches(path) { return w.host.ReadFile(path) } return w.sandbox.ReadFile(path) } func (w *whitelistFs) WriteFile(path string, data []byte) error { if w.matches(path) { return w.host.WriteFile(path, data) } return w.sandbox.WriteFile(path, data) } func (w *whitelistFs) ReadDir(path string) ([]os.DirEntry, error) { if w.matches(path) { return w.host.ReadDir(path) } return w.sandbox.ReadDir(path) } func (w *whitelistFs) Open(path string) (fs.File, error) { if w.matches(path) { return w.host.Open(path) } return w.sandbox.Open(path) } // buildFs returns the appropriate fileSystem implementation based on restriction // settings and optional path whitelist patterns. func buildFs(workspace string, restrict bool, patterns []*regexp.Regexp) fileSystem { if !restrict { return &hostFs{} } sandbox := &sandboxFs{workspace: workspace} if len(patterns) > 0 { return &whitelistFs{sandbox: sandbox, patterns: patterns} } return sandbox } // Helper to get a safe relative path for os.Root usage func getSafeRelPath(workspace, path string) (string, error) { if workspace == "" { return "", fmt.Errorf("workspace is not defined") } rel := filepath.Clean(path) if filepath.IsAbs(rel) { var err error rel, err = filepath.Rel(workspace, rel) if err != nil { return "", fmt.Errorf("failed to calculate relative path: %w", err) } } if !filepath.IsLocal(rel) { return "", fmt.Errorf("path escapes workspace: %s", path) } return rel, nil } // isBinaryFile uses common heuristics to determine if the content is a binary file. func isBinaryFile(content []byte) bool { if len(content) == 0 { return false } // Sample the first 512 bytes (or less if the file is smaller) limit := len(content) if limit > 512 { limit = 512 } sample := content[:limit] // Check for NUL bytes in the sample (standard binary detection) if bytes.IndexByte(sample, 0) != -1 { return true } // Use standard library content type detection to catch specific formats like PDF contentType := http.DetectContentType(sample) if contentType == "application/pdf" || strings.HasPrefix(contentType, "image/") || strings.HasPrefix(contentType, "video/") || strings.HasPrefix(contentType, "audio/") { return true } return false }