Files
picoclaw/pkg/tools/filesystem.go
T
2026-03-07 00:33:27 +01:00

669 lines
18 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package tools
import (
"bytes"
"context"
"fmt"
"io"
"io/fs"
"net/http"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
"github.com/sipeed/picoclaw/pkg/fileutil"
)
const MaxReadFileSize = 128 * 1024 // 64KB limit to avoid context overflow
// validatePath ensures the given path is within the workspace if restrict is true.
func validatePath(path, workspace string, restrict bool) (string, error) {
if workspace == "" {
return path, fmt.Errorf("workspace is not defined")
}
absWorkspace, err := filepath.Abs(workspace)
if err != nil {
return "", fmt.Errorf("failed to resolve workspace path: %w", err)
}
var absPath string
if filepath.IsAbs(path) {
absPath = filepath.Clean(path)
} else {
absPath, err = filepath.Abs(filepath.Join(absWorkspace, path))
if err != nil {
return "", fmt.Errorf("failed to resolve file path: %w", err)
}
}
if restrict {
if !isWithinWorkspace(absPath, absWorkspace) {
return "", fmt.Errorf("access denied: path is outside the workspace")
}
var resolved string
workspaceReal := absWorkspace
if resolved, err = filepath.EvalSymlinks(absWorkspace); err == nil {
workspaceReal = resolved
}
if resolved, err = filepath.EvalSymlinks(absPath); err == nil {
if !isWithinWorkspace(resolved, workspaceReal) {
return "", fmt.Errorf("access denied: symlink resolves outside workspace")
}
} else if os.IsNotExist(err) {
var parentResolved string
if parentResolved, err = resolveExistingAncestor(filepath.Dir(absPath)); err == nil {
if !isWithinWorkspace(parentResolved, workspaceReal) {
return "", fmt.Errorf("access denied: symlink resolves outside workspace")
}
} else if !os.IsNotExist(err) {
return "", fmt.Errorf("failed to resolve path: %w", err)
}
} else {
return "", fmt.Errorf("failed to resolve path: %w", err)
}
}
return absPath, nil
}
func resolveExistingAncestor(path string) (string, error) {
for current := filepath.Clean(path); ; current = filepath.Dir(current) {
if resolved, err := filepath.EvalSymlinks(current); err == nil {
return resolved, nil
} else if !os.IsNotExist(err) {
return "", err
}
if filepath.Dir(current) == current {
return "", os.ErrNotExist
}
}
}
func isWithinWorkspace(candidate, workspace string) bool {
rel, err := filepath.Rel(filepath.Clean(workspace), filepath.Clean(candidate))
return err == nil && filepath.IsLocal(rel)
}
type ReadFileTool struct {
fs fileSystem
}
func NewReadFileTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *ReadFileTool {
var patterns []*regexp.Regexp
if len(allowPaths) > 0 {
patterns = allowPaths[0]
}
return &ReadFileTool{fs: buildFs(workspace, restrict, patterns)}
}
func (t *ReadFileTool) Name() string {
return "read_file"
}
func (t *ReadFileTool) Description() string {
return "Read the contents of a file. Supports pagination via `offset` and `length` " +
"for files larger than the per-call limit. If the response header indicates the " +
"file is TRUNCATED, use the provided offset in your next call to continue reading."
}
func (t *ReadFileTool) Parameters() map[string]any {
return map[string]any{
"type": "object",
"properties": map[string]any{
"path": map[string]any{
"type": "string",
"description": "Path to the file to read.",
},
"offset": map[string]any{
"type": "integer",
"description": "Byte offset to start reading from (default: 0).",
"default": 0,
},
"length": map[string]any{
"type": "integer",
"description": fmt.Sprintf(
"Maximum number of bytes to read (default / max: %d).", MaxReadFileSize,
),
"default": MaxReadFileSize,
},
},
"required": []string{"path"},
}
}
func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
path, ok := args["path"].(string)
if !ok {
return ErrorResult("path is required")
}
// offset (optional, default 0)
offset, err := getInt64Arg(args, "offset", 0)
if err != nil {
return ErrorResult(err.Error())
}
if offset < 0 {
return ErrorResult("offset must be >= 0")
}
// length (optional, capped at MaxReadFileSize)
length, err := getInt64Arg(args, "length", MaxReadFileSize)
if err != nil {
return ErrorResult(err.Error())
}
if length <= 0 {
return ErrorResult("length must be > 0")
}
if length > MaxReadFileSize {
length = MaxReadFileSize
}
file, err := t.fs.Open(path)
if err != nil {
return ErrorResult(err.Error())
}
defer file.Close()
// measure total size
totalSize := int64(-1) // -1 means unknown
if info, err := file.Stat(); err == nil {
totalSize = info.Size()
} else {
return ErrorResult(fmt.Sprintf("failed to get file info: %v", err))
}
// seek to offset
if seeker, ok := file.(io.Seeker); ok {
if _, err := seeker.Seek(offset, io.SeekStart); err != nil {
return ErrorResult(fmt.Sprintf("failed to seek to offset %d: %v", offset, err))
}
} else if offset > 0 {
// Fallback for non-seekable streams: discard leading bytes.
if _, err := io.CopyN(io.Discard, file, offset); err != nil {
return ErrorResult(fmt.Sprintf("failed to advance to offset %d: %v", offset, err))
}
}
// read up to `length` bytes
data, err := io.ReadAll(io.LimitReader(file, length))
if err != nil {
return ErrorResult(fmt.Sprintf("failed to read file content: %v", err))
}
if len(data) == 0 && offset > 0 {
return NewToolResult("[END OF FILE — no content at this offset]")
}
// build metadata header
readEnd := offset + int64(len(data))
hasMore := int64(len(data)) == length && (totalSize < 0 || readEnd < totalSize)
// Calculates the reading range avoiding negative numbers if the file is empty
var readRange string
if len(data) == 0 {
readRange = "0 bytes"
} else {
readRange = fmt.Sprintf("bytes %d%d", offset, readEnd-1)
}
var header string
if totalSize >= 0 {
header = fmt.Sprintf(
"[file: %s | total: %d bytes | read: %s]",
path, totalSize, readRange,
)
} else {
header = fmt.Sprintf(
"[file: %s | read: %s | total size unknown]",
path, readRange,
)
}
if hasMore {
header += fmt.Sprintf(
"\n[TRUNCATED — file has more content. Call read_file again with offset=%d to continue.]",
readEnd,
)
} else {
header += "\n[END OF FILE — no further content.]"
}
return NewToolResult(header + "\n\n" + string(data))
}
// getInt64Arg extracts an integer argument from the args map, returning the
// provided default if the key is absent.
func getInt64Arg(args map[string]any, key string, defaultVal int64) (int64, error) {
raw, exists := args[key]
if !exists {
return defaultVal, nil
}
switch v := raw.(type) {
case float64:
return int64(v), nil
case int:
return int64(v), nil
case int64:
return v, nil
case string:
parsed, err := strconv.ParseInt(v, 10, 64)
if err != nil {
return 0, fmt.Errorf("invalid integer format for %s parameter: %w", key, err)
}
return parsed, nil
default:
return 0, fmt.Errorf("unsupported type %T for %s parameter", raw, key)
}
}
type WriteFileTool struct {
fs fileSystem
}
func NewWriteFileTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *WriteFileTool {
var patterns []*regexp.Regexp
if len(allowPaths) > 0 {
patterns = allowPaths[0]
}
return &WriteFileTool{fs: buildFs(workspace, restrict, patterns)}
}
func (t *WriteFileTool) Name() string {
return "write_file"
}
func (t *WriteFileTool) Description() string {
return "Write content to a file"
}
func (t *WriteFileTool) Parameters() map[string]any {
return map[string]any{
"type": "object",
"properties": map[string]any{
"path": map[string]any{
"type": "string",
"description": "Path to the file to write",
},
"content": map[string]any{
"type": "string",
"description": "Content to write to the file",
},
},
"required": []string{"path", "content"},
}
}
func (t *WriteFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
path, ok := args["path"].(string)
if !ok {
return ErrorResult("path is required")
}
content, ok := args["content"].(string)
if !ok {
return ErrorResult("content is required")
}
if err := t.fs.WriteFile(path, []byte(content)); err != nil {
return ErrorResult(err.Error())
}
return SilentResult(fmt.Sprintf("File written: %s", path))
}
type ListDirTool struct {
fs fileSystem
}
func NewListDirTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *ListDirTool {
var patterns []*regexp.Regexp
if len(allowPaths) > 0 {
patterns = allowPaths[0]
}
return &ListDirTool{fs: buildFs(workspace, restrict, patterns)}
}
func (t *ListDirTool) Name() string {
return "list_dir"
}
func (t *ListDirTool) Description() string {
return "List files and directories in a path"
}
func (t *ListDirTool) Parameters() map[string]any {
return map[string]any{
"type": "object",
"properties": map[string]any{
"path": map[string]any{
"type": "string",
"description": "Path to list",
},
},
"required": []string{"path"},
}
}
func (t *ListDirTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
path, ok := args["path"].(string)
if !ok {
path = "."
}
entries, err := t.fs.ReadDir(path)
if err != nil {
return ErrorResult(fmt.Sprintf("failed to read directory: %v", err))
}
return formatDirEntries(entries)
}
func formatDirEntries(entries []os.DirEntry) *ToolResult {
var result strings.Builder
for _, entry := range entries {
if entry.IsDir() {
result.WriteString("DIR: " + entry.Name() + "\n")
} else {
result.WriteString("FILE: " + entry.Name() + "\n")
}
}
return NewToolResult(result.String())
}
// fileSystem abstracts reading, writing, and listing files, allowing both
// unrestricted (host filesystem) and sandbox (os.Root) implementations to share the same polymorphic interface.
type fileSystem interface {
ReadFile(path string) ([]byte, error)
WriteFile(path string, data []byte) error
ReadDir(path string) ([]os.DirEntry, error)
Open(path string) (fs.File, error)
}
// hostFs is an unrestricted fileReadWriter that operates directly on the host filesystem.
type hostFs struct{}
func (h *hostFs) ReadFile(path string) ([]byte, error) {
content, err := os.ReadFile(path)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("failed to read file: file not found: %w", err)
}
if os.IsPermission(err) {
return nil, fmt.Errorf("failed to read file: access denied: %w", err)
}
return nil, fmt.Errorf("failed to read file: %w", err)
}
return content, nil
}
func (h *hostFs) ReadDir(path string) ([]os.DirEntry, error) {
return os.ReadDir(path)
}
func (h *hostFs) WriteFile(path string, data []byte) error {
// Use unified atomic write utility with explicit sync for flash storage reliability.
// Using 0o600 (owner read/write only) for secure default permissions.
return fileutil.WriteFileAtomic(path, data, 0o600)
}
func (h *hostFs) Open(path string) (fs.File, error) {
f, err := os.Open(path)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("failed to open file: file not found: %w", err)
}
if os.IsPermission(err) {
return nil, fmt.Errorf("failed to open file: access denied: %w", err)
}
return nil, fmt.Errorf("failed to open file: %w", err)
}
return f, nil
}
// sandboxFs is a sandboxed fileSystem that operates within a strictly defined workspace using os.Root.
type sandboxFs struct {
workspace string
}
func (r *sandboxFs) execute(path string, fn func(root *os.Root, relPath string) error) error {
if r.workspace == "" {
return fmt.Errorf("workspace is not defined")
}
root, err := os.OpenRoot(r.workspace)
if err != nil {
return fmt.Errorf("failed to open workspace: %w", err)
}
defer root.Close()
relPath, err := getSafeRelPath(r.workspace, path)
if err != nil {
return err
}
return fn(root, relPath)
}
func (r *sandboxFs) ReadFile(path string) ([]byte, error) {
var content []byte
err := r.execute(path, func(root *os.Root, relPath string) error {
fileContent, err := root.ReadFile(relPath)
if err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("failed to read file: file not found: %w", err)
}
// os.Root returns "escapes from parent" for paths outside the root
if os.IsPermission(err) || strings.Contains(err.Error(), "escapes from parent") ||
strings.Contains(err.Error(), "permission denied") {
return fmt.Errorf("failed to read file: access denied: %w", err)
}
return fmt.Errorf("failed to read file: %w", err)
}
content = fileContent
return nil
})
return content, err
}
func (r *sandboxFs) WriteFile(path string, data []byte) error {
return r.execute(path, func(root *os.Root, relPath string) error {
dir := filepath.Dir(relPath)
if dir != "." && dir != "/" {
if err := root.MkdirAll(dir, 0o755); err != nil {
return fmt.Errorf("failed to create parent directories: %w", err)
}
}
// Use atomic write pattern with explicit sync for flash storage reliability.
// Using 0o600 (owner read/write only) for secure default permissions.
tmpRelPath := fmt.Sprintf(".tmp-%d-%d", os.Getpid(), time.Now().UnixNano())
tmpFile, err := root.OpenFile(tmpRelPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0o600)
if err != nil {
root.Remove(tmpRelPath)
return fmt.Errorf("failed to open temp file: %w", err)
}
if _, err := tmpFile.Write(data); err != nil {
tmpFile.Close()
root.Remove(tmpRelPath)
return fmt.Errorf("failed to write temp file: %w", err)
}
// CRITICAL: Force sync to storage medium before rename.
// This ensures data is physically written to disk, not just cached.
if err := tmpFile.Sync(); err != nil {
tmpFile.Close()
root.Remove(tmpRelPath)
return fmt.Errorf("failed to sync temp file: %w", err)
}
if err := tmpFile.Close(); err != nil {
root.Remove(tmpRelPath)
return fmt.Errorf("failed to close temp file: %w", err)
}
if err := root.Rename(tmpRelPath, relPath); err != nil {
root.Remove(tmpRelPath)
return fmt.Errorf("failed to rename temp file over target: %w", err)
}
// Sync directory to ensure rename is durable
if dirFile, err := root.Open("."); err == nil {
_ = dirFile.Sync()
dirFile.Close()
}
return nil
})
}
func (r *sandboxFs) ReadDir(path string) ([]os.DirEntry, error) {
var entries []os.DirEntry
err := r.execute(path, func(root *os.Root, relPath string) error {
dirEntries, err := fs.ReadDir(root.FS(), relPath)
if err != nil {
return err
}
entries = dirEntries
return nil
})
return entries, err
}
func (r *sandboxFs) Open(path string) (fs.File, error) {
var f fs.File
err := r.execute(path, func(root *os.Root, relPath string) error {
file, err := root.Open(relPath)
if err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("failed to open file: file not found: %w", err)
}
if os.IsPermission(err) || strings.Contains(err.Error(), "escapes from parent") ||
strings.Contains(err.Error(), "permission denied") {
return fmt.Errorf("failed to open file: access denied: %w", err)
}
return fmt.Errorf("failed to open file: %w", err)
}
f = file
return nil
})
return f, err
}
// whitelistFs wraps a sandboxFs and allows access to specific paths outside
// the workspace when they match any of the provided patterns.
type whitelistFs struct {
sandbox *sandboxFs
host hostFs
patterns []*regexp.Regexp
}
func (w *whitelistFs) matches(path string) bool {
for _, p := range w.patterns {
if p.MatchString(path) {
return true
}
}
return false
}
func (w *whitelistFs) ReadFile(path string) ([]byte, error) {
if w.matches(path) {
return w.host.ReadFile(path)
}
return w.sandbox.ReadFile(path)
}
func (w *whitelistFs) WriteFile(path string, data []byte) error {
if w.matches(path) {
return w.host.WriteFile(path, data)
}
return w.sandbox.WriteFile(path, data)
}
func (w *whitelistFs) ReadDir(path string) ([]os.DirEntry, error) {
if w.matches(path) {
return w.host.ReadDir(path)
}
return w.sandbox.ReadDir(path)
}
func (w *whitelistFs) Open(path string) (fs.File, error) {
if w.matches(path) {
return w.host.Open(path)
}
return w.sandbox.Open(path)
}
// buildFs returns the appropriate fileSystem implementation based on restriction
// settings and optional path whitelist patterns.
func buildFs(workspace string, restrict bool, patterns []*regexp.Regexp) fileSystem {
if !restrict {
return &hostFs{}
}
sandbox := &sandboxFs{workspace: workspace}
if len(patterns) > 0 {
return &whitelistFs{sandbox: sandbox, patterns: patterns}
}
return sandbox
}
// Helper to get a safe relative path for os.Root usage
func getSafeRelPath(workspace, path string) (string, error) {
if workspace == "" {
return "", fmt.Errorf("workspace is not defined")
}
rel := filepath.Clean(path)
if filepath.IsAbs(rel) {
var err error
rel, err = filepath.Rel(workspace, rel)
if err != nil {
return "", fmt.Errorf("failed to calculate relative path: %w", err)
}
}
if !filepath.IsLocal(rel) {
return "", fmt.Errorf("path escapes workspace: %s", path)
}
return rel, nil
}
// isBinaryFile uses common heuristics to determine if the content is a binary file.
func isBinaryFile(content []byte) bool {
if len(content) == 0 {
return false
}
// Sample the first 512 bytes (or less if the file is smaller)
limit := len(content)
if limit > 512 {
limit = 512
}
sample := content[:limit]
// Check for NUL bytes in the sample (standard binary detection)
if bytes.IndexByte(sample, 0) != -1 {
return true
}
// Use standard library content type detection to catch specific formats like PDF
contentType := http.DetectContentType(sample)
if contentType == "application/pdf" ||
strings.HasPrefix(contentType, "image/") ||
strings.HasPrefix(contentType, "video/") ||
strings.HasPrefix(contentType, "audio/") {
return true
}
return false
}