mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
572 lines
15 KiB
Go
572 lines
15 KiB
Go
package tools
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"io/fs"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/fileutil"
|
|
)
|
|
|
|
// validatePath ensures the given path is within the workspace if restrict is true.
|
|
func validatePath(path, workspace string, restrict bool) (string, error) {
|
|
if workspace == "" {
|
|
return path, fmt.Errorf("workspace is not defined")
|
|
}
|
|
|
|
absWorkspace, err := filepath.Abs(workspace)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to resolve workspace path: %w", err)
|
|
}
|
|
|
|
var absPath string
|
|
if filepath.IsAbs(path) {
|
|
absPath = filepath.Clean(path)
|
|
} else {
|
|
absPath, err = filepath.Abs(filepath.Join(absWorkspace, path))
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to resolve file path: %w", err)
|
|
}
|
|
}
|
|
|
|
if restrict {
|
|
if !isWithinWorkspace(absPath, absWorkspace) {
|
|
return "", fmt.Errorf("access denied: path is outside the workspace")
|
|
}
|
|
|
|
var resolved string
|
|
workspaceReal := absWorkspace
|
|
if resolved, err = filepath.EvalSymlinks(absWorkspace); err == nil {
|
|
workspaceReal = resolved
|
|
}
|
|
|
|
if resolved, err = filepath.EvalSymlinks(absPath); err == nil {
|
|
if !isWithinWorkspace(resolved, workspaceReal) {
|
|
return "", fmt.Errorf("access denied: symlink resolves outside workspace")
|
|
}
|
|
} else if os.IsNotExist(err) {
|
|
var parentResolved string
|
|
if parentResolved, err = resolveExistingAncestor(filepath.Dir(absPath)); err == nil {
|
|
if !isWithinWorkspace(parentResolved, workspaceReal) {
|
|
return "", fmt.Errorf("access denied: symlink resolves outside workspace")
|
|
}
|
|
} else if !os.IsNotExist(err) {
|
|
return "", fmt.Errorf("failed to resolve path: %w", err)
|
|
}
|
|
} else {
|
|
return "", fmt.Errorf("failed to resolve path: %w", err)
|
|
}
|
|
}
|
|
|
|
return absPath, nil
|
|
}
|
|
|
|
func resolveExistingAncestor(path string) (string, error) {
|
|
for current := filepath.Clean(path); ; current = filepath.Dir(current) {
|
|
if resolved, err := filepath.EvalSymlinks(current); err == nil {
|
|
return resolved, nil
|
|
} else if !os.IsNotExist(err) {
|
|
return "", err
|
|
}
|
|
if filepath.Dir(current) == current {
|
|
return "", os.ErrNotExist
|
|
}
|
|
}
|
|
}
|
|
|
|
func isWithinWorkspace(candidate, workspace string) bool {
|
|
rel, err := filepath.Rel(filepath.Clean(workspace), filepath.Clean(candidate))
|
|
return err == nil && filepath.IsLocal(rel)
|
|
}
|
|
|
|
type ReadFileTool struct {
|
|
fs fileSystem
|
|
}
|
|
|
|
func NewReadFileTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *ReadFileTool {
|
|
var patterns []*regexp.Regexp
|
|
if len(allowPaths) > 0 {
|
|
patterns = allowPaths[0]
|
|
}
|
|
return &ReadFileTool{fs: buildFs(workspace, restrict, patterns)}
|
|
}
|
|
|
|
func (t *ReadFileTool) Name() string {
|
|
return "read_file"
|
|
}
|
|
|
|
func (t *ReadFileTool) Description() string {
|
|
return "Read the contents of a file"
|
|
}
|
|
|
|
func (t *ReadFileTool) Parameters() map[string]any {
|
|
return map[string]any{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"path": map[string]any{
|
|
"type": "string",
|
|
"description": "Path to the file to read",
|
|
},
|
|
},
|
|
"required": []string{"path"},
|
|
}
|
|
}
|
|
|
|
func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
|
|
path, ok := args["path"].(string)
|
|
if !ok {
|
|
return ErrorResult("path is required")
|
|
}
|
|
|
|
// open file instead of loading it all into memory
|
|
file, err := t.fs.Open(path)
|
|
if err != nil {
|
|
return ErrorResult(err.Error())
|
|
}
|
|
defer file.Close()
|
|
|
|
// read only an initial chunk (512 bytes is the standard for MIME sniffing)
|
|
header := make([]byte, 512)
|
|
n, err := file.Read(header)
|
|
if err != nil && err != io.EOF {
|
|
return ErrorResult(fmt.Sprintf("failed to read file header: %v", err))
|
|
}
|
|
header = header[:n]
|
|
|
|
// Lock the binaries now before using more RAM
|
|
if isBinaryFile(header) {
|
|
return ErrorResult(
|
|
fmt.Sprintf(
|
|
"cannot read file %q: appears to be a binary file (e.g., PDF, image, executable)",
|
|
filepath.Base(path),
|
|
),
|
|
)
|
|
}
|
|
|
|
// If it is text, let's read the rest of the file
|
|
// (io.ReadAll will continue reading starting from byte 513)
|
|
rest, err := io.ReadAll(file)
|
|
if err != nil {
|
|
return ErrorResult(fmt.Sprintf("failed to read file content: %v", err))
|
|
}
|
|
|
|
// Recompose the complete content by merging the header and the rest
|
|
fullContent := make([]byte, 0, len(header)+len(rest))
|
|
fullContent = append(fullContent, header...)
|
|
fullContent = append(fullContent, rest...)
|
|
|
|
return NewToolResult(string(fullContent))
|
|
}
|
|
|
|
type WriteFileTool struct {
|
|
fs fileSystem
|
|
}
|
|
|
|
func NewWriteFileTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *WriteFileTool {
|
|
var patterns []*regexp.Regexp
|
|
if len(allowPaths) > 0 {
|
|
patterns = allowPaths[0]
|
|
}
|
|
return &WriteFileTool{fs: buildFs(workspace, restrict, patterns)}
|
|
}
|
|
|
|
func (t *WriteFileTool) Name() string {
|
|
return "write_file"
|
|
}
|
|
|
|
func (t *WriteFileTool) Description() string {
|
|
return "Write content to a file"
|
|
}
|
|
|
|
func (t *WriteFileTool) Parameters() map[string]any {
|
|
return map[string]any{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"path": map[string]any{
|
|
"type": "string",
|
|
"description": "Path to the file to write",
|
|
},
|
|
"content": map[string]any{
|
|
"type": "string",
|
|
"description": "Content to write to the file",
|
|
},
|
|
},
|
|
"required": []string{"path", "content"},
|
|
}
|
|
}
|
|
|
|
func (t *WriteFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
|
|
path, ok := args["path"].(string)
|
|
if !ok {
|
|
return ErrorResult("path is required")
|
|
}
|
|
|
|
content, ok := args["content"].(string)
|
|
if !ok {
|
|
return ErrorResult("content is required")
|
|
}
|
|
|
|
if err := t.fs.WriteFile(path, []byte(content)); err != nil {
|
|
return ErrorResult(err.Error())
|
|
}
|
|
|
|
return SilentResult(fmt.Sprintf("File written: %s", path))
|
|
}
|
|
|
|
type ListDirTool struct {
|
|
fs fileSystem
|
|
}
|
|
|
|
func NewListDirTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *ListDirTool {
|
|
var patterns []*regexp.Regexp
|
|
if len(allowPaths) > 0 {
|
|
patterns = allowPaths[0]
|
|
}
|
|
return &ListDirTool{fs: buildFs(workspace, restrict, patterns)}
|
|
}
|
|
|
|
func (t *ListDirTool) Name() string {
|
|
return "list_dir"
|
|
}
|
|
|
|
func (t *ListDirTool) Description() string {
|
|
return "List files and directories in a path"
|
|
}
|
|
|
|
func (t *ListDirTool) Parameters() map[string]any {
|
|
return map[string]any{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"path": map[string]any{
|
|
"type": "string",
|
|
"description": "Path to list",
|
|
},
|
|
},
|
|
"required": []string{"path"},
|
|
}
|
|
}
|
|
|
|
func (t *ListDirTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
|
|
path, ok := args["path"].(string)
|
|
if !ok {
|
|
path = "."
|
|
}
|
|
|
|
entries, err := t.fs.ReadDir(path)
|
|
if err != nil {
|
|
return ErrorResult(fmt.Sprintf("failed to read directory: %v", err))
|
|
}
|
|
return formatDirEntries(entries)
|
|
}
|
|
|
|
func formatDirEntries(entries []os.DirEntry) *ToolResult {
|
|
var result strings.Builder
|
|
for _, entry := range entries {
|
|
if entry.IsDir() {
|
|
result.WriteString("DIR: " + entry.Name() + "\n")
|
|
} else {
|
|
result.WriteString("FILE: " + entry.Name() + "\n")
|
|
}
|
|
}
|
|
return NewToolResult(result.String())
|
|
}
|
|
|
|
// fileSystem abstracts reading, writing, and listing files, allowing both
|
|
// unrestricted (host filesystem) and sandbox (os.Root) implementations to share the same polymorphic interface.
|
|
type fileSystem interface {
|
|
ReadFile(path string) ([]byte, error)
|
|
WriteFile(path string, data []byte) error
|
|
ReadDir(path string) ([]os.DirEntry, error)
|
|
Open(path string) (fs.File, error)
|
|
}
|
|
|
|
// hostFs is an unrestricted fileReadWriter that operates directly on the host filesystem.
|
|
type hostFs struct{}
|
|
|
|
func (h *hostFs) ReadFile(path string) ([]byte, error) {
|
|
content, err := os.ReadFile(path)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil, fmt.Errorf("failed to read file: file not found: %w", err)
|
|
}
|
|
if os.IsPermission(err) {
|
|
return nil, fmt.Errorf("failed to read file: access denied: %w", err)
|
|
}
|
|
return nil, fmt.Errorf("failed to read file: %w", err)
|
|
}
|
|
return content, nil
|
|
}
|
|
|
|
func (h *hostFs) ReadDir(path string) ([]os.DirEntry, error) {
|
|
return os.ReadDir(path)
|
|
}
|
|
|
|
func (h *hostFs) WriteFile(path string, data []byte) error {
|
|
// Use unified atomic write utility with explicit sync for flash storage reliability.
|
|
// Using 0o600 (owner read/write only) for secure default permissions.
|
|
return fileutil.WriteFileAtomic(path, data, 0o600)
|
|
}
|
|
|
|
func (h *hostFs) Open(path string) (fs.File, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil, fmt.Errorf("failed to open file: file not found: %w", err)
|
|
}
|
|
if os.IsPermission(err) {
|
|
return nil, fmt.Errorf("failed to open file: access denied: %w", err)
|
|
}
|
|
return nil, fmt.Errorf("failed to open file: %w", err)
|
|
}
|
|
return f, nil
|
|
}
|
|
|
|
// sandboxFs is a sandboxed fileSystem that operates within a strictly defined workspace using os.Root.
|
|
type sandboxFs struct {
|
|
workspace string
|
|
}
|
|
|
|
func (r *sandboxFs) execute(path string, fn func(root *os.Root, relPath string) error) error {
|
|
if r.workspace == "" {
|
|
return fmt.Errorf("workspace is not defined")
|
|
}
|
|
|
|
root, err := os.OpenRoot(r.workspace)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to open workspace: %w", err)
|
|
}
|
|
defer root.Close()
|
|
|
|
relPath, err := getSafeRelPath(r.workspace, path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return fn(root, relPath)
|
|
}
|
|
|
|
func (r *sandboxFs) ReadFile(path string) ([]byte, error) {
|
|
var content []byte
|
|
err := r.execute(path, func(root *os.Root, relPath string) error {
|
|
fileContent, err := root.ReadFile(relPath)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return fmt.Errorf("failed to read file: file not found: %w", err)
|
|
}
|
|
// os.Root returns "escapes from parent" for paths outside the root
|
|
if os.IsPermission(err) || strings.Contains(err.Error(), "escapes from parent") ||
|
|
strings.Contains(err.Error(), "permission denied") {
|
|
return fmt.Errorf("failed to read file: access denied: %w", err)
|
|
}
|
|
return fmt.Errorf("failed to read file: %w", err)
|
|
}
|
|
content = fileContent
|
|
return nil
|
|
})
|
|
return content, err
|
|
}
|
|
|
|
func (r *sandboxFs) WriteFile(path string, data []byte) error {
|
|
return r.execute(path, func(root *os.Root, relPath string) error {
|
|
dir := filepath.Dir(relPath)
|
|
if dir != "." && dir != "/" {
|
|
if err := root.MkdirAll(dir, 0o755); err != nil {
|
|
return fmt.Errorf("failed to create parent directories: %w", err)
|
|
}
|
|
}
|
|
|
|
// Use atomic write pattern with explicit sync for flash storage reliability.
|
|
// Using 0o600 (owner read/write only) for secure default permissions.
|
|
tmpRelPath := fmt.Sprintf(".tmp-%d-%d", os.Getpid(), time.Now().UnixNano())
|
|
|
|
tmpFile, err := root.OpenFile(tmpRelPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0o600)
|
|
if err != nil {
|
|
root.Remove(tmpRelPath)
|
|
return fmt.Errorf("failed to open temp file: %w", err)
|
|
}
|
|
|
|
if _, err := tmpFile.Write(data); err != nil {
|
|
tmpFile.Close()
|
|
root.Remove(tmpRelPath)
|
|
return fmt.Errorf("failed to write temp file: %w", err)
|
|
}
|
|
|
|
// CRITICAL: Force sync to storage medium before rename.
|
|
// This ensures data is physically written to disk, not just cached.
|
|
if err := tmpFile.Sync(); err != nil {
|
|
tmpFile.Close()
|
|
root.Remove(tmpRelPath)
|
|
return fmt.Errorf("failed to sync temp file: %w", err)
|
|
}
|
|
|
|
if err := tmpFile.Close(); err != nil {
|
|
root.Remove(tmpRelPath)
|
|
return fmt.Errorf("failed to close temp file: %w", err)
|
|
}
|
|
|
|
if err := root.Rename(tmpRelPath, relPath); err != nil {
|
|
root.Remove(tmpRelPath)
|
|
return fmt.Errorf("failed to rename temp file over target: %w", err)
|
|
}
|
|
|
|
// Sync directory to ensure rename is durable
|
|
if dirFile, err := root.Open("."); err == nil {
|
|
_ = dirFile.Sync()
|
|
dirFile.Close()
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func (r *sandboxFs) ReadDir(path string) ([]os.DirEntry, error) {
|
|
var entries []os.DirEntry
|
|
err := r.execute(path, func(root *os.Root, relPath string) error {
|
|
dirEntries, err := fs.ReadDir(root.FS(), relPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
entries = dirEntries
|
|
return nil
|
|
})
|
|
return entries, err
|
|
}
|
|
|
|
func (r *sandboxFs) Open(path string) (fs.File, error) {
|
|
var f fs.File
|
|
err := r.execute(path, func(root *os.Root, relPath string) error {
|
|
file, err := root.Open(relPath)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return fmt.Errorf("failed to open file: file not found: %w", err)
|
|
}
|
|
if os.IsPermission(err) || strings.Contains(err.Error(), "escapes from parent") ||
|
|
strings.Contains(err.Error(), "permission denied") {
|
|
return fmt.Errorf("failed to open file: access denied: %w", err)
|
|
}
|
|
return fmt.Errorf("failed to open file: %w", err)
|
|
}
|
|
f = file
|
|
return nil
|
|
})
|
|
return f, err
|
|
}
|
|
|
|
// whitelistFs wraps a sandboxFs and allows access to specific paths outside
|
|
// the workspace when they match any of the provided patterns.
|
|
type whitelistFs struct {
|
|
sandbox *sandboxFs
|
|
host hostFs
|
|
patterns []*regexp.Regexp
|
|
}
|
|
|
|
func (w *whitelistFs) matches(path string) bool {
|
|
for _, p := range w.patterns {
|
|
if p.MatchString(path) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (w *whitelistFs) ReadFile(path string) ([]byte, error) {
|
|
if w.matches(path) {
|
|
return w.host.ReadFile(path)
|
|
}
|
|
return w.sandbox.ReadFile(path)
|
|
}
|
|
|
|
func (w *whitelistFs) WriteFile(path string, data []byte) error {
|
|
if w.matches(path) {
|
|
return w.host.WriteFile(path, data)
|
|
}
|
|
return w.sandbox.WriteFile(path, data)
|
|
}
|
|
|
|
func (w *whitelistFs) ReadDir(path string) ([]os.DirEntry, error) {
|
|
if w.matches(path) {
|
|
return w.host.ReadDir(path)
|
|
}
|
|
return w.sandbox.ReadDir(path)
|
|
}
|
|
|
|
func (w *whitelistFs) Open(path string) (fs.File, error) {
|
|
if w.matches(path) {
|
|
return w.host.Open(path)
|
|
}
|
|
return w.sandbox.Open(path)
|
|
}
|
|
|
|
// buildFs returns the appropriate fileSystem implementation based on restriction
|
|
// settings and optional path whitelist patterns.
|
|
func buildFs(workspace string, restrict bool, patterns []*regexp.Regexp) fileSystem {
|
|
if !restrict {
|
|
return &hostFs{}
|
|
}
|
|
sandbox := &sandboxFs{workspace: workspace}
|
|
if len(patterns) > 0 {
|
|
return &whitelistFs{sandbox: sandbox, patterns: patterns}
|
|
}
|
|
return sandbox
|
|
}
|
|
|
|
// Helper to get a safe relative path for os.Root usage
|
|
func getSafeRelPath(workspace, path string) (string, error) {
|
|
if workspace == "" {
|
|
return "", fmt.Errorf("workspace is not defined")
|
|
}
|
|
|
|
rel := filepath.Clean(path)
|
|
if filepath.IsAbs(rel) {
|
|
var err error
|
|
rel, err = filepath.Rel(workspace, rel)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to calculate relative path: %w", err)
|
|
}
|
|
}
|
|
|
|
if !filepath.IsLocal(rel) {
|
|
return "", fmt.Errorf("path escapes workspace: %s", path)
|
|
}
|
|
|
|
return rel, nil
|
|
}
|
|
|
|
// isBinaryFile uses common heuristics to determine if the content is a binary file.
|
|
func isBinaryFile(content []byte) bool {
|
|
if len(content) == 0 {
|
|
return false
|
|
}
|
|
|
|
// Sample the first 512 bytes (or less if the file is smaller)
|
|
limit := len(content)
|
|
if limit > 512 {
|
|
limit = 512
|
|
}
|
|
sample := content[:limit]
|
|
|
|
// Check for NUL bytes in the sample (standard binary detection)
|
|
if bytes.IndexByte(sample, 0) != -1 {
|
|
return true
|
|
}
|
|
|
|
// Use standard library content type detection to catch specific formats like PDF
|
|
contentType := http.DetectContentType(sample)
|
|
if contentType == "application/pdf" ||
|
|
strings.HasPrefix(contentType, "image/") ||
|
|
strings.HasPrefix(contentType, "video/") ||
|
|
strings.HasPrefix(contentType, "audio/") {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|