mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
refined code
This commit is contained in:
@@ -70,7 +70,8 @@ func NewAgentInstance(
|
||||
toolsRegistry := tools.NewToolRegistry()
|
||||
|
||||
if cfg.Tools.IsToolEnabled("read_file") {
|
||||
toolsRegistry.Register(tools.NewReadFileTool(workspace, readRestrict, allowReadPaths))
|
||||
maxReadFileSize := cfg.Tools.ReadFile.MaxReadFileSize
|
||||
toolsRegistry.Register(tools.NewReadFileTool(workspace, readRestrict, maxReadFileSize, allowReadPaths))
|
||||
}
|
||||
if cfg.Tools.IsToolEnabled("write_file") {
|
||||
toolsRegistry.Register(tools.NewWriteFileTool(workspace, restrict, allowWritePaths))
|
||||
|
||||
@@ -660,6 +660,11 @@ type MediaCleanupConfig struct {
|
||||
Interval int ` env:"PICOCLAW_MEDIA_CLEANUP_INTERVAL" json:"interval_minutes"`
|
||||
}
|
||||
|
||||
type ReadFileToolConfig struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
MaxReadFileSize int `json:"max_read_file_size"`
|
||||
}
|
||||
|
||||
type ToolsConfig struct {
|
||||
AllowReadPaths []string `json:"allow_read_paths" env:"PICOCLAW_TOOLS_ALLOW_READ_PATHS"`
|
||||
AllowWritePaths []string `json:"allow_write_paths" env:"PICOCLAW_TOOLS_ALLOW_WRITE_PATHS"`
|
||||
@@ -676,7 +681,7 @@ type ToolsConfig struct {
|
||||
InstallSkill ToolConfig `json:"install_skill" envPrefix:"PICOCLAW_TOOLS_INSTALL_SKILL_"`
|
||||
ListDir ToolConfig `json:"list_dir" envPrefix:"PICOCLAW_TOOLS_LIST_DIR_"`
|
||||
Message ToolConfig `json:"message" envPrefix:"PICOCLAW_TOOLS_MESSAGE_"`
|
||||
ReadFile ToolConfig `json:"read_file" envPrefix:"PICOCLAW_TOOLS_READ_FILE_"`
|
||||
ReadFile ReadFileToolConfig `json:"read_file" envPrefix:"PICOCLAW_TOOLS_READ_FILE_"`
|
||||
SendFile ToolConfig `json:"send_file" envPrefix:"PICOCLAW_TOOLS_SEND_FILE_"`
|
||||
Spawn ToolConfig `json:"spawn" envPrefix:"PICOCLAW_TOOLS_SPAWN_"`
|
||||
SPI ToolConfig `json:"spi" envPrefix:"PICOCLAW_TOOLS_SPI_"`
|
||||
|
||||
@@ -458,8 +458,9 @@ func DefaultConfig() *Config {
|
||||
Message: ToolConfig{
|
||||
Enabled: true,
|
||||
},
|
||||
ReadFile: ToolConfig{
|
||||
Enabled: true,
|
||||
ReadFile: ReadFileToolConfig{
|
||||
Enabled: true,
|
||||
MaxReadFileSize: 64 * 1024, // 64KB
|
||||
},
|
||||
Spawn: ToolConfig{
|
||||
Enabled: true,
|
||||
|
||||
+98
-72
@@ -1,12 +1,12 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"net/http"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
@@ -15,9 +15,10 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/sipeed/picoclaw/pkg/fileutil"
|
||||
"github.com/sipeed/picoclaw/pkg/logger"
|
||||
)
|
||||
|
||||
const MaxReadFileSize = 128 * 1024 // 64KB limit to avoid context overflow
|
||||
const MaxReadFileSize = 64 * 1024 // 64KB limit to avoid context overflow
|
||||
|
||||
// validatePath ensures the given path is within the workspace if restrict is true.
|
||||
func validatePath(path, workspace string, restrict bool) (string, error) {
|
||||
@@ -91,15 +92,30 @@ func isWithinWorkspace(candidate, workspace string) bool {
|
||||
}
|
||||
|
||||
type ReadFileTool struct {
|
||||
fs fileSystem
|
||||
fs fileSystem
|
||||
maxSize int64
|
||||
}
|
||||
|
||||
func NewReadFileTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *ReadFileTool {
|
||||
func NewReadFileTool(
|
||||
workspace string,
|
||||
restrict bool,
|
||||
maxReadFileSize int,
|
||||
allowPaths ...[]*regexp.Regexp,
|
||||
) *ReadFileTool {
|
||||
var patterns []*regexp.Regexp
|
||||
if len(allowPaths) > 0 {
|
||||
patterns = allowPaths[0]
|
||||
}
|
||||
return &ReadFileTool{fs: buildFs(workspace, restrict, patterns)}
|
||||
|
||||
maxSize := int64(maxReadFileSize)
|
||||
if maxSize <= 0 {
|
||||
maxSize = MaxReadFileSize
|
||||
}
|
||||
|
||||
return &ReadFileTool{
|
||||
fs: buildFs(workspace, restrict, patterns),
|
||||
maxSize: maxSize,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *ReadFileTool) Name() string {
|
||||
@@ -107,9 +123,7 @@ func (t *ReadFileTool) Name() string {
|
||||
}
|
||||
|
||||
func (t *ReadFileTool) Description() string {
|
||||
return "Read the contents of a file. Supports pagination via `offset` and `length` " +
|
||||
"for files larger than the per-call limit. If the response header indicates the " +
|
||||
"file is TRUNCATED, use the provided offset in your next call to continue reading."
|
||||
return "Read the contents of a file. Supports pagination via `offset` and `length`."
|
||||
}
|
||||
|
||||
func (t *ReadFileTool) Parameters() map[string]any {
|
||||
@@ -122,15 +136,13 @@ func (t *ReadFileTool) Parameters() map[string]any {
|
||||
},
|
||||
"offset": map[string]any{
|
||||
"type": "integer",
|
||||
"description": "Byte offset to start reading from (default: 0).",
|
||||
"description": "Byte offset to start reading from.",
|
||||
"default": 0,
|
||||
},
|
||||
"length": map[string]any{
|
||||
"type": "integer",
|
||||
"description": fmt.Sprintf(
|
||||
"Maximum number of bytes to read (default / max: %d).", MaxReadFileSize,
|
||||
),
|
||||
"default": MaxReadFileSize,
|
||||
"type": "integer",
|
||||
"description": "Maximum number of bytes to read.",
|
||||
"default": t.maxSize,
|
||||
},
|
||||
},
|
||||
"required": []string{"path"},
|
||||
@@ -153,15 +165,15 @@ func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *ToolRe
|
||||
}
|
||||
|
||||
// length (optional, capped at MaxReadFileSize)
|
||||
length, err := getInt64Arg(args, "length", MaxReadFileSize)
|
||||
length, err := getInt64Arg(args, "length", t.maxSize)
|
||||
if err != nil {
|
||||
return ErrorResult(err.Error())
|
||||
}
|
||||
if length <= 0 {
|
||||
return ErrorResult("length must be > 0")
|
||||
}
|
||||
if length > MaxReadFileSize {
|
||||
length = MaxReadFileSize
|
||||
if length > t.maxSize {
|
||||
length = t.maxSize
|
||||
}
|
||||
|
||||
file, err := t.fs.Open(path)
|
||||
@@ -174,66 +186,104 @@ func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *ToolRe
|
||||
totalSize := int64(-1) // -1 means unknown
|
||||
if info, err := file.Stat(); err == nil {
|
||||
totalSize = info.Size()
|
||||
} else {
|
||||
return ErrorResult(fmt.Sprintf("failed to get file info: %v", err))
|
||||
}
|
||||
|
||||
// seek to offset
|
||||
// sniff the first 512 bytes to detect binary content before loading
|
||||
// it into the LLM context. Seeking back to 0 afterwards restores state.
|
||||
sniff := make([]byte, 512)
|
||||
sniffN, _ := file.Read(sniff)
|
||||
|
||||
// Reset read position to beginning before applying the caller's offset.
|
||||
if seeker, ok := file.(io.Seeker); ok {
|
||||
if _, err := seeker.Seek(0, io.SeekStart); err != nil {
|
||||
return ErrorResult(fmt.Sprintf("failed to reset file position after sniff: %v", err))
|
||||
}
|
||||
} else {
|
||||
// Non-seekable: we consumed sniffN bytes above; account for them when
|
||||
// discarding to reach the requested offset below.
|
||||
// If offset < sniffN the data we already read covers it, which we
|
||||
// cannot replay on a non-seekable stream — return a clear error.
|
||||
if offset < int64(sniffN) && offset > 0 {
|
||||
return ErrorResult(
|
||||
"non-seekable file: cannot seek to an offset within the first 512 bytes after binary detection",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Seek to the requested offset.
|
||||
if seeker, ok := file.(io.Seeker); ok {
|
||||
if _, err := seeker.Seek(offset, io.SeekStart); err != nil {
|
||||
return ErrorResult(fmt.Sprintf("failed to seek to offset %d: %v", offset, err))
|
||||
}
|
||||
} else if offset > 0 {
|
||||
// Fallback for non-seekable streams: discard leading bytes.
|
||||
if _, err := io.CopyN(io.Discard, file, offset); err != nil {
|
||||
return ErrorResult(fmt.Sprintf("failed to advance to offset %d: %v", offset, err))
|
||||
// sniffN bytes were already consumed above, so subtract them.
|
||||
remaining := offset - int64(sniffN)
|
||||
if remaining > 0 {
|
||||
if _, err := io.CopyN(io.Discard, file, remaining); err != nil {
|
||||
return ErrorResult(fmt.Sprintf("failed to advance to offset %d: %v", offset, err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read up to `length` bytes
|
||||
data, err := io.ReadAll(io.LimitReader(file, length))
|
||||
if err != nil {
|
||||
// read length+1 bytes to reliably detect whether more content exists
|
||||
// without relying on totalSize (which may be -1 for non-seekable streams).
|
||||
// This avoids the false-positive TRUNCATED message on the last page.
|
||||
probe := make([]byte, length+1)
|
||||
n, err := io.ReadFull(file, probe)
|
||||
// FIX: io.ReadFull returns io.ErrUnexpectedEOF for partial reads (0 < n < len),
|
||||
// and io.EOF only when n == 0. Both are normal terminal conditions — only
|
||||
// other errors are genuine failures.
|
||||
if err != nil && err != io.EOF && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return ErrorResult(fmt.Sprintf("failed to read file content: %v", err))
|
||||
}
|
||||
|
||||
if len(data) == 0 && offset > 0 {
|
||||
return NewToolResult("[END OF FILE — no content at this offset]")
|
||||
}
|
||||
// hasMore is true only when we actually got the extra probe byte.
|
||||
hasMore := int64(n) > length
|
||||
data := probe[:min(int64(n), length)]
|
||||
|
||||
// build metadata header
|
||||
readEnd := offset + int64(len(data))
|
||||
hasMore := int64(len(data)) == length && (totalSize < 0 || readEnd < totalSize)
|
||||
|
||||
// Calculates the reading range avoiding negative numbers if the file is empty
|
||||
var readRange string
|
||||
if len(data) == 0 {
|
||||
readRange = "0 bytes"
|
||||
} else {
|
||||
readRange = fmt.Sprintf("bytes %d–%d", offset, readEnd-1)
|
||||
return NewToolResult("[END OF FILE - no content at this offset]")
|
||||
}
|
||||
|
||||
// Build metadata header.
|
||||
// use filepath.Base(path) instead of the raw path to avoid leaking
|
||||
// internal filesystem structure into the LLM context.
|
||||
readEnd := offset + int64(len(data))
|
||||
// use ASCII hyphen-minus instead of en-dash (U+2013) to keep the
|
||||
// header parseable by downstream tools and log processors.
|
||||
readRange := fmt.Sprintf("bytes %d-%d", offset, readEnd-1)
|
||||
|
||||
displayPath := filepath.Base(path)
|
||||
var header string
|
||||
if totalSize >= 0 {
|
||||
header = fmt.Sprintf(
|
||||
"[file: %s | total: %d bytes | read: %s]",
|
||||
path, totalSize, readRange,
|
||||
displayPath, totalSize, readRange,
|
||||
)
|
||||
} else {
|
||||
header = fmt.Sprintf(
|
||||
"[file: %s | read: %s | total size unknown]",
|
||||
path, readRange,
|
||||
displayPath, readRange,
|
||||
)
|
||||
}
|
||||
|
||||
if hasMore {
|
||||
header += fmt.Sprintf(
|
||||
"\n[TRUNCATED — file has more content. Call read_file again with offset=%d to continue.]",
|
||||
"\n[TRUNCATED - file has more content. Call read_file again with offset=%d to continue.]",
|
||||
readEnd,
|
||||
)
|
||||
} else {
|
||||
header += "\n[END OF FILE — no further content.]"
|
||||
header += "\n[END OF FILE - no further content.]"
|
||||
}
|
||||
|
||||
logger.DebugCF("tool", "ReadFileTool execution completed successfully",
|
||||
map[string]any{
|
||||
"path": path,
|
||||
"bytes_read": len(data),
|
||||
"has_more": hasMore,
|
||||
})
|
||||
|
||||
return NewToolResult(header + "\n\n" + string(data))
|
||||
}
|
||||
|
||||
@@ -247,6 +297,12 @@ func getInt64Arg(args map[string]any, key string, defaultVal int64) (int64, erro
|
||||
|
||||
switch v := raw.(type) {
|
||||
case float64:
|
||||
if v != math.Trunc(v) {
|
||||
return 0, fmt.Errorf("%s must be an integer, got float %v", key, v)
|
||||
}
|
||||
if v > math.MaxInt64 || v < math.MinInt64 {
|
||||
return 0, fmt.Errorf("%s value %v overflows int64", key, v)
|
||||
}
|
||||
return int64(v), nil
|
||||
case int:
|
||||
return int64(v), nil
|
||||
@@ -636,33 +692,3 @@ func getSafeRelPath(workspace, path string) (string, error) {
|
||||
|
||||
return rel, nil
|
||||
}
|
||||
|
||||
// isBinaryFile uses common heuristics to determine if the content is a binary file.
|
||||
func isBinaryFile(content []byte) bool {
|
||||
if len(content) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Sample the first 512 bytes (or less if the file is smaller)
|
||||
limit := len(content)
|
||||
if limit > 512 {
|
||||
limit = 512
|
||||
}
|
||||
sample := content[:limit]
|
||||
|
||||
// Check for NUL bytes in the sample (standard binary detection)
|
||||
if bytes.IndexByte(sample, 0) != -1 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Use standard library content type detection to catch specific formats like PDF
|
||||
contentType := http.DetectContentType(sample)
|
||||
if contentType == "application/pdf" ||
|
||||
strings.HasPrefix(contentType, "image/") ||
|
||||
strings.HasPrefix(contentType, "video/") ||
|
||||
strings.HasPrefix(contentType, "audio/") {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ func TestFilesystemTool_ReadFile_Success(t *testing.T) {
|
||||
testFile := filepath.Join(tmpDir, "test.txt")
|
||||
os.WriteFile(testFile, []byte("test content"), 0o644)
|
||||
|
||||
tool := NewReadFileTool("", false)
|
||||
tool := NewReadFileTool("", false, MaxReadFileSize)
|
||||
ctx := context.Background()
|
||||
args := map[string]any{
|
||||
"path": testFile,
|
||||
@@ -45,7 +45,7 @@ func TestFilesystemTool_ReadFile_Success(t *testing.T) {
|
||||
|
||||
// TestFilesystemTool_ReadFile_NotFound verifies error handling for missing file
|
||||
func TestFilesystemTool_ReadFile_NotFound(t *testing.T) {
|
||||
tool := NewReadFileTool("", false)
|
||||
tool := NewReadFileTool("", false, MaxReadFileSize)
|
||||
ctx := context.Background()
|
||||
args := map[string]any{
|
||||
"path": "/nonexistent_file_12345.txt",
|
||||
@@ -271,7 +271,7 @@ func TestFilesystemTool_ReadFile_RejectsSymlinkEscape(t *testing.T) {
|
||||
t.Skipf("symlink not supported in this environment: %v", err)
|
||||
}
|
||||
|
||||
tool := NewReadFileTool(workspace, true)
|
||||
tool := NewReadFileTool(workspace, true, MaxReadFileSize)
|
||||
result := tool.Execute(context.Background(), map[string]any{
|
||||
"path": link,
|
||||
})
|
||||
@@ -289,7 +289,7 @@ func TestFilesystemTool_ReadFile_RejectsSymlinkEscape(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestFilesystemTool_EmptyWorkspace_AccessDenied(t *testing.T) {
|
||||
tool := NewReadFileTool("", true) // restrict=true but workspace=""
|
||||
tool := NewReadFileTool("", true, MaxReadFileSize) // restrict=true but workspace=""
|
||||
|
||||
// Try to read a sensitive file (simulated by a temp file outside workspace)
|
||||
tmpDir := t.TempDir()
|
||||
@@ -499,7 +499,7 @@ func TestWhitelistFs_AllowsMatchingPaths(t *testing.T) {
|
||||
// Pattern allows access to the outsideDir.
|
||||
patterns := []*regexp.Regexp{regexp.MustCompile(`^` + regexp.QuoteMeta(outsideDir))}
|
||||
|
||||
tool := NewReadFileTool(workspace, true, patterns)
|
||||
tool := NewReadFileTool(workspace, true, MaxReadFileSize, patterns)
|
||||
|
||||
// Read from whitelisted path should succeed.
|
||||
result := tool.Execute(context.Background(), map[string]any{"path": outsideFile})
|
||||
@@ -521,69 +521,6 @@ func TestWhitelistFs_AllowsMatchingPaths(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsBinaryFile(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
content []byte
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "empty content",
|
||||
content: []byte(""),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "plain text",
|
||||
content: []byte("This is a normal text file with punctuation and 12345 numbers."),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "contains null byte",
|
||||
content: []byte("plain text\x00followed by a null byte"),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "pdf header",
|
||||
content: []byte("%PDF-1.4\n%\xE2\xE3\xCF\xD3\n1 0 obj\n<</Type/Catalog/Pages 2 0 R>>"),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "png magic bytes",
|
||||
content: []byte("\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00"),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "jpeg magic bytes",
|
||||
content: []byte("\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H"),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "html text (not binary)",
|
||||
content: []byte("<!DOCTYPE html><html><body><h1>Ciao</h1></body></html>"),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "json text (not binary)",
|
||||
content: []byte(`{"key": "value", "number": 42}`),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "markdown text (not binary)",
|
||||
content: []byte("# Markdown Title\n\nThis is a **bold text** and a [link](https://example.com)."),
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := isBinaryFile(tt.content)
|
||||
if result != tt.expected {
|
||||
t.Errorf("isBinaryFile() for %q returned %v, expected %v", tt.name, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestReadFileTool_ChunkedReading verifies the pagination logic of the tool
|
||||
// by reading a file in multiple chunks using 'offset' and 'length'.
|
||||
func TestReadFileTool_ChunkedReading(t *testing.T) {
|
||||
@@ -597,7 +534,7 @@ func TestReadFileTool_ChunkedReading(t *testing.T) {
|
||||
t.Fatalf("Failed to write test file: %v", err)
|
||||
}
|
||||
|
||||
tool := NewReadFileTool(tmpDir, false)
|
||||
tool := NewReadFileTool(tmpDir, false, MaxReadFileSize)
|
||||
ctx := context.Background()
|
||||
|
||||
// --- Step 1: Read the first chunk (10 bytes) ---
|
||||
@@ -686,7 +623,7 @@ func TestReadFileTool_OffsetBeyondEOF(t *testing.T) {
|
||||
t.Fatalf("Failed to write test file: %v", err)
|
||||
}
|
||||
|
||||
tool := NewReadFileTool(tmpDir, false)
|
||||
tool := NewReadFileTool(tmpDir, false, MaxReadFileSize)
|
||||
ctx := context.Background()
|
||||
|
||||
args := map[string]any{
|
||||
@@ -702,7 +639,7 @@ func TestReadFileTool_OffsetBeyondEOF(t *testing.T) {
|
||||
}
|
||||
|
||||
// Must return EXACTLY the string provided in the code
|
||||
expectedMsg := "[END OF FILE — no content at this offset]"
|
||||
expectedMsg := "[END OF FILE - no content at this offset]"
|
||||
if result.ForLLM != expectedMsg {
|
||||
t.Errorf("The message %q was expected, obtained: %q", expectedMsg, result.ForLLM)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user