From 674f00ec63ce8e90d9458da1f0d8a44bde534fb2 Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sat, 7 Mar 2026 00:33:27 +0100 Subject: [PATCH] set offset and length in read_file tool --- pkg/tools/filesystem.go | 151 ++++++++++++++++++++++++++++------- pkg/tools/filesystem_test.go | 123 +++++++++++++++++++++++++--- 2 files changed, 234 insertions(+), 40 deletions(-) diff --git a/pkg/tools/filesystem.go b/pkg/tools/filesystem.go index 17f67e3b0..92c5c4f17 100644 --- a/pkg/tools/filesystem.go +++ b/pkg/tools/filesystem.go @@ -10,12 +10,15 @@ import ( "os" "path/filepath" "regexp" + "strconv" "strings" "time" "github.com/sipeed/picoclaw/pkg/fileutil" ) +const MaxReadFileSize = 128 * 1024 // 64KB limit to avoid context overflow + // validatePath ensures the given path is within the workspace if restrict is true. func validatePath(path, workspace string, restrict bool) (string, error) { if workspace == "" { @@ -104,7 +107,9 @@ func (t *ReadFileTool) Name() string { } func (t *ReadFileTool) Description() string { - return "Read the contents of a file" + return "Read the contents of a file. Supports pagination via `offset` and `length` " + + "for files larger than the per-call limit. If the response header indicates the " + + "file is TRUNCATED, use the provided offset in your next call to continue reading." } func (t *ReadFileTool) Parameters() map[string]any { @@ -113,7 +118,19 @@ func (t *ReadFileTool) Parameters() map[string]any { "properties": map[string]any{ "path": map[string]any{ "type": "string", - "description": "Path to the file to read", + "description": "Path to the file to read.", + }, + "offset": map[string]any{ + "type": "integer", + "description": "Byte offset to start reading from (default: 0).", + "default": 0, + }, + "length": map[string]any{ + "type": "integer", + "description": fmt.Sprintf( + "Maximum number of bytes to read (default / max: %d).", MaxReadFileSize, + ), + "default": MaxReadFileSize, }, }, "required": []string{"path"}, @@ -126,44 +143,124 @@ func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *ToolRe return ErrorResult("path is required") } - // open file instead of loading it all into memory + // offset (optional, default 0) + offset, err := getInt64Arg(args, "offset", 0) + if err != nil { + return ErrorResult(err.Error()) + } + if offset < 0 { + return ErrorResult("offset must be >= 0") + } + + // length (optional, capped at MaxReadFileSize) + length, err := getInt64Arg(args, "length", MaxReadFileSize) + if err != nil { + return ErrorResult(err.Error()) + } + if length <= 0 { + return ErrorResult("length must be > 0") + } + if length > MaxReadFileSize { + length = MaxReadFileSize + } + file, err := t.fs.Open(path) if err != nil { return ErrorResult(err.Error()) } defer file.Close() - // read only an initial chunk (512 bytes is the standard for MIME sniffing) - header := make([]byte, 512) - n, err := file.Read(header) - if err != nil && err != io.EOF { - return ErrorResult(fmt.Sprintf("failed to read file header: %v", err)) - } - header = header[:n] - - // Lock the binaries now before using more RAM - if isBinaryFile(header) { - return ErrorResult( - fmt.Sprintf( - "cannot read file %q: appears to be a binary file (e.g., PDF, image, executable)", - filepath.Base(path), - ), - ) + // measure total size + totalSize := int64(-1) // -1 means unknown + if info, err := file.Stat(); err == nil { + totalSize = info.Size() + } else { + return ErrorResult(fmt.Sprintf("failed to get file info: %v", err)) } - // If it is text, let's read the rest of the file - // (io.ReadAll will continue reading starting from byte 513) - rest, err := io.ReadAll(file) + // seek to offset + if seeker, ok := file.(io.Seeker); ok { + if _, err := seeker.Seek(offset, io.SeekStart); err != nil { + return ErrorResult(fmt.Sprintf("failed to seek to offset %d: %v", offset, err)) + } + } else if offset > 0 { + // Fallback for non-seekable streams: discard leading bytes. + if _, err := io.CopyN(io.Discard, file, offset); err != nil { + return ErrorResult(fmt.Sprintf("failed to advance to offset %d: %v", offset, err)) + } + } + + // read up to `length` bytes + data, err := io.ReadAll(io.LimitReader(file, length)) if err != nil { return ErrorResult(fmt.Sprintf("failed to read file content: %v", err)) } - // Recompose the complete content by merging the header and the rest - fullContent := make([]byte, 0, len(header)+len(rest)) - fullContent = append(fullContent, header...) - fullContent = append(fullContent, rest...) + if len(data) == 0 && offset > 0 { + return NewToolResult("[END OF FILE — no content at this offset]") + } - return NewToolResult(string(fullContent)) + // build metadata header + readEnd := offset + int64(len(data)) + hasMore := int64(len(data)) == length && (totalSize < 0 || readEnd < totalSize) + + // Calculates the reading range avoiding negative numbers if the file is empty + var readRange string + if len(data) == 0 { + readRange = "0 bytes" + } else { + readRange = fmt.Sprintf("bytes %d–%d", offset, readEnd-1) + } + + var header string + if totalSize >= 0 { + header = fmt.Sprintf( + "[file: %s | total: %d bytes | read: %s]", + path, totalSize, readRange, + ) + } else { + header = fmt.Sprintf( + "[file: %s | read: %s | total size unknown]", + path, readRange, + ) + } + + if hasMore { + header += fmt.Sprintf( + "\n[TRUNCATED — file has more content. Call read_file again with offset=%d to continue.]", + readEnd, + ) + } else { + header += "\n[END OF FILE — no further content.]" + } + + return NewToolResult(header + "\n\n" + string(data)) +} + +// getInt64Arg extracts an integer argument from the args map, returning the +// provided default if the key is absent. +func getInt64Arg(args map[string]any, key string, defaultVal int64) (int64, error) { + raw, exists := args[key] + if !exists { + return defaultVal, nil + } + + switch v := raw.(type) { + case float64: + return int64(v), nil + case int: + return int64(v), nil + case int64: + return v, nil + case string: + parsed, err := strconv.ParseInt(v, 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid integer format for %s parameter: %w", key, err) + } + return parsed, nil + default: + return 0, fmt.Errorf("unsupported type %T for %s parameter", raw, key) + } } type WriteFileTool struct { diff --git a/pkg/tools/filesystem_test.go b/pkg/tools/filesystem_test.go index 2868431e0..f02483e25 100644 --- a/pkg/tools/filesystem_test.go +++ b/pkg/tools/filesystem_test.go @@ -584,29 +584,126 @@ func TestIsBinaryFile(t *testing.T) { } } -func TestFilesystemTool_ReadFile_BlocksBinary(t *testing.T) { +// TestReadFileTool_ChunkedReading verifies the pagination logic of the tool +// by reading a file in multiple chunks using 'offset' and 'length'. +func TestReadFileTool_ChunkedReading(t *testing.T) { tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "pagination_test.txt") - // Create a dummy binary file (e.g., a PDF). - testFile := filepath.Join(tmpDir, "fake_document.pdf") - fakePDFContent := []byte("%PDF-1.4\n% Some null test bytes\x00\x00\x00") - os.WriteFile(testFile, fakePDFContent, 0o644) + // Create a test file with exactly 26 bytes of content + fullContent := "abcdefghijklmnopqrstuvwxyz" + err := os.WriteFile(testFile, []byte(fullContent), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } - tool := NewReadFileTool(tmpDir, true) + tool := NewReadFileTool(tmpDir, false) ctx := context.Background() + + // --- Step 1: Read the first chunk (10 bytes) --- + args1 := map[string]any{ + "path": testFile, + "offset": 0, + "length": 10, + } + result1 := tool.Execute(ctx, args1) + + if result1.IsError { + t.Fatalf("Chunk 1 failed: %s", result1.ForLLM) + } + + // Expect the first 10 characters + if !strings.Contains(result1.ForLLM, "abcdefghij") { + t.Errorf("Chunk 1 should contain 'abcdefghij', got: %s", result1.ForLLM) + } + // Expect the header to indicate the file is truncated + if !strings.Contains(result1.ForLLM, "[TRUNCATED") { + t.Errorf("Chunk 1 header should indicate truncation, got: %s", result1.ForLLM) + } + // Expect the header to suggest the next offset (10) + if !strings.Contains(result1.ForLLM, "offset=10") { + t.Errorf("Chunk 1 header should suggest next offset=10, got: %s", result1.ForLLM) + } + + // Step 2: Read the second chunk (10 bytes) --- + args2 := map[string]any{ + "path": testFile, + "offset": 10, + "length": 10, + } + result2 := tool.Execute(ctx, args2) + + if result2.IsError { + t.Fatalf("Chunk 2 failed: %s", result2.ForLLM) + } + + // Expect the next 10 characters + if !strings.Contains(result2.ForLLM, "klmnopqrst") { + t.Errorf("Chunk 2 should contain 'klmnopqrst', got: %s", result2.ForLLM) + } + // Expect the header to suggest the next offset (20) + if !strings.Contains(result2.ForLLM, "offset=20") { + t.Errorf("Chunk 2 header should suggest next offset=20, got: %s", result2.ForLLM) + } + + // Step 3: Read the final chunk (remaining 6 bytes) --- + // We ask for 10 bytes, but only 6 are left in the file + args3 := map[string]any{ + "path": testFile, + "offset": 20, + "length": 10, + } + result3 := tool.Execute(ctx, args3) + + if result3.IsError { + t.Fatalf("Chunk 3 failed: %s", result3.ForLLM) + } + + // Expect the last 6 characters + if !strings.Contains(result3.ForLLM, "uvwxyz") { + t.Errorf("Chunk 3 should contain 'uvwxyz', got: %s", result3.ForLLM) + } + // Expect the header to indicate the end of the file + if !strings.Contains(result3.ForLLM, "[END OF FILE") { + t.Errorf("Chunk 3 header should indicate end of file, got: %s", result3.ForLLM) + } + + // Ensure no TRUNCATED message is present in the final chunk + if strings.Contains(result3.ForLLM, "[TRUNCATED") { + t.Errorf("Chunk 3 header should NOT indicate truncation, got: %s", result3.ForLLM) + } +} + +// TestReadFileTool_OffsetBeyondEOF checks the behavior when requesting +// An offset that exceeds the total file size. +func TestReadFileTool_OffsetBeyondEOF(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "short.txt") + + // create a file of only 5 bytes + err := os.WriteFile(testFile, []byte("12345"), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileTool(tmpDir, false) + ctx := context.Background() + args := map[string]any{ - "path": testFile, + "path": testFile, + "offset": int64(100), // Offset beyond the end of the file } result := tool.Execute(ctx, args) - if !result.IsError { - t.Errorf("An error was expected when trying to read a binary file, but instead it was successful") + // It should not be classified as a tool execution error + if result.IsError { + t.Errorf("A mistake was not expected, obtained IsError=true: %s", result.ForLLM) } - // The error should mention that it is a binary file - expectedMsg := "appears to be a binary file" - if !strings.Contains(result.ForLLM, expectedMsg) { - t.Errorf("The error message '%s' was expected, obtained: %s", expectedMsg, result.ForLLM) + // Must return EXACTLY the string provided in the code + expectedMsg := "[END OF FILE — no content at this offset]" + if result.ForLLM != expectedMsg { + t.Errorf("The message %q was expected, obtained: %q", expectedMsg, result.ForLLM) } }