mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
set offset and length in read_file tool
This commit is contained in:
+124
-27
@@ -10,12 +10,15 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/sipeed/picoclaw/pkg/fileutil"
|
||||
)
|
||||
|
||||
const MaxReadFileSize = 128 * 1024 // 64KB limit to avoid context overflow
|
||||
|
||||
// validatePath ensures the given path is within the workspace if restrict is true.
|
||||
func validatePath(path, workspace string, restrict bool) (string, error) {
|
||||
if workspace == "" {
|
||||
@@ -104,7 +107,9 @@ func (t *ReadFileTool) Name() string {
|
||||
}
|
||||
|
||||
func (t *ReadFileTool) Description() string {
|
||||
return "Read the contents of a file"
|
||||
return "Read the contents of a file. Supports pagination via `offset` and `length` " +
|
||||
"for files larger than the per-call limit. If the response header indicates the " +
|
||||
"file is TRUNCATED, use the provided offset in your next call to continue reading."
|
||||
}
|
||||
|
||||
func (t *ReadFileTool) Parameters() map[string]any {
|
||||
@@ -113,7 +118,19 @@ func (t *ReadFileTool) Parameters() map[string]any {
|
||||
"properties": map[string]any{
|
||||
"path": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Path to the file to read",
|
||||
"description": "Path to the file to read.",
|
||||
},
|
||||
"offset": map[string]any{
|
||||
"type": "integer",
|
||||
"description": "Byte offset to start reading from (default: 0).",
|
||||
"default": 0,
|
||||
},
|
||||
"length": map[string]any{
|
||||
"type": "integer",
|
||||
"description": fmt.Sprintf(
|
||||
"Maximum number of bytes to read (default / max: %d).", MaxReadFileSize,
|
||||
),
|
||||
"default": MaxReadFileSize,
|
||||
},
|
||||
},
|
||||
"required": []string{"path"},
|
||||
@@ -126,44 +143,124 @@ func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *ToolRe
|
||||
return ErrorResult("path is required")
|
||||
}
|
||||
|
||||
// open file instead of loading it all into memory
|
||||
// offset (optional, default 0)
|
||||
offset, err := getInt64Arg(args, "offset", 0)
|
||||
if err != nil {
|
||||
return ErrorResult(err.Error())
|
||||
}
|
||||
if offset < 0 {
|
||||
return ErrorResult("offset must be >= 0")
|
||||
}
|
||||
|
||||
// length (optional, capped at MaxReadFileSize)
|
||||
length, err := getInt64Arg(args, "length", MaxReadFileSize)
|
||||
if err != nil {
|
||||
return ErrorResult(err.Error())
|
||||
}
|
||||
if length <= 0 {
|
||||
return ErrorResult("length must be > 0")
|
||||
}
|
||||
if length > MaxReadFileSize {
|
||||
length = MaxReadFileSize
|
||||
}
|
||||
|
||||
file, err := t.fs.Open(path)
|
||||
if err != nil {
|
||||
return ErrorResult(err.Error())
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// read only an initial chunk (512 bytes is the standard for MIME sniffing)
|
||||
header := make([]byte, 512)
|
||||
n, err := file.Read(header)
|
||||
if err != nil && err != io.EOF {
|
||||
return ErrorResult(fmt.Sprintf("failed to read file header: %v", err))
|
||||
}
|
||||
header = header[:n]
|
||||
|
||||
// Lock the binaries now before using more RAM
|
||||
if isBinaryFile(header) {
|
||||
return ErrorResult(
|
||||
fmt.Sprintf(
|
||||
"cannot read file %q: appears to be a binary file (e.g., PDF, image, executable)",
|
||||
filepath.Base(path),
|
||||
),
|
||||
)
|
||||
// measure total size
|
||||
totalSize := int64(-1) // -1 means unknown
|
||||
if info, err := file.Stat(); err == nil {
|
||||
totalSize = info.Size()
|
||||
} else {
|
||||
return ErrorResult(fmt.Sprintf("failed to get file info: %v", err))
|
||||
}
|
||||
|
||||
// If it is text, let's read the rest of the file
|
||||
// (io.ReadAll will continue reading starting from byte 513)
|
||||
rest, err := io.ReadAll(file)
|
||||
// seek to offset
|
||||
if seeker, ok := file.(io.Seeker); ok {
|
||||
if _, err := seeker.Seek(offset, io.SeekStart); err != nil {
|
||||
return ErrorResult(fmt.Sprintf("failed to seek to offset %d: %v", offset, err))
|
||||
}
|
||||
} else if offset > 0 {
|
||||
// Fallback for non-seekable streams: discard leading bytes.
|
||||
if _, err := io.CopyN(io.Discard, file, offset); err != nil {
|
||||
return ErrorResult(fmt.Sprintf("failed to advance to offset %d: %v", offset, err))
|
||||
}
|
||||
}
|
||||
|
||||
// read up to `length` bytes
|
||||
data, err := io.ReadAll(io.LimitReader(file, length))
|
||||
if err != nil {
|
||||
return ErrorResult(fmt.Sprintf("failed to read file content: %v", err))
|
||||
}
|
||||
|
||||
// Recompose the complete content by merging the header and the rest
|
||||
fullContent := make([]byte, 0, len(header)+len(rest))
|
||||
fullContent = append(fullContent, header...)
|
||||
fullContent = append(fullContent, rest...)
|
||||
if len(data) == 0 && offset > 0 {
|
||||
return NewToolResult("[END OF FILE — no content at this offset]")
|
||||
}
|
||||
|
||||
return NewToolResult(string(fullContent))
|
||||
// build metadata header
|
||||
readEnd := offset + int64(len(data))
|
||||
hasMore := int64(len(data)) == length && (totalSize < 0 || readEnd < totalSize)
|
||||
|
||||
// Calculates the reading range avoiding negative numbers if the file is empty
|
||||
var readRange string
|
||||
if len(data) == 0 {
|
||||
readRange = "0 bytes"
|
||||
} else {
|
||||
readRange = fmt.Sprintf("bytes %d–%d", offset, readEnd-1)
|
||||
}
|
||||
|
||||
var header string
|
||||
if totalSize >= 0 {
|
||||
header = fmt.Sprintf(
|
||||
"[file: %s | total: %d bytes | read: %s]",
|
||||
path, totalSize, readRange,
|
||||
)
|
||||
} else {
|
||||
header = fmt.Sprintf(
|
||||
"[file: %s | read: %s | total size unknown]",
|
||||
path, readRange,
|
||||
)
|
||||
}
|
||||
|
||||
if hasMore {
|
||||
header += fmt.Sprintf(
|
||||
"\n[TRUNCATED — file has more content. Call read_file again with offset=%d to continue.]",
|
||||
readEnd,
|
||||
)
|
||||
} else {
|
||||
header += "\n[END OF FILE — no further content.]"
|
||||
}
|
||||
|
||||
return NewToolResult(header + "\n\n" + string(data))
|
||||
}
|
||||
|
||||
// getInt64Arg extracts an integer argument from the args map, returning the
|
||||
// provided default if the key is absent.
|
||||
func getInt64Arg(args map[string]any, key string, defaultVal int64) (int64, error) {
|
||||
raw, exists := args[key]
|
||||
if !exists {
|
||||
return defaultVal, nil
|
||||
}
|
||||
|
||||
switch v := raw.(type) {
|
||||
case float64:
|
||||
return int64(v), nil
|
||||
case int:
|
||||
return int64(v), nil
|
||||
case int64:
|
||||
return v, nil
|
||||
case string:
|
||||
parsed, err := strconv.ParseInt(v, 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid integer format for %s parameter: %w", key, err)
|
||||
}
|
||||
return parsed, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("unsupported type %T for %s parameter", raw, key)
|
||||
}
|
||||
}
|
||||
|
||||
type WriteFileTool struct {
|
||||
|
||||
+110
-13
@@ -584,29 +584,126 @@ func TestIsBinaryFile(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilesystemTool_ReadFile_BlocksBinary(t *testing.T) {
|
||||
// TestReadFileTool_ChunkedReading verifies the pagination logic of the tool
|
||||
// by reading a file in multiple chunks using 'offset' and 'length'.
|
||||
func TestReadFileTool_ChunkedReading(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
testFile := filepath.Join(tmpDir, "pagination_test.txt")
|
||||
|
||||
// Create a dummy binary file (e.g., a PDF).
|
||||
testFile := filepath.Join(tmpDir, "fake_document.pdf")
|
||||
fakePDFContent := []byte("%PDF-1.4\n% Some null test bytes\x00\x00\x00")
|
||||
os.WriteFile(testFile, fakePDFContent, 0o644)
|
||||
// Create a test file with exactly 26 bytes of content
|
||||
fullContent := "abcdefghijklmnopqrstuvwxyz"
|
||||
err := os.WriteFile(testFile, []byte(fullContent), 0o644)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to write test file: %v", err)
|
||||
}
|
||||
|
||||
tool := NewReadFileTool(tmpDir, true)
|
||||
tool := NewReadFileTool(tmpDir, false)
|
||||
ctx := context.Background()
|
||||
|
||||
// --- Step 1: Read the first chunk (10 bytes) ---
|
||||
args1 := map[string]any{
|
||||
"path": testFile,
|
||||
"offset": 0,
|
||||
"length": 10,
|
||||
}
|
||||
result1 := tool.Execute(ctx, args1)
|
||||
|
||||
if result1.IsError {
|
||||
t.Fatalf("Chunk 1 failed: %s", result1.ForLLM)
|
||||
}
|
||||
|
||||
// Expect the first 10 characters
|
||||
if !strings.Contains(result1.ForLLM, "abcdefghij") {
|
||||
t.Errorf("Chunk 1 should contain 'abcdefghij', got: %s", result1.ForLLM)
|
||||
}
|
||||
// Expect the header to indicate the file is truncated
|
||||
if !strings.Contains(result1.ForLLM, "[TRUNCATED") {
|
||||
t.Errorf("Chunk 1 header should indicate truncation, got: %s", result1.ForLLM)
|
||||
}
|
||||
// Expect the header to suggest the next offset (10)
|
||||
if !strings.Contains(result1.ForLLM, "offset=10") {
|
||||
t.Errorf("Chunk 1 header should suggest next offset=10, got: %s", result1.ForLLM)
|
||||
}
|
||||
|
||||
// Step 2: Read the second chunk (10 bytes) ---
|
||||
args2 := map[string]any{
|
||||
"path": testFile,
|
||||
"offset": 10,
|
||||
"length": 10,
|
||||
}
|
||||
result2 := tool.Execute(ctx, args2)
|
||||
|
||||
if result2.IsError {
|
||||
t.Fatalf("Chunk 2 failed: %s", result2.ForLLM)
|
||||
}
|
||||
|
||||
// Expect the next 10 characters
|
||||
if !strings.Contains(result2.ForLLM, "klmnopqrst") {
|
||||
t.Errorf("Chunk 2 should contain 'klmnopqrst', got: %s", result2.ForLLM)
|
||||
}
|
||||
// Expect the header to suggest the next offset (20)
|
||||
if !strings.Contains(result2.ForLLM, "offset=20") {
|
||||
t.Errorf("Chunk 2 header should suggest next offset=20, got: %s", result2.ForLLM)
|
||||
}
|
||||
|
||||
// Step 3: Read the final chunk (remaining 6 bytes) ---
|
||||
// We ask for 10 bytes, but only 6 are left in the file
|
||||
args3 := map[string]any{
|
||||
"path": testFile,
|
||||
"offset": 20,
|
||||
"length": 10,
|
||||
}
|
||||
result3 := tool.Execute(ctx, args3)
|
||||
|
||||
if result3.IsError {
|
||||
t.Fatalf("Chunk 3 failed: %s", result3.ForLLM)
|
||||
}
|
||||
|
||||
// Expect the last 6 characters
|
||||
if !strings.Contains(result3.ForLLM, "uvwxyz") {
|
||||
t.Errorf("Chunk 3 should contain 'uvwxyz', got: %s", result3.ForLLM)
|
||||
}
|
||||
// Expect the header to indicate the end of the file
|
||||
if !strings.Contains(result3.ForLLM, "[END OF FILE") {
|
||||
t.Errorf("Chunk 3 header should indicate end of file, got: %s", result3.ForLLM)
|
||||
}
|
||||
|
||||
// Ensure no TRUNCATED message is present in the final chunk
|
||||
if strings.Contains(result3.ForLLM, "[TRUNCATED") {
|
||||
t.Errorf("Chunk 3 header should NOT indicate truncation, got: %s", result3.ForLLM)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReadFileTool_OffsetBeyondEOF checks the behavior when requesting
|
||||
// An offset that exceeds the total file size.
|
||||
func TestReadFileTool_OffsetBeyondEOF(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
testFile := filepath.Join(tmpDir, "short.txt")
|
||||
|
||||
// create a file of only 5 bytes
|
||||
err := os.WriteFile(testFile, []byte("12345"), 0o644)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to write test file: %v", err)
|
||||
}
|
||||
|
||||
tool := NewReadFileTool(tmpDir, false)
|
||||
ctx := context.Background()
|
||||
|
||||
args := map[string]any{
|
||||
"path": testFile,
|
||||
"path": testFile,
|
||||
"offset": int64(100), // Offset beyond the end of the file
|
||||
}
|
||||
|
||||
result := tool.Execute(ctx, args)
|
||||
|
||||
if !result.IsError {
|
||||
t.Errorf("An error was expected when trying to read a binary file, but instead it was successful")
|
||||
// It should not be classified as a tool execution error
|
||||
if result.IsError {
|
||||
t.Errorf("A mistake was not expected, obtained IsError=true: %s", result.ForLLM)
|
||||
}
|
||||
|
||||
// The error should mention that it is a binary file
|
||||
expectedMsg := "appears to be a binary file"
|
||||
if !strings.Contains(result.ForLLM, expectedMsg) {
|
||||
t.Errorf("The error message '%s' was expected, obtained: %s", expectedMsg, result.ForLLM)
|
||||
// Must return EXACTLY the string provided in the code
|
||||
expectedMsg := "[END OF FILE — no content at this offset]"
|
||||
if result.ForLLM != expectedMsg {
|
||||
t.Errorf("The message %q was expected, obtained: %q", expectedMsg, result.ForLLM)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user