From bae4342af1d0a27aa37c52c6e1689d340aaa7048 Mon Sep 17 00:00:00 2001 From: Mauro Date: Thu, 2 Apr 2026 12:49:08 +0200 Subject: [PATCH] Feat/tool read_file by lines (#1981) * feat(tool): read_file tool by lines * fix test * restore old bytes read_file tool * unified read_file tool * revert * fix doc * fix test * fix doc * fix offset * fix default start_line * fix line format * fix bug * removed legacy test * enhanced infos * improvements * feat(tool): read_file tool by lines --- config/config.example.json | 3 +- docs/configuration.md | 60 +++++ docs/it/configuration.md | 219 ----------------- pkg/agent/instance.go | 7 +- pkg/agent/instance_test.go | 41 ++++ pkg/config/config.go | 21 +- pkg/config/config_test.go | 7 + pkg/config/defaults.go | 1 + pkg/tools/filesystem.go | 382 ++++++++++++++++++++++++++++- pkg/tools/filesystem_test.go | 449 ++++++++++++++++++++++++++++++++--- 10 files changed, 936 insertions(+), 254 deletions(-) delete mode 100644 docs/it/configuration.md diff --git a/config/config.example.json b/config/config.example.json index bedd543d7..f0cce6d72 100644 --- a/config/config.example.json +++ b/config/config.example.json @@ -421,7 +421,8 @@ "enabled": true }, "read_file": { - "enabled": true + "enabled": true, + "mode": "bytes" }, "send_tts": { "enabled": false diff --git a/docs/configuration.md b/docs/configuration.md index 58930cbfa..7a5902f58 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -301,6 +301,66 @@ Even with `restrict_to_workspace: false`, the `exec` tool blocks these dangerous | `tools.allow_read_paths` | string[] | `[]` | Additional paths allowed for reading outside workspace | | `tools.allow_write_paths` | string[] | `[]` | Additional paths allowed for writing outside workspace | +### Read File Mode + +`read_file` has two mutually exclusive implementations selected by config. PicoClaw registers exactly one of them at startup: + +| Config Key | Type | Default | Description | +|------------|------|---------|-------------| +| `tools.read_file.enabled` | bool | `true` | Enables the `read_file` tool | +| `tools.read_file.mode` | string | `bytes` | Selects the `read_file` implementation: `bytes` or `lines` | +| `tools.read_file.max_read_file_size` | int | `65536` | Maximum bytes returned by `read_file` | + +#### Mode: `bytes` + +Optimized for arbitrary files and binary-safe pagination. + +Parameters: + +* `path` (required): File path +* `offset` (optional): Starting byte offset, default `0` +* `length` (optional): Maximum number of bytes to read, default `max_read_file_size` + +Use `bytes` when: + +* You may read binary files +* You want deterministic byte-range pagination + +#### Mode: `lines` + +Text-oriented behavior, optimized for source files, markdown, logs, and configs. The tool reads sequentially by line and stops when the configured byte budget is reached. + +Parameters: + +* `path` (required): File path +* `start_line` (optional): Starting line number, 1-indexed and inclusive, default `1` +* `max_lines` (optional): Maximum number of lines to read, default = all remaining lines until EOF or byte budget + +Behavior notes: + +* Binary-looking files are rejected with guidance to switch `read_file` to `mode = bytes` +* Extremely long single lines are truncated rather than skipped + +Use `mode = lines` when: + +* The agent mostly reads text files +* You want line-based pagination in prompts and tool calls +* You want cleaner chunks for code review, logs, and documentation + +#### Example + +```json +{ + "tools": { + "read_file": { + "enabled": true, + "mode": "lines", + "max_read_file_size": 65536 + } + } +} +``` + ### Exec Security | Config Key | Type | Default | Description | diff --git a/docs/it/configuration.md b/docs/it/configuration.md deleted file mode 100644 index 6a79a9543..000000000 --- a/docs/it/configuration.md +++ /dev/null @@ -1,219 +0,0 @@ -# ⚙️ Guida alla Configurazione - -> Torna al [README](../../README.md) - -## ⚙️ Configurazione - -File di configurazione: `~/.picoclaw/config.json` - -### Variabili d'Ambiente - -Puoi sovrascrivere i percorsi predefiniti usando variabili d'ambiente. Questo è utile per installazioni portatili, distribuzioni containerizzate, o per eseguire picoclaw come servizio di sistema. Queste variabili sono indipendenti e controllano percorsi diversi. - -| Variabile | Descrizione | Percorso Predefinito | -|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------|---------------------------| -| `PICOCLAW_CONFIG` | Sovrascrive il percorso al file di configurazione. Indica direttamente a picoclaw quale `config.json` caricare, ignorando tutte le altre posizioni. | `~/.picoclaw/config.json` | -| `PICOCLAW_HOME` | Sovrascrive la directory radice per i dati di picoclaw. Modifica la posizione predefinita del `workspace` e delle altre directory dati. | `~/.picoclaw` | - -**Esempi:** - -```bash -# Esegui picoclaw usando un file di configurazione specifico -# Il percorso del workspace verrà letto da quel file di configurazione -PICOCLAW_CONFIG=/etc/picoclaw/production.json picoclaw gateway - -# Esegui picoclaw con tutti i dati salvati in /opt/picoclaw -# La configurazione verrà caricata dal percorso predefinito ~/.picoclaw/config.json -# Il workspace verrà creato in /opt/picoclaw/workspace -PICOCLAW_HOME=/opt/picoclaw picoclaw agent - -# Usa entrambi per un setup completamente personalizzato -PICOCLAW_HOME=/srv/picoclaw PICOCLAW_CONFIG=/srv/picoclaw/main.json picoclaw gateway -``` - -### Struttura del Workspace - -PicoClaw salva i dati nel workspace configurato (predefinito: `~/.picoclaw/workspace`): - -``` -~/.picoclaw/workspace/ -├── sessions/ # Sessioni di conversazione e cronologia -├── memory/ # Memoria a lungo termine (MEMORY.md) -├── state/ # Stato persistente (ultimo canale, ecc.) -├── cron/ # Database dei job pianificati -├── skills/ # Skill personalizzate -├── AGENTS.md # Guida al comportamento dell'agent -├── HEARTBEAT.md # Prompt per task periodici (controllato ogni 30 min) -├── IDENTITY.md # Identità dell'agent -├── SOUL.md # Anima dell'agent -└── USER.md # Preferenze dell'utente -``` - -> **Nota:** Le modifiche a `AGENTS.md`, `SOUL.md`, `USER.md`, `IDENTITY.md` e `memory/MEMORY.md` vengono rilevate automaticamente a runtime tramite il tracciamento della data di modifica (mtime). **Non è necessario riavviare il gateway** dopo aver modificato questi file — l'agent caricherà il nuovo contenuto alla prossima richiesta. - -### Sorgenti delle Skill - -Per impostazione predefinita, le skill vengono caricate da: - -1. `~/.picoclaw/workspace/skills` (workspace) -2. `~/.picoclaw/skills` (globale) -3. `/skills` (builtin) - -Per configurazioni avanzate/di test, puoi sovrascrivere la directory radice delle skill builtin con: - -```bash -export PICOCLAW_BUILTIN_SKILLS=/path/to/skills -``` - -### Politica Unificata di Esecuzione dei Comandi - -- I comandi slash generici vengono eseguiti tramite un unico percorso in `pkg/agent/loop.go` via `commands.Executor`. -- Gli adattatori dei canali non consumano più localmente i comandi generici; inoltrano il testo in entrata al percorso bus/agent. Telegram registra ancora automaticamente i comandi supportati all'avvio. -- Un comando slash sconosciuto (ad esempio `/foo`) viene passato all'elaborazione LLM come se fosse un messaggio dell'utente. -- Un comando registrato ma non supportato sul canale corrente (ad esempio `/show` su WhatsApp) restituisce un errore esplicito all'utente e interrompe l'elaborazione. - -### 🔒 Sandbox di Sicurezza - -PicoClaw esegue in un ambiente sandboxed per impostazione predefinita. L'agent può accedere solo ai file ed eseguire comandi all'interno del workspace configurato. - -#### Configurazione Predefinita - -```json -{ - "agents": { - "defaults": { - "workspace": "~/.picoclaw/workspace", - "restrict_to_workspace": true - } - } -} -``` - -| Opzione | Predefinito | Descrizione | -| ----------------------- | ----------------------- | ---------------------------------------------------- | -| `workspace` | `~/.picoclaw/workspace` | Directory di lavoro dell'agent | -| `restrict_to_workspace` | `true` | Limita l'accesso a file/comandi al workspace | - -#### Strumenti Protetti - -Quando `restrict_to_workspace: true`, i seguenti strumenti sono in sandbox: - -| Strumento | Funzione | Restrizione | -| ------------- | ------------------------- | ---------------------------------------------------- | -| `read_file` | Legge file | Solo file all'interno del workspace | -| `write_file` | Scrive file | Solo file all'interno del workspace | -| `list_dir` | Elenca directory | Solo directory all'interno del workspace | -| `edit_file` | Modifica file | Solo file all'interno del workspace | -| `append_file` | Aggiunge ai file | Solo file all'interno del workspace | -| `exec` | Esegue comandi | I percorsi dei comandi devono essere nel workspace | - -#### Protezione Exec Aggiuntiva - -Anche con `restrict_to_workspace: false`, lo strumento `exec` blocca questi comandi pericolosi: - -* `rm -rf`, `del /f`, `rmdir /s` — Cancellazione di massa -* `format`, `mkfs`, `diskpart` — Formattazione del disco -* `dd if=` — Imaging del disco -* Scrittura su `/dev/sd[a-z]` — Scritture dirette su disco -* `shutdown`, `reboot`, `poweroff` — Spegnimento del sistema -* Fork bomb `:(){ :|:& };:` - -### Controllo Accesso ai File - -| Chiave di configurazione | Tipo | Predefinito | Descrizione | -|--------------------------|------|-------------|-------------| -| `tools.allow_read_paths` | string[] | `[]` | Percorsi aggiuntivi consentiti per la lettura al di fuori del workspace | -| `tools.allow_write_paths` | string[] | `[]` | Percorsi aggiuntivi consentiti per la scrittura al di fuori del workspace | - -### Sicurezza Exec - -| Chiave di configurazione | Tipo | Predefinito | Descrizione | -|--------------------------|------|-------------|-------------| -| `tools.exec.allow_remote` | bool | `false` | Consente lo strumento exec da canali remoti (Telegram/Discord ecc.) | -| `tools.exec.enable_deny_patterns` | bool | `true` | Abilita l'intercettazione dei comandi pericolosi | -| `tools.exec.custom_deny_patterns` | string[] | `[]` | Pattern regex personalizzati da bloccare | -| `tools.exec.custom_allow_patterns` | string[] | `[]` | Pattern regex personalizzati da consentire | - -> **Nota di sicurezza:** La protezione dei symlink è abilitata per impostazione predefinita — tutti i percorsi file vengono risolti tramite `filepath.EvalSymlinks` prima del confronto con la whitelist, prevenendo attacchi di escape tramite symlink. - -#### Limitazione Nota: Processi Figlio degli Strumenti di Build - -Il controllo di sicurezza exec ispeziona solo la riga di comando avviata direttamente da PicoClaw. Non ispeziona ricorsivamente i processi figlio generati da strumenti di sviluppo consentiti come `make`, `go run`, `cargo`, `npm run` o script di build personalizzati. - -Ciò significa che un comando di primo livello può comunque compilare o avviare altri binari dopo aver superato il controllo iniziale. In pratica, tratta gli script di build, i Makefile, gli script di pacchetti e i binari generati come codice eseguibile che richiede lo stesso livello di revisione di un comando shell diretto. - -Per ambienti ad alto rischio: - -* Esamina gli script di build prima dell'esecuzione. -* Preferisci l'approvazione/revisione manuale per i workflow di compilazione ed esecuzione. -* Esegui PicoClaw in un container o VM se hai bisogno di un isolamento più forte di quello fornito dal controllo integrato. - -#### Esempi di Errore - -``` -[ERROR] tool: Tool execution failed -{tool=exec, error=Command blocked by safety guard (path outside working dir)} -``` - -``` -[ERROR] tool: Tool execution failed -{tool=exec, error=Command blocked by safety guard (dangerous pattern detected)} -``` - -#### Disabilitare le Restrizioni (Rischio di Sicurezza) - -Se hai bisogno che l'agent acceda a percorsi al di fuori del workspace: - -**Metodo 1: File di configurazione** - -```json -{ - "agents": { - "defaults": { - "restrict_to_workspace": false - } - } -} -``` - -**Metodo 2: Variabile d'ambiente** - -```bash -export PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE=false -``` - -> ⚠️ **Attenzione**: Disabilitare questa restrizione consente all'agent di accedere a qualsiasi percorso sul tuo sistema. Usare con cautela solo in ambienti controllati. - -#### Coerenza dei Confini di Sicurezza - -L'impostazione `restrict_to_workspace` si applica in modo coerente a tutti i percorsi di esecuzione: - -| Percorso di esecuzione | Confine di sicurezza | -| ---------------------- | --------------------------------- | -| Main Agent | `restrict_to_workspace` ✅ | -| Subagent / Spawn | Eredita la stessa restrizione ✅ | -| Heartbeat tasks | Eredita la stessa restrizione ✅ | - -Tutti i percorsi condividono la stessa restrizione del workspace — non è possibile aggirare il confine di sicurezza tramite subagent o task pianificati. - -### Heartbeat (Task Periodici) - -PicoClaw può eseguire task periodici automaticamente. Crea un file `HEARTBEAT.md` nel tuo workspace: - -```markdown -# Periodic Tasks - -- Check my email for important messages -- Review my calendar for upcoming events -- Check the weather forecast -``` - -L'agent leggerà questo file ogni 30 minuti (configurabile) ed eseguirà tutti i task usando gli strumenti disponibili. - -#### Task Asincroni con Spawn - -Per task di lunga durata (ricerca web, chiamate API), usa lo strumento `spawn` per creare un **subagent**: - -```markdown -# Periodic Tasks -``` diff --git a/pkg/agent/instance.go b/pkg/agent/instance.go index 880725660..bacfa49c5 100644 --- a/pkg/agent/instance.go +++ b/pkg/agent/instance.go @@ -77,7 +77,12 @@ func NewAgentInstance( if cfg.Tools.IsToolEnabled("read_file") { maxReadFileSize := cfg.Tools.ReadFile.MaxReadFileSize - toolsRegistry.Register(tools.NewReadFileTool(workspace, readRestrict, maxReadFileSize, allowReadPaths)) + switch cfg.Tools.ReadFile.EffectiveMode() { + case config.ReadFileModeLines: + toolsRegistry.Register(tools.NewReadFileLinesTool(workspace, readRestrict, maxReadFileSize, allowReadPaths)) + default: + toolsRegistry.Register(tools.NewReadFileBytesTool(workspace, readRestrict, maxReadFileSize, allowReadPaths)) + } } if cfg.Tools.IsToolEnabled("write_file") { toolsRegistry.Register(tools.NewWriteFileTool(workspace, restrict, allowWritePaths)) diff --git a/pkg/agent/instance_test.go b/pkg/agent/instance_test.go index e296a18cb..7c043d88f 100644 --- a/pkg/agent/instance_test.go +++ b/pkg/agent/instance_test.go @@ -248,6 +248,47 @@ func TestNewAgentInstance_AllowsMediaTempDirForReadListAndExec(t *testing.T) { } } +func TestNewAgentInstance_ReadFileModeSelectsSchema(t *testing.T) { + workspace := t.TempDir() + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: workspace, + ModelName: "test-model", + }, + }, + Tools: config.ToolsConfig{ + ReadFile: config.ReadFileToolConfig{ + Enabled: true, + Mode: config.ReadFileModeLines, + MaxReadFileSize: 4096, + }, + }, + } + + agent := NewAgentInstance(nil, &cfg.Agents.Defaults, cfg, &mockProvider{}) + readTool, ok := agent.Tools.Get("read_file") + if !ok { + t.Fatal("read_file tool not registered") + } + + params := readTool.Parameters() + props, _ := params["properties"].(map[string]any) + if _, ok := props["start_line"]; !ok { + t.Fatalf("expected line-mode schema to expose start_line, got %#v", props) + } + if _, ok := props["max_lines"]; !ok { + t.Fatalf("expected line-mode schema to expose max_lines, got %#v", props) + } + if _, ok := props["offset"]; ok { + t.Fatalf("did not expect line-mode schema to expose offset, got %#v", props) + } + if _, ok := props["length"]; ok { + t.Fatalf("did not expect line-mode schema to expose length, got %#v", props) + } +} + func TestNewAgentInstance_InvalidExecConfigDoesNotExit(t *testing.T) { workspace := t.TempDir() diff --git a/pkg/config/config.go b/pkg/config/config.go index fcedf45b9..30e5e1dd9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -803,8 +803,25 @@ type MediaCleanupConfig struct { } type ReadFileToolConfig struct { - Enabled bool `json:"enabled"` - MaxReadFileSize int `json:"max_read_file_size"` + Enabled bool `json:"enabled"` + Mode string `json:"mode"` + MaxReadFileSize int `json:"max_read_file_size"` +} + +const ( + ReadFileModeBytes = "bytes" + ReadFileModeLines = "lines" +) + +func (c ReadFileToolConfig) EffectiveMode() string { + switch strings.ToLower(strings.TrimSpace(c.Mode)) { + case ReadFileModeLines: + return ReadFileModeLines + case "", ReadFileModeBytes: + return ReadFileModeBytes + default: + return ReadFileModeBytes + } } type ToolsConfig struct { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 278dfa43a..a1410f940 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -317,6 +317,13 @@ func TestDefaultConfig_WebTools(t *testing.T) { } } +func TestDefaultConfig_ReadFileMode(t *testing.T) { + cfg := DefaultConfig() + if cfg.Tools.ReadFile.EffectiveMode() != ReadFileModeBytes { + t.Fatalf("expected default read_file mode %q, got %q", ReadFileModeBytes, cfg.Tools.ReadFile.EffectiveMode()) + } +} + func TestSaveConfig_FilePermissions(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("file permission bits are not enforced on Windows") diff --git a/pkg/config/defaults.go b/pkg/config/defaults.go index a9a107975..39cdb89e6 100644 --- a/pkg/config/defaults.go +++ b/pkg/config/defaults.go @@ -487,6 +487,7 @@ func DefaultConfig() *Config { }, ReadFile: ReadFileToolConfig{ Enabled: true, + Mode: ReadFileModeBytes, MaxReadFileSize: 64 * 1024, // 64KB }, Spawn: ToolConfig{ diff --git a/pkg/tools/filesystem.go b/pkg/tools/filesystem.go index 39d45013d..0b9a16950 100644 --- a/pkg/tools/filesystem.go +++ b/pkg/tools/filesystem.go @@ -1,18 +1,22 @@ package tools import ( + "bufio" + "bytes" "context" "errors" "fmt" "io" "io/fs" "math" + "net/http" "os" "path/filepath" "regexp" "strconv" "strings" "time" + "unicode/utf8" "github.com/sipeed/picoclaw/pkg/fileutil" "github.com/sipeed/picoclaw/pkg/logger" @@ -20,7 +24,11 @@ import ( const MaxReadFileSize = 64 * 1024 // 64KB limit to avoid context overflow -func validatePathWithAllowPaths(path, workspace string, restrict bool, patterns []*regexp.Regexp) (string, error) { +func validatePathWithAllowPaths( + path, workspace string, + restrict bool, + patterns []*regexp.Regexp, +) (string, error) { if workspace == "" { return path, fmt.Errorf("workspace is not defined") } @@ -253,6 +261,11 @@ type ReadFileTool struct { maxSize int64 } +type ReadFileLinesTool struct { + fs fileSystem + maxSize int64 +} + func NewReadFileTool( workspace string, restrict bool, @@ -275,14 +288,53 @@ func NewReadFileTool( } } +func NewReadFileBytesTool( + workspace string, + restrict bool, + maxReadFileSize int, + allowPaths ...[]*regexp.Regexp, +) *ReadFileTool { + return NewReadFileTool(workspace, restrict, maxReadFileSize, allowPaths...) +} + +func NewReadFileLinesTool( + workspace string, + restrict bool, + maxReadFileSize int, + allowPaths ...[]*regexp.Regexp, +) *ReadFileLinesTool { + var patterns []*regexp.Regexp + if len(allowPaths) > 0 { + patterns = allowPaths[0] + } + + maxSize := int64(maxReadFileSize) + if maxSize <= 0 { + maxSize = MaxReadFileSize + } + + return &ReadFileLinesTool{ + fs: buildFs(workspace, restrict, patterns), + maxSize: maxSize, + } +} + func (t *ReadFileTool) Name() string { return "read_file" } +func (t *ReadFileLinesTool) Name() string { + return "read_file" +} + func (t *ReadFileTool) Description() string { return "Read the contents of a file. Supports pagination via `offset` and `length`." } +func (t *ReadFileLinesTool) Description() string { + return "Read a UTF-8 text file from the filesystem. Output always includes line numbers in the format `LINE_NUMBER|LINE_CONTENT` (1-indexed). Supports partial reads via `start_line` and `max_lines` for large text files." +} + func (t *ReadFileTool) Parameters() map[string]any { return map[string]any{ "type": "object", @@ -306,6 +358,28 @@ func (t *ReadFileTool) Parameters() map[string]any { } } +func (t *ReadFileLinesTool) Parameters() map[string]any { + return map[string]any{ + "type": "object", + "properties": map[string]any{ + "path": map[string]any{ + "type": "string", + "description": "Path to the file to read.", + }, + "start_line": map[string]any{ + "type": "integer", + "description": "Line number to start reading from (1-indexed, inclusive).", + "default": 1, + }, + "max_lines": map[string]any{ + "type": "integer", + "description": "Maximum number of lines to read.", + }, + }, + "required": []string{"path"}, + } +} + func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult { path, ok := args["path"].(string) if !ok { @@ -447,6 +521,302 @@ func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *ToolRe return NewToolResult(header + "\n\n" + string(data)) } +func (t *ReadFileLinesTool) Execute(ctx context.Context, args map[string]any) *ToolResult { + path, ok := args["path"].(string) + if !ok { + return ErrorResult("path is required") + } + + startLine, err := getInt64Arg(args, "start_line", 1) + if err != nil { + return ErrorResult(err.Error()) + } + if startLine < 1 { + return ErrorResult("start_line must be >= 1") + } + if _, exists := args["offset"]; exists { + return ErrorResult("offset is not supported in line mode; use start_line") + } + if _, exists := args["length"]; exists { + return ErrorResult("length is not supported in line mode; use max_lines") + } + if _, exists := args["limit"]; exists { + return ErrorResult("limit is not supported in line mode; use max_lines") + } + + limit := int64(-1) + if raw, exists := args["max_lines"]; exists && raw != nil { + limit, err = getInt64Arg(args, "max_lines", -1) + if err != nil { + return ErrorResult(err.Error()) + } + if limit <= 0 { + return ErrorResult("max_lines, if provided, must be > 0") + } + } + + file, err := t.fs.Open(path) + if err != nil { + return ErrorResult(err.Error()) + } + defer file.Close() + + if info, statErr := file.Stat(); statErr == nil && info.IsDir() { + return ErrorResult(fmt.Sprintf("failed to open file: path is a directory: %s", path)) + } + + sample := make([]byte, 512) + sampleN, readErr := file.Read(sample) + if readErr != nil && readErr != io.EOF { + return ErrorResult(fmt.Sprintf("failed to read file: %v", readErr)) + } + sample = sample[:sampleN] + if isBinaryReadFileData(sample) { + return ErrorResult("file appears to be binary; switch read_file mode to 'bytes' for byte-based inspection") + } + + reader := bufio.NewReaderSize(io.MultiReader(bytes.NewReader(sample), file), 32*1024) + + var content strings.Builder + lineIndex := int64(1) + var linesRead int64 + var fileBytesRead int64 + var outputBytesRead int64 + var reachedEOF bool + var byteBudgetTruncated bool + var lineTruncated bool + + for lineIndex < startLine { + hasLine, consumeErr := consumeNextLine(reader) + if consumeErr != nil { + return ErrorResult(fmt.Sprintf("failed to read file content: %v", consumeErr)) + } + if !hasLine { + reachedEOF = true + break + } + lineIndex++ + } + + for !reachedEOF && (limit < 0 || linesRead < limit) { + prefix := formatReadFileLinePrefix(lineIndex) + remaining := t.maxSize - outputBytesRead - int64(len(prefix)) + if remaining <= 0 { + byteBudgetTruncated = true + break + } + + line, complete, hasLine, readLineErr := readNextLinePrefix(reader, remaining) + if readLineErr != nil { + return ErrorResult(fmt.Sprintf("failed to read file content: %v", readLineErr)) + } + if !hasLine { + reachedEOF = true + break + } + + content.WriteString(prefix) + content.Write(line) + fileBytesRead += int64(len(line)) + outputBytesRead += int64(len(prefix) + len(line)) + linesRead++ + lineIndex++ + + if !complete { + byteBudgetTruncated = true + lineTruncated = true + break + } + } + + if !reachedEOF && !lineTruncated { + hasMoreContent, peekErr := readerHasMoreContent(reader) + if peekErr != nil { + return ErrorResult(fmt.Sprintf("failed to inspect remaining file content: %v", peekErr)) + } + if !hasMoreContent { + reachedEOF = true + byteBudgetTruncated = false + } + } + + if linesRead == 0 && content.Len() == 0 { + return NewToolResult(fmt.Sprintf("[END OF FILE - no content at or after start_line=%d]", startLine)) + } + + start := startLine + endLine := startLine + linesRead - 1 + displayPath := filepath.Base(path) + header := fmt.Sprintf( + "[file: %s | read: lines %d-%d (1-indexed) | file_bytes: %d | output_bytes: %d]", + displayPath, start, endLine, fileBytesRead, outputBytesRead, + ) + + switch { + case lineTruncated: + header += fmt.Sprintf( + "\n[TRUNCATED - line %d exceeded the %d byte read budget and was cut mid-line.]", + endLine, + t.maxSize, + ) + case byteBudgetTruncated: + if limit > 0 { + header += fmt.Sprintf( + "\n[TRUNCATED - byte budget reached. Call read_file again with start_line=%d and max_lines=%d to continue at the next line.]", + startLine+linesRead, + limit, + ) + } else { + header += fmt.Sprintf( + "\n[TRUNCATED - byte budget reached. Call read_file again with start_line=%d to continue at the next line.]", + startLine+linesRead, + ) + } + case !reachedEOF && limit > 0 && linesRead >= limit: + header += fmt.Sprintf( + "\n[PARTIAL - more content remains. Call read_file again with start_line=%d and max_lines=%d to continue.]", + startLine+linesRead, + limit, + ) + default: + header += "\n[END OF FILE - no further content.]" + } + + logger.DebugCF("tool", "ReadFileTool execution completed successfully", + map[string]any{ + "path": path, + "lines_read": linesRead, + "file_bytes_read": fileBytesRead, + "output_bytes_read": outputBytesRead, + "truncated": byteBudgetTruncated, + "tool": t.Name(), + }) + + return NewToolResult(header + "\n\n" + content.String()) +} + +func formatReadFileLinePrefix(lineNumber int64) string { + return strconv.FormatInt(lineNumber, 10) + "|" +} + +func isBinaryReadFileData(data []byte) bool { + if len(data) == 0 { + return false + } + + sample := data + if len(sample) > 512 { + sample = sample[:512] + } + + if bytes.IndexByte(sample, 0) >= 0 { + return true + } + + contentType := http.DetectContentType(sample) + if strings.HasPrefix(contentType, "text/") { + return false + } + if strings.HasSuffix(contentType, "/json") || + strings.HasSuffix(contentType, "+json") || + strings.HasSuffix(contentType, "/xml") || + strings.HasSuffix(contentType, "+xml") || + strings.Contains(contentType, "javascript") { + return false + } + + if !utf8.Valid(sample) { + return true + } + + controlChars := 0 + for _, b := range sample { + if b < 0x20 && b != '\n' && b != '\r' && b != '\t' && b != '\f' && b != '\b' { + controlChars++ + } + } + + return float64(controlChars)/float64(len(sample)) > 0.1 +} + +func consumeNextLine(reader *bufio.Reader) (bool, error) { + sawData := false + + for { + fragment, err := reader.ReadSlice('\n') + if len(fragment) > 0 { + sawData = true + } + + switch { + case err == nil: + return true, nil + case errors.Is(err, bufio.ErrBufferFull): + continue + case errors.Is(err, io.EOF): + return sawData, nil + default: + return false, err + } + } +} + +func readNextLinePrefix(reader *bufio.Reader, maxBytes int64) ([]byte, bool, bool, error) { + if maxBytes <= 0 { + return nil, false, false, nil + } + + var out bytes.Buffer + sawData := false + complete := true + + for { + fragment, err := reader.ReadSlice('\n') + if len(fragment) > 0 { + sawData = true + if remaining := maxBytes - int64(out.Len()); remaining > 0 { + take := len(fragment) + if int64(take) > remaining { + take = int(remaining) + complete = false + } + out.Write(fragment[:take]) + } else { + complete = false + } + } + + switch { + case err == nil: + return out.Bytes(), complete, sawData, nil + case errors.Is(err, bufio.ErrBufferFull): + if !complete { + return out.Bytes(), false, true, nil + } + continue + case errors.Is(err, io.EOF): + if !sawData { + return nil, true, false, nil + } + return out.Bytes(), complete, true, nil + default: + return nil, false, false, err + } + } +} + +func readerHasMoreContent(reader *bufio.Reader) (bool, error) { + _, err := reader.Peek(1) + switch { + case err == nil: + return true, nil + case errors.Is(err, io.EOF): + return false, nil + default: + return false, err + } +} + // getInt64Arg extracts an integer argument from the args map, returning the // provided default if the key is absent. func getInt64Arg(args map[string]any, key string, defaultVal int64) (int64, error) { @@ -483,7 +853,11 @@ type WriteFileTool struct { fs fileSystem } -func NewWriteFileTool(workspace string, restrict bool, allowPaths ...[]*regexp.Regexp) *WriteFileTool { +func NewWriteFileTool( + workspace string, + restrict bool, + allowPaths ...[]*regexp.Regexp, +) *WriteFileTool { var patterns []*regexp.Regexp if len(allowPaths) > 0 { patterns = allowPaths[0] @@ -536,7 +910,9 @@ func (t *WriteFileTool) Execute(ctx context.Context, args map[string]any) *ToolR if !overwrite { if _, err := t.fs.Open(path); err == nil { - return ErrorResult(fmt.Sprintf("file: %s already exists. Set overwrite=true to replace.", path)) + return ErrorResult( + fmt.Sprintf("file: %s already exists. Set overwrite=true to replace.", path), + ) } } diff --git a/pkg/tools/filesystem_test.go b/pkg/tools/filesystem_test.go index 0b4dd310b..bfbc1f46e 100644 --- a/pkg/tools/filesystem_test.go +++ b/pkg/tools/filesystem_test.go @@ -18,7 +18,7 @@ func TestFilesystemTool_ReadFile_Success(t *testing.T) { testFile := filepath.Join(tmpDir, "test.txt") os.WriteFile(testFile, []byte("test content"), 0o644) - tool := NewReadFileTool("", false, MaxReadFileSize) + tool := NewReadFileBytesTool("", false, MaxReadFileSize) ctx := context.Background() args := map[string]any{ "path": testFile, @@ -45,7 +45,7 @@ func TestFilesystemTool_ReadFile_Success(t *testing.T) { // TestFilesystemTool_ReadFile_NotFound verifies error handling for missing file func TestFilesystemTool_ReadFile_NotFound(t *testing.T) { - tool := NewReadFileTool("", false, MaxReadFileSize) + tool := NewReadFileBytesTool("", false, MaxReadFileSize) ctx := context.Background() args := map[string]any{ "path": "/nonexistent_file_12345.txt", @@ -59,8 +59,13 @@ func TestFilesystemTool_ReadFile_NotFound(t *testing.T) { } // Should contain error message - if !strings.Contains(result.ForLLM, "failed to open file") && !strings.Contains(result.ForUser, "failed to read") { - t.Errorf("Expected error message, got ForLLM: %s, ForUser: %s", result.ForLLM, result.ForUser) + if !strings.Contains(result.ForLLM, "failed to open file") && + !strings.Contains(result.ForUser, "failed to open") { + t.Errorf( + "Expected error message, got ForLLM: %s, ForUser: %s", + result.ForLLM, + result.ForUser, + ) } } @@ -78,7 +83,8 @@ func TestFilesystemTool_ReadFile_MissingPath(t *testing.T) { } // Should mention required parameter - if !strings.Contains(result.ForLLM, "path is required") && !strings.Contains(result.ForUser, "path is required") { + if !strings.Contains(result.ForLLM, "path is required") && + !strings.Contains(result.ForUser, "path is required") { t.Errorf("Expected 'path is required' message, got ForLLM: %s", result.ForLLM) } } @@ -297,7 +303,12 @@ func TestFilesystemTool_WriteFile_OverwriteSandboxed(t *testing.T) { "content": "replaced in sandbox", "overwrite": true, }) - assert.False(t, result.IsError, "expected success in sandbox mode with overwrite=true, got: %s", result.ForLLM) + assert.False( + t, + result.IsError, + "expected success in sandbox mode with overwrite=true, got: %s", + result.ForLLM, + ) data, err := os.ReadFile(filepath.Join(workspace, testFile)) assert.NoError(t, err) @@ -325,7 +336,8 @@ func TestFilesystemTool_ListDir_Success(t *testing.T) { } // Should list files and directories - if !strings.Contains(result.ForLLM, "file1.txt") || !strings.Contains(result.ForLLM, "file2.txt") { + if !strings.Contains(result.ForLLM, "file1.txt") || + !strings.Contains(result.ForLLM, "file2.txt") { t.Errorf("Expected files in listing, got: %s", result.ForLLM) } if !strings.Contains(result.ForLLM, "subdir") { @@ -349,8 +361,13 @@ func TestFilesystemTool_ListDir_NotFound(t *testing.T) { } // Should contain error message - if !strings.Contains(result.ForLLM, "failed to read") && !strings.Contains(result.ForUser, "failed to read") { - t.Errorf("Expected error message, got ForLLM: %s, ForUser: %s", result.ForLLM, result.ForUser) + if !strings.Contains(result.ForLLM, "failed to read") && + !strings.Contains(result.ForUser, "failed to read") { + t.Errorf( + "Expected error message, got ForLLM: %s, ForUser: %s", + result.ForLLM, + result.ForUser, + ) } } @@ -397,7 +414,8 @@ func TestFilesystemTool_ReadFile_RejectsSymlinkEscape(t *testing.T) { // os.Root might return different errors depending on platform/implementation // but it definitely should error. // Our wrapper returns "access denied or file not found" - if !strings.Contains(result.ForLLM, "access denied") && !strings.Contains(result.ForLLM, "file not found") && + if !strings.Contains(result.ForLLM, "access denied") && + !strings.Contains(result.ForLLM, "file not found") && !strings.Contains(result.ForLLM, "no such file") { t.Fatalf("expected symlink escape error, got: %s", result.ForLLM) } @@ -416,10 +434,20 @@ func TestFilesystemTool_EmptyWorkspace_AccessDenied(t *testing.T) { }) // We EXPECT IsError=true (access blocked due to empty workspace) - assert.True(t, result.IsError, "Security Regression: Empty workspace allowed access! content: %s", result.ForLLM) + assert.True( + t, + result.IsError, + "Security Regression: Empty workspace allowed access! content: %s", + result.ForLLM, + ) // Verify it failed for the right reason - assert.Contains(t, result.ForLLM, "workspace is not defined", "Expected 'workspace is not defined' error") + assert.Contains( + t, + result.ForLLM, + "workspace is not defined", + "Expected 'workspace is not defined' error", + ) } // TestRootMkdirAll verifies that root.MkdirAll (used by atomicWriteFileInRoot) handles all cases: @@ -653,7 +681,10 @@ func TestWhitelistFs_BlocksSymlinkEscapeInAllowedDir(t *testing.T) { patterns := []*regexp.Regexp{regexp.MustCompile(`^` + regexp.QuoteMeta(allowedDir))} tool := NewReadFileTool(workspace, true, MaxReadFileSize, patterns) - result := tool.Execute(context.Background(), map[string]any{"path": filepath.Join(linkPath, "secret.txt")}) + result := tool.Execute( + context.Background(), + map[string]any{"path": filepath.Join(linkPath, "secret.txt")}, + ) if !result.IsError { t.Fatalf("expected symlink escape from allowed dir to be blocked, got: %s", result.ForLLM) } @@ -726,7 +757,6 @@ func TestReadFileTool_ChunkedReading(t *testing.T) { tmpDir := t.TempDir() testFile := filepath.Join(tmpDir, "pagination_test.txt") - // Create a test file with exactly 26 bytes of content fullContent := "abcdefghijklmnopqrstuvwxyz" err := os.WriteFile(testFile, []byte(fullContent), 0o644) if err != nil { @@ -748,15 +778,12 @@ func TestReadFileTool_ChunkedReading(t *testing.T) { t.Fatalf("Chunk 1 failed: %s", result1.ForLLM) } - // Expect the first 10 characters if !strings.Contains(result1.ForLLM, "abcdefghij") { t.Errorf("Chunk 1 should contain 'abcdefghij', got: %s", result1.ForLLM) } - // Expect the header to indicate the file is truncated if !strings.Contains(result1.ForLLM, "[TRUNCATED") { t.Errorf("Chunk 1 header should indicate truncation, got: %s", result1.ForLLM) } - // Expect the header to suggest the next offset (10) if !strings.Contains(result1.ForLLM, "offset=10") { t.Errorf("Chunk 1 header should suggest next offset=10, got: %s", result1.ForLLM) } @@ -773,17 +800,14 @@ func TestReadFileTool_ChunkedReading(t *testing.T) { t.Fatalf("Chunk 2 failed: %s", result2.ForLLM) } - // Expect the next 10 characters if !strings.Contains(result2.ForLLM, "klmnopqrst") { t.Errorf("Chunk 2 should contain 'klmnopqrst', got: %s", result2.ForLLM) } - // Expect the header to suggest the next offset (20) if !strings.Contains(result2.ForLLM, "offset=20") { t.Errorf("Chunk 2 header should suggest next offset=20, got: %s", result2.ForLLM) } // Step 3: Read the final chunk (remaining 6 bytes) --- - // We ask for 10 bytes, but only 6 are left in the file args3 := map[string]any{ "path": testFile, "offset": 20, @@ -795,16 +819,12 @@ func TestReadFileTool_ChunkedReading(t *testing.T) { t.Fatalf("Chunk 3 failed: %s", result3.ForLLM) } - // Expect the last 6 characters if !strings.Contains(result3.ForLLM, "uvwxyz") { t.Errorf("Chunk 3 should contain 'uvwxyz', got: %s", result3.ForLLM) } - // Expect the header to indicate the end of the file if !strings.Contains(result3.ForLLM, "[END OF FILE") { t.Errorf("Chunk 3 header should indicate end of file, got: %s", result3.ForLLM) } - - // Ensure no TRUNCATED message is present in the final chunk if strings.Contains(result3.ForLLM, "[TRUNCATED") { t.Errorf("Chunk 3 header should NOT indicate truncation, got: %s", result3.ForLLM) } @@ -816,7 +836,6 @@ func TestReadFileTool_OffsetBeyondEOF(t *testing.T) { tmpDir := t.TempDir() testFile := filepath.Join(tmpDir, "short.txt") - // create a file of only 5 bytes err := os.WriteFile(testFile, []byte("12345"), 0o644) if err != nil { t.Fatalf("Failed to write test file: %v", err) @@ -827,19 +846,393 @@ func TestReadFileTool_OffsetBeyondEOF(t *testing.T) { args := map[string]any{ "path": testFile, - "offset": int64(100), // Offset beyond the end of the file + "offset": int64(100), } result := tool.Execute(ctx, args) - // It should not be classified as a tool execution error if result.IsError { t.Errorf("A mistake was not expected, obtained IsError=true: %s", result.ForLLM) } - // Must return EXACTLY the string provided in the code expectedMsg := "[END OF FILE - no content at this offset]" if result.ForLLM != expectedMsg { t.Errorf("The message %q was expected, obtained: %q", expectedMsg, result.ForLLM) } } + +func TestReadFileLinesTool_ChunkedReading(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "pagination_lines.txt") + + fullContent := strings.Join([]string{ + "line 1", + "line 2", + "line 3", + "line 4", + "line 5", + "line 6", + }, "\n") + "\n" + err := os.WriteFile(testFile, []byte(fullContent), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileLinesTool(tmpDir, false, MaxReadFileSize) + + result1 := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 1, + "max_lines": 2, + }) + if result1.IsError { + t.Fatalf("Chunk 1 failed: %s", result1.ForLLM) + } + if !strings.Contains(result1.ForLLM, "1|line 1\n2|line 2\n") { + t.Fatalf("expected first two lines, got: %s", result1.ForLLM) + } + if !strings.Contains(result1.ForLLM, "lines 1-2") { + t.Fatalf("expected line range 1-2, got: %s", result1.ForLLM) + } + if !strings.Contains(result1.ForLLM, "start_line=3") { + t.Fatalf("expected continuation start_line=3, got: %s", result1.ForLLM) + } + if !strings.Contains(result1.ForLLM, "max_lines=2") { + t.Fatalf("expected continuation max_lines=2, got: %s", result1.ForLLM) + } + + result2 := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 3, + "max_lines": 2, + }) + if result2.IsError { + t.Fatalf("Chunk 2 failed: %s", result2.ForLLM) + } + if !strings.Contains(result2.ForLLM, "3|line 3\n4|line 4\n") { + t.Fatalf("expected middle chunk, got: %s", result2.ForLLM) + } + if !strings.Contains(result2.ForLLM, "start_line=5") { + t.Fatalf("expected continuation start_line=5, got: %s", result2.ForLLM) + } + if !strings.Contains(result2.ForLLM, "max_lines=2") { + t.Fatalf("expected continuation max_lines=2, got: %s", result2.ForLLM) + } + + result3 := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 5, + "max_lines": 2, + }) + if result3.IsError { + t.Fatalf("Chunk 3 failed: %s", result3.ForLLM) + } + if !strings.Contains(result3.ForLLM, "5|line 5\n6|line 6\n") { + t.Fatalf("expected final chunk, got: %s", result3.ForLLM) + } + if !strings.Contains(result3.ForLLM, "[END OF FILE") { + t.Fatalf("expected EOF marker, got: %s", result3.ForLLM) + } +} + +func TestReadFileLinesTool_DefaultOffsetAndRemainingLines(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "default_lines.txt") + + err := os.WriteFile(testFile, []byte("line 1\nline 2\nline 3\n"), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileLinesTool(tmpDir, false, MaxReadFileSize) + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 1, + }) + if result.IsError { + t.Fatalf("Execute() error = %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "1|line 1\n2|line 2\n3|line 3\n") { + t.Fatalf("expected remaining lines by default, got: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "lines 1-3") { + t.Fatalf("expected line range 1-3, got: %s", result.ForLLM) + } +} + +func TestReadFileTool_LegacyLengthUsesByteModeForText(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "legacy_bytes.txt") + + err := os.WriteFile(testFile, []byte("abcdefghijklmnopqrstuvwxyz"), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileBytesTool(tmpDir, false, MaxReadFileSize) + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "offset": 10, + "length": 5, + }) + if result.IsError { + t.Fatalf("Execute() error = %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "read: bytes 10-14") { + t.Fatalf("expected byte-based header, got: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "klmno") { + t.Fatalf("expected byte chunk content, got: %s", result.ForLLM) + } + if strings.Contains(result.ForLLM, "lines ") { + t.Fatalf("expected legacy byte mode, got line-based header: %s", result.ForLLM) + } +} + +func TestReadFileLinesTool_OffsetBeyondEOF(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "short_lines.txt") + + err := os.WriteFile(testFile, []byte("line 1\nline 2\n"), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileLinesTool(tmpDir, false, MaxReadFileSize) + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": int64(100), + }) + if result.IsError { + t.Fatalf("unexpected error: %s", result.ForLLM) + } + if result.ForLLM != "[END OF FILE - no content at or after start_line=100]" { + t.Fatalf("unexpected EOF message: %q", result.ForLLM) + } +} + +func TestReadFileLinesTool_RegistryValidationSupportsMaxLinesAndRejectsLimit(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "registry_lines.txt") + + err := os.WriteFile(testFile, []byte("line 1\nline 2\nline 3\n"), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + reg := NewToolRegistry() + reg.Register(NewReadFileLinesTool(tmpDir, false, MaxReadFileSize)) + + result := reg.Execute(context.Background(), "read_file", map[string]any{ + "path": testFile, + "start_line": 1, + "max_lines": 1, + }) + if result.IsError { + t.Fatalf("expected max_lines to pass registry validation, got: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "1|line 1\n") { + t.Fatalf("expected first line via max_lines, got: %s", result.ForLLM) + } + + result = reg.Execute(context.Background(), "read_file", map[string]any{ + "path": testFile, + "start_line": 2, + "limit": 1, + }) + if !result.IsError { + t.Fatalf("expected limit to be rejected, got success: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "unexpected property \"limit\"") { + t.Fatalf("expected registry validation error for limit, got: %s", result.ForLLM) + } +} + +func TestReadFileLinesTool_RejectsOffset(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "legacy_offset.txt") + + err := os.WriteFile(testFile, []byte("line 1\nline 2\n"), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileLinesTool(tmpDir, false, MaxReadFileSize) + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 1, + "offset": 1, + }) + if !result.IsError { + t.Fatalf("expected offset to be rejected, got success: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "offset is not supported in line mode; use start_line") { + t.Fatalf("unexpected error for offset in line mode: %s", result.ForLLM) + } +} + +func TestReadFileLinesTool_RejectsLength(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "legacy_length.txt") + + err := os.WriteFile(testFile, []byte("line 1\nline 2\n"), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileLinesTool(tmpDir, false, MaxReadFileSize) + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 1, + "length": 1, + }) + if !result.IsError { + t.Fatalf("expected length to be rejected, got success: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "length is not supported in line mode; use max_lines") { + t.Fatalf("unexpected error for length in line mode: %s", result.ForLLM) + } +} + +func TestReadFileLinesTool_RejectsLimit(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "legacy_limit.txt") + + err := os.WriteFile(testFile, []byte("line 1\nline 2\n"), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileLinesTool(tmpDir, false, MaxReadFileSize) + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 1, + "limit": 1, + }) + if !result.IsError { + t.Fatalf("expected limit to be rejected, got success: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "limit is not supported in line mode; use max_lines") { + t.Fatalf("unexpected error for limit in line mode: %s", result.ForLLM) + } +} + +func TestReadFileLinesTool_BinaryFileRejected(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "binary.dat") + + data := []byte{0x00, 0x01, 'A', 'B', 'C', 'D', 'E', 'F'} + err := os.WriteFile(testFile, data, 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileLinesTool(tmpDir, false, MaxReadFileSize) + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 1, + }) + if !result.IsError { + t.Fatalf("expected binary file rejection in line mode, got: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "switch read_file mode to 'bytes'") { + t.Fatalf("expected binary file rejection message, got: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "mode to 'bytes'") { + t.Fatalf("expected suggestion to switch read_file mode, got: %s", result.ForLLM) + } +} + +func TestReadFileLinesTool_TruncatesSingleLongLineAtByteBudget(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "long_line.txt") + + content := "first line\n" + strings.Repeat("x", 70*1024) + "\n" + err := os.WriteFile(testFile, []byte(content), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileLinesTool(tmpDir, false, MaxReadFileSize) + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 1, + }) + if result.IsError { + t.Fatalf("Execute() error = %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "was cut mid-line") { + t.Fatalf("expected explicit mid-line truncation warning, got: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "1|first line\n") { + t.Fatalf("expected the first line with line prefix, got: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "2|") { + t.Fatalf("expected line prefix for the truncated line, got: %s", result.ForLLM) + } +} + +func TestReadFileLinesTool_NoTrailingNewline(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "no_trailing_newline.txt") + + err := os.WriteFile(testFile, []byte("line 1\nline 2"), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileLinesTool(tmpDir, false, MaxReadFileSize) + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 1, + }) + if result.IsError { + t.Fatalf("Execute() error = %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "1|line 1\n2|line 2") { + t.Fatalf( + "expected final line without trailing newline to be preserved, got: %s", + result.ForLLM, + ) + } + if !strings.Contains(result.ForLLM, "[END OF FILE - no further content.]") { + t.Fatalf("expected EOF marker, got: %s", result.ForLLM) + } +} + +func TestReadFileLinesTool_ExactByteBudgetBoundaryIncludesPrefix(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "exact_boundary.txt") + + err := os.WriteFile(testFile, []byte("1234567\nsecond line\n"), 0o644) + if err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + tool := NewReadFileLinesTool(tmpDir, false, 10) + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "start_line": 1, + }) + if result.IsError { + t.Fatalf("Execute() error = %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "1|1234567\n") { + t.Fatalf( + "expected first line to fit exactly in the byte budget with its prefix, got: %s", + result.ForLLM, + ) + } + if strings.Contains(result.ForLLM, "2|") { + t.Fatalf( + "expected second line to be excluded once the exact output byte budget was reached, got: %s", + result.ForLLM, + ) + } + if !strings.Contains(result.ForLLM, "file_bytes: 8 | output_bytes: 10") { + t.Fatalf("expected separate file/output byte counters, got: %s", result.ForLLM) + } + if !strings.Contains(result.ForLLM, "start_line=2") { + t.Fatalf("expected continuation at line 2, got: %s", result.ForLLM) + } +}