diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 966668227..19d13b2bb 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -183,6 +183,17 @@ func registerSharedTools( agent.Tools.Register(messageTool) } + // Send file tool (outbound media via MediaStore — store injected later by SetMediaStore) + if cfg.Tools.IsToolEnabled("send_file") { + sendFileTool := tools.NewSendFileTool( + agent.Workspace, + cfg.Agents.Defaults.RestrictToWorkspace, + cfg.Agents.Defaults.GetMaxMediaSize(), + nil, + ) + agent.Tools.Register(sendFileTool) + } + // Skill discovery and installation tools skills_enabled := cfg.Tools.IsToolEnabled("skills") find_skills_enable := cfg.Tools.IsToolEnabled("find_skills") @@ -384,6 +395,13 @@ func (al *AgentLoop) SetChannelManager(cm *channels.Manager) { // SetMediaStore injects a MediaStore for media lifecycle management. func (al *AgentLoop) SetMediaStore(s media.MediaStore) { al.mediaStore = s + + // Propagate store to send_file tools in all agents. + al.registry.ForEachTool("send_file", func(t tools.Tool) { + if sf, ok := t.(*tools.SendFileTool); ok { + sf.SetMediaStore(s) + } + }) } // SetTranscriber injects a voice transcriber for agent-level audio transcription. @@ -1167,7 +1185,7 @@ func (al *AgentLoop) runLLMIteration( } // If tool returned media refs, publish them as outbound media - if len(r.result.Media) > 0 && opts.SendResponse { + if len(r.result.Media) > 0 { parts := make([]bus.MediaPart, 0, len(r.result.Media)) for _, ref := range r.result.Media { part := bus.MediaPart{Ref: ref} diff --git a/pkg/agent/registry.go b/pkg/agent/registry.go index 77b846832..0e7973dc3 100644 --- a/pkg/agent/registry.go +++ b/pkg/agent/registry.go @@ -7,6 +7,7 @@ import ( "github.com/sipeed/picoclaw/pkg/logger" "github.com/sipeed/picoclaw/pkg/providers" "github.com/sipeed/picoclaw/pkg/routing" + "github.com/sipeed/picoclaw/pkg/tools" ) // AgentRegistry manages multiple agent instances and routes messages to them. @@ -100,6 +101,19 @@ func (r *AgentRegistry) CanSpawnSubagent(parentAgentID, targetAgentID string) bo return false } +// ForEachTool calls fn for every tool registered under the given name +// across all agents. This is useful for propagating dependencies (e.g. +// MediaStore) to tools after registry construction. +func (r *AgentRegistry) ForEachTool(name string, fn func(tools.Tool)) { + r.mu.RLock() + defer r.mu.RUnlock() + for _, agent := range r.agents { + if t, ok := agent.Tools.Get(name); ok { + fn(t) + } + } +} + // GetDefaultAgent returns the default agent instance. func (r *AgentRegistry) GetDefaultAgent() *AgentInstance { r.mu.RLock() diff --git a/pkg/config/config.go b/pkg/config/config.go index cff81a3a7..72af3e2fb 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -640,6 +640,7 @@ type ToolsConfig struct { ListDir ToolConfig `json:"list_dir" envPrefix:"PICOCLAW_TOOLS_LIST_DIR_"` Message ToolConfig `json:"message" envPrefix:"PICOCLAW_TOOLS_MESSAGE_"` ReadFile ToolConfig `json:"read_file" envPrefix:"PICOCLAW_TOOLS_READ_FILE_"` + SendFile ToolConfig `json:"send_file" envPrefix:"PICOCLAW_TOOLS_SEND_FILE_"` Spawn ToolConfig `json:"spawn" envPrefix:"PICOCLAW_TOOLS_SPAWN_"` SPI ToolConfig `json:"spi" envPrefix:"PICOCLAW_TOOLS_SPI_"` Subagent ToolConfig `json:"subagent" envPrefix:"PICOCLAW_TOOLS_SUBAGENT_"` @@ -913,6 +914,8 @@ func (t *ToolsConfig) IsToolEnabled(name string) bool { return t.Subagent.Enabled case "web_fetch": return t.WebFetch.Enabled + case "send_file": + return t.SendFile.Enabled case "write_file": return t.WriteFile.Enabled case "mcp": diff --git a/pkg/config/defaults.go b/pkg/config/defaults.go index c4c04d41a..1902480c5 100644 --- a/pkg/config/defaults.go +++ b/pkg/config/defaults.go @@ -404,6 +404,9 @@ func DefaultConfig() *Config { TTLSeconds: 300, }, }, + SendFile: ToolConfig{ + Enabled: true, + }, MCP: MCPConfig{ ToolConfig: ToolConfig{ Enabled: false, diff --git a/pkg/tools/send_file.go b/pkg/tools/send_file.go new file mode 100644 index 000000000..1a03e58ed --- /dev/null +++ b/pkg/tools/send_file.go @@ -0,0 +1,150 @@ +package tools + +import ( + "context" + "fmt" + "mime" + "os" + "path/filepath" + "strings" + + "github.com/h2non/filetype" + + "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/media" +) + +// SendFileTool allows the LLM to send a local file (image, document, etc.) +// to the user on the current chat channel via the MediaStore pipeline. +type SendFileTool struct { + workspace string + restrict bool + maxFileSize int + mediaStore media.MediaStore + + defaultChannel string + defaultChatID string +} + +func NewSendFileTool(workspace string, restrict bool, maxFileSize int, store media.MediaStore) *SendFileTool { + if maxFileSize <= 0 { + maxFileSize = config.DefaultMaxMediaSize + } + return &SendFileTool{ + workspace: workspace, + restrict: restrict, + maxFileSize: maxFileSize, + mediaStore: store, + } +} + +func (t *SendFileTool) Name() string { return "send_file" } +func (t *SendFileTool) Description() string { + return "Send a local file (image, document, etc.) to the user on the current chat channel." +} + +func (t *SendFileTool) Parameters() map[string]any { + return map[string]any{ + "type": "object", + "properties": map[string]any{ + "path": map[string]any{ + "type": "string", + "description": "Path to the local file. Relative paths are resolved from workspace.", + }, + "filename": map[string]any{ + "type": "string", + "description": "Optional display filename. Defaults to the basename of path.", + }, + }, + "required": []string{"path"}, + } +} + +func (t *SendFileTool) SetContext(channel, chatID string) { + t.defaultChannel = channel + t.defaultChatID = chatID +} + +func (t *SendFileTool) SetMediaStore(store media.MediaStore) { + t.mediaStore = store +} + +func (t *SendFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult { + path, _ := args["path"].(string) + if strings.TrimSpace(path) == "" { + return ErrorResult("path is required") + } + + // Prefer context-injected channel/chatID (set by ExecuteWithContext), fall back to SetContext values. + channel := ToolChannel(ctx) + if channel == "" { + channel = t.defaultChannel + } + chatID := ToolChatID(ctx) + if chatID == "" { + chatID = t.defaultChatID + } + if channel == "" || chatID == "" { + return ErrorResult("no target channel/chat available") + } + + if t.mediaStore == nil { + return ErrorResult("media store not configured") + } + + resolved, err := validatePath(path, t.workspace, t.restrict) + if err != nil { + return ErrorResult(fmt.Sprintf("invalid path: %v", err)) + } + + info, err := os.Stat(resolved) + if err != nil { + return ErrorResult(fmt.Sprintf("file not found: %v", err)) + } + if info.IsDir() { + return ErrorResult("path is a directory, expected a file") + } + if info.Size() > int64(t.maxFileSize) { + return ErrorResult(fmt.Sprintf( + "file too large: %d bytes (max %d bytes)", + info.Size(), t.maxFileSize, + )) + } + + filename, _ := args["filename"].(string) + if filename == "" { + filename = filepath.Base(resolved) + } + + mediaType := detectMediaType(resolved) + scope := fmt.Sprintf("tool:send_file:%s:%s", channel, chatID) + + ref, err := t.mediaStore.Store(resolved, media.MediaMeta{ + Filename: filename, + ContentType: mediaType, + Source: "tool:send_file", + }, scope) + if err != nil { + return ErrorResult(fmt.Sprintf("failed to register media: %v", err)) + } + + return MediaResult(fmt.Sprintf("File %q sent to user", filename), []string{ref}) +} + +// detectMediaType determines the MIME type of a file. +// Uses magic-bytes detection (h2non/filetype) first, then falls back to +// extension-based lookup via mime.TypeByExtension. +func detectMediaType(path string) string { + kind, err := filetype.MatchFile(path) + if err == nil && kind != filetype.Unknown { + return kind.MIME.Value + } + + if ext := filepath.Ext(path); ext != "" { + if t := mime.TypeByExtension(ext); t != "" { + return t + } + } + + return "application/octet-stream" +} diff --git a/pkg/tools/send_file_test.go b/pkg/tools/send_file_test.go new file mode 100644 index 000000000..08d129674 --- /dev/null +++ b/pkg/tools/send_file_test.go @@ -0,0 +1,176 @@ +package tools + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/media" +) + +func TestSendFileTool_MissingPath(t *testing.T) { + store := media.NewFileMediaStore() + tool := NewSendFileTool("/tmp", false, 0, store) + tool.SetContext("feishu", "chat123") + + result := tool.Execute(context.Background(), map[string]any{}) + if !result.IsError { + t.Fatal("expected error for missing path") + } +} + +func TestSendFileTool_NoContext(t *testing.T) { + store := media.NewFileMediaStore() + tool := NewSendFileTool("/tmp", false, 0, store) + // no SetContext call + + result := tool.Execute(context.Background(), map[string]any{"path": "/tmp/test.txt"}) + if !result.IsError { + t.Fatal("expected error when no channel context") + } +} + +func TestSendFileTool_NoMediaStore(t *testing.T) { + tool := NewSendFileTool("/tmp", false, 0, nil) + tool.SetContext("feishu", "chat123") + + result := tool.Execute(context.Background(), map[string]any{"path": "/tmp/test.txt"}) + if !result.IsError { + t.Fatal("expected error when no media store") + } +} + +func TestSendFileTool_Directory(t *testing.T) { + store := media.NewFileMediaStore() + tool := NewSendFileTool("/tmp", false, 0, store) + tool.SetContext("feishu", "chat123") + + result := tool.Execute(context.Background(), map[string]any{"path": "/tmp"}) + if !result.IsError { + t.Fatal("expected error for directory path") + } +} + +func TestSendFileTool_FileTooLarge(t *testing.T) { + dir := t.TempDir() + testFile := filepath.Join(dir, "big.bin") + // Create a file larger than the limit + if err := os.WriteFile(testFile, make([]byte, 1024), 0o644); err != nil { + t.Fatal(err) + } + + store := media.NewFileMediaStore() + tool := NewSendFileTool(dir, false, 512, store) // 512 byte limit + tool.SetContext("feishu", "chat123") + + result := tool.Execute(context.Background(), map[string]any{"path": testFile}) + if !result.IsError { + t.Fatal("expected error for oversized file") + } + if !strings.Contains(result.ForLLM, "too large") { + t.Errorf("expected 'too large' in error, got %q", result.ForLLM) + } +} + +func TestSendFileTool_DefaultMaxSize(t *testing.T) { + tool := NewSendFileTool("/tmp", false, 0, nil) + if tool.maxFileSize != config.DefaultMaxMediaSize { + t.Errorf("expected default max size %d, got %d", config.DefaultMaxMediaSize, tool.maxFileSize) + } +} + +func TestSendFileTool_Success(t *testing.T) { + dir := t.TempDir() + testFile := filepath.Join(dir, "photo.png") + if err := os.WriteFile(testFile, []byte("fake png"), 0o644); err != nil { + t.Fatal(err) + } + + store := media.NewFileMediaStore() + tool := NewSendFileTool(dir, false, 0, store) + tool.SetContext("feishu", "chat123") + + result := tool.Execute(context.Background(), map[string]any{"path": testFile}) + if result.IsError { + t.Fatalf("unexpected error: %s", result.ForLLM) + } + if len(result.Media) != 1 { + t.Fatalf("expected 1 media ref, got %d", len(result.Media)) + } + if result.Media[0][:8] != "media://" { + t.Errorf("expected media:// ref, got %q", result.Media[0]) + } +} + +func TestSendFileTool_CustomFilename(t *testing.T) { + dir := t.TempDir() + testFile := filepath.Join(dir, "img.jpg") + if err := os.WriteFile(testFile, []byte("fake jpg"), 0o644); err != nil { + t.Fatal(err) + } + + store := media.NewFileMediaStore() + tool := NewSendFileTool(dir, false, 0, store) + tool.SetContext("telegram", "chat456") + + result := tool.Execute(context.Background(), map[string]any{ + "path": testFile, + "filename": "my-photo.jpg", + }) + if result.IsError { + t.Fatalf("unexpected error: %s", result.ForLLM) + } + if len(result.Media) != 1 { + t.Fatalf("expected 1 media ref, got %d", len(result.Media)) + } +} + +func TestDetectMediaType_MagicBytes(t *testing.T) { + dir := t.TempDir() + + // Minimal valid PNG header + pngHeader := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A} + pngFile := filepath.Join(dir, "image.dat") // wrong extension, but valid PNG bytes + if err := os.WriteFile(pngFile, pngHeader, 0o644); err != nil { + t.Fatal(err) + } + + got := detectMediaType(pngFile) + if got != "image/png" { + t.Errorf("expected image/png from magic bytes, got %q", got) + } +} + +func TestDetectMediaType_FallbackToExtension(t *testing.T) { + dir := t.TempDir() + + // File with unrecognizable content but known extension + txtFile := filepath.Join(dir, "readme.txt") + if err := os.WriteFile(txtFile, []byte("hello world"), 0o644); err != nil { + t.Fatal(err) + } + + got := detectMediaType(txtFile) + // text/plain or similar — just verify it's not application/octet-stream + if got == "application/octet-stream" { + t.Errorf("expected extension-based MIME for .txt, got %q", got) + } +} + +func TestDetectMediaType_UnknownFallsToOctetStream(t *testing.T) { + dir := t.TempDir() + + // File with no extension and random bytes + unknownFile := filepath.Join(dir, "mystery") + if err := os.WriteFile(unknownFile, []byte{0x00, 0x01, 0x02}, 0o644); err != nil { + t.Fatal(err) + } + + got := detectMediaType(unknownFile) + if got != "application/octet-stream" { + t.Errorf("expected application/octet-stream, got %q", got) + } +}