mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
feat(feishu,tools): add outbound media delivery via send_file tool (#1156)
* feat(feishu): implement SendMedia and add send_file tool Add outbound media support for the Feishu channel so the agent can send images and files to users via the MediaStore pipeline. Feishu channel: - SendMedia dispatches media parts as image or file uploads - sendImage uploads via Image.Create then sends image message - sendFile uploads via File.Create then sends file message - feishuFileType maps extensions to Feishu file_type values send_file tool: - New tool lets the LLM send a local file to the current chat - Validates path, registers file in MediaStore, returns media ref - Agent loop wires tool registration, MediaStore propagation, and context updates Tested on Radxa Cubie A7A (arm64) with Feishu websocket channel. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(agent): publish outbound media regardless of SendResponse flag The SendResponse flag controls whether the agent loop publishes the final text response (callers that publish it themselves set this to false). However, the media publish path was also gated behind this flag, which meant tool-produced media was silently dropped for normal channel messages. Media should be published immediately when a tool returns media refs, independent of how the text response is delivered. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(tools): use magic-bytes MIME detection and add file size limit to send_file - Replace hardcoded extension-to-MIME map with h2non/filetype (magic bytes) + mime.TypeByExtension fallback, consistent with the vision pipeline in resolveMediaRefs - Add configurable max file size check (defaults to config.DefaultMaxMediaSize, 20 MB) to prevent oversized uploads - Add tests for magic-bytes detection, extension fallback, size limit, and default max size Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * refactor(agent): add ForEachTool to AgentRegistry for cross-agent tool lookup Extract the pattern of iterating agents to find a named tool into AgentRegistry.ForEachTool, simplifying SetMediaStore propagation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(agent,tools): adapt send_file to ctx-based channel injection after upstream refactor Replace ContextualTool interface (removed upstream) with direct ctx reading in SendFileTool.Execute, using ToolChannel/ToolChatID helpers. Remove updateToolContexts which is no longer needed since ExecuteWithContext already injects channel/chatID into ctx for all tools. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat(tools): support toggling send_file tool via config Add SendFileConfig with Enabled field to ToolsConfig, defaulting to true. Wrap send_file tool registration in loop.go with the config check, consistent with the pattern used by other tools. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+19
-1
@@ -183,6 +183,17 @@ func registerSharedTools(
|
||||
agent.Tools.Register(messageTool)
|
||||
}
|
||||
|
||||
// Send file tool (outbound media via MediaStore — store injected later by SetMediaStore)
|
||||
if cfg.Tools.IsToolEnabled("send_file") {
|
||||
sendFileTool := tools.NewSendFileTool(
|
||||
agent.Workspace,
|
||||
cfg.Agents.Defaults.RestrictToWorkspace,
|
||||
cfg.Agents.Defaults.GetMaxMediaSize(),
|
||||
nil,
|
||||
)
|
||||
agent.Tools.Register(sendFileTool)
|
||||
}
|
||||
|
||||
// Skill discovery and installation tools
|
||||
skills_enabled := cfg.Tools.IsToolEnabled("skills")
|
||||
find_skills_enable := cfg.Tools.IsToolEnabled("find_skills")
|
||||
@@ -384,6 +395,13 @@ func (al *AgentLoop) SetChannelManager(cm *channels.Manager) {
|
||||
// SetMediaStore injects a MediaStore for media lifecycle management.
|
||||
func (al *AgentLoop) SetMediaStore(s media.MediaStore) {
|
||||
al.mediaStore = s
|
||||
|
||||
// Propagate store to send_file tools in all agents.
|
||||
al.registry.ForEachTool("send_file", func(t tools.Tool) {
|
||||
if sf, ok := t.(*tools.SendFileTool); ok {
|
||||
sf.SetMediaStore(s)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// SetTranscriber injects a voice transcriber for agent-level audio transcription.
|
||||
@@ -1167,7 +1185,7 @@ func (al *AgentLoop) runLLMIteration(
|
||||
}
|
||||
|
||||
// If tool returned media refs, publish them as outbound media
|
||||
if len(r.result.Media) > 0 && opts.SendResponse {
|
||||
if len(r.result.Media) > 0 {
|
||||
parts := make([]bus.MediaPart, 0, len(r.result.Media))
|
||||
for _, ref := range r.result.Media {
|
||||
part := bus.MediaPart{Ref: ref}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/sipeed/picoclaw/pkg/logger"
|
||||
"github.com/sipeed/picoclaw/pkg/providers"
|
||||
"github.com/sipeed/picoclaw/pkg/routing"
|
||||
"github.com/sipeed/picoclaw/pkg/tools"
|
||||
)
|
||||
|
||||
// AgentRegistry manages multiple agent instances and routes messages to them.
|
||||
@@ -100,6 +101,19 @@ func (r *AgentRegistry) CanSpawnSubagent(parentAgentID, targetAgentID string) bo
|
||||
return false
|
||||
}
|
||||
|
||||
// ForEachTool calls fn for every tool registered under the given name
|
||||
// across all agents. This is useful for propagating dependencies (e.g.
|
||||
// MediaStore) to tools after registry construction.
|
||||
func (r *AgentRegistry) ForEachTool(name string, fn func(tools.Tool)) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
for _, agent := range r.agents {
|
||||
if t, ok := agent.Tools.Get(name); ok {
|
||||
fn(t)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GetDefaultAgent returns the default agent instance.
|
||||
func (r *AgentRegistry) GetDefaultAgent() *AgentInstance {
|
||||
r.mu.RLock()
|
||||
|
||||
@@ -640,6 +640,7 @@ type ToolsConfig struct {
|
||||
ListDir ToolConfig `json:"list_dir" envPrefix:"PICOCLAW_TOOLS_LIST_DIR_"`
|
||||
Message ToolConfig `json:"message" envPrefix:"PICOCLAW_TOOLS_MESSAGE_"`
|
||||
ReadFile ToolConfig `json:"read_file" envPrefix:"PICOCLAW_TOOLS_READ_FILE_"`
|
||||
SendFile ToolConfig `json:"send_file" envPrefix:"PICOCLAW_TOOLS_SEND_FILE_"`
|
||||
Spawn ToolConfig `json:"spawn" envPrefix:"PICOCLAW_TOOLS_SPAWN_"`
|
||||
SPI ToolConfig `json:"spi" envPrefix:"PICOCLAW_TOOLS_SPI_"`
|
||||
Subagent ToolConfig `json:"subagent" envPrefix:"PICOCLAW_TOOLS_SUBAGENT_"`
|
||||
@@ -913,6 +914,8 @@ func (t *ToolsConfig) IsToolEnabled(name string) bool {
|
||||
return t.Subagent.Enabled
|
||||
case "web_fetch":
|
||||
return t.WebFetch.Enabled
|
||||
case "send_file":
|
||||
return t.SendFile.Enabled
|
||||
case "write_file":
|
||||
return t.WriteFile.Enabled
|
||||
case "mcp":
|
||||
|
||||
@@ -404,6 +404,9 @@ func DefaultConfig() *Config {
|
||||
TTLSeconds: 300,
|
||||
},
|
||||
},
|
||||
SendFile: ToolConfig{
|
||||
Enabled: true,
|
||||
},
|
||||
MCP: MCPConfig{
|
||||
ToolConfig: ToolConfig{
|
||||
Enabled: false,
|
||||
|
||||
@@ -0,0 +1,150 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"mime"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/h2non/filetype"
|
||||
|
||||
"github.com/sipeed/picoclaw/pkg/config"
|
||||
"github.com/sipeed/picoclaw/pkg/media"
|
||||
)
|
||||
|
||||
// SendFileTool allows the LLM to send a local file (image, document, etc.)
|
||||
// to the user on the current chat channel via the MediaStore pipeline.
|
||||
type SendFileTool struct {
|
||||
workspace string
|
||||
restrict bool
|
||||
maxFileSize int
|
||||
mediaStore media.MediaStore
|
||||
|
||||
defaultChannel string
|
||||
defaultChatID string
|
||||
}
|
||||
|
||||
func NewSendFileTool(workspace string, restrict bool, maxFileSize int, store media.MediaStore) *SendFileTool {
|
||||
if maxFileSize <= 0 {
|
||||
maxFileSize = config.DefaultMaxMediaSize
|
||||
}
|
||||
return &SendFileTool{
|
||||
workspace: workspace,
|
||||
restrict: restrict,
|
||||
maxFileSize: maxFileSize,
|
||||
mediaStore: store,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *SendFileTool) Name() string { return "send_file" }
|
||||
func (t *SendFileTool) Description() string {
|
||||
return "Send a local file (image, document, etc.) to the user on the current chat channel."
|
||||
}
|
||||
|
||||
func (t *SendFileTool) Parameters() map[string]any {
|
||||
return map[string]any{
|
||||
"type": "object",
|
||||
"properties": map[string]any{
|
||||
"path": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Path to the local file. Relative paths are resolved from workspace.",
|
||||
},
|
||||
"filename": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Optional display filename. Defaults to the basename of path.",
|
||||
},
|
||||
},
|
||||
"required": []string{"path"},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *SendFileTool) SetContext(channel, chatID string) {
|
||||
t.defaultChannel = channel
|
||||
t.defaultChatID = chatID
|
||||
}
|
||||
|
||||
func (t *SendFileTool) SetMediaStore(store media.MediaStore) {
|
||||
t.mediaStore = store
|
||||
}
|
||||
|
||||
func (t *SendFileTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
|
||||
path, _ := args["path"].(string)
|
||||
if strings.TrimSpace(path) == "" {
|
||||
return ErrorResult("path is required")
|
||||
}
|
||||
|
||||
// Prefer context-injected channel/chatID (set by ExecuteWithContext), fall back to SetContext values.
|
||||
channel := ToolChannel(ctx)
|
||||
if channel == "" {
|
||||
channel = t.defaultChannel
|
||||
}
|
||||
chatID := ToolChatID(ctx)
|
||||
if chatID == "" {
|
||||
chatID = t.defaultChatID
|
||||
}
|
||||
if channel == "" || chatID == "" {
|
||||
return ErrorResult("no target channel/chat available")
|
||||
}
|
||||
|
||||
if t.mediaStore == nil {
|
||||
return ErrorResult("media store not configured")
|
||||
}
|
||||
|
||||
resolved, err := validatePath(path, t.workspace, t.restrict)
|
||||
if err != nil {
|
||||
return ErrorResult(fmt.Sprintf("invalid path: %v", err))
|
||||
}
|
||||
|
||||
info, err := os.Stat(resolved)
|
||||
if err != nil {
|
||||
return ErrorResult(fmt.Sprintf("file not found: %v", err))
|
||||
}
|
||||
if info.IsDir() {
|
||||
return ErrorResult("path is a directory, expected a file")
|
||||
}
|
||||
if info.Size() > int64(t.maxFileSize) {
|
||||
return ErrorResult(fmt.Sprintf(
|
||||
"file too large: %d bytes (max %d bytes)",
|
||||
info.Size(), t.maxFileSize,
|
||||
))
|
||||
}
|
||||
|
||||
filename, _ := args["filename"].(string)
|
||||
if filename == "" {
|
||||
filename = filepath.Base(resolved)
|
||||
}
|
||||
|
||||
mediaType := detectMediaType(resolved)
|
||||
scope := fmt.Sprintf("tool:send_file:%s:%s", channel, chatID)
|
||||
|
||||
ref, err := t.mediaStore.Store(resolved, media.MediaMeta{
|
||||
Filename: filename,
|
||||
ContentType: mediaType,
|
||||
Source: "tool:send_file",
|
||||
}, scope)
|
||||
if err != nil {
|
||||
return ErrorResult(fmt.Sprintf("failed to register media: %v", err))
|
||||
}
|
||||
|
||||
return MediaResult(fmt.Sprintf("File %q sent to user", filename), []string{ref})
|
||||
}
|
||||
|
||||
// detectMediaType determines the MIME type of a file.
|
||||
// Uses magic-bytes detection (h2non/filetype) first, then falls back to
|
||||
// extension-based lookup via mime.TypeByExtension.
|
||||
func detectMediaType(path string) string {
|
||||
kind, err := filetype.MatchFile(path)
|
||||
if err == nil && kind != filetype.Unknown {
|
||||
return kind.MIME.Value
|
||||
}
|
||||
|
||||
if ext := filepath.Ext(path); ext != "" {
|
||||
if t := mime.TypeByExtension(ext); t != "" {
|
||||
return t
|
||||
}
|
||||
}
|
||||
|
||||
return "application/octet-stream"
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/sipeed/picoclaw/pkg/config"
|
||||
"github.com/sipeed/picoclaw/pkg/media"
|
||||
)
|
||||
|
||||
func TestSendFileTool_MissingPath(t *testing.T) {
|
||||
store := media.NewFileMediaStore()
|
||||
tool := NewSendFileTool("/tmp", false, 0, store)
|
||||
tool.SetContext("feishu", "chat123")
|
||||
|
||||
result := tool.Execute(context.Background(), map[string]any{})
|
||||
if !result.IsError {
|
||||
t.Fatal("expected error for missing path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendFileTool_NoContext(t *testing.T) {
|
||||
store := media.NewFileMediaStore()
|
||||
tool := NewSendFileTool("/tmp", false, 0, store)
|
||||
// no SetContext call
|
||||
|
||||
result := tool.Execute(context.Background(), map[string]any{"path": "/tmp/test.txt"})
|
||||
if !result.IsError {
|
||||
t.Fatal("expected error when no channel context")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendFileTool_NoMediaStore(t *testing.T) {
|
||||
tool := NewSendFileTool("/tmp", false, 0, nil)
|
||||
tool.SetContext("feishu", "chat123")
|
||||
|
||||
result := tool.Execute(context.Background(), map[string]any{"path": "/tmp/test.txt"})
|
||||
if !result.IsError {
|
||||
t.Fatal("expected error when no media store")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendFileTool_Directory(t *testing.T) {
|
||||
store := media.NewFileMediaStore()
|
||||
tool := NewSendFileTool("/tmp", false, 0, store)
|
||||
tool.SetContext("feishu", "chat123")
|
||||
|
||||
result := tool.Execute(context.Background(), map[string]any{"path": "/tmp"})
|
||||
if !result.IsError {
|
||||
t.Fatal("expected error for directory path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendFileTool_FileTooLarge(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
testFile := filepath.Join(dir, "big.bin")
|
||||
// Create a file larger than the limit
|
||||
if err := os.WriteFile(testFile, make([]byte, 1024), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store := media.NewFileMediaStore()
|
||||
tool := NewSendFileTool(dir, false, 512, store) // 512 byte limit
|
||||
tool.SetContext("feishu", "chat123")
|
||||
|
||||
result := tool.Execute(context.Background(), map[string]any{"path": testFile})
|
||||
if !result.IsError {
|
||||
t.Fatal("expected error for oversized file")
|
||||
}
|
||||
if !strings.Contains(result.ForLLM, "too large") {
|
||||
t.Errorf("expected 'too large' in error, got %q", result.ForLLM)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendFileTool_DefaultMaxSize(t *testing.T) {
|
||||
tool := NewSendFileTool("/tmp", false, 0, nil)
|
||||
if tool.maxFileSize != config.DefaultMaxMediaSize {
|
||||
t.Errorf("expected default max size %d, got %d", config.DefaultMaxMediaSize, tool.maxFileSize)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendFileTool_Success(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
testFile := filepath.Join(dir, "photo.png")
|
||||
if err := os.WriteFile(testFile, []byte("fake png"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store := media.NewFileMediaStore()
|
||||
tool := NewSendFileTool(dir, false, 0, store)
|
||||
tool.SetContext("feishu", "chat123")
|
||||
|
||||
result := tool.Execute(context.Background(), map[string]any{"path": testFile})
|
||||
if result.IsError {
|
||||
t.Fatalf("unexpected error: %s", result.ForLLM)
|
||||
}
|
||||
if len(result.Media) != 1 {
|
||||
t.Fatalf("expected 1 media ref, got %d", len(result.Media))
|
||||
}
|
||||
if result.Media[0][:8] != "media://" {
|
||||
t.Errorf("expected media:// ref, got %q", result.Media[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendFileTool_CustomFilename(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
testFile := filepath.Join(dir, "img.jpg")
|
||||
if err := os.WriteFile(testFile, []byte("fake jpg"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store := media.NewFileMediaStore()
|
||||
tool := NewSendFileTool(dir, false, 0, store)
|
||||
tool.SetContext("telegram", "chat456")
|
||||
|
||||
result := tool.Execute(context.Background(), map[string]any{
|
||||
"path": testFile,
|
||||
"filename": "my-photo.jpg",
|
||||
})
|
||||
if result.IsError {
|
||||
t.Fatalf("unexpected error: %s", result.ForLLM)
|
||||
}
|
||||
if len(result.Media) != 1 {
|
||||
t.Fatalf("expected 1 media ref, got %d", len(result.Media))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectMediaType_MagicBytes(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Minimal valid PNG header
|
||||
pngHeader := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}
|
||||
pngFile := filepath.Join(dir, "image.dat") // wrong extension, but valid PNG bytes
|
||||
if err := os.WriteFile(pngFile, pngHeader, 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
got := detectMediaType(pngFile)
|
||||
if got != "image/png" {
|
||||
t.Errorf("expected image/png from magic bytes, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectMediaType_FallbackToExtension(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// File with unrecognizable content but known extension
|
||||
txtFile := filepath.Join(dir, "readme.txt")
|
||||
if err := os.WriteFile(txtFile, []byte("hello world"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
got := detectMediaType(txtFile)
|
||||
// text/plain or similar — just verify it's not application/octet-stream
|
||||
if got == "application/octet-stream" {
|
||||
t.Errorf("expected extension-based MIME for .txt, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectMediaType_UnknownFallsToOctetStream(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// File with no extension and random bytes
|
||||
unknownFile := filepath.Join(dir, "mystery")
|
||||
if err := os.WriteFile(unknownFile, []byte{0x00, 0x01, 0x02}, 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
got := detectMediaType(unknownFile)
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("expected application/octet-stream, got %q", got)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user