fix(tool): route binary outputs through the media pipeline.

This commit is contained in:
afjcjsbx
2026-03-22 12:05:28 +01:00
parent c0bb8d6df9
commit df4f322f09
14 changed files with 1462 additions and 64 deletions
+269 -11
View File
@@ -5,9 +5,13 @@ import (
"encoding/json"
"fmt"
"hash/fnv"
"os"
"strings"
"time"
"github.com/modelcontextprotocol/go-sdk/mcp"
"github.com/sipeed/picoclaw/pkg/media"
)
// MCPManager defines the interface for MCP manager operations
@@ -25,6 +29,7 @@ type MCPTool struct {
manager MCPManager
serverName string
tool *mcp.Tool
mediaStore media.MediaStore
}
// NewMCPTool creates a new MCP tool wrapper
@@ -36,6 +41,10 @@ func NewMCPTool(manager MCPManager, serverName string, tool *mcp.Tool) *MCPTool
}
}
func (t *MCPTool) SetMediaStore(store media.MediaStore) {
t.mediaStore = store
}
// sanitizeIdentifierComponent normalizes a string so it can be safely used
// as part of a tool/function identifier for downstream providers.
// It:
@@ -218,13 +227,7 @@ func (t *MCPTool) Execute(ctx context.Context, args map[string]any) *ToolResult
WithError(fmt.Errorf("MCP tool error: %s", errMsg))
}
// Extract text content from result
output := extractContentText(result.Content)
return &ToolResult{
ForLLM: output,
IsError: false,
}
return t.normalizeResultContent(ctx, result.Content)
}
// extractContentText extracts text from MCP content array
@@ -233,14 +236,269 @@ func extractContentText(content []mcp.Content) string {
for _, c := range content {
switch v := c.(type) {
case *mcp.TextContent:
parts = append(parts, v.Text)
parts = append(parts, sanitizeToolLLMContent(v.Text))
case *mcp.ImageContent:
// For images, just indicate that an image was returned
parts = append(parts, fmt.Sprintf("[Image: %s]", v.MIMEType))
parts = append(parts, fmt.Sprintf("[Image: %s]", normalizedMIMEType(v.MIMEType)))
case *mcp.AudioContent:
parts = append(parts, fmt.Sprintf("[Audio: %s]", normalizedMIMEType(v.MIMEType)))
case *mcp.ResourceLink:
parts = append(parts, summarizeResourceLink(v))
case *mcp.EmbeddedResource:
parts = append(parts, summarizeEmbeddedResource(v))
default:
// For other content types, use string representation
parts = append(parts, fmt.Sprintf("[Content: %T]", v))
}
}
return strings.Join(parts, "\n")
return sanitizeToolLLMContent(strings.Join(parts, "\n"))
}
func (t *MCPTool) normalizeResultContent(ctx context.Context, content []mcp.Content) *ToolResult {
llmParts := make([]string, 0, len(content))
mediaRefs := make([]string, 0, len(content))
for _, c := range content {
switch v := c.(type) {
case *mcp.TextContent:
text := strings.TrimSpace(sanitizeToolLLMContent(v.Text))
if text != "" {
llmParts = append(llmParts, text)
}
case *mcp.ImageContent:
ref, note := t.storeBinaryContent(
ctx,
"image",
normalizedMIMEType(v.MIMEType),
v.Data,
v.Annotations,
)
if ref != "" {
mediaRefs = append(mediaRefs, ref)
}
if note != "" {
llmParts = append(llmParts, note)
}
case *mcp.AudioContent:
ref, note := t.storeBinaryContent(
ctx,
"audio",
normalizedMIMEType(v.MIMEType),
v.Data,
v.Annotations,
)
if ref != "" {
mediaRefs = append(mediaRefs, ref)
}
if note != "" {
llmParts = append(llmParts, note)
}
case *mcp.ResourceLink:
llmParts = append(llmParts, summarizeResourceLink(v))
case *mcp.EmbeddedResource:
ref, note := t.storeEmbeddedResource(ctx, v)
if ref != "" {
mediaRefs = append(mediaRefs, ref)
}
if note != "" {
llmParts = append(llmParts, note)
}
default:
llmParts = append(llmParts, fmt.Sprintf("[MCP returned unsupported content type %T]", v))
}
}
result := &ToolResult{
ForLLM: strings.Join(compactStrings(llmParts), "\n"),
Media: mediaRefs,
}
return result
}
func (t *MCPTool) storeEmbeddedResource(ctx context.Context, content *mcp.EmbeddedResource) (string, string) {
if content == nil || content.Resource == nil {
return "", "[MCP returned an embedded resource without data.]"
}
resource := content.Resource
if len(resource.Blob) > 0 {
return t.storeBinaryContent(
ctx,
"resource",
normalizedMIMEType(resource.MIMEType),
resource.Blob,
content.Annotations,
)
}
if strings.TrimSpace(resource.Text) != "" {
return "", sanitizeToolLLMContent(resource.Text)
}
return "", summarizeEmbeddedResource(content)
}
func (t *MCPTool) storeBinaryContent(
ctx context.Context,
kind string,
mimeType string,
data []byte,
annotations *mcp.Annotations,
) (string, string) {
if len(data) == 0 {
return "", fmt.Sprintf("[MCP returned %s content (%s) but it was empty.]", kind, mimeType)
}
if !annotationsAllowUser(annotations) {
return "", fmt.Sprintf(
"[MCP returned %s content (%s) for non-user audience; omitted from model context.]",
kind,
mimeType,
)
}
if t.mediaStore == nil {
return "", fmt.Sprintf(
"[MCP returned %s content (%s); omitted from model context because media delivery is unavailable.]",
kind,
mimeType,
)
}
channel := ToolChannel(ctx)
chatID := ToolChatID(ctx)
if channel == "" || chatID == "" {
return "", fmt.Sprintf(
"[MCP returned %s content (%s); omitted from model context because no target chat was available.]",
kind,
mimeType,
)
}
dir := media.TempDir()
if err := os.MkdirAll(dir, 0o700); err != nil {
return "", fmt.Sprintf("[MCP returned %s content (%s) but it could not be stored.]", kind, mimeType)
}
ext := extensionForMIMEType(mimeType)
tmpFile, err := os.CreateTemp(dir, "mcp-*"+ext)
if err != nil {
return "", fmt.Sprintf("[MCP returned %s content (%s) but it could not be stored.]", kind, mimeType)
}
tmpPath := tmpFile.Name()
if _, err = tmpFile.Write(data); err != nil {
_ = tmpFile.Close()
_ = os.Remove(tmpPath)
return "", fmt.Sprintf("[MCP returned %s content (%s) but it could not be stored.]", kind, mimeType)
}
if err = tmpFile.Close(); err != nil {
_ = os.Remove(tmpPath)
return "", fmt.Sprintf("[MCP returned %s content (%s) but it could not be stored.]", kind, mimeType)
}
scope := fmt.Sprintf(
"tool:mcp:%s:%s:%s:%d",
sanitizeIdentifierComponent(t.serverName),
channel,
chatID,
time.Now().UnixNano(),
)
filename := fmt.Sprintf(
"%s_%s%s",
sanitizeIdentifierComponent(t.serverName),
sanitizeIdentifierComponent(t.tool.Name),
ext,
)
ref, err := t.mediaStore.Store(tmpPath, media.MediaMeta{
Filename: filename,
ContentType: mimeType,
Source: fmt.Sprintf(
"tool:mcp:%s:%s",
sanitizeIdentifierComponent(t.serverName),
sanitizeIdentifierComponent(t.tool.Name),
),
}, scope)
if err != nil {
_ = os.Remove(tmpPath)
return "", fmt.Sprintf(
"[MCP returned %s content (%s) but it could not be registered as media.]",
kind,
mimeType,
)
}
return ref, fmt.Sprintf(
"[MCP returned %s content (%s); omitted from model context and stored as a local media artifact.]",
kind,
mimeType,
)
}
func summarizeResourceLink(content *mcp.ResourceLink) string {
if content == nil {
return "[MCP returned an empty resource link.]"
}
parts := []string{"[MCP returned resource link"}
if content.Name != "" {
parts = append(parts, fmt.Sprintf("name=%q", content.Name))
}
if content.URI != "" {
parts = append(parts, fmt.Sprintf("uri=%q", content.URI))
}
if content.MIMEType != "" {
parts = append(parts, fmt.Sprintf("mime=%q", content.MIMEType))
}
if content.Description != "" {
desc := strings.TrimSpace(content.Description)
if len(desc) > 200 {
desc = desc[:200] + "..."
}
parts = append(parts, fmt.Sprintf("description=%q", desc))
}
return strings.Join(parts, ", ") + "]"
}
func summarizeEmbeddedResource(content *mcp.EmbeddedResource) string {
if content == nil || content.Resource == nil {
return "[MCP returned an embedded resource.]"
}
resource := content.Resource
if resource.URI != "" {
return fmt.Sprintf(
"[MCP returned embedded resource %q (%s).]",
resource.URI,
normalizedMIMEType(resource.MIMEType),
)
}
return fmt.Sprintf("[MCP returned embedded resource (%s).]", normalizedMIMEType(resource.MIMEType))
}
func annotationsAllowUser(annotations *mcp.Annotations) bool {
if annotations == nil || len(annotations.Audience) == 0 {
return true
}
for _, audience := range annotations.Audience {
if strings.EqualFold(string(audience), "user") {
return true
}
}
return false
}
func normalizedMIMEType(mimeType string) string {
if strings.TrimSpace(mimeType) == "" {
return "application/octet-stream"
}
return mimeType
}
func compactStrings(parts []string) []string {
compact := make([]string, 0, len(parts))
for _, part := range parts {
if strings.TrimSpace(part) == "" {
continue
}
compact = append(compact, part)
}
return compact
}