mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
fix(tool): route binary outputs through the media pipeline.
This commit is contained in:
+269
-11
@@ -5,9 +5,13 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/modelcontextprotocol/go-sdk/mcp"
|
||||
|
||||
"github.com/sipeed/picoclaw/pkg/media"
|
||||
)
|
||||
|
||||
// MCPManager defines the interface for MCP manager operations
|
||||
@@ -25,6 +29,7 @@ type MCPTool struct {
|
||||
manager MCPManager
|
||||
serverName string
|
||||
tool *mcp.Tool
|
||||
mediaStore media.MediaStore
|
||||
}
|
||||
|
||||
// NewMCPTool creates a new MCP tool wrapper
|
||||
@@ -36,6 +41,10 @@ func NewMCPTool(manager MCPManager, serverName string, tool *mcp.Tool) *MCPTool
|
||||
}
|
||||
}
|
||||
|
||||
func (t *MCPTool) SetMediaStore(store media.MediaStore) {
|
||||
t.mediaStore = store
|
||||
}
|
||||
|
||||
// sanitizeIdentifierComponent normalizes a string so it can be safely used
|
||||
// as part of a tool/function identifier for downstream providers.
|
||||
// It:
|
||||
@@ -218,13 +227,7 @@ func (t *MCPTool) Execute(ctx context.Context, args map[string]any) *ToolResult
|
||||
WithError(fmt.Errorf("MCP tool error: %s", errMsg))
|
||||
}
|
||||
|
||||
// Extract text content from result
|
||||
output := extractContentText(result.Content)
|
||||
|
||||
return &ToolResult{
|
||||
ForLLM: output,
|
||||
IsError: false,
|
||||
}
|
||||
return t.normalizeResultContent(ctx, result.Content)
|
||||
}
|
||||
|
||||
// extractContentText extracts text from MCP content array
|
||||
@@ -233,14 +236,269 @@ func extractContentText(content []mcp.Content) string {
|
||||
for _, c := range content {
|
||||
switch v := c.(type) {
|
||||
case *mcp.TextContent:
|
||||
parts = append(parts, v.Text)
|
||||
parts = append(parts, sanitizeToolLLMContent(v.Text))
|
||||
case *mcp.ImageContent:
|
||||
// For images, just indicate that an image was returned
|
||||
parts = append(parts, fmt.Sprintf("[Image: %s]", v.MIMEType))
|
||||
parts = append(parts, fmt.Sprintf("[Image: %s]", normalizedMIMEType(v.MIMEType)))
|
||||
case *mcp.AudioContent:
|
||||
parts = append(parts, fmt.Sprintf("[Audio: %s]", normalizedMIMEType(v.MIMEType)))
|
||||
case *mcp.ResourceLink:
|
||||
parts = append(parts, summarizeResourceLink(v))
|
||||
case *mcp.EmbeddedResource:
|
||||
parts = append(parts, summarizeEmbeddedResource(v))
|
||||
default:
|
||||
// For other content types, use string representation
|
||||
parts = append(parts, fmt.Sprintf("[Content: %T]", v))
|
||||
}
|
||||
}
|
||||
return strings.Join(parts, "\n")
|
||||
return sanitizeToolLLMContent(strings.Join(parts, "\n"))
|
||||
}
|
||||
|
||||
func (t *MCPTool) normalizeResultContent(ctx context.Context, content []mcp.Content) *ToolResult {
|
||||
llmParts := make([]string, 0, len(content))
|
||||
mediaRefs := make([]string, 0, len(content))
|
||||
|
||||
for _, c := range content {
|
||||
switch v := c.(type) {
|
||||
case *mcp.TextContent:
|
||||
text := strings.TrimSpace(sanitizeToolLLMContent(v.Text))
|
||||
if text != "" {
|
||||
llmParts = append(llmParts, text)
|
||||
}
|
||||
case *mcp.ImageContent:
|
||||
ref, note := t.storeBinaryContent(
|
||||
ctx,
|
||||
"image",
|
||||
normalizedMIMEType(v.MIMEType),
|
||||
v.Data,
|
||||
v.Annotations,
|
||||
)
|
||||
if ref != "" {
|
||||
mediaRefs = append(mediaRefs, ref)
|
||||
}
|
||||
if note != "" {
|
||||
llmParts = append(llmParts, note)
|
||||
}
|
||||
case *mcp.AudioContent:
|
||||
ref, note := t.storeBinaryContent(
|
||||
ctx,
|
||||
"audio",
|
||||
normalizedMIMEType(v.MIMEType),
|
||||
v.Data,
|
||||
v.Annotations,
|
||||
)
|
||||
if ref != "" {
|
||||
mediaRefs = append(mediaRefs, ref)
|
||||
}
|
||||
if note != "" {
|
||||
llmParts = append(llmParts, note)
|
||||
}
|
||||
case *mcp.ResourceLink:
|
||||
llmParts = append(llmParts, summarizeResourceLink(v))
|
||||
case *mcp.EmbeddedResource:
|
||||
ref, note := t.storeEmbeddedResource(ctx, v)
|
||||
if ref != "" {
|
||||
mediaRefs = append(mediaRefs, ref)
|
||||
}
|
||||
if note != "" {
|
||||
llmParts = append(llmParts, note)
|
||||
}
|
||||
default:
|
||||
llmParts = append(llmParts, fmt.Sprintf("[MCP returned unsupported content type %T]", v))
|
||||
}
|
||||
}
|
||||
|
||||
result := &ToolResult{
|
||||
ForLLM: strings.Join(compactStrings(llmParts), "\n"),
|
||||
Media: mediaRefs,
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (t *MCPTool) storeEmbeddedResource(ctx context.Context, content *mcp.EmbeddedResource) (string, string) {
|
||||
if content == nil || content.Resource == nil {
|
||||
return "", "[MCP returned an embedded resource without data.]"
|
||||
}
|
||||
|
||||
resource := content.Resource
|
||||
if len(resource.Blob) > 0 {
|
||||
return t.storeBinaryContent(
|
||||
ctx,
|
||||
"resource",
|
||||
normalizedMIMEType(resource.MIMEType),
|
||||
resource.Blob,
|
||||
content.Annotations,
|
||||
)
|
||||
}
|
||||
|
||||
if strings.TrimSpace(resource.Text) != "" {
|
||||
return "", sanitizeToolLLMContent(resource.Text)
|
||||
}
|
||||
|
||||
return "", summarizeEmbeddedResource(content)
|
||||
}
|
||||
|
||||
func (t *MCPTool) storeBinaryContent(
|
||||
ctx context.Context,
|
||||
kind string,
|
||||
mimeType string,
|
||||
data []byte,
|
||||
annotations *mcp.Annotations,
|
||||
) (string, string) {
|
||||
if len(data) == 0 {
|
||||
return "", fmt.Sprintf("[MCP returned %s content (%s) but it was empty.]", kind, mimeType)
|
||||
}
|
||||
if !annotationsAllowUser(annotations) {
|
||||
return "", fmt.Sprintf(
|
||||
"[MCP returned %s content (%s) for non-user audience; omitted from model context.]",
|
||||
kind,
|
||||
mimeType,
|
||||
)
|
||||
}
|
||||
if t.mediaStore == nil {
|
||||
return "", fmt.Sprintf(
|
||||
"[MCP returned %s content (%s); omitted from model context because media delivery is unavailable.]",
|
||||
kind,
|
||||
mimeType,
|
||||
)
|
||||
}
|
||||
|
||||
channel := ToolChannel(ctx)
|
||||
chatID := ToolChatID(ctx)
|
||||
if channel == "" || chatID == "" {
|
||||
return "", fmt.Sprintf(
|
||||
"[MCP returned %s content (%s); omitted from model context because no target chat was available.]",
|
||||
kind,
|
||||
mimeType,
|
||||
)
|
||||
}
|
||||
|
||||
dir := media.TempDir()
|
||||
if err := os.MkdirAll(dir, 0o700); err != nil {
|
||||
return "", fmt.Sprintf("[MCP returned %s content (%s) but it could not be stored.]", kind, mimeType)
|
||||
}
|
||||
|
||||
ext := extensionForMIMEType(mimeType)
|
||||
tmpFile, err := os.CreateTemp(dir, "mcp-*"+ext)
|
||||
if err != nil {
|
||||
return "", fmt.Sprintf("[MCP returned %s content (%s) but it could not be stored.]", kind, mimeType)
|
||||
}
|
||||
tmpPath := tmpFile.Name()
|
||||
if _, err = tmpFile.Write(data); err != nil {
|
||||
_ = tmpFile.Close()
|
||||
_ = os.Remove(tmpPath)
|
||||
return "", fmt.Sprintf("[MCP returned %s content (%s) but it could not be stored.]", kind, mimeType)
|
||||
}
|
||||
if err = tmpFile.Close(); err != nil {
|
||||
_ = os.Remove(tmpPath)
|
||||
return "", fmt.Sprintf("[MCP returned %s content (%s) but it could not be stored.]", kind, mimeType)
|
||||
}
|
||||
|
||||
scope := fmt.Sprintf(
|
||||
"tool:mcp:%s:%s:%s:%d",
|
||||
sanitizeIdentifierComponent(t.serverName),
|
||||
channel,
|
||||
chatID,
|
||||
time.Now().UnixNano(),
|
||||
)
|
||||
filename := fmt.Sprintf(
|
||||
"%s_%s%s",
|
||||
sanitizeIdentifierComponent(t.serverName),
|
||||
sanitizeIdentifierComponent(t.tool.Name),
|
||||
ext,
|
||||
)
|
||||
|
||||
ref, err := t.mediaStore.Store(tmpPath, media.MediaMeta{
|
||||
Filename: filename,
|
||||
ContentType: mimeType,
|
||||
Source: fmt.Sprintf(
|
||||
"tool:mcp:%s:%s",
|
||||
sanitizeIdentifierComponent(t.serverName),
|
||||
sanitizeIdentifierComponent(t.tool.Name),
|
||||
),
|
||||
}, scope)
|
||||
if err != nil {
|
||||
_ = os.Remove(tmpPath)
|
||||
return "", fmt.Sprintf(
|
||||
"[MCP returned %s content (%s) but it could not be registered as media.]",
|
||||
kind,
|
||||
mimeType,
|
||||
)
|
||||
}
|
||||
|
||||
return ref, fmt.Sprintf(
|
||||
"[MCP returned %s content (%s); omitted from model context and stored as a local media artifact.]",
|
||||
kind,
|
||||
mimeType,
|
||||
)
|
||||
}
|
||||
|
||||
func summarizeResourceLink(content *mcp.ResourceLink) string {
|
||||
if content == nil {
|
||||
return "[MCP returned an empty resource link.]"
|
||||
}
|
||||
|
||||
parts := []string{"[MCP returned resource link"}
|
||||
if content.Name != "" {
|
||||
parts = append(parts, fmt.Sprintf("name=%q", content.Name))
|
||||
}
|
||||
if content.URI != "" {
|
||||
parts = append(parts, fmt.Sprintf("uri=%q", content.URI))
|
||||
}
|
||||
if content.MIMEType != "" {
|
||||
parts = append(parts, fmt.Sprintf("mime=%q", content.MIMEType))
|
||||
}
|
||||
if content.Description != "" {
|
||||
desc := strings.TrimSpace(content.Description)
|
||||
if len(desc) > 200 {
|
||||
desc = desc[:200] + "..."
|
||||
}
|
||||
parts = append(parts, fmt.Sprintf("description=%q", desc))
|
||||
}
|
||||
return strings.Join(parts, ", ") + "]"
|
||||
}
|
||||
|
||||
func summarizeEmbeddedResource(content *mcp.EmbeddedResource) string {
|
||||
if content == nil || content.Resource == nil {
|
||||
return "[MCP returned an embedded resource.]"
|
||||
}
|
||||
|
||||
resource := content.Resource
|
||||
if resource.URI != "" {
|
||||
return fmt.Sprintf(
|
||||
"[MCP returned embedded resource %q (%s).]",
|
||||
resource.URI,
|
||||
normalizedMIMEType(resource.MIMEType),
|
||||
)
|
||||
}
|
||||
return fmt.Sprintf("[MCP returned embedded resource (%s).]", normalizedMIMEType(resource.MIMEType))
|
||||
}
|
||||
|
||||
func annotationsAllowUser(annotations *mcp.Annotations) bool {
|
||||
if annotations == nil || len(annotations.Audience) == 0 {
|
||||
return true
|
||||
}
|
||||
for _, audience := range annotations.Audience {
|
||||
if strings.EqualFold(string(audience), "user") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func normalizedMIMEType(mimeType string) string {
|
||||
if strings.TrimSpace(mimeType) == "" {
|
||||
return "application/octet-stream"
|
||||
}
|
||||
return mimeType
|
||||
}
|
||||
|
||||
func compactStrings(parts []string) []string {
|
||||
compact := make([]string, 0, len(parts))
|
||||
for _, part := range parts {
|
||||
if strings.TrimSpace(part) == "" {
|
||||
continue
|
||||
}
|
||||
compact = append(compact, part)
|
||||
}
|
||||
return compact
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user