mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
feat(mcp): store oversized text results as artifacts (#2308)
* feat(mcp): store oversized text results as artifacts * feat(mcp): fix doc * fix(mcp): preserve raw MCP payload in text artifacts * fix(mcp): avoid leaking large text when artifact persistence fails * chore(mcp): clarify inline text limit and cover artifact edge cases
This commit is contained in:
@@ -528,6 +528,9 @@ For example:
|
||||
- `PICOCLAW_TOOLS_EXEC_ENABLE_DENY_PATTERNS=false`
|
||||
- `PICOCLAW_TOOLS_CRON_EXEC_TIMEOUT_MINUTES=10`
|
||||
- `PICOCLAW_TOOLS_MCP_ENABLED=true`
|
||||
- `PICOCLAW_TOOLS_MCP_MAX_INLINE_TEXT_CHARS=16384`
|
||||
|
||||
Note: Nested map-style config (for example `tools.mcp.servers.<name>.*`) is configured in `config.json` rather than
|
||||
environment variables.
|
||||
|
||||
For MCP tools, `tools.mcp.max_inline_text_chars` controls how much text result is kept inline in model context. The threshold is counted in Unicode characters (Go runes), not bytes. For example, `16384` means up to 16,384 characters inline, which may occupy more than 16 KB for multibyte text such as CJK. Above this threshold, PicoClaw saves the MCP text result as a local artifact in the agent workspace and gives the model a short note plus a structured `[file:...]` artifact path instead of injecting the full payload into context.
|
||||
|
||||
@@ -126,6 +126,8 @@ func (al *AgentLoop) ensureMCPInitialized(ctx context.Context) error {
|
||||
}
|
||||
|
||||
mcpTool := tools.NewMCPTool(mcpManager, serverName, tool)
|
||||
mcpTool.SetWorkspace(agent.Workspace)
|
||||
mcpTool.SetMaxInlineTextRunes(al.cfg.Tools.MCP.GetMaxInlineTextChars())
|
||||
|
||||
if registerAsHidden {
|
||||
agent.Tools.RegisterHidden(mcpTool)
|
||||
|
||||
@@ -943,10 +943,21 @@ type MCPServerConfig struct {
|
||||
type MCPConfig struct {
|
||||
ToolConfig ` envPrefix:"PICOCLAW_TOOLS_MCP_"`
|
||||
Discovery ToolDiscoveryConfig ` json:"discovery"`
|
||||
// MaxInlineTextChars controls how much MCP text stays inline before it is saved as an artifact.
|
||||
MaxInlineTextChars int `json:"max_inline_text_chars,omitempty" env:"PICOCLAW_TOOLS_MCP_MAX_INLINE_TEXT_CHARS"`
|
||||
// Servers is a map of server name to server configuration
|
||||
Servers map[string]MCPServerConfig `json:"servers,omitempty"`
|
||||
}
|
||||
|
||||
const DefaultMCPMaxInlineTextChars = 16 * 1024
|
||||
|
||||
func (c *MCPConfig) GetMaxInlineTextChars() int {
|
||||
if c.MaxInlineTextChars > 0 {
|
||||
return c.MaxInlineTextChars
|
||||
}
|
||||
return DefaultMCPMaxInlineTextChars
|
||||
}
|
||||
|
||||
func LoadConfig(path string) (*Config, error) {
|
||||
logger.Debugf("loading config from %s", path)
|
||||
|
||||
|
||||
@@ -198,6 +198,41 @@ func TestAgentConfig_FullParse(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultConfig_MCPMaxInlineTextChars(t *testing.T) {
|
||||
cfg := DefaultConfig()
|
||||
if cfg.Tools.MCP.GetMaxInlineTextChars() != DefaultMCPMaxInlineTextChars {
|
||||
t.Fatalf(
|
||||
"DefaultConfig().Tools.MCP.GetMaxInlineTextChars() = %d, want %d",
|
||||
cfg.Tools.MCP.GetMaxInlineTextChars(),
|
||||
DefaultMCPMaxInlineTextChars,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_MCPMaxInlineTextChars(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
configPath := filepath.Join(dir, "config.json")
|
||||
raw := `{
|
||||
"tools": {
|
||||
"mcp": {
|
||||
"enabled": true,
|
||||
"max_inline_text_chars": 2048
|
||||
}
|
||||
}
|
||||
}`
|
||||
if err := os.WriteFile(configPath, []byte(raw), 0o644); err != nil {
|
||||
t.Fatalf("WriteFile(configPath): %v", err)
|
||||
}
|
||||
|
||||
cfg, err := LoadConfig(configPath)
|
||||
if err != nil {
|
||||
t.Fatalf("LoadConfig() error: %v", err)
|
||||
}
|
||||
if got := cfg.Tools.MCP.GetMaxInlineTextChars(); got != 2048 {
|
||||
t.Fatalf("cfg.Tools.MCP.GetMaxInlineTextChars() = %d, want 2048", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfig_BackwardCompat_NoAgentsList(t *testing.T) {
|
||||
jsonData := `{
|
||||
"agents": {
|
||||
|
||||
@@ -462,7 +462,8 @@ func DefaultConfig() *Config {
|
||||
UseBM25: true,
|
||||
UseRegex: false,
|
||||
},
|
||||
Servers: map[string]MCPServerConfig{},
|
||||
MaxInlineTextChars: DefaultMCPMaxInlineTextChars,
|
||||
Servers: map[string]MCPServerConfig{},
|
||||
},
|
||||
AppendFile: ToolConfig{
|
||||
Enabled: true,
|
||||
|
||||
+115
-18
@@ -6,11 +6,14 @@ import (
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/modelcontextprotocol/go-sdk/mcp"
|
||||
|
||||
"github.com/sipeed/picoclaw/pkg/logger"
|
||||
"github.com/sipeed/picoclaw/pkg/media"
|
||||
)
|
||||
|
||||
@@ -26,18 +29,21 @@ type MCPManager interface {
|
||||
|
||||
// MCPTool wraps an MCP tool to implement the Tool interface
|
||||
type MCPTool struct {
|
||||
manager MCPManager
|
||||
serverName string
|
||||
tool *mcp.Tool
|
||||
mediaStore media.MediaStore
|
||||
manager MCPManager
|
||||
serverName string
|
||||
tool *mcp.Tool
|
||||
mediaStore media.MediaStore
|
||||
workspace string
|
||||
maxInlineTextRunes int
|
||||
}
|
||||
|
||||
// NewMCPTool creates a new MCP tool wrapper
|
||||
func NewMCPTool(manager MCPManager, serverName string, tool *mcp.Tool) *MCPTool {
|
||||
return &MCPTool{
|
||||
manager: manager,
|
||||
serverName: serverName,
|
||||
tool: tool,
|
||||
manager: manager,
|
||||
serverName: serverName,
|
||||
tool: tool,
|
||||
maxInlineTextRunes: maxMCPInlineTextRunes,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,6 +51,18 @@ func (t *MCPTool) SetMediaStore(store media.MediaStore) {
|
||||
t.mediaStore = store
|
||||
}
|
||||
|
||||
func (t *MCPTool) SetWorkspace(workspace string) {
|
||||
t.workspace = strings.TrimSpace(workspace)
|
||||
}
|
||||
|
||||
func (t *MCPTool) SetMaxInlineTextRunes(limit int) {
|
||||
if limit > 0 {
|
||||
t.maxInlineTextRunes = limit
|
||||
}
|
||||
}
|
||||
|
||||
const maxMCPInlineTextRunes = 16 * 1024
|
||||
|
||||
// sanitizeIdentifierComponent normalizes a string so it can be safely used
|
||||
// as part of a tool/function identifier for downstream providers.
|
||||
// It:
|
||||
@@ -255,14 +273,19 @@ func extractContentText(content []mcp.Content) string {
|
||||
|
||||
func (t *MCPTool) normalizeResultContent(ctx context.Context, content []mcp.Content) *ToolResult {
|
||||
llmParts := make([]string, 0, len(content))
|
||||
rawTextParts := make([]string, 0, len(content))
|
||||
mediaRefs := make([]string, 0, len(content))
|
||||
|
||||
for _, c := range content {
|
||||
switch v := c.(type) {
|
||||
case *mcp.TextContent:
|
||||
text := strings.TrimSpace(sanitizeToolLLMContent(v.Text))
|
||||
if text != "" {
|
||||
llmParts = append(llmParts, text)
|
||||
rawText := strings.TrimSpace(v.Text)
|
||||
if rawText != "" {
|
||||
rawTextParts = append(rawTextParts, rawText)
|
||||
}
|
||||
safeText := strings.TrimSpace(sanitizeToolLLMContent(v.Text))
|
||||
if safeText != "" {
|
||||
llmParts = append(llmParts, safeText)
|
||||
}
|
||||
case *mcp.ImageContent:
|
||||
ref, note := t.storeBinaryContent(
|
||||
@@ -295,10 +318,13 @@ func (t *MCPTool) normalizeResultContent(ctx context.Context, content []mcp.Cont
|
||||
case *mcp.ResourceLink:
|
||||
llmParts = append(llmParts, summarizeResourceLink(v))
|
||||
case *mcp.EmbeddedResource:
|
||||
ref, note := t.storeEmbeddedResource(ctx, v)
|
||||
ref, note, rawText := t.storeEmbeddedResource(ctx, v)
|
||||
if ref != "" {
|
||||
mediaRefs = append(mediaRefs, ref)
|
||||
}
|
||||
if rawText != "" {
|
||||
rawTextParts = append(rawTextParts, rawText)
|
||||
}
|
||||
if note != "" {
|
||||
llmParts = append(llmParts, note)
|
||||
}
|
||||
@@ -307,34 +333,105 @@ func (t *MCPTool) normalizeResultContent(ctx context.Context, content []mcp.Cont
|
||||
}
|
||||
}
|
||||
|
||||
forLLM := strings.Join(compactStrings(llmParts), "\n")
|
||||
rawText := strings.Join(compactStrings(rawTextParts), "\n")
|
||||
if artifactResult := t.persistLargeTextArtifact(rawText); artifactResult != nil {
|
||||
artifactResult.Media = mediaRefs
|
||||
return artifactResult
|
||||
}
|
||||
|
||||
result := &ToolResult{
|
||||
ForLLM: strings.Join(compactStrings(llmParts), "\n"),
|
||||
ForLLM: forLLM,
|
||||
Media: mediaRefs,
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (t *MCPTool) storeEmbeddedResource(ctx context.Context, content *mcp.EmbeddedResource) (string, string) {
|
||||
func (t *MCPTool) persistLargeTextArtifact(text string) *ToolResult {
|
||||
text = strings.TrimSpace(text)
|
||||
limit := t.maxInlineTextRunes
|
||||
if limit <= 0 {
|
||||
limit = maxMCPInlineTextRunes
|
||||
}
|
||||
size := utf8.RuneCountInString(text)
|
||||
if text == "" || size <= limit || t.workspace == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
dir := filepath.Join(t.workspace, ".artifacts", "mcp")
|
||||
if err := os.MkdirAll(dir, 0o700); err != nil {
|
||||
return t.largeTextArtifactFallback(text, err)
|
||||
}
|
||||
// TODO: Add lifecycle cleanup/retention for MCP artifact files.
|
||||
|
||||
pattern := fmt.Sprintf(
|
||||
"%s_%s_*.txt",
|
||||
sanitizeIdentifierComponent(t.serverName),
|
||||
sanitizeIdentifierComponent(t.tool.Name),
|
||||
)
|
||||
tmpFile, err := os.CreateTemp(dir, pattern)
|
||||
if err != nil {
|
||||
return t.largeTextArtifactFallback(text, err)
|
||||
}
|
||||
path := tmpFile.Name()
|
||||
if _, err = tmpFile.WriteString(text); err != nil {
|
||||
_ = tmpFile.Close()
|
||||
_ = os.Remove(path)
|
||||
return t.largeTextArtifactFallback(text, err)
|
||||
}
|
||||
if err = tmpFile.Close(); err != nil {
|
||||
_ = os.Remove(path)
|
||||
return t.largeTextArtifactFallback(text, err)
|
||||
}
|
||||
|
||||
return &ToolResult{
|
||||
ForLLM: fmt.Sprintf(
|
||||
"[MCP returned a large text result (%d chars); omitted from model context and saved as a local artifact.]",
|
||||
size,
|
||||
),
|
||||
ArtifactTags: []string{"[file:" + path + "]"},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *MCPTool) largeTextArtifactFallback(text string, err error) *ToolResult {
|
||||
size := utf8.RuneCountInString(text)
|
||||
logger.WarnCF("tool", "Failed to persist large MCP text artifact", map[string]any{
|
||||
"server": t.serverName,
|
||||
"tool": t.tool.Name,
|
||||
"chars": size,
|
||||
"error": err.Error(),
|
||||
})
|
||||
return &ToolResult{
|
||||
ForLLM: fmt.Sprintf(
|
||||
"[MCP returned a large text result (%d chars); omitted from model context because artifact persistence failed.]",
|
||||
size,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func (t *MCPTool) storeEmbeddedResource(ctx context.Context, content *mcp.EmbeddedResource) (string, string, string) {
|
||||
if content == nil || content.Resource == nil {
|
||||
return "", "[MCP returned an embedded resource without data.]"
|
||||
return "", "[MCP returned an embedded resource without data.]", ""
|
||||
}
|
||||
|
||||
resource := content.Resource
|
||||
if len(resource.Blob) > 0 {
|
||||
return t.storeBinaryContent(
|
||||
ref, note := t.storeBinaryContent(
|
||||
ctx,
|
||||
"resource",
|
||||
normalizedMIMEType(resource.MIMEType),
|
||||
resource.Blob,
|
||||
content.Annotations,
|
||||
)
|
||||
return ref, note, ""
|
||||
}
|
||||
|
||||
if strings.TrimSpace(resource.Text) != "" {
|
||||
return "", sanitizeToolLLMContent(resource.Text)
|
||||
rawText := strings.TrimSpace(resource.Text)
|
||||
if rawText != "" {
|
||||
return "", sanitizeToolLLMContent(resource.Text), rawText
|
||||
}
|
||||
|
||||
return "", summarizeEmbeddedResource(content)
|
||||
return "", summarizeEmbeddedResource(content), ""
|
||||
}
|
||||
|
||||
func (t *MCPTool) storeBinaryContent(
|
||||
|
||||
@@ -634,3 +634,177 @@ func TestMCPTool_Execute_LargeBase64TextIsOmittedFromContext(t *testing.T) {
|
||||
t.Fatalf("expected sanitized large base64 note, got %q", result.ForLLM)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMCPTool_Execute_LargeBase64TextArtifactPreservesRawPayload(t *testing.T) {
|
||||
workspace := t.TempDir()
|
||||
largeBase64 := strings.Repeat("QUJD", 400)
|
||||
manager := &MockMCPManager{
|
||||
callToolFunc: func(ctx context.Context, serverName, toolName string, arguments map[string]any) (*mcp.CallToolResult, error) {
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
&mcp.TextContent{Text: largeBase64},
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
mcpTool := NewMCPTool(manager, "test_server", &mcp.Tool{Name: "dump_payload"})
|
||||
mcpTool.SetWorkspace(workspace)
|
||||
mcpTool.SetMaxInlineTextRunes(32)
|
||||
|
||||
result := mcpTool.Execute(context.Background(), nil)
|
||||
|
||||
if !strings.Contains(result.ForLLM, "saved as a local artifact") {
|
||||
t.Fatalf("expected artifact note, got %q", result.ForLLM)
|
||||
}
|
||||
if result.ForLLM == largeBase64OmittedMessage {
|
||||
t.Fatalf("expected artifact note instead of sanitized base64 placeholder")
|
||||
}
|
||||
if len(result.ArtifactTags) != 1 {
|
||||
t.Fatalf("expected 1 artifact tag, got %d", len(result.ArtifactTags))
|
||||
}
|
||||
tag := result.ArtifactTags[0]
|
||||
const prefix = "[file:"
|
||||
if !strings.HasPrefix(tag, prefix) || !strings.HasSuffix(tag, "]") {
|
||||
t.Fatalf("expected file artifact tag, got %q", tag)
|
||||
}
|
||||
path := strings.TrimSuffix(strings.TrimPrefix(tag, prefix), "]")
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("expected artifact file to be readable: %v", err)
|
||||
}
|
||||
if string(data) != largeBase64 {
|
||||
t.Fatalf("expected artifact file contents to preserve raw MCP payload")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMCPTool_Execute_LargeTextStoredAsArtifact(t *testing.T) {
|
||||
workspace := t.TempDir()
|
||||
largeText := strings.Repeat("This is a large MCP text payload.\n", 800)
|
||||
manager := &MockMCPManager{
|
||||
callToolFunc: func(ctx context.Context, serverName, toolName string, arguments map[string]any) (*mcp.CallToolResult, error) {
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
&mcp.TextContent{Text: largeText},
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
mcpTool := NewMCPTool(manager, "test_server", &mcp.Tool{Name: "dump_payload"})
|
||||
mcpTool.SetWorkspace(workspace)
|
||||
|
||||
result := mcpTool.Execute(context.Background(), nil)
|
||||
|
||||
if strings.Contains(result.ForLLM, "This is a large MCP text payload") {
|
||||
t.Fatalf("expected large MCP text to be omitted from ForLLM, got %q", result.ForLLM)
|
||||
}
|
||||
if !strings.Contains(result.ForLLM, "saved as a local artifact") {
|
||||
t.Fatalf("expected artifact note, got %q", result.ForLLM)
|
||||
}
|
||||
if len(result.ArtifactTags) != 1 {
|
||||
t.Fatalf("expected 1 artifact tag, got %d", len(result.ArtifactTags))
|
||||
}
|
||||
tag := result.ArtifactTags[0]
|
||||
const prefix = "[file:"
|
||||
if !strings.HasPrefix(tag, prefix) || !strings.HasSuffix(tag, "]") {
|
||||
t.Fatalf("expected file artifact tag, got %q", tag)
|
||||
}
|
||||
path := strings.TrimSuffix(strings.TrimPrefix(tag, prefix), "]")
|
||||
if !strings.HasPrefix(path, workspace) {
|
||||
t.Fatalf("expected artifact inside workspace, got %q", path)
|
||||
}
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("expected artifact file to be readable: %v", err)
|
||||
}
|
||||
if string(data) != strings.TrimSpace(largeText) {
|
||||
t.Fatalf("expected artifact file contents to match source text")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMCPTool_Execute_CustomInlineTextThreshold(t *testing.T) {
|
||||
workspace := t.TempDir()
|
||||
text := strings.Repeat("small custom threshold text\n", 20)
|
||||
manager := &MockMCPManager{
|
||||
callToolFunc: func(ctx context.Context, serverName, toolName string, arguments map[string]any) (*mcp.CallToolResult, error) {
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
&mcp.TextContent{Text: text},
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
mcpTool := NewMCPTool(manager, "test_server", &mcp.Tool{Name: "dump_payload"})
|
||||
mcpTool.SetWorkspace(workspace)
|
||||
mcpTool.SetMaxInlineTextRunes(32)
|
||||
|
||||
result := mcpTool.Execute(context.Background(), nil)
|
||||
|
||||
if len(result.ArtifactTags) != 1 {
|
||||
t.Fatalf("expected custom threshold to persist artifact, got %+v", result)
|
||||
}
|
||||
if strings.Contains(result.ForLLM, "small custom threshold text") {
|
||||
t.Fatalf("expected text to be omitted from ForLLM, got %q", result.ForLLM)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMCPTool_Execute_LargeTextArtifactFailureStillOmitsContext(t *testing.T) {
|
||||
workspaceRoot := t.TempDir()
|
||||
workspaceFile := filepath.Join(workspaceRoot, "not-a-directory")
|
||||
if err := os.WriteFile(workspaceFile, []byte("x"), 0o600); err != nil {
|
||||
t.Fatalf("failed to create workspace file: %v", err)
|
||||
}
|
||||
|
||||
largeText := strings.Repeat("This is a large MCP text payload.\n", 800)
|
||||
manager := &MockMCPManager{
|
||||
callToolFunc: func(ctx context.Context, serverName, toolName string, arguments map[string]any) (*mcp.CallToolResult, error) {
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
&mcp.TextContent{Text: largeText},
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
mcpTool := NewMCPTool(manager, "test_server", &mcp.Tool{Name: "dump_payload"})
|
||||
mcpTool.SetWorkspace(workspaceFile)
|
||||
|
||||
result := mcpTool.Execute(context.Background(), nil)
|
||||
|
||||
if strings.Contains(result.ForLLM, "This is a large MCP text payload") {
|
||||
t.Fatalf("expected large MCP text to be omitted from ForLLM, got %q", result.ForLLM)
|
||||
}
|
||||
if !strings.Contains(result.ForLLM, "artifact persistence failed") {
|
||||
t.Fatalf("expected persistence failure note, got %q", result.ForLLM)
|
||||
}
|
||||
if len(result.ArtifactTags) != 0 {
|
||||
t.Fatalf("expected no artifact tags on persistence failure, got %+v", result.ArtifactTags)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMCPTool_Execute_WhitespaceWorkspaceDisablesArtifactPersistence(t *testing.T) {
|
||||
largeText := strings.Repeat("This is a large MCP text payload.\n", 800)
|
||||
manager := &MockMCPManager{
|
||||
callToolFunc: func(ctx context.Context, serverName, toolName string, arguments map[string]any) (*mcp.CallToolResult, error) {
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
&mcp.TextContent{Text: largeText},
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
mcpTool := NewMCPTool(manager, "test_server", &mcp.Tool{Name: "dump_payload"})
|
||||
mcpTool.SetWorkspace(" \n\t ")
|
||||
|
||||
result := mcpTool.Execute(context.Background(), nil)
|
||||
|
||||
if len(result.ArtifactTags) != 0 {
|
||||
t.Fatalf("expected no artifact tags for whitespace workspace, got %+v", result.ArtifactTags)
|
||||
}
|
||||
if !strings.Contains(result.ForLLM, "This is a large MCP text payload") {
|
||||
t.Fatalf("expected large text to remain inline when workspace is blank, got %q", result.ForLLM)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user