feat(mcp): store oversized text results as artifacts (#2308)

* feat(mcp): store oversized text results as artifacts

* feat(mcp): fix doc

* fix(mcp): preserve raw MCP payload in text artifacts

* fix(mcp): avoid leaking large text when artifact persistence fails

* chore(mcp): clarify inline text limit and cover artifact edge cases
This commit is contained in:
Mauro
2026-04-03 19:30:36 +02:00
committed by GitHub
parent bd56e10bb8
commit d8c5183d9a
7 changed files with 342 additions and 19 deletions
+3
View File
@@ -528,6 +528,9 @@ For example:
- `PICOCLAW_TOOLS_EXEC_ENABLE_DENY_PATTERNS=false`
- `PICOCLAW_TOOLS_CRON_EXEC_TIMEOUT_MINUTES=10`
- `PICOCLAW_TOOLS_MCP_ENABLED=true`
- `PICOCLAW_TOOLS_MCP_MAX_INLINE_TEXT_CHARS=16384`
Note: Nested map-style config (for example `tools.mcp.servers.<name>.*`) is configured in `config.json` rather than
environment variables.
For MCP tools, `tools.mcp.max_inline_text_chars` controls how much text result is kept inline in model context. The threshold is counted in Unicode characters (Go runes), not bytes. For example, `16384` means up to 16,384 characters inline, which may occupy more than 16 KB for multibyte text such as CJK. Above this threshold, PicoClaw saves the MCP text result as a local artifact in the agent workspace and gives the model a short note plus a structured `[file:...]` artifact path instead of injecting the full payload into context.
+2
View File
@@ -126,6 +126,8 @@ func (al *AgentLoop) ensureMCPInitialized(ctx context.Context) error {
}
mcpTool := tools.NewMCPTool(mcpManager, serverName, tool)
mcpTool.SetWorkspace(agent.Workspace)
mcpTool.SetMaxInlineTextRunes(al.cfg.Tools.MCP.GetMaxInlineTextChars())
if registerAsHidden {
agent.Tools.RegisterHidden(mcpTool)
+11
View File
@@ -943,10 +943,21 @@ type MCPServerConfig struct {
type MCPConfig struct {
ToolConfig ` envPrefix:"PICOCLAW_TOOLS_MCP_"`
Discovery ToolDiscoveryConfig ` json:"discovery"`
// MaxInlineTextChars controls how much MCP text stays inline before it is saved as an artifact.
MaxInlineTextChars int `json:"max_inline_text_chars,omitempty" env:"PICOCLAW_TOOLS_MCP_MAX_INLINE_TEXT_CHARS"`
// Servers is a map of server name to server configuration
Servers map[string]MCPServerConfig `json:"servers,omitempty"`
}
const DefaultMCPMaxInlineTextChars = 16 * 1024
func (c *MCPConfig) GetMaxInlineTextChars() int {
if c.MaxInlineTextChars > 0 {
return c.MaxInlineTextChars
}
return DefaultMCPMaxInlineTextChars
}
func LoadConfig(path string) (*Config, error) {
logger.Debugf("loading config from %s", path)
+35
View File
@@ -198,6 +198,41 @@ func TestAgentConfig_FullParse(t *testing.T) {
}
}
func TestDefaultConfig_MCPMaxInlineTextChars(t *testing.T) {
cfg := DefaultConfig()
if cfg.Tools.MCP.GetMaxInlineTextChars() != DefaultMCPMaxInlineTextChars {
t.Fatalf(
"DefaultConfig().Tools.MCP.GetMaxInlineTextChars() = %d, want %d",
cfg.Tools.MCP.GetMaxInlineTextChars(),
DefaultMCPMaxInlineTextChars,
)
}
}
func TestLoadConfig_MCPMaxInlineTextChars(t *testing.T) {
dir := t.TempDir()
configPath := filepath.Join(dir, "config.json")
raw := `{
"tools": {
"mcp": {
"enabled": true,
"max_inline_text_chars": 2048
}
}
}`
if err := os.WriteFile(configPath, []byte(raw), 0o644); err != nil {
t.Fatalf("WriteFile(configPath): %v", err)
}
cfg, err := LoadConfig(configPath)
if err != nil {
t.Fatalf("LoadConfig() error: %v", err)
}
if got := cfg.Tools.MCP.GetMaxInlineTextChars(); got != 2048 {
t.Fatalf("cfg.Tools.MCP.GetMaxInlineTextChars() = %d, want 2048", got)
}
}
func TestConfig_BackwardCompat_NoAgentsList(t *testing.T) {
jsonData := `{
"agents": {
+2 -1
View File
@@ -462,7 +462,8 @@ func DefaultConfig() *Config {
UseBM25: true,
UseRegex: false,
},
Servers: map[string]MCPServerConfig{},
MaxInlineTextChars: DefaultMCPMaxInlineTextChars,
Servers: map[string]MCPServerConfig{},
},
AppendFile: ToolConfig{
Enabled: true,
+115 -18
View File
@@ -6,11 +6,14 @@ import (
"fmt"
"hash/fnv"
"os"
"path/filepath"
"strings"
"time"
"unicode/utf8"
"github.com/modelcontextprotocol/go-sdk/mcp"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/media"
)
@@ -26,18 +29,21 @@ type MCPManager interface {
// MCPTool wraps an MCP tool to implement the Tool interface
type MCPTool struct {
manager MCPManager
serverName string
tool *mcp.Tool
mediaStore media.MediaStore
manager MCPManager
serverName string
tool *mcp.Tool
mediaStore media.MediaStore
workspace string
maxInlineTextRunes int
}
// NewMCPTool creates a new MCP tool wrapper
func NewMCPTool(manager MCPManager, serverName string, tool *mcp.Tool) *MCPTool {
return &MCPTool{
manager: manager,
serverName: serverName,
tool: tool,
manager: manager,
serverName: serverName,
tool: tool,
maxInlineTextRunes: maxMCPInlineTextRunes,
}
}
@@ -45,6 +51,18 @@ func (t *MCPTool) SetMediaStore(store media.MediaStore) {
t.mediaStore = store
}
func (t *MCPTool) SetWorkspace(workspace string) {
t.workspace = strings.TrimSpace(workspace)
}
func (t *MCPTool) SetMaxInlineTextRunes(limit int) {
if limit > 0 {
t.maxInlineTextRunes = limit
}
}
const maxMCPInlineTextRunes = 16 * 1024
// sanitizeIdentifierComponent normalizes a string so it can be safely used
// as part of a tool/function identifier for downstream providers.
// It:
@@ -255,14 +273,19 @@ func extractContentText(content []mcp.Content) string {
func (t *MCPTool) normalizeResultContent(ctx context.Context, content []mcp.Content) *ToolResult {
llmParts := make([]string, 0, len(content))
rawTextParts := make([]string, 0, len(content))
mediaRefs := make([]string, 0, len(content))
for _, c := range content {
switch v := c.(type) {
case *mcp.TextContent:
text := strings.TrimSpace(sanitizeToolLLMContent(v.Text))
if text != "" {
llmParts = append(llmParts, text)
rawText := strings.TrimSpace(v.Text)
if rawText != "" {
rawTextParts = append(rawTextParts, rawText)
}
safeText := strings.TrimSpace(sanitizeToolLLMContent(v.Text))
if safeText != "" {
llmParts = append(llmParts, safeText)
}
case *mcp.ImageContent:
ref, note := t.storeBinaryContent(
@@ -295,10 +318,13 @@ func (t *MCPTool) normalizeResultContent(ctx context.Context, content []mcp.Cont
case *mcp.ResourceLink:
llmParts = append(llmParts, summarizeResourceLink(v))
case *mcp.EmbeddedResource:
ref, note := t.storeEmbeddedResource(ctx, v)
ref, note, rawText := t.storeEmbeddedResource(ctx, v)
if ref != "" {
mediaRefs = append(mediaRefs, ref)
}
if rawText != "" {
rawTextParts = append(rawTextParts, rawText)
}
if note != "" {
llmParts = append(llmParts, note)
}
@@ -307,34 +333,105 @@ func (t *MCPTool) normalizeResultContent(ctx context.Context, content []mcp.Cont
}
}
forLLM := strings.Join(compactStrings(llmParts), "\n")
rawText := strings.Join(compactStrings(rawTextParts), "\n")
if artifactResult := t.persistLargeTextArtifact(rawText); artifactResult != nil {
artifactResult.Media = mediaRefs
return artifactResult
}
result := &ToolResult{
ForLLM: strings.Join(compactStrings(llmParts), "\n"),
ForLLM: forLLM,
Media: mediaRefs,
}
return result
}
func (t *MCPTool) storeEmbeddedResource(ctx context.Context, content *mcp.EmbeddedResource) (string, string) {
func (t *MCPTool) persistLargeTextArtifact(text string) *ToolResult {
text = strings.TrimSpace(text)
limit := t.maxInlineTextRunes
if limit <= 0 {
limit = maxMCPInlineTextRunes
}
size := utf8.RuneCountInString(text)
if text == "" || size <= limit || t.workspace == "" {
return nil
}
dir := filepath.Join(t.workspace, ".artifacts", "mcp")
if err := os.MkdirAll(dir, 0o700); err != nil {
return t.largeTextArtifactFallback(text, err)
}
// TODO: Add lifecycle cleanup/retention for MCP artifact files.
pattern := fmt.Sprintf(
"%s_%s_*.txt",
sanitizeIdentifierComponent(t.serverName),
sanitizeIdentifierComponent(t.tool.Name),
)
tmpFile, err := os.CreateTemp(dir, pattern)
if err != nil {
return t.largeTextArtifactFallback(text, err)
}
path := tmpFile.Name()
if _, err = tmpFile.WriteString(text); err != nil {
_ = tmpFile.Close()
_ = os.Remove(path)
return t.largeTextArtifactFallback(text, err)
}
if err = tmpFile.Close(); err != nil {
_ = os.Remove(path)
return t.largeTextArtifactFallback(text, err)
}
return &ToolResult{
ForLLM: fmt.Sprintf(
"[MCP returned a large text result (%d chars); omitted from model context and saved as a local artifact.]",
size,
),
ArtifactTags: []string{"[file:" + path + "]"},
}
}
func (t *MCPTool) largeTextArtifactFallback(text string, err error) *ToolResult {
size := utf8.RuneCountInString(text)
logger.WarnCF("tool", "Failed to persist large MCP text artifact", map[string]any{
"server": t.serverName,
"tool": t.tool.Name,
"chars": size,
"error": err.Error(),
})
return &ToolResult{
ForLLM: fmt.Sprintf(
"[MCP returned a large text result (%d chars); omitted from model context because artifact persistence failed.]",
size,
),
}
}
func (t *MCPTool) storeEmbeddedResource(ctx context.Context, content *mcp.EmbeddedResource) (string, string, string) {
if content == nil || content.Resource == nil {
return "", "[MCP returned an embedded resource without data.]"
return "", "[MCP returned an embedded resource without data.]", ""
}
resource := content.Resource
if len(resource.Blob) > 0 {
return t.storeBinaryContent(
ref, note := t.storeBinaryContent(
ctx,
"resource",
normalizedMIMEType(resource.MIMEType),
resource.Blob,
content.Annotations,
)
return ref, note, ""
}
if strings.TrimSpace(resource.Text) != "" {
return "", sanitizeToolLLMContent(resource.Text)
rawText := strings.TrimSpace(resource.Text)
if rawText != "" {
return "", sanitizeToolLLMContent(resource.Text), rawText
}
return "", summarizeEmbeddedResource(content)
return "", summarizeEmbeddedResource(content), ""
}
func (t *MCPTool) storeBinaryContent(
+174
View File
@@ -634,3 +634,177 @@ func TestMCPTool_Execute_LargeBase64TextIsOmittedFromContext(t *testing.T) {
t.Fatalf("expected sanitized large base64 note, got %q", result.ForLLM)
}
}
func TestMCPTool_Execute_LargeBase64TextArtifactPreservesRawPayload(t *testing.T) {
workspace := t.TempDir()
largeBase64 := strings.Repeat("QUJD", 400)
manager := &MockMCPManager{
callToolFunc: func(ctx context.Context, serverName, toolName string, arguments map[string]any) (*mcp.CallToolResult, error) {
return &mcp.CallToolResult{
Content: []mcp.Content{
&mcp.TextContent{Text: largeBase64},
},
}, nil
},
}
mcpTool := NewMCPTool(manager, "test_server", &mcp.Tool{Name: "dump_payload"})
mcpTool.SetWorkspace(workspace)
mcpTool.SetMaxInlineTextRunes(32)
result := mcpTool.Execute(context.Background(), nil)
if !strings.Contains(result.ForLLM, "saved as a local artifact") {
t.Fatalf("expected artifact note, got %q", result.ForLLM)
}
if result.ForLLM == largeBase64OmittedMessage {
t.Fatalf("expected artifact note instead of sanitized base64 placeholder")
}
if len(result.ArtifactTags) != 1 {
t.Fatalf("expected 1 artifact tag, got %d", len(result.ArtifactTags))
}
tag := result.ArtifactTags[0]
const prefix = "[file:"
if !strings.HasPrefix(tag, prefix) || !strings.HasSuffix(tag, "]") {
t.Fatalf("expected file artifact tag, got %q", tag)
}
path := strings.TrimSuffix(strings.TrimPrefix(tag, prefix), "]")
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("expected artifact file to be readable: %v", err)
}
if string(data) != largeBase64 {
t.Fatalf("expected artifact file contents to preserve raw MCP payload")
}
}
func TestMCPTool_Execute_LargeTextStoredAsArtifact(t *testing.T) {
workspace := t.TempDir()
largeText := strings.Repeat("This is a large MCP text payload.\n", 800)
manager := &MockMCPManager{
callToolFunc: func(ctx context.Context, serverName, toolName string, arguments map[string]any) (*mcp.CallToolResult, error) {
return &mcp.CallToolResult{
Content: []mcp.Content{
&mcp.TextContent{Text: largeText},
},
}, nil
},
}
mcpTool := NewMCPTool(manager, "test_server", &mcp.Tool{Name: "dump_payload"})
mcpTool.SetWorkspace(workspace)
result := mcpTool.Execute(context.Background(), nil)
if strings.Contains(result.ForLLM, "This is a large MCP text payload") {
t.Fatalf("expected large MCP text to be omitted from ForLLM, got %q", result.ForLLM)
}
if !strings.Contains(result.ForLLM, "saved as a local artifact") {
t.Fatalf("expected artifact note, got %q", result.ForLLM)
}
if len(result.ArtifactTags) != 1 {
t.Fatalf("expected 1 artifact tag, got %d", len(result.ArtifactTags))
}
tag := result.ArtifactTags[0]
const prefix = "[file:"
if !strings.HasPrefix(tag, prefix) || !strings.HasSuffix(tag, "]") {
t.Fatalf("expected file artifact tag, got %q", tag)
}
path := strings.TrimSuffix(strings.TrimPrefix(tag, prefix), "]")
if !strings.HasPrefix(path, workspace) {
t.Fatalf("expected artifact inside workspace, got %q", path)
}
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("expected artifact file to be readable: %v", err)
}
if string(data) != strings.TrimSpace(largeText) {
t.Fatalf("expected artifact file contents to match source text")
}
}
func TestMCPTool_Execute_CustomInlineTextThreshold(t *testing.T) {
workspace := t.TempDir()
text := strings.Repeat("small custom threshold text\n", 20)
manager := &MockMCPManager{
callToolFunc: func(ctx context.Context, serverName, toolName string, arguments map[string]any) (*mcp.CallToolResult, error) {
return &mcp.CallToolResult{
Content: []mcp.Content{
&mcp.TextContent{Text: text},
},
}, nil
},
}
mcpTool := NewMCPTool(manager, "test_server", &mcp.Tool{Name: "dump_payload"})
mcpTool.SetWorkspace(workspace)
mcpTool.SetMaxInlineTextRunes(32)
result := mcpTool.Execute(context.Background(), nil)
if len(result.ArtifactTags) != 1 {
t.Fatalf("expected custom threshold to persist artifact, got %+v", result)
}
if strings.Contains(result.ForLLM, "small custom threshold text") {
t.Fatalf("expected text to be omitted from ForLLM, got %q", result.ForLLM)
}
}
func TestMCPTool_Execute_LargeTextArtifactFailureStillOmitsContext(t *testing.T) {
workspaceRoot := t.TempDir()
workspaceFile := filepath.Join(workspaceRoot, "not-a-directory")
if err := os.WriteFile(workspaceFile, []byte("x"), 0o600); err != nil {
t.Fatalf("failed to create workspace file: %v", err)
}
largeText := strings.Repeat("This is a large MCP text payload.\n", 800)
manager := &MockMCPManager{
callToolFunc: func(ctx context.Context, serverName, toolName string, arguments map[string]any) (*mcp.CallToolResult, error) {
return &mcp.CallToolResult{
Content: []mcp.Content{
&mcp.TextContent{Text: largeText},
},
}, nil
},
}
mcpTool := NewMCPTool(manager, "test_server", &mcp.Tool{Name: "dump_payload"})
mcpTool.SetWorkspace(workspaceFile)
result := mcpTool.Execute(context.Background(), nil)
if strings.Contains(result.ForLLM, "This is a large MCP text payload") {
t.Fatalf("expected large MCP text to be omitted from ForLLM, got %q", result.ForLLM)
}
if !strings.Contains(result.ForLLM, "artifact persistence failed") {
t.Fatalf("expected persistence failure note, got %q", result.ForLLM)
}
if len(result.ArtifactTags) != 0 {
t.Fatalf("expected no artifact tags on persistence failure, got %+v", result.ArtifactTags)
}
}
func TestMCPTool_Execute_WhitespaceWorkspaceDisablesArtifactPersistence(t *testing.T) {
largeText := strings.Repeat("This is a large MCP text payload.\n", 800)
manager := &MockMCPManager{
callToolFunc: func(ctx context.Context, serverName, toolName string, arguments map[string]any) (*mcp.CallToolResult, error) {
return &mcp.CallToolResult{
Content: []mcp.Content{
&mcp.TextContent{Text: largeText},
},
}, nil
},
}
mcpTool := NewMCPTool(manager, "test_server", &mcp.Tool{Name: "dump_payload"})
mcpTool.SetWorkspace(" \n\t ")
result := mcpTool.Execute(context.Background(), nil)
if len(result.ArtifactTags) != 0 {
t.Fatalf("expected no artifact tags for whitespace workspace, got %+v", result.ArtifactTags)
}
if !strings.Contains(result.ForLLM, "This is a large MCP text payload") {
t.Fatalf("expected large text to remain inline when workspace is blank, got %q", result.ForLLM)
}
}