From 6997edc82e1cd555187f982701d03ebc51e26bba Mon Sep 17 00:00:00 2001 From: shikihane Date: Sun, 1 Mar 2026 19:19:31 +0800 Subject: [PATCH] feat(agent): wire Media through agent pipeline (cherry-pick PR #555) Add Media field to processOptions, pass msg.Media from inbound messages through to BuildMessages and serializeMessages so vision-capable LLMs receive image_url content parts. Based on work by @as3k in sipeed/picoclaw#555. Co-Authored-By: Claude Opus 4.6 --- pkg/agent/context.go | 3 ++- pkg/agent/loop.go | 14 ++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/agent/context.go b/pkg/agent/context.go index 6fccbaf53..8868d6bf4 100644 --- a/pkg/agent/context.go +++ b/pkg/agent/context.go @@ -465,10 +465,11 @@ func (cb *ContextBuilder) BuildMessages( messages = append(messages, history...) // Add current user message - if strings.TrimSpace(currentMessage) != "" { + if strings.TrimSpace(currentMessage) != "" || len(media) > 0 { messages = append(messages, providers.Message{ Role: "user", Content: currentMessage, + Media: media, }) } diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 00b0f096a..52a72d0f1 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -46,11 +46,12 @@ type AgentLoop struct { // processOptions configures how a message is processed type processOptions struct { - SessionKey string // Session identifier for history/context - Channel string // Target channel for tool execution - ChatID string // Target chat ID for tool execution - UserMessage string // User message content (may include prefix) - DefaultResponse string // Response when LLM returns empty + SessionKey string // Session identifier for history/context + Channel string // Target channel for tool execution + ChatID string // Target chat ID for tool execution + UserMessage string // User message content (may include prefix) + Media []string // Media URLs attached to the user message + DefaultResponse string // Response when LLM returns empty EnableSummary bool // Whether to trigger summarization SendResponse bool // Whether to send response via bus NoHistory bool // If true, don't load session history (for heartbeat) @@ -417,6 +418,7 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage) Channel: msg.Channel, ChatID: msg.ChatID, UserMessage: msg.Content, + Media: msg.Media, DefaultResponse: defaultResponse, EnableSummary: true, SendResponse: false, @@ -509,7 +511,7 @@ func (al *AgentLoop) runAgentLoop(ctx context.Context, agent *AgentInstance, opt history, summary, opts.UserMessage, - nil, + opts.Media, opts.Channel, opts.ChatID, )