feat(agent): add resolveMediaRefs to convert media:// refs to base64 data URLs

Without this function, media:// refs stored by MediaStore are passed
directly to the LLM API, which rejects them as invalid URLs.

resolveMediaRefs() runs after BuildMessages() and before the LLM call,
converting each media:// ref to a data:image/...;base64,... URL that
vision-capable models can process.

Also adds mimeFromExtension() helper for MIME type inference from
file extensions when ContentType metadata is not available.
This commit is contained in:
shikihane
2026-03-02 18:08:32 +08:00
parent a4e5c391bd
commit 18b36af934
+76
View File
@@ -8,9 +8,11 @@ package agent
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
@@ -515,6 +517,7 @@ func (al *AgentLoop) runAgentLoop(ctx context.Context, agent *AgentInstance, opt
opts.Channel,
opts.ChatID,
)
messages = resolveMediaRefs(messages, al.mediaStore)
// 3. Save user message to session
agent.Sessions.AddMessage(opts.SessionKey, "user", opts.UserMessage)
@@ -1352,3 +1355,76 @@ func extractParentPeer(msg bus.InboundMessage) *routing.RoutePeer {
}
return &routing.RoutePeer{Kind: parentKind, ID: parentID}
}
// resolveMediaRefs replaces media:// refs in message Media fields with base64 data URLs.
// Returns a new slice with resolved URLs; original messages are not mutated.
func resolveMediaRefs(messages []providers.Message, store media.MediaStore) []providers.Message {
if store == nil {
return messages
}
result := make([]providers.Message, len(messages))
copy(result, messages)
for i, m := range result {
if len(m.Media) == 0 {
continue
}
resolved := make([]string, 0, len(m.Media))
for _, ref := range m.Media {
if !strings.HasPrefix(ref, "media://") {
resolved = append(resolved, ref)
continue
}
localPath, meta, err := store.ResolveWithMeta(ref)
if err != nil {
logger.WarnCF("agent", "Failed to resolve media ref", map[string]any{
"ref": ref,
"error": err.Error(),
})
continue
}
data, err := os.ReadFile(localPath)
if err != nil {
logger.WarnCF("agent", "Failed to read media file", map[string]any{
"path": localPath,
"error": err.Error(),
})
continue
}
mime := meta.ContentType
if mime == "" {
mime = mimeFromExtension(filepath.Ext(localPath))
}
dataURL := "data:" + mime + ";base64," + base64.StdEncoding.EncodeToString(data)
resolved = append(resolved, dataURL)
}
result[i].Media = resolved
}
return result
}
// mimeFromExtension returns a MIME type for common image extensions.
func mimeFromExtension(ext string) string {
switch strings.ToLower(ext) {
case ".jpg", ".jpeg":
return "image/jpeg"
case ".png":
return "image/png"
case ".gif":
return "image/gif"
case ".webp":
return "image/webp"
case ".bmp":
return "image/bmp"
default:
return "image/jpeg"
}
}