// PicoClaw - Ultra-lightweight personal AI agent // Inspired by and based on nanobot: https://github.com/HKUDS/nanobot // License: MIT // // Copyright (c) 2026 PicoClaw contributors package agent import ( "bytes" "encoding/base64" "io" "os" "strings" "github.com/h2non/filetype" "github.com/sipeed/picoclaw/pkg/logger" "github.com/sipeed/picoclaw/pkg/media" "github.com/sipeed/picoclaw/pkg/providers" ) // resolveMediaRefs resolves media:// refs in messages. // Images are base64-encoded into the Media array for multimodal LLMs. // Non-image files (documents, audio, video) have their local path injected // into Content so the agent can access them via file tools like read_file. // Returns a new slice; original messages are not mutated. func resolveMediaRefs(messages []providers.Message, store media.MediaStore, maxSize int) []providers.Message { if store == nil { return messages } result := make([]providers.Message, len(messages)) copy(result, messages) for i, m := range result { if len(m.Media) == 0 { continue } resolved := make([]string, 0, len(m.Media)) var pathTags []string for _, ref := range m.Media { if !strings.HasPrefix(ref, "media://") { resolved = append(resolved, ref) continue } localPath, meta, err := store.ResolveWithMeta(ref) if err != nil { logger.WarnCF("agent", "Failed to resolve media ref", map[string]any{ "ref": ref, "error": err.Error(), }) continue } info, err := os.Stat(localPath) if err != nil { logger.WarnCF("agent", "Failed to stat media file", map[string]any{ "path": localPath, "error": err.Error(), }) continue } mime := detectMIME(localPath, meta) if strings.HasPrefix(mime, "image/") { dataURL := encodeImageToDataURL(localPath, mime, info, maxSize) if dataURL != "" { resolved = append(resolved, dataURL) } continue } pathTags = append(pathTags, buildPathTag(mime, localPath)) } result[i].Media = resolved if len(pathTags) > 0 { result[i].Content = injectPathTags(result[i].Content, pathTags) } } return result } // detectMIME determines the MIME type from metadata or magic-bytes detection. // Returns empty string if detection fails. func detectMIME(localPath string, meta media.MediaMeta) string { if meta.ContentType != "" { return meta.ContentType } kind, err := filetype.MatchFile(localPath) if err != nil || kind == filetype.Unknown { return "" } return kind.MIME.Value } // encodeImageToDataURL base64-encodes an image file into a data URL. // Returns empty string if the file exceeds maxSize or encoding fails. func encodeImageToDataURL(localPath, mime string, info os.FileInfo, maxSize int) string { if info.Size() > int64(maxSize) { logger.WarnCF("agent", "Media file too large, skipping", map[string]any{ "path": localPath, "size": info.Size(), "max_size": maxSize, }) return "" } f, err := os.Open(localPath) if err != nil { logger.WarnCF("agent", "Failed to open media file", map[string]any{ "path": localPath, "error": err.Error(), }) return "" } defer f.Close() prefix := "data:" + mime + ";base64," encodedLen := base64.StdEncoding.EncodedLen(int(info.Size())) var buf bytes.Buffer buf.Grow(len(prefix) + encodedLen) buf.WriteString(prefix) encoder := base64.NewEncoder(base64.StdEncoding, &buf) if _, err := io.Copy(encoder, f); err != nil { logger.WarnCF("agent", "Failed to encode media file", map[string]any{ "path": localPath, "error": err.Error(), }) return "" } encoder.Close() return buf.String() } // buildPathTag creates a structured tag exposing the local file path. // Tag type is derived from MIME: [audio:/path], [video:/path], or [file:/path]. func buildPathTag(mime, localPath string) string { switch { case strings.HasPrefix(mime, "audio/"): return "[audio:" + localPath + "]" case strings.HasPrefix(mime, "video/"): return "[video:" + localPath + "]" default: return "[file:" + localPath + "]" } } // injectPathTags replaces generic media tags in content with path-bearing versions, // or appends if no matching generic tag is found. func injectPathTags(content string, tags []string) string { for _, tag := range tags { var generic string switch { case strings.HasPrefix(tag, "[audio:"): generic = "[audio]" case strings.HasPrefix(tag, "[video:"): generic = "[video]" case strings.HasPrefix(tag, "[file:"): generic = "[file]" } if generic != "" && strings.Contains(content, generic) { content = strings.Replace(content, generic, tag, 1) } else if content == "" { content = tag } else { content += " " + tag } } return content }