From 6fd65825e731cf27a24b477bb5516dd2f2ea52b2 Mon Sep 17 00:00:00 2001 From: shikihane Date: Tue, 3 Mar 2026 14:01:52 +0800 Subject: [PATCH] feat(agent): implement resolveMediaRefs with streaming base64 and filetype detection Co-Authored-By: Claude Opus 4.6 --- pkg/agent/loop_media.go | 121 ++++++++++++++++++++++++++++++++++ pkg/agent/loop_test.go | 140 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 261 insertions(+) create mode 100644 pkg/agent/loop_media.go diff --git a/pkg/agent/loop_media.go b/pkg/agent/loop_media.go new file mode 100644 index 000000000..813feef69 --- /dev/null +++ b/pkg/agent/loop_media.go @@ -0,0 +1,121 @@ +// PicoClaw - Ultra-lightweight personal AI agent +// Inspired by and based on nanobot: https://github.com/HKUDS/nanobot +// License: MIT +// +// Copyright (c) 2026 PicoClaw contributors + +package agent + +import ( + "bytes" + "encoding/base64" + "io" + "os" + "strings" + + "github.com/h2non/filetype" + "github.com/sipeed/picoclaw/pkg/logger" + "github.com/sipeed/picoclaw/pkg/media" + "github.com/sipeed/picoclaw/pkg/providers" +) + +// resolveMediaRefs replaces media:// refs in message Media fields with base64 data URLs. +// Uses streaming base64 encoding (file handle → encoder → buffer) to avoid holding +// both raw bytes and encoded string in memory simultaneously. +// Returns a new slice; original messages are not mutated. +func resolveMediaRefs(messages []providers.Message, store media.MediaStore, maxSize int) []providers.Message { + if store == nil { + return messages + } + + result := make([]providers.Message, len(messages)) + copy(result, messages) + + for i, m := range result { + if len(m.Media) == 0 { + continue + } + + resolved := make([]string, 0, len(m.Media)) + for _, ref := range m.Media { + if !strings.HasPrefix(ref, "media://") { + resolved = append(resolved, ref) + continue + } + + localPath, meta, err := store.ResolveWithMeta(ref) + if err != nil { + logger.WarnCF("agent", "Failed to resolve media ref", map[string]any{ + "ref": ref, + "error": err.Error(), + }) + continue + } + + info, err := os.Stat(localPath) + if err != nil { + logger.WarnCF("agent", "Failed to stat media file", map[string]any{ + "path": localPath, + "error": err.Error(), + }) + continue + } + if info.Size() > int64(maxSize) { + logger.WarnCF("agent", "Media file too large, skipping", map[string]any{ + "path": localPath, + "size": info.Size(), + "max_size": maxSize, + }) + continue + } + + // Determine MIME type: prefer metadata, fallback to magic-bytes detection + mime := meta.ContentType + if mime == "" { + kind, err := filetype.MatchFile(localPath) + if err != nil || kind == filetype.Unknown { + logger.WarnCF("agent", "Unknown media type, skipping", map[string]any{ + "path": localPath, + }) + continue + } + mime = kind.MIME.Value + } + + // Streaming base64: open file → base64 encoder → buffer + // Peak memory: ~1.33x file size (buffer only, no raw bytes copy) + f, err := os.Open(localPath) + if err != nil { + logger.WarnCF("agent", "Failed to open media file", map[string]any{ + "path": localPath, + "error": err.Error(), + }) + continue + } + + prefix := "data:" + mime + ";base64," + encodedLen := base64.StdEncoding.EncodedLen(int(info.Size())) + var buf bytes.Buffer + buf.Grow(len(prefix) + encodedLen) + buf.WriteString(prefix) + + encoder := base64.NewEncoder(base64.StdEncoding, &buf) + if _, err := io.Copy(encoder, f); err != nil { + f.Close() + logger.WarnCF("agent", "Failed to encode media file", map[string]any{ + "path": localPath, + "error": err.Error(), + }) + continue + } + encoder.Close() + f.Close() + + resolved = append(resolved, buf.String()) + } + + result[i].Media = resolved + } + + return result +} diff --git a/pkg/agent/loop_test.go b/pkg/agent/loop_test.go index 3565314fe..4076c6e7c 100644 --- a/pkg/agent/loop_test.go +++ b/pkg/agent/loop_test.go @@ -6,12 +6,14 @@ import ( "os" "path/filepath" "slices" + "strings" "testing" "time" "github.com/sipeed/picoclaw/pkg/bus" "github.com/sipeed/picoclaw/pkg/channels" "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/media" "github.com/sipeed/picoclaw/pkg/providers" "github.com/sipeed/picoclaw/pkg/tools" ) @@ -808,3 +810,141 @@ func TestHandleReasoning(t *testing.T) { } }) } + +func TestResolveMediaRefs_ResolvesToBase64(t *testing.T) { + store := media.NewFileMediaStore() + dir := t.TempDir() + + // Create a minimal valid PNG (8-byte header is enough for filetype detection) + pngPath := filepath.Join(dir, "test.png") + // PNG magic: 0x89 P N G \r \n 0x1A \n + minimal IHDR + pngHeader := []byte{ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature + 0x00, 0x00, 0x00, 0x0D, // IHDR length + 0x49, 0x48, 0x44, 0x52, // "IHDR" + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, // 1x1 RGB + 0x00, 0x00, 0x00, // no interlace + 0x90, 0x77, 0x53, 0xDE, // CRC + } + if err := os.WriteFile(pngPath, pngHeader, 0o644); err != nil { + t.Fatal(err) + } + ref, err := store.Store(pngPath, media.MediaMeta{}, "test") + if err != nil { + t.Fatal(err) + } + + messages := []providers.Message{ + {Role: "user", Content: "describe this", Media: []string{ref}}, + } + result := resolveMediaRefs(messages, store, config.DefaultMaxMediaSize) + + if len(result[0].Media) != 1 { + t.Fatalf("expected 1 resolved media, got %d", len(result[0].Media)) + } + if !strings.HasPrefix(result[0].Media[0], "data:image/png;base64,") { + t.Fatalf("expected data:image/png;base64, prefix, got %q", result[0].Media[0][:40]) + } +} + +func TestResolveMediaRefs_SkipsOversizedFile(t *testing.T) { + store := media.NewFileMediaStore() + dir := t.TempDir() + + bigPath := filepath.Join(dir, "big.png") + // Write PNG header + padding to exceed limit + data := make([]byte, 1024+1) // 1KB + 1 byte + copy(data, []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}) + if err := os.WriteFile(bigPath, data, 0o644); err != nil { + t.Fatal(err) + } + ref, _ := store.Store(bigPath, media.MediaMeta{}, "test") + + messages := []providers.Message{ + {Role: "user", Content: "hi", Media: []string{ref}}, + } + // Use a tiny limit (1KB) so the file is oversized + result := resolveMediaRefs(messages, store, 1024) + + if len(result[0].Media) != 0 { + t.Fatalf("expected 0 media (oversized), got %d", len(result[0].Media)) + } +} + +func TestResolveMediaRefs_SkipsUnknownType(t *testing.T) { + store := media.NewFileMediaStore() + dir := t.TempDir() + + txtPath := filepath.Join(dir, "readme.txt") + if err := os.WriteFile(txtPath, []byte("hello world"), 0o644); err != nil { + t.Fatal(err) + } + ref, _ := store.Store(txtPath, media.MediaMeta{}, "test") + + messages := []providers.Message{ + {Role: "user", Content: "hi", Media: []string{ref}}, + } + result := resolveMediaRefs(messages, store, config.DefaultMaxMediaSize) + + if len(result[0].Media) != 0 { + t.Fatalf("expected 0 media (unknown type), got %d", len(result[0].Media)) + } +} + +func TestResolveMediaRefs_PassesThroughNonMediaRefs(t *testing.T) { + messages := []providers.Message{ + {Role: "user", Content: "hi", Media: []string{"https://example.com/img.png"}}, + } + result := resolveMediaRefs(messages, nil, config.DefaultMaxMediaSize) + + if len(result[0].Media) != 1 || result[0].Media[0] != "https://example.com/img.png" { + t.Fatalf("expected passthrough of non-media:// URL, got %v", result[0].Media) + } +} + +func TestResolveMediaRefs_DoesNotMutateOriginal(t *testing.T) { + store := media.NewFileMediaStore() + dir := t.TempDir() + pngPath := filepath.Join(dir, "test.png") + pngHeader := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, + 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, + 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, 0xDE} + os.WriteFile(pngPath, pngHeader, 0o644) + ref, _ := store.Store(pngPath, media.MediaMeta{}, "test") + + original := []providers.Message{ + {Role: "user", Content: "hi", Media: []string{ref}}, + } + originalRef := original[0].Media[0] + + resolveMediaRefs(original, store, config.DefaultMaxMediaSize) + + if original[0].Media[0] != originalRef { + t.Fatal("resolveMediaRefs mutated original message slice") + } +} + +func TestResolveMediaRefs_UsesMetaContentType(t *testing.T) { + store := media.NewFileMediaStore() + dir := t.TempDir() + + // File with JPEG content but stored with explicit content type + jpegPath := filepath.Join(dir, "photo") + jpegHeader := []byte{0xFF, 0xD8, 0xFF, 0xE0} // JPEG magic bytes + os.WriteFile(jpegPath, jpegHeader, 0o644) + ref, _ := store.Store(jpegPath, media.MediaMeta{ContentType: "image/jpeg"}, "test") + + messages := []providers.Message{ + {Role: "user", Content: "hi", Media: []string{ref}}, + } + result := resolveMediaRefs(messages, store, config.DefaultMaxMediaSize) + + if len(result[0].Media) != 1 { + t.Fatalf("expected 1 media, got %d", len(result[0].Media)) + } + if !strings.HasPrefix(result[0].Media[0], "data:image/jpeg;base64,") { + t.Fatalf("expected jpeg prefix, got %q", result[0].Media[0][:30]) + } +} +