Files
picoclaw/pkg/agent/agent_media.go
T
sky5454 329e68e017 refactor(agent): Agent Looper refactor phase2, restructure pipeline and rename loop files to agent (#2585)
* refactor(agent): introduce interfaces for MessageBus and ChannelManager

Phase 2 of loop.go refactor — dependency inversion using adapter pattern.

- Add interfaces.MessageBus and interfaces.ChannelManager interfaces
- Create adapters/messagebus.go wrapping *bus.MessageBus
- Create adapters/channelmanager.go wrapping *channels.Manager
- Update AgentLoop to use interfaces instead of concrete types
- Update registerSharedTools to accept interfaces.MessageBus

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* refactor(agent): restructure pipeline and rename loop files

Pipeline refactoring:
- Split pipeline.go (1400 lines) into focused files:
  - pipeline_setup.go (~115 lines): SetupTurn method
  - pipeline_llm.go (~519 lines): CallLLM method
  - pipeline_execute.go (~693 lines): ExecuteTools method
  - pipeline_finalize.go (~78 lines): Finalize method
- Pipeline struct and NewPipeline remain in pipeline.go (~39 lines)

Agent file renaming:
- Rename loop_*.go to agent_*.go for consistent naming:
  - loop.go -> agent.go, loop_message.go -> agent_message.go, etc.
- Merge turn.go + turn_exec.go into turn_state.go
- Rename loop_turn.go -> turn_coord.go

Documentation:
- Update docs/pipeline-restructuring-plan.md
- Add docs/agent-rename-plan.md

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* fix(agent): code format  fixed

* refactor(agent): code test file added/renamed

* docs(agent): update agent refactor docs

* fix(agent): fix agent hardAbortX

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-21 10:55:50 +08:00

199 lines
5.0 KiB
Go

// PicoClaw - Ultra-lightweight personal AI agent
// Inspired by and based on nanobot: https://github.com/HKUDS/nanobot
// License: MIT
//
// Copyright (c) 2026 PicoClaw contributors
package agent
import (
"bytes"
"encoding/base64"
"io"
"os"
"strings"
"github.com/h2non/filetype"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/media"
"github.com/sipeed/picoclaw/pkg/providers"
)
// resolveMediaRefs resolves media:// refs in messages.
// Images are base64-encoded into the Media array for multimodal LLMs.
// Non-image files (documents, audio, video) have their local path injected
// into Content so the agent can access them via file tools like read_file.
// Returns a new slice; original messages are not mutated.
func resolveMediaRefs(messages []providers.Message, store media.MediaStore, maxSize int) []providers.Message {
if store == nil {
return messages
}
result := make([]providers.Message, len(messages))
copy(result, messages)
for i, m := range result {
if len(m.Media) == 0 {
continue
}
resolved := make([]string, 0, len(m.Media))
var pathTags []string
for _, ref := range m.Media {
if !strings.HasPrefix(ref, "media://") {
resolved = append(resolved, ref)
continue
}
localPath, meta, err := store.ResolveWithMeta(ref)
if err != nil {
logger.WarnCF("agent", "Failed to resolve media ref", map[string]any{
"ref": ref,
"error": err.Error(),
})
continue
}
info, err := os.Stat(localPath)
if err != nil {
logger.WarnCF("agent", "Failed to stat media file", map[string]any{
"path": localPath,
"error": err.Error(),
})
continue
}
mime := detectMIME(localPath, meta)
if strings.HasPrefix(mime, "image/") {
dataURL := encodeImageToDataURL(localPath, mime, info, maxSize)
if dataURL != "" {
resolved = append(resolved, dataURL)
}
continue
}
pathTags = append(pathTags, buildPathTag(mime, localPath))
}
result[i].Media = resolved
if len(pathTags) > 0 {
result[i].Content = injectPathTags(result[i].Content, pathTags)
}
}
return result
}
func buildArtifactTags(store media.MediaStore, refs []string) []string {
if store == nil || len(refs) == 0 {
return nil
}
tags := make([]string, 0, len(refs))
for _, ref := range refs {
localPath, meta, err := store.ResolveWithMeta(ref)
if err != nil {
continue
}
mime := detectMIME(localPath, meta)
tags = append(tags, buildPathTag(mime, localPath))
}
return tags
}
// detectMIME determines the MIME type from metadata or magic-bytes detection.
// Returns empty string if detection fails.
func detectMIME(localPath string, meta media.MediaMeta) string {
if meta.ContentType != "" {
return meta.ContentType
}
kind, err := filetype.MatchFile(localPath)
if err != nil || kind == filetype.Unknown {
return ""
}
return kind.MIME.Value
}
// encodeImageToDataURL base64-encodes an image file into a data URL.
// Returns empty string if the file exceeds maxSize or encoding fails.
func encodeImageToDataURL(localPath, mime string, info os.FileInfo, maxSize int) string {
if info.Size() > int64(maxSize) {
logger.WarnCF("agent", "Media file too large, skipping", map[string]any{
"path": localPath,
"size": info.Size(),
"max_size": maxSize,
})
return ""
}
f, err := os.Open(localPath)
if err != nil {
logger.WarnCF("agent", "Failed to open media file", map[string]any{
"path": localPath,
"error": err.Error(),
})
return ""
}
defer f.Close()
prefix := "data:" + mime + ";base64,"
encodedLen := base64.StdEncoding.EncodedLen(int(info.Size()))
var buf bytes.Buffer
buf.Grow(len(prefix) + encodedLen)
buf.WriteString(prefix)
encoder := base64.NewEncoder(base64.StdEncoding, &buf)
if _, err := io.Copy(encoder, f); err != nil {
logger.WarnCF("agent", "Failed to encode media file", map[string]any{
"path": localPath,
"error": err.Error(),
})
return ""
}
encoder.Close()
return buf.String()
}
// buildPathTag creates a structured tag exposing the local file path.
// Tag type is derived from MIME: [audio:/path], [video:/path], or [file:/path].
func buildPathTag(mime, localPath string) string {
switch {
case strings.HasPrefix(mime, "audio/"):
return "[audio:" + localPath + "]"
case strings.HasPrefix(mime, "video/"):
return "[video:" + localPath + "]"
default:
return "[file:" + localPath + "]"
}
}
// injectPathTags replaces generic media tags in content with path-bearing versions,
// or appends if no matching generic tag is found.
func injectPathTags(content string, tags []string) string {
for _, tag := range tags {
var generic string
switch {
case strings.HasPrefix(tag, "[audio:"):
generic = "[audio]"
case strings.HasPrefix(tag, "[video:"):
generic = "[video]"
case strings.HasPrefix(tag, "[file:"):
generic = "[file]"
}
if generic != "" && strings.Contains(content, generic) {
content = strings.Replace(content, generic, tag, 1)
} else if content == "" {
content = tag
} else {
content += " " + tag
}
}
return content
}