mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
293 lines
7.3 KiB
Go
293 lines
7.3 KiB
Go
package tools
|
|
|
|
import (
|
|
"encoding/base64"
|
|
"fmt"
|
|
"mime"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
"unicode"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/media"
|
|
)
|
|
|
|
const (
|
|
largeBase64OmittedMessage = "[Tool returned a large base64-like payload; omitted from model context.]"
|
|
inlineMediaOmittedMessage = "[Tool returned inline media content; omitted from model context.]"
|
|
inlineMediaStoredMessage = "[Tool returned inline media content (%s); omitted from model context and registered as a media attachment.]"
|
|
)
|
|
|
|
var (
|
|
inlineMarkdownDataURLRe = regexp.MustCompile(`!\[[^\]]*\]\((data:[^)]+)\)`)
|
|
inlineRawDataURLRe = regexp.MustCompile(`data:[^;\s]+;base64,[A-Za-z0-9+/=\r\n]+`)
|
|
)
|
|
|
|
func normalizeToolResult(
|
|
result *ToolResult,
|
|
toolName string,
|
|
store media.MediaStore,
|
|
channel string,
|
|
chatID string,
|
|
) *ToolResult {
|
|
if result == nil {
|
|
return nil
|
|
}
|
|
|
|
notes := make([]string, 0, 2)
|
|
seen := make(map[string]struct{})
|
|
|
|
if store != nil && channel != "" && chatID != "" {
|
|
var refs []string
|
|
var extractedNotes []string
|
|
|
|
result.ForLLM, refs, extractedNotes = extractInlineMediaRefs(
|
|
result.ForLLM,
|
|
toolName,
|
|
store,
|
|
channel,
|
|
chatID,
|
|
seen,
|
|
)
|
|
result.Media = append(result.Media, refs...)
|
|
notes = append(notes, extractedNotes...)
|
|
|
|
result.ForUser, refs, extractedNotes = extractInlineMediaRefs(
|
|
result.ForUser,
|
|
toolName,
|
|
store,
|
|
channel,
|
|
chatID,
|
|
seen,
|
|
)
|
|
result.Media = append(result.Media, refs...)
|
|
notes = append(notes, extractedNotes...)
|
|
}
|
|
|
|
result.ForLLM = sanitizeToolLLMContent(result.ForLLM)
|
|
|
|
if len(result.Media) > 0 && len(notes) > 0 {
|
|
if strings.TrimSpace(result.ForLLM) == "" {
|
|
result.ForLLM = strings.Join(notes, "\n")
|
|
} else {
|
|
result.ForLLM = strings.TrimSpace(result.ForLLM) + "\n" + strings.Join(notes, "\n")
|
|
}
|
|
}
|
|
if len(result.Media) > 0 && strings.TrimSpace(result.ForLLM) == "" {
|
|
result.ForLLM = "[Tool returned media content; omitted from model context and registered as a media attachment.]"
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func sanitizeToolLLMContent(text string) string {
|
|
trimmed := strings.TrimSpace(text)
|
|
if trimmed == "" {
|
|
return text
|
|
}
|
|
if inlineMarkdownDataURLRe.MatchString(trimmed) || inlineRawDataURLRe.MatchString(trimmed) {
|
|
cleaned := inlineMarkdownDataURLRe.ReplaceAllString(trimmed, "")
|
|
cleaned = inlineRawDataURLRe.ReplaceAllString(cleaned, "")
|
|
cleaned = strings.TrimSpace(cleaned)
|
|
if cleaned == "" {
|
|
return inlineMediaOmittedMessage
|
|
}
|
|
return cleaned + "\n" + inlineMediaOmittedMessage
|
|
}
|
|
if looksLikeLargeBase64Payload(trimmed) {
|
|
return largeBase64OmittedMessage
|
|
}
|
|
return text
|
|
}
|
|
|
|
func looksLikeLargeBase64Payload(text string) bool {
|
|
trimmed := strings.TrimSpace(text)
|
|
if len(trimmed) < 1024 {
|
|
return false
|
|
}
|
|
|
|
nonSpace := 0
|
|
base64Like := 0
|
|
spaceCount := 0
|
|
|
|
for _, r := range trimmed {
|
|
if unicode.IsSpace(r) {
|
|
spaceCount++
|
|
continue
|
|
}
|
|
nonSpace++
|
|
if (r >= 'A' && r <= 'Z') ||
|
|
(r >= 'a' && r <= 'z') ||
|
|
(r >= '0' && r <= '9') ||
|
|
r == '+' || r == '/' || r == '=' {
|
|
base64Like++
|
|
}
|
|
}
|
|
|
|
if nonSpace == 0 {
|
|
return false
|
|
}
|
|
|
|
ratio := float64(base64Like) / float64(nonSpace)
|
|
return ratio >= 0.97 && spaceCount <= len(trimmed)/128
|
|
}
|
|
|
|
func extractInlineMediaRefs(
|
|
text string,
|
|
toolName string,
|
|
store media.MediaStore,
|
|
channel string,
|
|
chatID string,
|
|
seen map[string]struct{},
|
|
) (cleaned string, refs []string, notes []string) {
|
|
cleaned = text
|
|
|
|
matches := inlineMarkdownDataURLRe.FindAllStringSubmatch(cleaned, -1)
|
|
for _, match := range matches {
|
|
if len(match) < 2 {
|
|
continue
|
|
}
|
|
dataURL := match[1]
|
|
ref, note := storeInlineDataURL(toolName, store, channel, chatID, dataURL, seen)
|
|
if ref != "" {
|
|
refs = append(refs, ref)
|
|
}
|
|
if note != "" {
|
|
notes = append(notes, note)
|
|
}
|
|
cleaned = strings.ReplaceAll(cleaned, match[0], "")
|
|
}
|
|
|
|
rawMatches := inlineRawDataURLRe.FindAllString(cleaned, -1)
|
|
for _, dataURL := range rawMatches {
|
|
ref, note := storeInlineDataURL(toolName, store, channel, chatID, dataURL, seen)
|
|
if ref != "" {
|
|
refs = append(refs, ref)
|
|
}
|
|
if note != "" {
|
|
notes = append(notes, note)
|
|
}
|
|
cleaned = strings.ReplaceAll(cleaned, dataURL, "")
|
|
}
|
|
|
|
return strings.TrimSpace(cleaned), refs, notes
|
|
}
|
|
|
|
func storeInlineDataURL(
|
|
toolName string,
|
|
store media.MediaStore,
|
|
channel string,
|
|
chatID string,
|
|
dataURL string,
|
|
seen map[string]struct{},
|
|
) (ref string, note string) {
|
|
dataURL = strings.TrimSpace(dataURL)
|
|
if _, ok := seen[dataURL]; ok {
|
|
return "", ""
|
|
}
|
|
seen[dataURL] = struct{}{}
|
|
|
|
if !strings.HasPrefix(strings.ToLower(dataURL), "data:") {
|
|
return "", ""
|
|
}
|
|
|
|
comma := strings.IndexByte(dataURL, ',')
|
|
if comma <= 5 {
|
|
return "", "[Tool returned inline media content that could not be parsed.]"
|
|
}
|
|
|
|
metaPart := dataURL[:comma]
|
|
payload := dataURL[comma+1:]
|
|
if !strings.Contains(strings.ToLower(metaPart), ";base64") {
|
|
return "", "[Tool returned inline media content that was not base64-encoded.]"
|
|
}
|
|
|
|
mimeType := strings.TrimSpace(strings.TrimPrefix(metaPart, "data:"))
|
|
if semi := strings.IndexByte(mimeType, ';'); semi >= 0 {
|
|
mimeType = mimeType[:semi]
|
|
}
|
|
if mimeType == "" {
|
|
mimeType = "application/octet-stream"
|
|
}
|
|
|
|
payload = strings.NewReplacer("\n", "", "\r", "", "\t", "", " ", "").Replace(payload)
|
|
decoded, err := base64.StdEncoding.DecodeString(payload)
|
|
if err != nil {
|
|
return "", fmt.Sprintf("[Tool returned inline media content (%s) that could not be decoded.]", mimeType)
|
|
}
|
|
|
|
dir := media.TempDir()
|
|
if err = os.MkdirAll(dir, 0o700); err != nil {
|
|
return "", fmt.Sprintf("[Tool returned inline media content (%s) but it could not be stored.]", mimeType)
|
|
}
|
|
|
|
ext := extensionForMIMEType(mimeType)
|
|
tmpFile, err := os.CreateTemp(dir, "tool-inline-*"+ext)
|
|
if err != nil {
|
|
return "", fmt.Sprintf("[Tool returned inline media content (%s) but it could not be stored.]", mimeType)
|
|
}
|
|
tmpPath := tmpFile.Name()
|
|
if _, err = tmpFile.Write(decoded); err != nil {
|
|
tmpFile.Close()
|
|
_ = os.Remove(tmpPath)
|
|
return "", fmt.Sprintf("[Tool returned inline media content (%s) but it could not be stored.]", mimeType)
|
|
}
|
|
if err = tmpFile.Close(); err != nil {
|
|
_ = os.Remove(tmpPath)
|
|
return "", fmt.Sprintf("[Tool returned inline media content (%s) but it could not be stored.]", mimeType)
|
|
}
|
|
|
|
filename := sanitizeIdentifierComponent(toolName) + ext
|
|
scope := fmt.Sprintf(
|
|
"tool:inline:%s:%s:%s:%d",
|
|
sanitizeIdentifierComponent(toolName),
|
|
channel,
|
|
chatID,
|
|
time.Now().UnixNano(),
|
|
)
|
|
|
|
ref, err = store.Store(tmpPath, media.MediaMeta{
|
|
Filename: filename,
|
|
ContentType: mimeType,
|
|
Source: fmt.Sprintf("tool:inline:%s", sanitizeIdentifierComponent(toolName)),
|
|
}, scope)
|
|
if err != nil {
|
|
_ = os.Remove(tmpPath)
|
|
return "", fmt.Sprintf("[Tool returned inline media content (%s) but it could not be registered.]", mimeType)
|
|
}
|
|
|
|
return ref, fmt.Sprintf(inlineMediaStoredMessage, mimeType)
|
|
}
|
|
|
|
func extensionForMIMEType(mimeType string) string {
|
|
if mimeType == "" {
|
|
return ".bin"
|
|
}
|
|
if exts, err := mime.ExtensionsByType(mimeType); err == nil && len(exts) > 0 {
|
|
return exts[0]
|
|
}
|
|
|
|
switch strings.ToLower(mimeType) {
|
|
case "image/jpeg":
|
|
return ".jpg"
|
|
case "image/png":
|
|
return ".png"
|
|
case "image/gif":
|
|
return ".gif"
|
|
case "image/webp":
|
|
return ".webp"
|
|
case "audio/wav", "audio/x-wav":
|
|
return ".wav"
|
|
case "audio/mpeg":
|
|
return ".mp3"
|
|
case "audio/ogg":
|
|
return ".ogg"
|
|
case "video/mp4":
|
|
return ".mp4"
|
|
default:
|
|
return filepath.Ext(mimeType)
|
|
}
|
|
}
|