feat(web): download files on frontend (#2563)

* feat(web): download attachments in frontend

* fix: proxy pico media and force svg downloads

* feat(web): hide ephemeral media refs from persisted session history
This commit is contained in:
Mauro
2026-04-22 05:28:04 +02:00
committed by GitHub
parent 023ca2e4c1
commit 3316ee6923
18 changed files with 909 additions and 73 deletions
+19
View File
@@ -105,6 +105,25 @@ func buildArtifactTags(store media.MediaStore, refs []string) []string {
return tags
}
func buildProviderAttachments(store media.MediaStore, refs []string) []providers.Attachment {
if store == nil || len(refs) == 0 {
return nil
}
attachments := make([]providers.Attachment, 0, len(refs))
for _, ref := range refs {
attachment := providers.Attachment{Ref: ref}
if _, meta, err := store.ResolveWithMeta(ref); err == nil {
attachment.Filename = meta.Filename
attachment.ContentType = meta.ContentType
attachment.Type = inferMediaType(meta.Filename, meta.ContentType)
}
attachments = append(attachments, attachment)
}
return attachments
}
// detectMIME determines the MIME type from metadata or magic-bytes detection.
// Returns empty string if detection fails.
func detectMIME(localPath string, meta media.MediaMeta) string {
+3
View File
@@ -1051,6 +1051,9 @@ func TestProcessMessage_MediaToolHandledSkipsFollowUpLLMAndFinalText(t *testing.
if last.Role != "assistant" || last.Content != "Requested output delivered via tool attachment." {
t.Fatalf("expected handled assistant summary in history, got %+v", last)
}
if len(last.Attachments) != 1 {
t.Fatalf("expected handled assistant summary attachments in history, got %+v", last.Attachments)
}
}
func TestProcessMessage_HandledToolProcessesQueuedSteeringBeforeReturning(t *testing.T) {
+15 -3
View File
@@ -33,6 +33,7 @@ func (p *Pipeline) ExecuteTools(
ts.setPhase(TurnPhaseTools)
messages := exec.messages
handledAttachments := make([]providers.Attachment, 0)
toolLoop:
for i, tc := range normalizedToolCalls {
@@ -144,6 +145,11 @@ toolLoop:
})
hookResult.IsError = true
hookResult.ForLLM = fmt.Sprintf("failed to deliver attachment: %v", err)
} else {
handledAttachments = append(
handledAttachments,
buildProviderAttachments(al.mediaStore, hookResult.Media)...,
)
}
} else if al.bus != nil {
al.bus.PublishOutboundMedia(ctx, outboundMedia)
@@ -503,6 +509,11 @@ toolLoop:
"error": err.Error(),
})
toolResult = tools.ErrorResult(fmt.Sprintf("failed to deliver attachment: %v", err)).WithError(err)
} else {
handledAttachments = append(
handledAttachments,
buildProviderAttachments(al.mediaStore, toolResult.Media)...,
)
}
} else if al.bus != nil {
al.bus.PublishOutboundMedia(ctx, outboundMedia)
@@ -656,11 +667,12 @@ toolLoop:
// No pending steering: finalize or break depending on allResponsesHandled
if exec.allResponsesHandled {
summaryMsg := providers.Message{
Role: "assistant",
Content: handledToolResponseSummary,
Role: "assistant",
Content: handledToolResponseSummary,
Attachments: append([]providers.Attachment(nil), handledAttachments...),
}
if !ts.opts.NoHistory {
ts.agent.Sessions.AddMessage(ts.sessionKey, summaryMsg.Role, summaryMsg.Content)
ts.agent.Sessions.AddFullMessage(ts.sessionKey, summaryMsg)
ts.recordPersistedMessage(summaryMsg)
ts.ingestMessage(turnCtx, al, summaryMsg)
if err := ts.agent.Sessions.Save(ts.sessionKey); err != nil {
+208
View File
@@ -5,7 +5,11 @@ import (
"encoding/base64"
"encoding/json"
"fmt"
"mime"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
@@ -251,6 +255,10 @@ func (c *PicoChannel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
case "/ws", "/ws/":
c.handleWebSocket(w, r)
default:
if strings.HasPrefix(path, "/media/") {
c.handleMediaDownload(w, r)
return
}
http.NotFound(w, r)
}
}
@@ -317,6 +325,206 @@ func (c *PicoChannel) SendPlaceholder(ctx context.Context, chatID string) (strin
return msgID, nil
}
// SendMedia implements channels.MediaSender for the Pico web UI.
// Media is delivered as a normal assistant message carrying structured
// attachments plus an authenticated same-origin download URL.
func (c *PicoChannel) SendMedia(ctx context.Context, msg bus.OutboundMediaMessage) ([]string, error) {
if !c.IsRunning() {
return nil, channels.ErrNotRunning
}
store := c.GetMediaStore()
if store == nil {
return nil, fmt.Errorf("no media store available: %w", channels.ErrSendFailed)
}
attachments := make([]map[string]any, 0, len(msg.Parts))
caption := ""
for _, part := range msg.Parts {
localPath, meta, err := store.ResolveWithMeta(part.Ref)
if err != nil {
logger.ErrorCF("pico", "Failed to resolve media ref", map[string]any{
"ref": part.Ref,
"error": err.Error(),
})
continue
}
filename := strings.TrimSpace(part.Filename)
if filename == "" {
filename = strings.TrimSpace(meta.Filename)
}
if filename == "" {
filename = filepath.Base(localPath)
}
contentType := strings.TrimSpace(part.ContentType)
if contentType == "" {
contentType = strings.TrimSpace(meta.ContentType)
}
if contentType == "" {
contentType = "application/octet-stream"
}
attachmentType := strings.TrimSpace(part.Type)
if attachmentType == "" {
attachmentType = picoInferAttachmentType(filename, contentType)
}
attachmentURL, err := picoDownloadURLForRef(part.Ref)
if err != nil {
logger.ErrorCF("pico", "Failed to build media download URL", map[string]any{
"ref": part.Ref,
"error": err.Error(),
})
continue
}
attachments = append(attachments, map[string]any{
"type": attachmentType,
"url": attachmentURL,
"filename": filename,
"content_type": contentType,
})
if caption == "" && strings.TrimSpace(part.Caption) != "" {
caption = strings.TrimSpace(part.Caption)
}
}
if len(attachments) == 0 {
return nil, fmt.Errorf("no deliverable media parts: %w", channels.ErrSendFailed)
}
msgID := uuid.New().String()
outMsg := newMessage(TypeMessageCreate, map[string]any{
PayloadKeyContent: caption,
"attachments": attachments,
"message_id": msgID,
})
if err := c.broadcastToSession(msg.ChatID, outMsg); err != nil {
return nil, err
}
return []string{msgID}, nil
}
func picoDownloadURLForRef(ref string) (string, error) {
refID, err := picoMediaRefID(ref)
if err != nil {
return "", err
}
return "/pico/media/" + url.PathEscape(refID), nil
}
func picoMediaRefID(ref string) (string, error) {
refID := strings.TrimSpace(strings.TrimPrefix(ref, "media://"))
if refID == "" || strings.Contains(refID, "/") {
return "", fmt.Errorf("invalid media ref %q", ref)
}
return refID, nil
}
func picoInferAttachmentType(filename, contentType string) string {
contentType = strings.ToLower(strings.TrimSpace(contentType))
filename = strings.ToLower(strings.TrimSpace(filename))
switch {
case strings.HasPrefix(contentType, "image/"):
return "image"
case strings.HasPrefix(contentType, "audio/"):
return "audio"
case strings.HasPrefix(contentType, "video/"):
return "video"
}
switch ext := filepath.Ext(filename); ext {
case ".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".svg":
return "image"
case ".mp3", ".wav", ".ogg", ".m4a", ".flac", ".aac", ".wma", ".opus":
return "audio"
case ".mp4", ".avi", ".mov", ".webm", ".mkv":
return "video"
default:
return "file"
}
}
func picoAllowsInlineDisplay(filename, contentType string) bool {
contentType = strings.ToLower(strings.TrimSpace(contentType))
filename = strings.ToLower(strings.TrimSpace(filename))
if strings.Contains(contentType, "svg") || filepath.Ext(filename) == ".svg" {
return false
}
return picoInferAttachmentType(filename, contentType) == "image"
}
func (c *PicoChannel) handleMediaDownload(w http.ResponseWriter, r *http.Request) {
if !c.IsRunning() {
http.Error(w, "channel not running", http.StatusServiceUnavailable)
return
}
if !c.authenticate(r) {
http.Error(w, "unauthorized", http.StatusUnauthorized)
return
}
refID := strings.TrimSpace(strings.TrimPrefix(strings.TrimPrefix(r.URL.Path, "/pico/media/"), "/"))
if refID == "" {
http.NotFound(w, r)
return
}
store := c.GetMediaStore()
if store == nil {
http.Error(w, "media store unavailable", http.StatusServiceUnavailable)
return
}
localPath, meta, err := store.ResolveWithMeta("media://" + refID)
if err != nil {
http.NotFound(w, r)
return
}
file, err := os.Open(localPath)
if err != nil {
http.Error(w, "failed to open media", http.StatusInternalServerError)
return
}
defer file.Close()
info, err := file.Stat()
if err != nil {
http.Error(w, "failed to stat media", http.StatusInternalServerError)
return
}
filename := strings.TrimSpace(meta.Filename)
if filename == "" {
filename = filepath.Base(localPath)
}
contentType := strings.TrimSpace(meta.ContentType)
if contentType == "" {
contentType = "application/octet-stream"
}
dispositionType := "attachment"
if picoAllowsInlineDisplay(filename, contentType) {
dispositionType = "inline"
}
if cd := mime.FormatMediaType(dispositionType, map[string]string{"filename": filename}); cd != "" {
w.Header().Set("Content-Disposition", cd)
}
w.Header().Set("Content-Type", contentType)
http.ServeContent(w, r, filename, info.ModTime(), file)
}
// broadcastToSession sends a message to all connections with a matching session.
func (c *PicoChannel) broadcastToSession(chatID string, msg PicoMessage) error {
// chatID format: "pico:<sessionID>"
+97
View File
@@ -4,12 +4,17 @@ import (
"context"
"errors"
"fmt"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"sync"
"testing"
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/channels"
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/media"
)
func newTestPicoChannel(t *testing.T) *PicoChannel {
@@ -123,6 +128,98 @@ func TestBroadcastToSession_TargetsOnlyRequestedSession(t *testing.T) {
}
}
func TestSendMedia_ResolvesMediaBeforeDelivery(t *testing.T) {
ch := newTestPicoChannel(t)
store := media.NewFileMediaStore()
ch.SetMediaStore(store)
if err := ch.Start(context.Background()); err != nil {
t.Fatalf("Start() error = %v", err)
}
defer ch.Stop(context.Background())
localPath := filepath.Join(t.TempDir(), "report.txt")
if err := os.WriteFile(localPath, []byte("attachment body"), 0o600); err != nil {
t.Fatalf("WriteFile() error = %v", err)
}
ref, err := store.Store(localPath, media.MediaMeta{
Filename: "report.txt",
ContentType: "text/plain",
}, "test-scope")
if err != nil {
t.Fatalf("Store() error = %v", err)
}
closedConn := &picoConn{id: "closed", sessionID: "sess-1"}
closedConn.closed.Store(true)
ch.addConnForTest(closedConn)
_, err = ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "pico:sess-1",
Parts: []bus.MediaPart{{
Ref: ref,
Type: "file",
Filename: "report.txt",
ContentType: "text/plain",
}},
})
if !errors.Is(err, channels.ErrSendFailed) {
t.Fatalf("SendMedia() error = %v, want ErrSendFailed", err)
}
}
func TestPicoDownloadURLForRef(t *testing.T) {
got, err := picoDownloadURLForRef("media://attachment-1")
if err != nil {
t.Fatalf("picoDownloadURLForRef() error = %v", err)
}
if got != "/pico/media/attachment-1" {
t.Fatalf("picoDownloadURLForRef() = %q, want %q", got, "/pico/media/attachment-1")
}
}
func TestHandleMediaDownload_ServesStoredFile(t *testing.T) {
ch := newTestPicoChannel(t)
store := media.NewFileMediaStore()
ch.SetMediaStore(store)
if err := ch.Start(context.Background()); err != nil {
t.Fatalf("Start() error = %v", err)
}
defer ch.Stop(context.Background())
localPath := filepath.Join(t.TempDir(), "report.txt")
if err := os.WriteFile(localPath, []byte("downloadable"), 0o600); err != nil {
t.Fatalf("WriteFile() error = %v", err)
}
ref, err := store.Store(localPath, media.MediaMeta{
Filename: "report.txt",
ContentType: "text/plain",
}, "test-scope")
if err != nil {
t.Fatalf("Store() error = %v", err)
}
refID := strings.TrimPrefix(ref, "media://")
req := httptest.NewRequest("GET", "/pico/media/"+refID, nil)
req.Header.Set("Authorization", "Bearer test-token")
rec := httptest.NewRecorder()
ch.ServeHTTP(rec, req)
if rec.Code != 200 {
t.Fatalf("status = %d, want 200", rec.Code)
}
if body := rec.Body.String(); body != "downloadable" {
t.Fatalf("body = %q, want %q", body, "downloadable")
}
if got := rec.Header().Get("Content-Type"); got != "text/plain" {
t.Fatalf("Content-Type = %q, want %q", got, "text/plain")
}
}
func (c *PicoChannel) addConnForTest(pc *picoConn) {
c.connsMu.Lock()
defer c.connsMu.Unlock()
+9
View File
@@ -62,10 +62,19 @@ type ContentBlock struct {
CacheControl *CacheControl `json:"cache_control,omitempty"`
}
type Attachment struct {
Type string `json:"type,omitempty"`
Ref string `json:"ref,omitempty"`
URL string `json:"url,omitempty"`
Filename string `json:"filename,omitempty"`
ContentType string `json:"content_type,omitempty"`
}
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
Media []string `json:"media,omitempty"`
Attachments []Attachment `json:"attachments,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
SystemParts []ContentBlock `json:"system_parts,omitempty"` // structured system blocks for cache-aware adapters
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
+1
View File
@@ -19,6 +19,7 @@ type (
GoogleExtra = protocoltypes.GoogleExtra
ContentBlock = protocoltypes.ContentBlock
CacheControl = protocoltypes.CacheControl
Attachment = protocoltypes.Attachment
)
type LLMProvider interface {