diff --git a/pkg/agent/agent_media.go b/pkg/agent/agent_media.go index e8314c10d..a773d2ebb 100644 --- a/pkg/agent/agent_media.go +++ b/pkg/agent/agent_media.go @@ -105,6 +105,25 @@ func buildArtifactTags(store media.MediaStore, refs []string) []string { return tags } +func buildProviderAttachments(store media.MediaStore, refs []string) []providers.Attachment { + if store == nil || len(refs) == 0 { + return nil + } + + attachments := make([]providers.Attachment, 0, len(refs)) + for _, ref := range refs { + attachment := providers.Attachment{Ref: ref} + if _, meta, err := store.ResolveWithMeta(ref); err == nil { + attachment.Filename = meta.Filename + attachment.ContentType = meta.ContentType + attachment.Type = inferMediaType(meta.Filename, meta.ContentType) + } + attachments = append(attachments, attachment) + } + + return attachments +} + // detectMIME determines the MIME type from metadata or magic-bytes detection. // Returns empty string if detection fails. func detectMIME(localPath string, meta media.MediaMeta) string { diff --git a/pkg/agent/agent_test.go b/pkg/agent/agent_test.go index 5cdac186c..61c8afa37 100644 --- a/pkg/agent/agent_test.go +++ b/pkg/agent/agent_test.go @@ -1051,6 +1051,9 @@ func TestProcessMessage_MediaToolHandledSkipsFollowUpLLMAndFinalText(t *testing. if last.Role != "assistant" || last.Content != "Requested output delivered via tool attachment." { t.Fatalf("expected handled assistant summary in history, got %+v", last) } + if len(last.Attachments) != 1 { + t.Fatalf("expected handled assistant summary attachments in history, got %+v", last.Attachments) + } } func TestProcessMessage_HandledToolProcessesQueuedSteeringBeforeReturning(t *testing.T) { diff --git a/pkg/agent/pipeline_execute.go b/pkg/agent/pipeline_execute.go index 76ada0e64..87254619c 100644 --- a/pkg/agent/pipeline_execute.go +++ b/pkg/agent/pipeline_execute.go @@ -33,6 +33,7 @@ func (p *Pipeline) ExecuteTools( ts.setPhase(TurnPhaseTools) messages := exec.messages + handledAttachments := make([]providers.Attachment, 0) toolLoop: for i, tc := range normalizedToolCalls { @@ -144,6 +145,11 @@ toolLoop: }) hookResult.IsError = true hookResult.ForLLM = fmt.Sprintf("failed to deliver attachment: %v", err) + } else { + handledAttachments = append( + handledAttachments, + buildProviderAttachments(al.mediaStore, hookResult.Media)..., + ) } } else if al.bus != nil { al.bus.PublishOutboundMedia(ctx, outboundMedia) @@ -503,6 +509,11 @@ toolLoop: "error": err.Error(), }) toolResult = tools.ErrorResult(fmt.Sprintf("failed to deliver attachment: %v", err)).WithError(err) + } else { + handledAttachments = append( + handledAttachments, + buildProviderAttachments(al.mediaStore, toolResult.Media)..., + ) } } else if al.bus != nil { al.bus.PublishOutboundMedia(ctx, outboundMedia) @@ -656,11 +667,12 @@ toolLoop: // No pending steering: finalize or break depending on allResponsesHandled if exec.allResponsesHandled { summaryMsg := providers.Message{ - Role: "assistant", - Content: handledToolResponseSummary, + Role: "assistant", + Content: handledToolResponseSummary, + Attachments: append([]providers.Attachment(nil), handledAttachments...), } if !ts.opts.NoHistory { - ts.agent.Sessions.AddMessage(ts.sessionKey, summaryMsg.Role, summaryMsg.Content) + ts.agent.Sessions.AddFullMessage(ts.sessionKey, summaryMsg) ts.recordPersistedMessage(summaryMsg) ts.ingestMessage(turnCtx, al, summaryMsg) if err := ts.agent.Sessions.Save(ts.sessionKey); err != nil { diff --git a/pkg/channels/pico/pico.go b/pkg/channels/pico/pico.go index 8b41023f0..4d1fad1ed 100644 --- a/pkg/channels/pico/pico.go +++ b/pkg/channels/pico/pico.go @@ -5,7 +5,11 @@ import ( "encoding/base64" "encoding/json" "fmt" + "mime" "net/http" + "net/url" + "os" + "path/filepath" "strings" "sync" "sync/atomic" @@ -251,6 +255,10 @@ func (c *PicoChannel) ServeHTTP(w http.ResponseWriter, r *http.Request) { case "/ws", "/ws/": c.handleWebSocket(w, r) default: + if strings.HasPrefix(path, "/media/") { + c.handleMediaDownload(w, r) + return + } http.NotFound(w, r) } } @@ -317,6 +325,206 @@ func (c *PicoChannel) SendPlaceholder(ctx context.Context, chatID string) (strin return msgID, nil } +// SendMedia implements channels.MediaSender for the Pico web UI. +// Media is delivered as a normal assistant message carrying structured +// attachments plus an authenticated same-origin download URL. +func (c *PicoChannel) SendMedia(ctx context.Context, msg bus.OutboundMediaMessage) ([]string, error) { + if !c.IsRunning() { + return nil, channels.ErrNotRunning + } + + store := c.GetMediaStore() + if store == nil { + return nil, fmt.Errorf("no media store available: %w", channels.ErrSendFailed) + } + + attachments := make([]map[string]any, 0, len(msg.Parts)) + caption := "" + + for _, part := range msg.Parts { + localPath, meta, err := store.ResolveWithMeta(part.Ref) + if err != nil { + logger.ErrorCF("pico", "Failed to resolve media ref", map[string]any{ + "ref": part.Ref, + "error": err.Error(), + }) + continue + } + + filename := strings.TrimSpace(part.Filename) + if filename == "" { + filename = strings.TrimSpace(meta.Filename) + } + if filename == "" { + filename = filepath.Base(localPath) + } + + contentType := strings.TrimSpace(part.ContentType) + if contentType == "" { + contentType = strings.TrimSpace(meta.ContentType) + } + if contentType == "" { + contentType = "application/octet-stream" + } + + attachmentType := strings.TrimSpace(part.Type) + if attachmentType == "" { + attachmentType = picoInferAttachmentType(filename, contentType) + } + + attachmentURL, err := picoDownloadURLForRef(part.Ref) + if err != nil { + logger.ErrorCF("pico", "Failed to build media download URL", map[string]any{ + "ref": part.Ref, + "error": err.Error(), + }) + continue + } + + attachments = append(attachments, map[string]any{ + "type": attachmentType, + "url": attachmentURL, + "filename": filename, + "content_type": contentType, + }) + + if caption == "" && strings.TrimSpace(part.Caption) != "" { + caption = strings.TrimSpace(part.Caption) + } + } + + if len(attachments) == 0 { + return nil, fmt.Errorf("no deliverable media parts: %w", channels.ErrSendFailed) + } + + msgID := uuid.New().String() + outMsg := newMessage(TypeMessageCreate, map[string]any{ + PayloadKeyContent: caption, + "attachments": attachments, + "message_id": msgID, + }) + + if err := c.broadcastToSession(msg.ChatID, outMsg); err != nil { + return nil, err + } + + return []string{msgID}, nil +} + +func picoDownloadURLForRef(ref string) (string, error) { + refID, err := picoMediaRefID(ref) + if err != nil { + return "", err + } + return "/pico/media/" + url.PathEscape(refID), nil +} + +func picoMediaRefID(ref string) (string, error) { + refID := strings.TrimSpace(strings.TrimPrefix(ref, "media://")) + if refID == "" || strings.Contains(refID, "/") { + return "", fmt.Errorf("invalid media ref %q", ref) + } + return refID, nil +} + +func picoInferAttachmentType(filename, contentType string) string { + contentType = strings.ToLower(strings.TrimSpace(contentType)) + filename = strings.ToLower(strings.TrimSpace(filename)) + + switch { + case strings.HasPrefix(contentType, "image/"): + return "image" + case strings.HasPrefix(contentType, "audio/"): + return "audio" + case strings.HasPrefix(contentType, "video/"): + return "video" + } + + switch ext := filepath.Ext(filename); ext { + case ".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".svg": + return "image" + case ".mp3", ".wav", ".ogg", ".m4a", ".flac", ".aac", ".wma", ".opus": + return "audio" + case ".mp4", ".avi", ".mov", ".webm", ".mkv": + return "video" + default: + return "file" + } +} + +func picoAllowsInlineDisplay(filename, contentType string) bool { + contentType = strings.ToLower(strings.TrimSpace(contentType)) + filename = strings.ToLower(strings.TrimSpace(filename)) + + if strings.Contains(contentType, "svg") || filepath.Ext(filename) == ".svg" { + return false + } + + return picoInferAttachmentType(filename, contentType) == "image" +} + +func (c *PicoChannel) handleMediaDownload(w http.ResponseWriter, r *http.Request) { + if !c.IsRunning() { + http.Error(w, "channel not running", http.StatusServiceUnavailable) + return + } + if !c.authenticate(r) { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + + refID := strings.TrimSpace(strings.TrimPrefix(strings.TrimPrefix(r.URL.Path, "/pico/media/"), "/")) + if refID == "" { + http.NotFound(w, r) + return + } + + store := c.GetMediaStore() + if store == nil { + http.Error(w, "media store unavailable", http.StatusServiceUnavailable) + return + } + + localPath, meta, err := store.ResolveWithMeta("media://" + refID) + if err != nil { + http.NotFound(w, r) + return + } + + file, err := os.Open(localPath) + if err != nil { + http.Error(w, "failed to open media", http.StatusInternalServerError) + return + } + defer file.Close() + + info, err := file.Stat() + if err != nil { + http.Error(w, "failed to stat media", http.StatusInternalServerError) + return + } + + filename := strings.TrimSpace(meta.Filename) + if filename == "" { + filename = filepath.Base(localPath) + } + contentType := strings.TrimSpace(meta.ContentType) + if contentType == "" { + contentType = "application/octet-stream" + } + + dispositionType := "attachment" + if picoAllowsInlineDisplay(filename, contentType) { + dispositionType = "inline" + } + + if cd := mime.FormatMediaType(dispositionType, map[string]string{"filename": filename}); cd != "" { + w.Header().Set("Content-Disposition", cd) + } + w.Header().Set("Content-Type", contentType) + http.ServeContent(w, r, filename, info.ModTime(), file) +} + // broadcastToSession sends a message to all connections with a matching session. func (c *PicoChannel) broadcastToSession(chatID string, msg PicoMessage) error { // chatID format: "pico:" diff --git a/pkg/channels/pico/pico_test.go b/pkg/channels/pico/pico_test.go index 59db705eb..f0d179527 100644 --- a/pkg/channels/pico/pico_test.go +++ b/pkg/channels/pico/pico_test.go @@ -4,12 +4,17 @@ import ( "context" "errors" "fmt" + "net/http/httptest" + "os" + "path/filepath" + "strings" "sync" "testing" "github.com/sipeed/picoclaw/pkg/bus" "github.com/sipeed/picoclaw/pkg/channels" "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/media" ) func newTestPicoChannel(t *testing.T) *PicoChannel { @@ -123,6 +128,98 @@ func TestBroadcastToSession_TargetsOnlyRequestedSession(t *testing.T) { } } +func TestSendMedia_ResolvesMediaBeforeDelivery(t *testing.T) { + ch := newTestPicoChannel(t) + store := media.NewFileMediaStore() + ch.SetMediaStore(store) + + if err := ch.Start(context.Background()); err != nil { + t.Fatalf("Start() error = %v", err) + } + defer ch.Stop(context.Background()) + + localPath := filepath.Join(t.TempDir(), "report.txt") + if err := os.WriteFile(localPath, []byte("attachment body"), 0o600); err != nil { + t.Fatalf("WriteFile() error = %v", err) + } + + ref, err := store.Store(localPath, media.MediaMeta{ + Filename: "report.txt", + ContentType: "text/plain", + }, "test-scope") + if err != nil { + t.Fatalf("Store() error = %v", err) + } + + closedConn := &picoConn{id: "closed", sessionID: "sess-1"} + closedConn.closed.Store(true) + ch.addConnForTest(closedConn) + + _, err = ch.SendMedia(context.Background(), bus.OutboundMediaMessage{ + ChatID: "pico:sess-1", + Parts: []bus.MediaPart{{ + Ref: ref, + Type: "file", + Filename: "report.txt", + ContentType: "text/plain", + }}, + }) + if !errors.Is(err, channels.ErrSendFailed) { + t.Fatalf("SendMedia() error = %v, want ErrSendFailed", err) + } +} + +func TestPicoDownloadURLForRef(t *testing.T) { + got, err := picoDownloadURLForRef("media://attachment-1") + if err != nil { + t.Fatalf("picoDownloadURLForRef() error = %v", err) + } + if got != "/pico/media/attachment-1" { + t.Fatalf("picoDownloadURLForRef() = %q, want %q", got, "/pico/media/attachment-1") + } +} + +func TestHandleMediaDownload_ServesStoredFile(t *testing.T) { + ch := newTestPicoChannel(t) + store := media.NewFileMediaStore() + ch.SetMediaStore(store) + + if err := ch.Start(context.Background()); err != nil { + t.Fatalf("Start() error = %v", err) + } + defer ch.Stop(context.Background()) + + localPath := filepath.Join(t.TempDir(), "report.txt") + if err := os.WriteFile(localPath, []byte("downloadable"), 0o600); err != nil { + t.Fatalf("WriteFile() error = %v", err) + } + + ref, err := store.Store(localPath, media.MediaMeta{ + Filename: "report.txt", + ContentType: "text/plain", + }, "test-scope") + if err != nil { + t.Fatalf("Store() error = %v", err) + } + + refID := strings.TrimPrefix(ref, "media://") + req := httptest.NewRequest("GET", "/pico/media/"+refID, nil) + req.Header.Set("Authorization", "Bearer test-token") + rec := httptest.NewRecorder() + + ch.ServeHTTP(rec, req) + + if rec.Code != 200 { + t.Fatalf("status = %d, want 200", rec.Code) + } + if body := rec.Body.String(); body != "downloadable" { + t.Fatalf("body = %q, want %q", body, "downloadable") + } + if got := rec.Header().Get("Content-Type"); got != "text/plain" { + t.Fatalf("Content-Type = %q, want %q", got, "text/plain") + } +} + func (c *PicoChannel) addConnForTest(pc *picoConn) { c.connsMu.Lock() defer c.connsMu.Unlock() diff --git a/pkg/providers/protocoltypes/types.go b/pkg/providers/protocoltypes/types.go index 194c1aa6f..89f68928a 100644 --- a/pkg/providers/protocoltypes/types.go +++ b/pkg/providers/protocoltypes/types.go @@ -62,10 +62,19 @@ type ContentBlock struct { CacheControl *CacheControl `json:"cache_control,omitempty"` } +type Attachment struct { + Type string `json:"type,omitempty"` + Ref string `json:"ref,omitempty"` + URL string `json:"url,omitempty"` + Filename string `json:"filename,omitempty"` + ContentType string `json:"content_type,omitempty"` +} + type Message struct { Role string `json:"role"` Content string `json:"content"` Media []string `json:"media,omitempty"` + Attachments []Attachment `json:"attachments,omitempty"` ReasoningContent string `json:"reasoning_content,omitempty"` SystemParts []ContentBlock `json:"system_parts,omitempty"` // structured system blocks for cache-aware adapters ToolCalls []ToolCall `json:"tool_calls,omitempty"` diff --git a/pkg/providers/types.go b/pkg/providers/types.go index fae252d13..23406bc45 100644 --- a/pkg/providers/types.go +++ b/pkg/providers/types.go @@ -19,6 +19,7 @@ type ( GoogleExtra = protocoltypes.GoogleExtra ContentBlock = protocoltypes.ContentBlock CacheControl = protocoltypes.CacheControl + Attachment = protocoltypes.Attachment ) type LLMProvider interface { diff --git a/web/backend/api/pico.go b/web/backend/api/pico.go index ffd0796c7..66b0bb92d 100644 --- a/web/backend/api/pico.go +++ b/web/backend/api/pico.go @@ -24,6 +24,8 @@ func (h *Handler) registerPicoRoutes(mux *http.ServeMux) { // This allows the frontend to connect via the same port as the web UI, // avoiding the need to expose extra ports for WebSocket communication. mux.HandleFunc("GET /pico/ws", h.handleWebSocketProxy()) + mux.HandleFunc("GET /pico/media/{id}", h.handlePicoMediaProxy()) + mux.HandleFunc("HEAD /pico/media/{id}", h.handlePicoMediaProxy()) } // createWsProxy creates a reverse proxy to the current gateway WebSocket endpoint. @@ -55,6 +57,53 @@ func (h *Handler) createWsProxy(origProtocol string, upstreamProtocol string) *h return wsProxy } +func (h *Handler) createPicoHTTPProxy(token string) *httputil.ReverseProxy { + return &httputil.ReverseProxy{ + Rewrite: func(r *httputil.ProxyRequest) { + target := h.gatewayProxyURL() + r.SetURL(target) + r.Out.Header.Set("Authorization", "Bearer "+token) + }, + ErrorHandler: func(w http.ResponseWriter, r *http.Request, err error) { + logger.Errorf("Failed to proxy Pico HTTP request: %v", err) + http.Error(w, "Gateway unavailable: "+err.Error(), http.StatusBadGateway) + }, + } +} + +func (h *Handler) gatewayAvailableForProxy() bool { + gateway.mu.Lock() + ensurePicoTokenCachedLocked(h.configPath) + cachedPID := gateway.pidData + trackedCmd := gateway.cmd + gateway.mu.Unlock() + + if pidData := h.sanitizeGatewayPidData(ppid.ReadPidFileWithCheck(globalConfigDir()), nil); pidData != nil { + gateway.mu.Lock() + gateway.pidData = pidData + setGatewayRuntimeStatusLocked("running") + gateway.mu.Unlock() + return true + } + + if cachedPID == nil { + return false + } + + if isCmdProcessAliveLocked(trackedCmd) { + return true + } + + gateway.mu.Lock() + if gateway.cmd == trackedCmd { + gateway.pidData = nil + setGatewayRuntimeStatusLocked("stopped") + } + available := gateway.pidData != nil + gateway.mu.Unlock() + return available +} + func decodePicoSettings(cfg *config.Config) (config.PicoSettings, bool) { if cfg == nil { return config.PicoSettings{}, false @@ -101,37 +150,7 @@ func (h *Handler) writePicoInfoResponse( // on the upstream gateway request. func (h *Handler) handleWebSocketProxy() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { - gateway.mu.Lock() - ensurePicoTokenCachedLocked(h.configPath) - cachedPID := gateway.pidData - trackedCmd := gateway.cmd - gateway.mu.Unlock() - - gatewayAvailable := false - // Prefer fresh PID file data when available. - if pidData := h.sanitizeGatewayPidData(ppid.ReadPidFileWithCheck(globalConfigDir()), nil); pidData != nil { - gateway.mu.Lock() - gateway.pidData = pidData - setGatewayRuntimeStatusLocked("running") - gatewayAvailable = true - gateway.mu.Unlock() - } else if cachedPID != nil { - // No PID file now: keep availability only while tracked process is - // still alive (covers short PID-file races at startup/restart). - if isCmdProcessAliveLocked(trackedCmd) { - gatewayAvailable = true - } else { - gateway.mu.Lock() - if gateway.cmd == trackedCmd { - gateway.pidData = nil - setGatewayRuntimeStatusLocked("stopped") - } - gatewayAvailable = gateway.pidData != nil - gateway.mu.Unlock() - } - } - - if !gatewayAvailable { + if !h.gatewayAvailableForProxy() { logger.Warnf("Gateway not available for WebSocket proxy") http.Error(w, "Gateway not available", http.StatusServiceUnavailable) return @@ -153,6 +172,29 @@ func (h *Handler) handleWebSocketProxy() http.HandlerFunc { } } +func (h *Handler) handlePicoMediaProxy() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if !h.gatewayAvailableForProxy() { + logger.Warnf("Gateway not available for Pico media proxy") + http.Error(w, "Gateway not available", http.StatusServiceUnavailable) + return + } + + gateway.mu.Lock() + uiToken := gateway.picoToken + gateway.mu.Unlock() + + token := tokenPrefix + uiToken + if token == "" { + logger.Warnf("Missing Pico token for media proxy") + http.Error(w, "Invalid Pico token", http.StatusForbidden) + return + } + + h.createPicoHTTPProxy(token).ServeHTTP(w, r) + } +} + // handleGetPicoInfo returns non-secret Pico connection info for the launcher UI. // // GET /api/pico/info diff --git a/web/backend/api/pico_test.go b/web/backend/api/pico_test.go index a56cd9ba2..6bdf0c6ca 100644 --- a/web/backend/api/pico_test.go +++ b/web/backend/api/pico_test.go @@ -9,6 +9,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "testing" "github.com/sipeed/picoclaw/pkg/config" @@ -649,6 +650,54 @@ func TestHandleWebSocketProxyLoadsPidDataOnDemand(t *testing.T) { } } +func TestCreatePicoHTTPProxyInjectsGatewayAuth(t *testing.T) { + configPath := filepath.Join(t.TempDir(), "config.json") + h := NewHandler(configPath) + + cfg := config.DefaultConfig() + cfg.Gateway.Host = "127.0.0.1" + cfg.Gateway.Port = 18790 + bc := cfg.Channels["pico"] + bc.Enabled = true + decoded, err := bc.GetDecoded() + if err != nil { + t.Fatalf("GetDecoded() error = %v", err) + } + decoded.(*config.PicoSettings).SetToken("ui-token") + if err := config.SaveConfig(configPath, cfg); err != nil { + t.Fatalf("SaveConfig() error = %v", err) + } + + proxy := h.createPicoHTTPProxy(tokenPrefix + "test-token" + "ui-token") + var capturedPath string + var capturedAuth string + proxy.Transport = roundTripFunc(func(req *http.Request) (*http.Response, error) { + capturedPath = req.URL.Path + capturedAuth = req.Header.Get("Authorization") + return &http.Response{ + StatusCode: http.StatusOK, + Header: make(http.Header), + Body: io.NopCloser(strings.NewReader("proxied")), + Request: req, + }, nil + }) + + req := httptest.NewRequest(http.MethodGet, "/pico/media/attachment-1", nil) + rec := httptest.NewRecorder() + proxy.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) + } + if capturedPath != "/pico/media/attachment-1" { + t.Fatalf("capturedPath = %q, want %q", capturedPath, "/pico/media/attachment-1") + } + expected := "Bearer " + tokenPrefix + "test-token" + "ui-token" + if capturedAuth != expected { + t.Fatalf("Authorization = %q, want %q", capturedAuth, expected) + } +} + func TestHandleWebSocketProxyRejectsStalePidDataAfterProcessExit(t *testing.T) { tmpDir := t.TempDir() t.Setenv("HOME", tmpDir) @@ -797,3 +846,9 @@ func mustGatewayTestPort(t *testing.T, rawURL string) int { return port } + +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return fn(req) +} diff --git a/web/backend/api/session.go b/web/backend/api/session.go index 0143f5737..0483b57cc 100644 --- a/web/backend/api/session.go +++ b/web/backend/api/session.go @@ -46,9 +46,17 @@ type sessionListItem struct { } type sessionChatMessage struct { - Role string `json:"role"` - Content string `json:"content"` - Media []string `json:"media,omitempty"` + Role string `json:"role"` + Content string `json:"content"` + Media []string `json:"media,omitempty"` + Attachments []sessionChatAttachment `json:"attachments,omitempty"` +} + +type sessionChatAttachment struct { + Type string `json:"type,omitempty"` + URL string `json:"url,omitempty"` + Filename string `json:"filename,omitempty"` + ContentType string `json:"content_type,omitempty"` } // legacyPicoSessionPrefix is the legacy key prefix used by older Pico JSON/JSONL @@ -398,10 +406,12 @@ func (h *Handler) findLegacyPicoSession(dir, sessionID string) (picoLegacySessio } func buildSessionListItem(sessionID string, sess sessionFile, toolFeedbackMaxArgsLength int) sessionListItem { + transcript := visibleSessionMessages(sess.Messages, toolFeedbackMaxArgsLength) + preview := "" - for _, msg := range sess.Messages { + for _, msg := range transcript { if msg.Role == "user" { - preview = sessionMessagePreview(msg) + preview = sessionChatMessagePreview(msg) } if preview != "" { break @@ -414,13 +424,11 @@ func buildSessionListItem(sessionID string, sess sessionFile, toolFeedbackMaxArg } title := preview - validMessageCount := len(visibleSessionMessages(sess.Messages, toolFeedbackMaxArgsLength)) - return sessionListItem{ ID: sessionID, Title: title, Preview: preview, - MessageCount: validMessageCount, + MessageCount: len(transcript), Created: sess.Created.Format(time.RFC3339), Updated: sess.Updated.Format(time.RFC3339), } @@ -441,16 +449,25 @@ func truncateRunes(s string, maxLen int) string { return string(runes[:maxLen]) + "..." } -func sessionMessageVisible(msg providers.Message) bool { - return strings.TrimSpace(msg.Content) != "" || len(msg.Media) > 0 +func sessionChatMessageVisible(msg sessionChatMessage) bool { + return strings.TrimSpace(msg.Content) != "" || len(msg.Media) > 0 || len(msg.Attachments) > 0 } -func sessionMessagePreview(msg providers.Message) string { +func sessionChatMessagePreview(msg sessionChatMessage) string { if content := strings.TrimSpace(msg.Content); content != "" { return content } + if len(msg.Attachments) > 0 { + if strings.EqualFold(strings.TrimSpace(msg.Attachments[0].Type), "image") { + return "[image]" + } + return "[attachment]" + } if len(msg.Media) > 0 { - return "[image]" + if strings.HasPrefix(strings.TrimSpace(msg.Media[0]), "data:image/") { + return "[image]" + } + return "[attachment]" } return "" } @@ -459,17 +476,21 @@ func visibleSessionMessages(messages []providers.Message, toolFeedbackMaxArgsLen transcript := make([]sessionChatMessage, 0, len(messages)) for _, msg := range messages { + attachments := sessionAttachments(msg) + switch msg.Role { case "tool": continue case "user": - if sessionMessageVisible(msg) { - transcript = append(transcript, sessionChatMessage{ - Role: "user", - Content: msg.Content, - Media: append([]string(nil), msg.Media...), - }) + chatMsg := sessionChatMessage{ + Role: "user", + Content: msg.Content, + Media: append([]string(nil), msg.Media...), + Attachments: attachments, + } + if sessionChatMessageVisible(chatMsg) { + transcript = append(transcript, chatMsg) } case "assistant": @@ -492,15 +513,25 @@ func visibleSessionMessages(messages []providers.Message, toolFeedbackMaxArgsLen // Pico web chat can persist both visible `message` tool output and a // later plain assistant reply in the same turn. Hide only the fixed // internal summary that marks handled tool delivery. - if !sessionMessageVisible(msg) || assistantMessageInternalOnly(msg) { + content := msg.Content + if assistantMessageInternalOnly(msg) { + if len(attachments) == 0 { + continue + } + content = "" + } + + chatMsg := sessionChatMessage{ + Role: "assistant", + Content: content, + Media: append([]string(nil), msg.Media...), + Attachments: attachments, + } + if !sessionChatMessageVisible(chatMsg) { continue } - transcript = append(transcript, sessionChatMessage{ - Role: "assistant", - Content: msg.Content, - Media: append([]string(nil), msg.Media...), - }) + transcript = append(transcript, chatMsg) } } @@ -518,11 +549,88 @@ func filterSessionChatMessages(messages []sessionChatMessage) []sessionChatMessa return filtered } +func sessionAttachments(msg providers.Message) []sessionChatAttachment { + if len(msg.Attachments) == 0 { + return nil + } + + attachments := make([]sessionChatAttachment, 0, len(msg.Attachments)) + for _, attachment := range msg.Attachments { + urlValue, ok := sessionAttachmentURL(attachment) + if !ok { + continue + } + attachmentType := strings.TrimSpace(attachment.Type) + if attachmentType == "" { + attachmentType = sessionAttachmentType(attachment) + } + attachments = append(attachments, sessionChatAttachment{ + Type: attachmentType, + URL: urlValue, + Filename: strings.TrimSpace(attachment.Filename), + ContentType: strings.TrimSpace(attachment.ContentType), + }) + } + + if len(attachments) == 0 { + return nil + } + return attachments +} + +func sessionAttachmentURL(attachment providers.Attachment) (string, bool) { + if rawURL := strings.TrimSpace(attachment.URL); rawURL != "" { + return rawURL, true + } + + ref := strings.TrimSpace(attachment.Ref) + if ref == "" { + return "", false + } + if strings.HasPrefix(ref, "media://") { + // Persisted session history must only expose durable attachment locations. + // media:// refs depend on the live in-memory MediaStore and may stop + // resolving after a restart or cleanup, so omit them from reopened history. + return "", false + } + return ref, true +} + +func sessionAttachmentType(attachment providers.Attachment) string { + contentType := strings.ToLower(strings.TrimSpace(attachment.ContentType)) + filename := strings.ToLower(strings.TrimSpace(attachment.Filename)) + rawRef := strings.ToLower(strings.TrimSpace(attachment.Ref)) + rawURL := strings.ToLower(strings.TrimSpace(attachment.URL)) + + switch { + case strings.HasPrefix(contentType, "image/"), + strings.HasPrefix(rawRef, "data:image/"), + strings.HasPrefix(rawURL, "data:image/"): + return "image" + case strings.HasPrefix(contentType, "audio/"): + return "audio" + case strings.HasPrefix(contentType, "video/"): + return "video" + } + + switch ext := filepath.Ext(filename); ext { + case ".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".svg": + return "image" + case ".mp3", ".wav", ".ogg", ".m4a", ".flac", ".aac", ".wma", ".opus": + return "audio" + case ".mp4", ".avi", ".mov", ".webm", ".mkv": + return "video" + default: + return "file" + } +} + func assistantMessageTransientThought(msg providers.Message) bool { return strings.TrimSpace(msg.Content) == "" && strings.TrimSpace(msg.ReasoningContent) != "" && len(msg.ToolCalls) == 0 && - len(msg.Media) == 0 + len(msg.Media) == 0 && + len(msg.Attachments) == 0 } func assistantMessageInternalOnly(msg providers.Message) bool { diff --git a/web/backend/api/session_test.go b/web/backend/api/session_test.go index f6c643bde..d2efb3879 100644 --- a/web/backend/api/session_test.go +++ b/web/backend/api/session_test.go @@ -218,6 +218,136 @@ func TestHandleGetSession_JSONLStorage(t *testing.T) { } } +func TestHandleGetSession_HidesHandledToolAttachmentsBackedByMediaRefs(t *testing.T) { + configPath, cleanup := setupOAuthTestEnv(t) + defer cleanup() + + dir := sessionsTestDir(t, configPath) + store, err := memory.NewJSONLStore(dir) + if err != nil { + t.Fatalf("NewJSONLStore() error = %v", err) + } + + sessionKey := legacyPicoSessionPrefix + "attachment-history" + for _, msg := range []providers.Message{ + {Role: "user", Content: "send me the report"}, + { + Role: "assistant", + Content: handledToolResponseSummaryText, + Attachments: []providers.Attachment{{ + Type: "file", + Ref: "media://attachment-1", + Filename: "report.txt", + ContentType: "text/plain", + }}, + }, + } { + if err := store.AddFullMessage(nil, sessionKey, msg); err != nil { + t.Fatalf("AddFullMessage() error = %v", err) + } + } + + h := NewHandler(configPath) + mux := http.NewServeMux() + h.RegisterRoutes(mux) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/api/sessions/attachment-history", nil) + mux.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String()) + } + + var resp struct { + Messages []sessionChatMessage `json:"messages"` + } + if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil { + t.Fatalf("Unmarshal() error = %v", err) + } + + if len(resp.Messages) != 1 { + t.Fatalf("len(resp.Messages) = %d, want 1", len(resp.Messages)) + } + if resp.Messages[0].Role != "user" || resp.Messages[0].Content != "send me the report" { + t.Fatalf("message = %#v, want only user request", resp.Messages[0]) + } +} + +func TestHandleGetSession_ExposesHandledToolAttachmentsWithDurableURL(t *testing.T) { + configPath, cleanup := setupOAuthTestEnv(t) + defer cleanup() + + dir := sessionsTestDir(t, configPath) + store, err := memory.NewJSONLStore(dir) + if err != nil { + t.Fatalf("NewJSONLStore() error = %v", err) + } + + sessionKey := legacyPicoSessionPrefix + "attachment-history-durable" + for _, msg := range []providers.Message{ + {Role: "user", Content: "send me the report"}, + { + Role: "assistant", + Content: handledToolResponseSummaryText, + Attachments: []providers.Attachment{{ + Type: "file", + URL: "https://example.com/report.txt", + Filename: "report.txt", + ContentType: "text/plain", + }}, + }, + } { + if err := store.AddFullMessage(nil, sessionKey, msg); err != nil { + t.Fatalf("AddFullMessage() error = %v", err) + } + } + + h := NewHandler(configPath) + mux := http.NewServeMux() + h.RegisterRoutes(mux) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/api/sessions/attachment-history-durable", nil) + mux.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String()) + } + + var resp struct { + Messages []sessionChatMessage `json:"messages"` + } + if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil { + t.Fatalf("Unmarshal() error = %v", err) + } + + if len(resp.Messages) != 2 { + t.Fatalf("len(resp.Messages) = %d, want 2", len(resp.Messages)) + } + + assistant := resp.Messages[1] + if assistant.Role != "assistant" { + t.Fatalf("assistant role = %q, want assistant", assistant.Role) + } + if assistant.Content != "" { + t.Fatalf("assistant content = %q, want empty string", assistant.Content) + } + if len(assistant.Attachments) != 1 { + t.Fatalf("len(assistant.Attachments) = %d, want 1", len(assistant.Attachments)) + } + if assistant.Attachments[0].URL != "https://example.com/report.txt" { + t.Fatalf( + "attachment url = %q, want %q", + assistant.Attachments[0].URL, + "https://example.com/report.txt", + ) + } + if assistant.Attachments[0].Filename != "report.txt" { + t.Fatalf("attachment filename = %q, want %q", assistant.Attachments[0].Filename, "report.txt") + } +} + func TestHandleSessions_JSONLScopeDiscovery(t *testing.T) { configPath, cleanup := setupOAuthTestEnv(t) defer cleanup() diff --git a/web/frontend/src/api/sessions.ts b/web/frontend/src/api/sessions.ts index dd0fa1f53..912fbecd8 100644 --- a/web/frontend/src/api/sessions.ts +++ b/web/frontend/src/api/sessions.ts @@ -15,6 +15,12 @@ export interface SessionDetail { role: "user" | "assistant" content: string media?: string[] + attachments?: { + type?: "image" | "audio" | "video" | "file" + url: string + filename?: string + content_type?: string + }[] }[] summary: string created: string diff --git a/web/frontend/src/components/chat/assistant-message.tsx b/web/frontend/src/components/chat/assistant-message.tsx index 5c2235982..2901b574a 100644 --- a/web/frontend/src/components/chat/assistant-message.tsx +++ b/web/frontend/src/components/chat/assistant-message.tsx @@ -3,6 +3,8 @@ import { IconCheck, IconChevronDown, IconCopy, + IconDownload, + IconFileText, } from "@tabler/icons-react" import { useAtom } from "jotai" import { useState } from "react" @@ -16,21 +18,30 @@ import remarkGfm from "remark-gfm" import { Button } from "@/components/ui/button" import { formatMessageTime } from "@/hooks/use-pico-chat" import { cn } from "@/lib/utils" -import { showThoughtsAtom } from "@/store/chat" +import { type ChatAttachment, showThoughtsAtom } from "@/store/chat" interface AssistantMessageProps { content: string + attachments?: ChatAttachment[] isThought?: boolean timestamp?: string | number } export function AssistantMessage({ content, + attachments = [], isThought = false, timestamp = "", }: AssistantMessageProps) { const { t } = useTranslation() const [isCopied, setIsCopied] = useState(false) + const hasText = content.trim().length > 0 + const imageAttachments = attachments.filter( + (attachment) => attachment.type === "image", + ) + const fileAttachments = attachments.filter( + (attachment) => attachment.type !== "image", + ) const [isExpanded, setIsExpanded] = useAtom(showThoughtsAtom) const formattedTimestamp = timestamp !== "" ? formatMessageTime(timestamp) : "" @@ -83,7 +94,7 @@ export function AssistantMessage({ /> )} - {(!isThought || isExpanded) && ( + {(!isThought || isExpanded) && hasText && (
)} - {!isThought && ( + + {(imageAttachments.length > 0 || fileAttachments.length > 0) && ( +
+ {imageAttachments.length > 0 && ( +
+ {imageAttachments.map((attachment, index) => ( + + {attachment.filename + + ))} +
+ )} + + {fileAttachments.length > 0 && ( +
+ {fileAttachments.map((attachment, index) => ( + + + + + {attachment.filename || "Download attachment"} + + + + + ))} +
+ )} +
+ )} + + {!isThought && hasText && (