diff --git a/pkg/agent/context.go b/pkg/agent/context.go index b5c68650a..c2921294b 100644 --- a/pkg/agent/context.go +++ b/pkg/agent/context.go @@ -602,14 +602,16 @@ func (cb *ContextBuilder) BuildMessages( // Add conversation history messages = append(messages, history...) - // Add current user message - if strings.TrimSpace(currentMessage) != "" { + // Add current user message. Media-only turns must still be preserved so + // multimodal providers receive the uploaded image even when the user sends + // no accompanying text. + if strings.TrimSpace(currentMessage) != "" || len(media) > 0 { msg := providers.Message{ Role: "user", Content: currentMessage, } if len(media) > 0 { - msg.Media = media + msg.Media = append([]string(nil), media...) } messages = append(messages, msg) } diff --git a/pkg/agent/context_cache_test.go b/pkg/agent/context_cache_test.go index 81a1534b9..ef5e6c5de 100644 --- a/pkg/agent/context_cache_test.go +++ b/pkg/agent/context_cache_test.go @@ -707,6 +707,38 @@ func TestEmptyWorkspaceBaselineDetectsNewFiles(t *testing.T) { } } +func TestBuildMessages_IncludesMediaOnlyCurrentMessage(t *testing.T) { + tmpDir := setupWorkspace(t, nil) + defer os.RemoveAll(tmpDir) + + cb := NewContextBuilder(tmpDir) + msgs := cb.BuildMessages( + nil, + "", + "", + []string{"data:image/png;base64,abc123"}, + "pico", + "chat-1", + "", + "", + ) + + if len(msgs) != 2 { + t.Fatalf("len(msgs) = %d, want 2", len(msgs)) + } + + userMsg := msgs[1] + if userMsg.Role != "user" { + t.Fatalf("userMsg.Role = %q, want %q", userMsg.Role, "user") + } + if userMsg.Content != "" { + t.Fatalf("userMsg.Content = %q, want empty string", userMsg.Content) + } + if len(userMsg.Media) != 1 || userMsg.Media[0] != "data:image/png;base64,abc123" { + t.Fatalf("userMsg.Media = %#v, want image payload", userMsg.Media) + } +} + // BenchmarkBuildMessagesWithCache measures caching performance. func BenchmarkBuildMessagesWithCache(b *testing.B) { tmpDir, _ := os.MkdirTemp("", "picoclaw-bench-*") diff --git a/pkg/channels/pico/client_test.go b/pkg/channels/pico/client_test.go index 7c5a62801..b40606647 100644 --- a/pkg/channels/pico/client_test.go +++ b/pkg/channels/pico/client_test.go @@ -262,3 +262,57 @@ func TestSend_ClosedConnection(t *testing.T) { ch.Stop(ctx) } + +func TestParseInlineImageMedia_Valid(t *testing.T) { + media, err := parseInlineImageMedia(map[string]any{ + "media": []any{ + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+X2ioAAAAASUVORK5CYII=", + }, + }) + if err != nil { + t.Fatalf("parseInlineImageMedia() error = %v", err) + } + if len(media) != 1 { + t.Fatalf("len(media) = %d, want 1", len(media)) + } +} + +func TestPicoChannel_HandleMessageSend_AllowsMediaOnly(t *testing.T) { + mb := bus.NewMessageBus() + ch, err := NewPicoChannel(config.PicoConfig{ + Token: *config.NewSecureString("test-token"), + }, mb) + if err != nil { + t.Fatalf("NewPicoChannel() error = %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + if err := ch.Start(ctx); err != nil { + t.Fatalf("Start() error = %v", err) + } + defer ch.Stop(ctx) + + pc := &picoConn{id: "conn-1", sessionID: "sess-1"} + ch.handleMessageSend(pc, PicoMessage{ + ID: "msg-1", + Payload: map[string]any{ + "media": []any{ + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+X2ioAAAAASUVORK5CYII=", + }, + }, + }) + + select { + case msg := <-mb.InboundChan(): + if msg.Content != "" { + t.Fatalf("msg.Content = %q, want empty", msg.Content) + } + if len(msg.Media) != 1 || !strings.HasPrefix(msg.Media[0], "data:image/png;base64,") { + t.Fatalf("msg.Media = %#v, want inline image payload", msg.Media) + } + case <-ctx.Done(): + t.Fatal("timed out waiting for inbound media message") + } +} diff --git a/pkg/channels/pico/pico.go b/pkg/channels/pico/pico.go index 0a7bf15a4..e22da1ba1 100644 --- a/pkg/channels/pico/pico.go +++ b/pkg/channels/pico/pico.go @@ -2,6 +2,7 @@ package pico import ( "context" + "encoding/base64" "encoding/json" "fmt" "net/http" @@ -30,6 +31,14 @@ type picoConn struct { cancel context.CancelFunc // cancels per-connection goroutines (e.g. pingLoop) } +var allowedInlineImageMIMETypes = map[string]struct{}{ + "image/jpeg": {}, + "image/png": {}, + "image/gif": {}, + "image/webp": {}, + "image/bmp": {}, +} + // writeJSON sends a JSON message to the connection with write locking. func (pc *picoConn) writeJSON(v any) error { if pc.closed.Load() { @@ -516,6 +525,9 @@ func (c *PicoChannel) handleMessage(pc *picoConn, msg PicoMessage) { case TypeMessageSend: c.handleMessageSend(pc, msg) + case TypeMediaSend: + c.handleMessageSend(pc, msg) + default: errMsg := newError("unknown_type", fmt.Sprintf("unknown message type: %s", msg.Type)) pc.writeJSON(errMsg) @@ -525,8 +537,19 @@ func (c *PicoChannel) handleMessage(pc *picoConn, msg PicoMessage) { // handleMessageSend processes an inbound message.send from a client. func (c *PicoChannel) handleMessageSend(pc *picoConn, msg PicoMessage) { content, _ := msg.Payload["content"].(string) - if strings.TrimSpace(content) == "" { - errMsg := newError("empty_content", "message content is empty") + media, err := parseInlineImageMedia(msg.Payload) + if err != nil { + errMsg := newErrorWithPayload("invalid_media", err.Error(), map[string]any{ + "request_id": msg.ID, + }) + pc.writeJSON(errMsg) + return + } + + if strings.TrimSpace(content) == "" && len(media) == 0 { + errMsg := newErrorWithPayload("empty_content", "message content is empty", map[string]any{ + "request_id": msg.ID, + }) pc.writeJSON(errMsg) return } @@ -550,6 +573,7 @@ func (c *PicoChannel) handleMessageSend(pc *picoConn, msg PicoMessage) { logger.DebugCF("pico", "Received message", map[string]any{ "session_id": sessionID, "preview": truncate(content, 50), + "media": len(media), }) sender := bus.SenderInfo{ @@ -562,7 +586,7 @@ func (c *PicoChannel) handleMessageSend(pc *picoConn, msg PicoMessage) { return } - c.HandleMessage(c.ctx, peer, msg.ID, senderID, chatID, content, nil, metadata, sender) + c.HandleMessage(c.ctx, peer, msg.ID, senderID, chatID, content, media, metadata, sender) } // truncate truncates a string to maxLen runes. @@ -573,3 +597,99 @@ func truncate(s string, maxLen int) string { } return string(runes[:maxLen]) + "..." } + +func parseInlineImageMedia(payload map[string]any) ([]string, error) { + if len(payload) == 0 { + return nil, nil + } + + raw, ok := payload["media"] + if !ok || raw == nil { + return nil, nil + } + + switch values := raw.(type) { + case []any: + media := make([]string, 0, len(values)) + for i, item := range values { + value, err := inlineImageValue(item) + if err != nil { + return nil, fmt.Errorf("media[%d]: %w", i, err) + } + if err := validateInlineImageDataURL(value); err != nil { + return nil, fmt.Errorf("media[%d]: %w", i, err) + } + media = append(media, value) + } + return media, nil + case []string: + media := make([]string, 0, len(values)) + for i, value := range values { + value = strings.TrimSpace(value) + if err := validateInlineImageDataURL(value); err != nil { + return nil, fmt.Errorf("media[%d]: %w", i, err) + } + media = append(media, value) + } + return media, nil + case string: + value := strings.TrimSpace(values) + if err := validateInlineImageDataURL(value); err != nil { + return nil, err + } + return []string{value}, nil + default: + return nil, fmt.Errorf("media must be a string or array of strings") + } +} + +func inlineImageValue(item any) (string, error) { + switch value := item.(type) { + case string: + value = strings.TrimSpace(value) + if value == "" { + return "", fmt.Errorf("image payload is empty") + } + return value, nil + case map[string]any: + for _, key := range []string{"url", "data_url"} { + if raw, ok := value[key].(string); ok && strings.TrimSpace(raw) != "" { + return strings.TrimSpace(raw), nil + } + } + return "", fmt.Errorf("image payload must include url or data_url") + default: + return "", fmt.Errorf("image payload must be a string or object") + } +} + +func validateInlineImageDataURL(mediaURL string) error { + if mediaURL == "" { + return fmt.Errorf("image payload is empty") + } + if !strings.HasPrefix(mediaURL, "data:image/") { + return fmt.Errorf("only inline image data URLs are supported") + } + + header, data, found := strings.Cut(mediaURL, ",") + if !found || strings.TrimSpace(data) == "" { + return fmt.Errorf("image data URL is malformed") + } + if !strings.Contains(header, ";base64") { + return fmt.Errorf("image data URL must be base64 encoded") + } + mimeType, _, _ := strings.Cut(strings.TrimPrefix(header, "data:"), ";") + if _, ok := allowedInlineImageMIMETypes[mimeType]; !ok { + return fmt.Errorf("unsupported image format: %s", mimeType) + } + + data = strings.TrimSpace(data) + if base64.StdEncoding.DecodedLen(len(data)) > config.DefaultMaxMediaSize { + return fmt.Errorf("image exceeds %d byte limit", config.DefaultMaxMediaSize) + } + if _, err := base64.StdEncoding.DecodeString(data); err != nil { + return fmt.Errorf("invalid base64 image data") + } + + return nil +} diff --git a/pkg/channels/pico/protocol.go b/pkg/channels/pico/protocol.go index 192c96164..3f8ba8643 100644 --- a/pkg/channels/pico/protocol.go +++ b/pkg/channels/pico/protocol.go @@ -39,10 +39,18 @@ func newMessage(msgType string, payload map[string]any) PicoMessage { } } -// newError creates an error PicoMessage. -func newError(code, message string) PicoMessage { - return newMessage(TypeError, map[string]any{ +func newErrorWithPayload(code, message string, extra map[string]any) PicoMessage { + payload := map[string]any{ "code": code, "message": message, - }) + } + for key, value := range extra { + payload[key] = value + } + return newMessage(TypeError, payload) +} + +// newError creates an error PicoMessage. +func newError(code, message string) PicoMessage { + return newErrorWithPayload(code, message, nil) } diff --git a/web/backend/api/session.go b/web/backend/api/session.go index 42d451a05..a2e931010 100644 --- a/web/backend/api/session.go +++ b/web/backend/api/session.go @@ -42,6 +42,12 @@ type sessionListItem struct { Updated string `json:"updated"` } +type sessionChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` + Media []string `json:"media,omitempty"` +} + type sessionMetaFile struct { Key string `json:"key"` Summary string `json:"summary"` @@ -62,8 +68,12 @@ type sessionMetaFile struct { const ( picoSessionPrefix = "agent:main:pico:direct:pico:" sanitizedPicoSessionPrefix = "agent_main_pico_direct_pico_" - maxSessionJSONLLineSize = 10 * 1024 * 1024 // 10 MB - maxSessionTitleRunes = 60 + // Keep the session API aligned with the shared JSONL store reader limit in + // pkg/memory/jsonl.go so oversized lines fail consistently everywhere. + maxSessionJSONLLineSize = 10 * 1024 * 1024 + maxSessionTitleRunes = 60 + + handledToolResponseSummaryText = "Requested output delivered via tool attachment." ) // extractPicoSessionID extracts the session UUID from a full session key. @@ -195,32 +205,21 @@ func (h *Handler) readJSONLSession(dir, sessionID string) (sessionFile, error) { func buildSessionListItem(sessionID string, sess sessionFile) sessionListItem { preview := "" for _, msg := range sess.Messages { - if msg.Role == "user" && strings.TrimSpace(msg.Content) != "" { - preview = msg.Content + if msg.Role == "user" { + preview = sessionMessagePreview(msg) + } + if preview != "" { break } } - title := strings.TrimSpace(sess.Summary) - if title == "" { - title = preview - } - - title = truncateRunes(title, maxSessionTitleRunes) preview = truncateRunes(preview, maxSessionTitleRunes) if preview == "" { preview = "(empty)" } - if title == "" { - title = preview - } + title := preview - validMessageCount := 0 - for _, msg := range sess.Messages { - if (msg.Role == "user" || msg.Role == "assistant") && strings.TrimSpace(msg.Content) != "" { - validMessageCount++ - } - } + validMessageCount := len(visibleSessionMessages(sess.Messages)) return sessionListItem{ ID: sessionID, @@ -247,6 +246,99 @@ func truncateRunes(s string, maxLen int) string { return string(runes[:maxLen]) + "..." } +func sessionMessageVisible(msg providers.Message) bool { + return strings.TrimSpace(msg.Content) != "" || len(msg.Media) > 0 +} + +func sessionMessagePreview(msg providers.Message) string { + if content := strings.TrimSpace(msg.Content); content != "" { + return content + } + if len(msg.Media) > 0 { + return "[image]" + } + return "" +} + +func visibleSessionMessages(messages []providers.Message) []sessionChatMessage { + transcript := make([]sessionChatMessage, 0, len(messages)) + + for _, msg := range messages { + switch msg.Role { + case "user": + if sessionMessageVisible(msg) { + transcript = append(transcript, sessionChatMessage{ + Role: "user", + Content: msg.Content, + Media: append([]string(nil), msg.Media...), + }) + } + + case "assistant": + visibleToolMessages := visibleAssistantToolMessages(msg.ToolCalls) + if len(visibleToolMessages) > 0 { + transcript = append(transcript, visibleToolMessages...) + } + + // Pico web chat can persist both visible `message` tool output and a + // later plain assistant reply in the same turn. Hide only the fixed + // internal summary that marks handled tool delivery. + if len(visibleToolMessages) > 0 || !sessionMessageVisible(msg) || assistantMessageInternalOnly(msg) { + continue + } + + transcript = append(transcript, sessionChatMessage{ + Role: "assistant", + Content: msg.Content, + Media: append([]string(nil), msg.Media...), + }) + } + } + + return transcript +} + +func assistantMessageInternalOnly(msg providers.Message) bool { + return strings.TrimSpace(msg.Content) == handledToolResponseSummaryText +} + +func visibleAssistantToolMessages(toolCalls []providers.ToolCall) []sessionChatMessage { + if len(toolCalls) == 0 { + return nil + } + + messages := make([]sessionChatMessage, 0, len(toolCalls)) + for _, tc := range toolCalls { + name := tc.Name + argsJSON := "" + if tc.Function != nil { + if name == "" { + name = tc.Function.Name + } + argsJSON = tc.Function.Arguments + } + + switch name { + case "message": + var args struct { + Content string `json:"content"` + } + if err := json.Unmarshal([]byte(argsJSON), &args); err != nil { + continue + } + if strings.TrimSpace(args.Content) == "" { + continue + } + messages = append(messages, sessionChatMessage{ + Role: "assistant", + Content: args.Content, + }) + } + } + + return messages +} + // sessionsDir resolves the path to the gateway's session storage directory. // It reads the workspace from config, falling back to ~/.picoclaw/workspace. func (h *Handler) sessionsDir() (string, error) { @@ -437,22 +529,7 @@ func (h *Handler) handleGetSession(w http.ResponseWriter, r *http.Request) { } } - // Convert to a simpler format for the frontend - type chatMessage struct { - Role string `json:"role"` - Content string `json:"content"` - } - - messages := make([]chatMessage, 0, len(sess.Messages)) - for _, msg := range sess.Messages { - // Only include user and assistant messages that have actual content - if (msg.Role == "user" || msg.Role == "assistant") && strings.TrimSpace(msg.Content) != "" { - messages = append(messages, chatMessage{ - Role: msg.Role, - Content: msg.Content, - }) - } - } + messages := visibleSessionMessages(sess.Messages) w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]any{ diff --git a/web/backend/api/session_test.go b/web/backend/api/session_test.go index 21ef5b5b8..9248c11b7 100644 --- a/web/backend/api/session_test.go +++ b/web/backend/api/session_test.go @@ -6,6 +6,7 @@ import ( "net/http/httptest" "os" "path/filepath" + "strings" "testing" "github.com/sipeed/picoclaw/pkg/config" @@ -87,15 +88,19 @@ func TestHandleListSessions_JSONLStorage(t *testing.T) { if items[0].MessageCount != 2 { t.Fatalf("items[0].MessageCount = %d, want 2", items[0].MessageCount) } - if items[0].Title != "JSONL-backed session" { - t.Fatalf("items[0].Title = %q, want %q", items[0].Title, "JSONL-backed session") + if items[0].Title != "Explain why the history API is empty after migration." { + t.Fatalf( + "items[0].Title = %q, want %q", + items[0].Title, + "Explain why the history API is empty after migration.", + ) } if items[0].Preview != "Explain why the history API is empty after migration." { t.Fatalf("items[0].Preview = %q", items[0].Preview) } } -func TestHandleListSessions_TitleUsesTrimmedSummary(t *testing.T) { +func TestHandleListSessions_TitleUsesFirstUserMessage(t *testing.T) { configPath, cleanup := setupOAuthTestEnv(t) defer cleanup() @@ -139,10 +144,7 @@ func TestHandleListSessions_TitleUsesTrimmedSummary(t *testing.T) { if len(items) != 1 { t.Fatalf("len(items) = %d, want 1", len(items)) } - expectedTitle := truncateRunes( - "This summary is intentionally longer than sixty characters so it must be truncated in the history menu.", - maxSessionTitleRunes, - ) + expectedTitle := truncateRunes("fallback preview", maxSessionTitleRunes) if items[0].Title != expectedTitle { t.Fatalf("items[0].Title = %q", items[0].Title) } @@ -215,6 +217,359 @@ func TestHandleGetSession_JSONLStorage(t *testing.T) { } } +func TestHandleGetSession_ReconstructsVisibleMessageToolOutput(t *testing.T) { + configPath, cleanup := setupOAuthTestEnv(t) + defer cleanup() + + dir := sessionsTestDir(t, configPath) + store, err := memory.NewJSONLStore(dir) + if err != nil { + t.Fatalf("NewJSONLStore() error = %v", err) + } + + sessionKey := picoSessionPrefix + "detail-message-tool" + for _, msg := range []providers.Message{ + {Role: "user", Content: "test"}, + { + Role: "assistant", + Content: "", + ToolCalls: []providers.ToolCall{ + { + ID: "call_1", + Type: "function", + Function: &providers.FunctionCall{ + Name: "message", + Arguments: `{"content":"visible tool output"}`, + }, + }, + }, + }, + {Role: "tool", Content: "Message sent to pico:pico:detail-message-tool", ToolCallID: "call_1"}, + {Role: "assistant", Content: handledToolResponseSummaryText}, + } { + if err := store.AddFullMessage(nil, sessionKey, msg); err != nil { + t.Fatalf("AddFullMessage() error = %v", err) + } + } + + h := NewHandler(configPath) + mux := http.NewServeMux() + h.RegisterRoutes(mux) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-message-tool", nil) + mux.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String()) + } + + var resp struct { + Messages []struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"messages"` + } + if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil { + t.Fatalf("Unmarshal() error = %v", err) + } + if len(resp.Messages) != 2 { + t.Fatalf("len(resp.Messages) = %d, want 2", len(resp.Messages)) + } + if resp.Messages[1].Role != "assistant" || resp.Messages[1].Content != "visible tool output" { + t.Fatalf("assistant message = %#v, want visible tool output", resp.Messages[1]) + } +} + +func TestHandleGetSession_PreservesFinalAssistantReplyAfterMessageToolOutput(t *testing.T) { + configPath, cleanup := setupOAuthTestEnv(t) + defer cleanup() + + dir := sessionsTestDir(t, configPath) + store, err := memory.NewJSONLStore(dir) + if err != nil { + t.Fatalf("NewJSONLStore() error = %v", err) + } + + sessionKey := picoSessionPrefix + "detail-message-tool-final-reply" + for _, msg := range []providers.Message{ + {Role: "user", Content: "test"}, + { + Role: "assistant", + ToolCalls: []providers.ToolCall{ + { + ID: "call_1", + Type: "function", + Function: &providers.FunctionCall{ + Name: "message", + Arguments: `{"content":"visible tool output"}`, + }, + }, + }, + }, + {Role: "tool", Content: "Message sent to pico:pico:detail-message-tool-final-reply", ToolCallID: "call_1"}, + {Role: "assistant", Content: "final assistant reply"}, + } { + if err := store.AddFullMessage(nil, sessionKey, msg); err != nil { + t.Fatalf("AddFullMessage() error = %v", err) + } + } + + h := NewHandler(configPath) + mux := http.NewServeMux() + h.RegisterRoutes(mux) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-message-tool-final-reply", nil) + mux.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String()) + } + + var resp struct { + Messages []struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"messages"` + } + if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil { + t.Fatalf("Unmarshal() error = %v", err) + } + if len(resp.Messages) != 3 { + t.Fatalf("len(resp.Messages) = %d, want 3", len(resp.Messages)) + } + if resp.Messages[1].Role != "assistant" || resp.Messages[1].Content != "visible tool output" { + t.Fatalf("interim assistant message = %#v, want visible tool output", resp.Messages[1]) + } + if resp.Messages[2].Role != "assistant" || resp.Messages[2].Content != "final assistant reply" { + t.Fatalf("final assistant message = %#v, want final assistant reply", resp.Messages[2]) + } +} + +func TestHandleListSessions_MessageCountUsesVisibleTranscript(t *testing.T) { + configPath, cleanup := setupOAuthTestEnv(t) + defer cleanup() + + dir := sessionsTestDir(t, configPath) + store, err := memory.NewJSONLStore(dir) + if err != nil { + t.Fatalf("NewJSONLStore() error = %v", err) + } + + sessionKey := picoSessionPrefix + "list-visible-count" + for _, msg := range []providers.Message{ + {Role: "user", Content: "test"}, + { + Role: "assistant", + ToolCalls: []providers.ToolCall{ + { + ID: "call_1", + Type: "function", + Function: &providers.FunctionCall{ + Name: "message", + Arguments: `{"content":"visible tool output"}`, + }, + }, + }, + }, + {Role: "tool", Content: "Message sent to pico:pico:list-visible-count", ToolCallID: "call_1"}, + {Role: "assistant", Content: handledToolResponseSummaryText}, + } { + if err := store.AddFullMessage(nil, sessionKey, msg); err != nil { + t.Fatalf("AddFullMessage() error = %v", err) + } + } + + h := NewHandler(configPath) + mux := http.NewServeMux() + h.RegisterRoutes(mux) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/api/sessions", nil) + mux.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String()) + } + + var items []sessionListItem + if err := json.Unmarshal(rec.Body.Bytes(), &items); err != nil { + t.Fatalf("Unmarshal() error = %v", err) + } + if len(items) != 1 { + t.Fatalf("len(items) = %d, want 1", len(items)) + } + if items[0].MessageCount != 2 { + t.Fatalf("items[0].MessageCount = %d, want 2", items[0].MessageCount) + } +} + +func TestHandleGetSession_IncludesMediaOnlyMessages(t *testing.T) { + configPath, cleanup := setupOAuthTestEnv(t) + defer cleanup() + + dir := sessionsTestDir(t, configPath) + store, err := memory.NewJSONLStore(dir) + if err != nil { + t.Fatalf("NewJSONLStore() error = %v", err) + } + + sessionKey := picoSessionPrefix + "detail-media-only" + if err := store.AddFullMessage(nil, sessionKey, providers.Message{ + Role: "user", + Media: []string{"data:image/png;base64,abc123"}, + }); err != nil { + t.Fatalf("AddFullMessage(user) error = %v", err) + } + + h := NewHandler(configPath) + mux := http.NewServeMux() + h.RegisterRoutes(mux) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-media-only", nil) + mux.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String()) + } + + var resp struct { + Messages []struct { + Role string `json:"role"` + Content string `json:"content"` + Media []string `json:"media"` + } `json:"messages"` + } + if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil { + t.Fatalf("Unmarshal() error = %v", err) + } + if len(resp.Messages) != 1 { + t.Fatalf("len(resp.Messages) = %d, want 1", len(resp.Messages)) + } + if resp.Messages[0].Role != "user" || len(resp.Messages[0].Media) != 1 { + t.Fatalf("message = %#v, want user message with media", resp.Messages[0]) + } +} + +func TestHandleSessions_SupportsJSONLMessagesUpToStoreCap(t *testing.T) { + configPath, cleanup := setupOAuthTestEnv(t) + defer cleanup() + + dir := sessionsTestDir(t, configPath) + store, err := memory.NewJSONLStore(dir) + if err != nil { + t.Fatalf("NewJSONLStore() error = %v", err) + } + + sessionKey := picoSessionPrefix + "detail-large-jsonl" + largeContent := strings.Repeat("x", 9*1024*1024) + if err := store.AddFullMessage(nil, sessionKey, providers.Message{ + Role: "user", + Content: largeContent, + }); err != nil { + t.Fatalf("AddFullMessage() error = %v", err) + } + + h := NewHandler(configPath) + mux := http.NewServeMux() + h.RegisterRoutes(mux) + + listRec := httptest.NewRecorder() + listReq := httptest.NewRequest(http.MethodGet, "/api/sessions", nil) + mux.ServeHTTP(listRec, listReq) + + if listRec.Code != http.StatusOK { + t.Fatalf("list status = %d, want %d, body=%s", listRec.Code, http.StatusOK, listRec.Body.String()) + } + + var items []sessionListItem + if err := json.Unmarshal(listRec.Body.Bytes(), &items); err != nil { + t.Fatalf("list Unmarshal() error = %v", err) + } + if len(items) != 1 { + t.Fatalf("len(items) = %d, want 1", len(items)) + } + + detailRec := httptest.NewRecorder() + detailReq := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-large-jsonl", nil) + mux.ServeHTTP(detailRec, detailReq) + + if detailRec.Code != http.StatusOK { + t.Fatalf( + "detail status = %d, want %d, body=%s", + detailRec.Code, + http.StatusOK, + detailRec.Body.String(), + ) + } + + var resp struct { + Messages []struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"messages"` + } + if err := json.Unmarshal(detailRec.Body.Bytes(), &resp); err != nil { + t.Fatalf("detail Unmarshal() error = %v", err) + } + if len(resp.Messages) != 1 { + t.Fatalf("len(resp.Messages) = %d, want 1", len(resp.Messages)) + } + if resp.Messages[0].Role != "user" { + t.Fatalf("resp.Messages[0].Role = %q, want %q", resp.Messages[0].Role, "user") + } + if got := len(resp.Messages[0].Content); got != len(largeContent) { + t.Fatalf("len(resp.Messages[0].Content) = %d, want %d", got, len(largeContent)) + } +} + +func TestHandleListSessions_UsesImagePreviewForMediaOnlyMessage(t *testing.T) { + configPath, cleanup := setupOAuthTestEnv(t) + defer cleanup() + + dir := sessionsTestDir(t, configPath) + store, err := memory.NewJSONLStore(dir) + if err != nil { + t.Fatalf("NewJSONLStore() error = %v", err) + } + + sessionKey := picoSessionPrefix + "preview-media-only" + if err := store.AddFullMessage(nil, sessionKey, providers.Message{ + Role: "user", + Media: []string{"data:image/png;base64,abc123"}, + }); err != nil { + t.Fatalf("AddFullMessage() error = %v", err) + } + + h := NewHandler(configPath) + mux := http.NewServeMux() + h.RegisterRoutes(mux) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/api/sessions", nil) + mux.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String()) + } + + var items []sessionListItem + if err := json.Unmarshal(rec.Body.Bytes(), &items); err != nil { + t.Fatalf("Unmarshal() error = %v", err) + } + if len(items) != 1 { + t.Fatalf("len(items) = %d, want 1", len(items)) + } + if items[0].Preview != "[image]" { + t.Fatalf("items[0].Preview = %q, want %q", items[0].Preview, "[image]") + } + if items[0].MessageCount != 1 { + t.Fatalf("items[0].MessageCount = %d, want 1", items[0].MessageCount) + } +} + func TestHandleDeleteSession_JSONLStorage(t *testing.T) { configPath, cleanup := setupOAuthTestEnv(t) defer cleanup() diff --git a/web/frontend/eslint.config.js b/web/frontend/eslint.config.js index bc9c64344..85d380c4f 100644 --- a/web/frontend/eslint.config.js +++ b/web/frontend/eslint.config.js @@ -28,4 +28,12 @@ export default defineConfig([ ], }, }, + { + files: ["src/routes/**/*.{ts,tsx}"], + rules: { + // TanStack Router route modules must export Route objects, so this rule + // produces false positives for framework-managed files. + "react-refresh/only-export-components": "off", + }, + }, ]) diff --git a/web/frontend/src/api/sessions.ts b/web/frontend/src/api/sessions.ts index c91495901..dd0fa1f53 100644 --- a/web/frontend/src/api/sessions.ts +++ b/web/frontend/src/api/sessions.ts @@ -1,5 +1,3 @@ -// Sessions API — list and retrieve chat session history - import { launcherFetch } from "@/api/http" export interface SessionSummary { @@ -13,7 +11,11 @@ export interface SessionSummary { export interface SessionDetail { id: string - messages: { role: "user" | "assistant"; content: string }[] + messages: { + role: "user" | "assistant" + content: string + media?: string[] + }[] summary: string created: string updated: string diff --git a/web/frontend/src/components/chat/assistant-message.tsx b/web/frontend/src/components/chat/assistant-message.tsx index 05da3ceb1..9966226b2 100644 --- a/web/frontend/src/components/chat/assistant-message.tsx +++ b/web/frontend/src/components/chat/assistant-message.tsx @@ -43,7 +43,7 @@ export function AssistantMessage({
-
+
void + onAddImages: () => void + onRemoveAttachment: (index: number) => void onSend: () => void isConnected: boolean hasDefaultModel: boolean + canSend: boolean } export function ChatComposer({ input, + attachments, onInputChange, + onAddImages, + onRemoveAttachment, onSend, isConnected, hasDefaultModel, + canSend, }: ChatComposerProps) { const { t } = useTranslation() const canInput = isConnected && hasDefaultModel @@ -35,6 +44,32 @@ export function ChatComposer({ return (
+ {attachments.length > 0 && ( +
+ {attachments.map((attachment, index) => ( +
+ {attachment.filename + +
+ ))} +
+ )} + onInputChange(e.target.value)} @@ -42,7 +77,7 @@ export function ChatComposer({ placeholder={t("chat.placeholder")} disabled={!canInput} className={cn( - "placeholder:text-muted-foreground max-h-[200px] min-h-[60px] resize-none border-0 bg-transparent px-2 py-1 text-[15px] shadow-none transition-colors focus-visible:ring-0 focus-visible:outline-none dark:bg-transparent", + "placeholder:text-muted-foreground/50 max-h-[200px] min-h-[60px] resize-none border-0 bg-transparent px-2 py-1 text-[15px] shadow-none transition-colors focus-visible:ring-0 focus-visible:outline-none dark:bg-transparent", !canInput && "cursor-not-allowed", )} minRows={1} @@ -50,13 +85,27 @@ export function ChatComposer({ />
-
{/* action buttons */}
+
+ +
diff --git a/web/frontend/src/components/chat/chat-page.tsx b/web/frontend/src/components/chat/chat-page.tsx index ae705ff1b..38a0fc6b1 100644 --- a/web/frontend/src/components/chat/chat-page.tsx +++ b/web/frontend/src/components/chat/chat-page.tsx @@ -1,6 +1,7 @@ import { IconPlus } from "@tabler/icons-react" -import { useEffect, useRef, useState } from "react" +import { type ChangeEvent, useEffect, useRef, useState } from "react" import { useTranslation } from "react-i18next" +import { toast } from "sonner" import { AssistantMessage } from "@/components/chat/assistant-message" import { ChatComposer } from "@/components/chat/chat-composer" @@ -15,13 +16,42 @@ import { useChatModels } from "@/hooks/use-chat-models" import { useGateway } from "@/hooks/use-gateway" import { usePicoChat } from "@/hooks/use-pico-chat" import { useSessionHistory } from "@/hooks/use-session-history" +import type { ChatAttachment } from "@/store/chat" + +const MAX_IMAGE_SIZE_BYTES = 7 * 1024 * 1024 +const MAX_IMAGE_SIZE_LABEL = "7 MB" +const ALLOWED_IMAGE_TYPES = new Set([ + "image/jpeg", + "image/png", + "image/gif", + "image/webp", + "image/bmp", +]) + +function readFileAsDataUrl(file: File): Promise { + return new Promise((resolve, reject) => { + const reader = new FileReader() + reader.onload = () => { + if (typeof reader.result === "string") { + resolve(reader.result) + return + } + reject(new Error("Failed to read file")) + } + reader.onerror = () => + reject(reader.error || new Error("Failed to read file")) + reader.readAsDataURL(file) + }) +} export function ChatPage() { const { t } = useTranslation() const scrollRef = useRef(null) + const fileInputRef = useRef(null) const [isAtBottom, setIsAtBottom] = useState(true) const [hasScrolled, setHasScrolled] = useState(false) const [input, setInput] = useState("") + const [attachments, setAttachments] = useState([]) const { messages, @@ -80,18 +110,84 @@ export function ChatPage() { }, [messages, isTyping, isAtBottom]) const handleSend = () => { - if (!input.trim() || !canSend) return - if (sendMessage(input.trim())) { + if ((!input.trim() && attachments.length === 0) || !canSend) return + if ( + sendMessage({ + content: input, + attachments, + }) + ) { setInput("") + setAttachments([]) } } + const handleAddImages = () => { + if (!canSend) return + fileInputRef.current?.click() + } + + const handleRemoveAttachment = (index: number) => { + setAttachments((prev) => prev.filter((_, itemIndex) => itemIndex !== index)) + } + + const handleImageSelection = async (event: ChangeEvent) => { + const files = Array.from(event.target.files ?? []) + event.target.value = "" + + if (files.length === 0) { + return + } + + const nextAttachments: ChatAttachment[] = [] + for (const file of files) { + if (!ALLOWED_IMAGE_TYPES.has(file.type)) { + toast.error( + t("chat.invalidImage", { + name: file.name, + }), + ) + continue + } + + if (file.size > MAX_IMAGE_SIZE_BYTES) { + toast.error( + t("chat.imageTooLarge", { + name: file.name, + size: MAX_IMAGE_SIZE_LABEL, + }), + ) + continue + } + + try { + nextAttachments.push({ + type: "image", + filename: file.name, + url: await readFileAsDataUrl(file), + }) + } catch { + toast.error( + t("chat.imageReadFailed", { + name: file.name, + }), + ) + } + } + + if (nextAttachments.length > 0) { + setAttachments(nextAttachments.slice(0, 1)) + } + } + + const canSubmit = canSend && (Boolean(input.trim()) || attachments.length > 0) + return (
) : ( - + )}
))} @@ -163,12 +262,24 @@ export function ChatPage() {
+ +
) diff --git a/web/frontend/src/components/chat/session-history-menu.tsx b/web/frontend/src/components/chat/session-history-menu.tsx index 009e8fbb9..3ec1a5ed2 100644 --- a/web/frontend/src/components/chat/session-history-menu.tsx +++ b/web/frontend/src/components/chat/session-history-menu.tsx @@ -71,7 +71,7 @@ export function SessionHistoryMenu({ onClick={() => onSwitchSession(session.id)} > - {session.title || session.preview} + {session.title} {t("chat.messagesCount", { diff --git a/web/frontend/src/components/chat/user-message.tsx b/web/frontend/src/components/chat/user-message.tsx index 2baee87f3..96119a534 100644 --- a/web/frontend/src/components/chat/user-message.tsx +++ b/web/frontend/src/components/chat/user-message.tsx @@ -1,13 +1,36 @@ +import type { ChatAttachment } from "@/store/chat" + interface UserMessageProps { content: string + attachments?: ChatAttachment[] } -export function UserMessage({ content }: UserMessageProps) { +export function UserMessage({ content, attachments = [] }: UserMessageProps) { + const hasText = content.trim().length > 0 + const imageAttachments = attachments.filter( + (attachment) => attachment.type === "image", + ) + return (
-
- {content} -
+ {imageAttachments.length > 0 && ( +
+ {imageAttachments.map((attachment, index) => ( + {attachment.filename + ))} +
+ )} + + {hasText && ( +
+ {content} +
+ )}
) } diff --git a/web/frontend/src/features/chat/controller.ts b/web/frontend/src/features/chat/controller.ts index 5e6eb2229..cef8b303f 100644 --- a/web/frontend/src/features/chat/controller.ts +++ b/web/frontend/src/features/chat/controller.ts @@ -18,7 +18,11 @@ import { normalizeWsUrlForBrowser, } from "@/features/chat/websocket" import i18n from "@/i18n" -import { getChatState, updateChatStore } from "@/store/chat" +import { + type ChatAttachment, + getChatState, + updateChatStore, +} from "@/store/chat" import { type GatewayState, gatewayAtom } from "@/store/gateway" const store = getDefaultStore() @@ -324,19 +328,43 @@ export async function hydrateActiveSession() { return hydratePromise } -export function sendChatMessage(content: string) { +interface SendChatMessageInput { + content: string + attachments?: ChatAttachment[] +} + +export function sendChatMessage({ + content, + attachments = [], +}: SendChatMessageInput) { if (!wsRef || wsRef.readyState !== WebSocket.OPEN) { console.warn("WebSocket not connected") return false } + const normalizedContent = content.trim() + const normalizedAttachments = attachments + .filter((attachment) => attachment.type === "image" && attachment.url) + .map((attachment) => ({ ...attachment })) + + if (!normalizedContent && normalizedAttachments.length === 0) { + return false + } + const socket = wsRef const id = `msg-${++msgIdCounter}-${Date.now()}` updateChatStore((prev) => ({ messages: [ ...prev.messages, - { id, role: "user", content, timestamp: Date.now() }, + { + id, + role: "user", + content: normalizedContent, + attachments: + normalizedAttachments.length > 0 ? normalizedAttachments : undefined, + timestamp: Date.now(), + }, ], isTyping: true, })) @@ -346,7 +374,10 @@ export function sendChatMessage(content: string) { JSON.stringify({ type: "message.send", id, - payload: { content }, + payload: { + content: normalizedContent, + media: normalizedAttachments.map((attachment) => attachment.url), + }, }), ) return true diff --git a/web/frontend/src/features/chat/history.ts b/web/frontend/src/features/chat/history.ts index 886148184..850b3319e 100644 --- a/web/frontend/src/features/chat/history.ts +++ b/web/frontend/src/features/chat/history.ts @@ -1,6 +1,18 @@ import { getSessionHistory } from "@/api/sessions" import { normalizeUnixTimestamp } from "@/features/chat/state" -import type { ChatMessage } from "@/store/chat" +import type { ChatAttachment, ChatMessage } from "@/store/chat" + +function toChatAttachments(media?: string[]): ChatAttachment[] | undefined { + if (!media || media.length === 0) { + return undefined + } + + const attachments = media + .filter((item) => item.startsWith("data:image/")) + .map((url) => ({ type: "image" as const, url })) + + return attachments.length > 0 ? attachments : undefined +} export async function loadSessionMessages( sessionId: string, @@ -12,6 +24,7 @@ export async function loadSessionMessages( id: `hist-${index}-${Date.now()}`, role: message.role, content: message.content, + attachments: toChatAttachments(message.media), timestamp: fallbackTime, })) } @@ -31,9 +44,13 @@ function normalizeMessageTimestamp(timestamp: number | string): string { } function messageSignature(message: ChatMessage): string { + const attachmentSignature = (message.attachments ?? []) + .map((attachment) => `${attachment.type}\u0001${attachment.url}`) + .join("\u0002") + return `${message.role}\u0000${message.content}\u0000${normalizeMessageTimestamp( message.timestamp, - )}` + )}\u0000${attachmentSignature}` } function comparableTimestamp(timestamp: number | string): number { diff --git a/web/frontend/src/features/chat/protocol.ts b/web/frontend/src/features/chat/protocol.ts index 5e5220c77..7429aef01 100644 --- a/web/frontend/src/features/chat/protocol.ts +++ b/web/frontend/src/features/chat/protocol.ts @@ -1,3 +1,5 @@ +import { toast } from "sonner" + import { normalizeUnixTimestamp } from "@/features/chat/state" import { updateChatStore } from "@/store/chat" @@ -67,10 +69,24 @@ export function handlePicoMessage( updateChatStore({ isTyping: false }) break - case "error": + case "error": { + const requestId = + typeof payload.request_id === "string" ? payload.request_id : "" + const errorMessage = + typeof payload.message === "string" ? payload.message : "" + console.error("Pico error:", payload) - updateChatStore({ isTyping: false }) + if (errorMessage) { + toast.error(errorMessage) + } + updateChatStore((prev) => ({ + messages: requestId + ? prev.messages.filter((msg) => msg.id !== requestId) + : prev.messages, + isTyping: false, + })) break + } case "pong": break diff --git a/web/frontend/src/hooks/use-session-history.ts b/web/frontend/src/hooks/use-session-history.ts index 790339dba..2673f3562 100644 --- a/web/frontend/src/hooks/use-session-history.ts +++ b/web/frontend/src/hooks/use-session-history.ts @@ -88,8 +88,14 @@ export function useSessionHistory({ const handleDeleteSession = useCallback( async (id: string) => { try { + const deletedLoadedSession = sessions.some( + (session) => session.id === id, + ) await deleteSession(id) setSessions((prev) => prev.filter((s) => s.id !== id)) + if (deletedLoadedSession) { + setOffset((prev) => Math.max(prev - 1, 0)) + } if (id === activeSessionId) { onDeletedActiveSession() } @@ -97,7 +103,7 @@ export function useSessionHistory({ console.error("Failed to delete session:", err) } }, - [activeSessionId, onDeletedActiveSession], + [activeSessionId, onDeletedActiveSession, sessions], ) return { diff --git a/web/frontend/src/i18n/locales/en.json b/web/frontend/src/i18n/locales/en.json index 851b0c8c4..891acae21 100644 --- a/web/frontend/src/i18n/locales/en.json +++ b/web/frontend/src/i18n/locales/en.json @@ -49,6 +49,12 @@ "deleteSession": "Delete session", "messagesCount": "{{count}} messages", "noModel": "Select model", + "attachImage": "Add images", + "removeImage": "Remove image", + "uploadedImage": "Uploaded image", + "invalidImage": "\"{{name}}\" is not a supported image file.", + "imageTooLarge": "\"{{name}}\" exceeds the {{size}} limit.", + "imageReadFailed": "Failed to read \"{{name}}\".", "empty": { "noConfiguredModel": "No Model Configured", "noConfiguredModelDescription": "You need to configure at least one AI model with an API key before you can start chatting.", diff --git a/web/frontend/src/i18n/locales/zh.json b/web/frontend/src/i18n/locales/zh.json index 07538ace9..667996208 100644 --- a/web/frontend/src/i18n/locales/zh.json +++ b/web/frontend/src/i18n/locales/zh.json @@ -49,6 +49,12 @@ "deleteSession": "删除会话", "messagesCount": "{{count}} 条消息", "noModel": "选择模型", + "attachImage": "添加图片", + "removeImage": "移除图片", + "uploadedImage": "已上传图片", + "invalidImage": "“{{name}}”不是支持的图片文件。", + "imageTooLarge": "“{{name}}”超过了 {{size}} 限制。", + "imageReadFailed": "读取“{{name}}”失败。", "empty": { "noConfiguredModel": "尚未配置模型", "noConfiguredModelDescription": "请先配置至少一个带有 API Key 的 AI 模型,才能开始对话。", diff --git a/web/frontend/src/store/chat.ts b/web/frontend/src/store/chat.ts index da5fa6670..21eb5edff 100644 --- a/web/frontend/src/store/chat.ts +++ b/web/frontend/src/store/chat.ts @@ -5,11 +5,18 @@ import { writeStoredSessionId, } from "@/features/chat/state" +export interface ChatAttachment { + type: "image" + url: string + filename?: string +} + export interface ChatMessage { id: string role: "user" | "assistant" content: string timestamp: number | string + attachments?: ChatAttachment[] } export type ConnectionState =