feat(web): support image messages in pico chat (#2299)

This commit is contained in:
wenjie
2026-04-03 14:15:20 +08:00
committed by GitHub
parent f3ad5d9305
commit f2a19ab947
21 changed files with 1009 additions and 79 deletions
+5 -3
View File
@@ -602,14 +602,16 @@ func (cb *ContextBuilder) BuildMessages(
// Add conversation history
messages = append(messages, history...)
// Add current user message
if strings.TrimSpace(currentMessage) != "" {
// Add current user message. Media-only turns must still be preserved so
// multimodal providers receive the uploaded image even when the user sends
// no accompanying text.
if strings.TrimSpace(currentMessage) != "" || len(media) > 0 {
msg := providers.Message{
Role: "user",
Content: currentMessage,
}
if len(media) > 0 {
msg.Media = media
msg.Media = append([]string(nil), media...)
}
messages = append(messages, msg)
}
+32
View File
@@ -707,6 +707,38 @@ func TestEmptyWorkspaceBaselineDetectsNewFiles(t *testing.T) {
}
}
func TestBuildMessages_IncludesMediaOnlyCurrentMessage(t *testing.T) {
tmpDir := setupWorkspace(t, nil)
defer os.RemoveAll(tmpDir)
cb := NewContextBuilder(tmpDir)
msgs := cb.BuildMessages(
nil,
"",
"",
[]string{"data:image/png;base64,abc123"},
"pico",
"chat-1",
"",
"",
)
if len(msgs) != 2 {
t.Fatalf("len(msgs) = %d, want 2", len(msgs))
}
userMsg := msgs[1]
if userMsg.Role != "user" {
t.Fatalf("userMsg.Role = %q, want %q", userMsg.Role, "user")
}
if userMsg.Content != "" {
t.Fatalf("userMsg.Content = %q, want empty string", userMsg.Content)
}
if len(userMsg.Media) != 1 || userMsg.Media[0] != "data:image/png;base64,abc123" {
t.Fatalf("userMsg.Media = %#v, want image payload", userMsg.Media)
}
}
// BenchmarkBuildMessagesWithCache measures caching performance.
func BenchmarkBuildMessagesWithCache(b *testing.B) {
tmpDir, _ := os.MkdirTemp("", "picoclaw-bench-*")
+54
View File
@@ -262,3 +262,57 @@ func TestSend_ClosedConnection(t *testing.T) {
ch.Stop(ctx)
}
func TestParseInlineImageMedia_Valid(t *testing.T) {
media, err := parseInlineImageMedia(map[string]any{
"media": []any{
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+X2ioAAAAASUVORK5CYII=",
},
})
if err != nil {
t.Fatalf("parseInlineImageMedia() error = %v", err)
}
if len(media) != 1 {
t.Fatalf("len(media) = %d, want 1", len(media))
}
}
func TestPicoChannel_HandleMessageSend_AllowsMediaOnly(t *testing.T) {
mb := bus.NewMessageBus()
ch, err := NewPicoChannel(config.PicoConfig{
Token: *config.NewSecureString("test-token"),
}, mb)
if err != nil {
t.Fatalf("NewPicoChannel() error = %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := ch.Start(ctx); err != nil {
t.Fatalf("Start() error = %v", err)
}
defer ch.Stop(ctx)
pc := &picoConn{id: "conn-1", sessionID: "sess-1"}
ch.handleMessageSend(pc, PicoMessage{
ID: "msg-1",
Payload: map[string]any{
"media": []any{
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+X2ioAAAAASUVORK5CYII=",
},
},
})
select {
case msg := <-mb.InboundChan():
if msg.Content != "" {
t.Fatalf("msg.Content = %q, want empty", msg.Content)
}
if len(msg.Media) != 1 || !strings.HasPrefix(msg.Media[0], "data:image/png;base64,") {
t.Fatalf("msg.Media = %#v, want inline image payload", msg.Media)
}
case <-ctx.Done():
t.Fatal("timed out waiting for inbound media message")
}
}
+123 -3
View File
@@ -2,6 +2,7 @@ package pico
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"net/http"
@@ -30,6 +31,14 @@ type picoConn struct {
cancel context.CancelFunc // cancels per-connection goroutines (e.g. pingLoop)
}
var allowedInlineImageMIMETypes = map[string]struct{}{
"image/jpeg": {},
"image/png": {},
"image/gif": {},
"image/webp": {},
"image/bmp": {},
}
// writeJSON sends a JSON message to the connection with write locking.
func (pc *picoConn) writeJSON(v any) error {
if pc.closed.Load() {
@@ -516,6 +525,9 @@ func (c *PicoChannel) handleMessage(pc *picoConn, msg PicoMessage) {
case TypeMessageSend:
c.handleMessageSend(pc, msg)
case TypeMediaSend:
c.handleMessageSend(pc, msg)
default:
errMsg := newError("unknown_type", fmt.Sprintf("unknown message type: %s", msg.Type))
pc.writeJSON(errMsg)
@@ -525,8 +537,19 @@ func (c *PicoChannel) handleMessage(pc *picoConn, msg PicoMessage) {
// handleMessageSend processes an inbound message.send from a client.
func (c *PicoChannel) handleMessageSend(pc *picoConn, msg PicoMessage) {
content, _ := msg.Payload["content"].(string)
if strings.TrimSpace(content) == "" {
errMsg := newError("empty_content", "message content is empty")
media, err := parseInlineImageMedia(msg.Payload)
if err != nil {
errMsg := newErrorWithPayload("invalid_media", err.Error(), map[string]any{
"request_id": msg.ID,
})
pc.writeJSON(errMsg)
return
}
if strings.TrimSpace(content) == "" && len(media) == 0 {
errMsg := newErrorWithPayload("empty_content", "message content is empty", map[string]any{
"request_id": msg.ID,
})
pc.writeJSON(errMsg)
return
}
@@ -550,6 +573,7 @@ func (c *PicoChannel) handleMessageSend(pc *picoConn, msg PicoMessage) {
logger.DebugCF("pico", "Received message", map[string]any{
"session_id": sessionID,
"preview": truncate(content, 50),
"media": len(media),
})
sender := bus.SenderInfo{
@@ -562,7 +586,7 @@ func (c *PicoChannel) handleMessageSend(pc *picoConn, msg PicoMessage) {
return
}
c.HandleMessage(c.ctx, peer, msg.ID, senderID, chatID, content, nil, metadata, sender)
c.HandleMessage(c.ctx, peer, msg.ID, senderID, chatID, content, media, metadata, sender)
}
// truncate truncates a string to maxLen runes.
@@ -573,3 +597,99 @@ func truncate(s string, maxLen int) string {
}
return string(runes[:maxLen]) + "..."
}
func parseInlineImageMedia(payload map[string]any) ([]string, error) {
if len(payload) == 0 {
return nil, nil
}
raw, ok := payload["media"]
if !ok || raw == nil {
return nil, nil
}
switch values := raw.(type) {
case []any:
media := make([]string, 0, len(values))
for i, item := range values {
value, err := inlineImageValue(item)
if err != nil {
return nil, fmt.Errorf("media[%d]: %w", i, err)
}
if err := validateInlineImageDataURL(value); err != nil {
return nil, fmt.Errorf("media[%d]: %w", i, err)
}
media = append(media, value)
}
return media, nil
case []string:
media := make([]string, 0, len(values))
for i, value := range values {
value = strings.TrimSpace(value)
if err := validateInlineImageDataURL(value); err != nil {
return nil, fmt.Errorf("media[%d]: %w", i, err)
}
media = append(media, value)
}
return media, nil
case string:
value := strings.TrimSpace(values)
if err := validateInlineImageDataURL(value); err != nil {
return nil, err
}
return []string{value}, nil
default:
return nil, fmt.Errorf("media must be a string or array of strings")
}
}
func inlineImageValue(item any) (string, error) {
switch value := item.(type) {
case string:
value = strings.TrimSpace(value)
if value == "" {
return "", fmt.Errorf("image payload is empty")
}
return value, nil
case map[string]any:
for _, key := range []string{"url", "data_url"} {
if raw, ok := value[key].(string); ok && strings.TrimSpace(raw) != "" {
return strings.TrimSpace(raw), nil
}
}
return "", fmt.Errorf("image payload must include url or data_url")
default:
return "", fmt.Errorf("image payload must be a string or object")
}
}
func validateInlineImageDataURL(mediaURL string) error {
if mediaURL == "" {
return fmt.Errorf("image payload is empty")
}
if !strings.HasPrefix(mediaURL, "data:image/") {
return fmt.Errorf("only inline image data URLs are supported")
}
header, data, found := strings.Cut(mediaURL, ",")
if !found || strings.TrimSpace(data) == "" {
return fmt.Errorf("image data URL is malformed")
}
if !strings.Contains(header, ";base64") {
return fmt.Errorf("image data URL must be base64 encoded")
}
mimeType, _, _ := strings.Cut(strings.TrimPrefix(header, "data:"), ";")
if _, ok := allowedInlineImageMIMETypes[mimeType]; !ok {
return fmt.Errorf("unsupported image format: %s", mimeType)
}
data = strings.TrimSpace(data)
if base64.StdEncoding.DecodedLen(len(data)) > config.DefaultMaxMediaSize {
return fmt.Errorf("image exceeds %d byte limit", config.DefaultMaxMediaSize)
}
if _, err := base64.StdEncoding.DecodeString(data); err != nil {
return fmt.Errorf("invalid base64 image data")
}
return nil
}
+12 -4
View File
@@ -39,10 +39,18 @@ func newMessage(msgType string, payload map[string]any) PicoMessage {
}
}
// newError creates an error PicoMessage.
func newError(code, message string) PicoMessage {
return newMessage(TypeError, map[string]any{
func newErrorWithPayload(code, message string, extra map[string]any) PicoMessage {
payload := map[string]any{
"code": code,
"message": message,
})
}
for key, value := range extra {
payload[key] = value
}
return newMessage(TypeError, payload)
}
// newError creates an error PicoMessage.
func newError(code, message string) PicoMessage {
return newErrorWithPayload(code, message, nil)
}
+112 -35
View File
@@ -42,6 +42,12 @@ type sessionListItem struct {
Updated string `json:"updated"`
}
type sessionChatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
Media []string `json:"media,omitempty"`
}
type sessionMetaFile struct {
Key string `json:"key"`
Summary string `json:"summary"`
@@ -62,8 +68,12 @@ type sessionMetaFile struct {
const (
picoSessionPrefix = "agent:main:pico:direct:pico:"
sanitizedPicoSessionPrefix = "agent_main_pico_direct_pico_"
maxSessionJSONLLineSize = 10 * 1024 * 1024 // 10 MB
maxSessionTitleRunes = 60
// Keep the session API aligned with the shared JSONL store reader limit in
// pkg/memory/jsonl.go so oversized lines fail consistently everywhere.
maxSessionJSONLLineSize = 10 * 1024 * 1024
maxSessionTitleRunes = 60
handledToolResponseSummaryText = "Requested output delivered via tool attachment."
)
// extractPicoSessionID extracts the session UUID from a full session key.
@@ -195,32 +205,21 @@ func (h *Handler) readJSONLSession(dir, sessionID string) (sessionFile, error) {
func buildSessionListItem(sessionID string, sess sessionFile) sessionListItem {
preview := ""
for _, msg := range sess.Messages {
if msg.Role == "user" && strings.TrimSpace(msg.Content) != "" {
preview = msg.Content
if msg.Role == "user" {
preview = sessionMessagePreview(msg)
}
if preview != "" {
break
}
}
title := strings.TrimSpace(sess.Summary)
if title == "" {
title = preview
}
title = truncateRunes(title, maxSessionTitleRunes)
preview = truncateRunes(preview, maxSessionTitleRunes)
if preview == "" {
preview = "(empty)"
}
if title == "" {
title = preview
}
title := preview
validMessageCount := 0
for _, msg := range sess.Messages {
if (msg.Role == "user" || msg.Role == "assistant") && strings.TrimSpace(msg.Content) != "" {
validMessageCount++
}
}
validMessageCount := len(visibleSessionMessages(sess.Messages))
return sessionListItem{
ID: sessionID,
@@ -247,6 +246,99 @@ func truncateRunes(s string, maxLen int) string {
return string(runes[:maxLen]) + "..."
}
func sessionMessageVisible(msg providers.Message) bool {
return strings.TrimSpace(msg.Content) != "" || len(msg.Media) > 0
}
func sessionMessagePreview(msg providers.Message) string {
if content := strings.TrimSpace(msg.Content); content != "" {
return content
}
if len(msg.Media) > 0 {
return "[image]"
}
return ""
}
func visibleSessionMessages(messages []providers.Message) []sessionChatMessage {
transcript := make([]sessionChatMessage, 0, len(messages))
for _, msg := range messages {
switch msg.Role {
case "user":
if sessionMessageVisible(msg) {
transcript = append(transcript, sessionChatMessage{
Role: "user",
Content: msg.Content,
Media: append([]string(nil), msg.Media...),
})
}
case "assistant":
visibleToolMessages := visibleAssistantToolMessages(msg.ToolCalls)
if len(visibleToolMessages) > 0 {
transcript = append(transcript, visibleToolMessages...)
}
// Pico web chat can persist both visible `message` tool output and a
// later plain assistant reply in the same turn. Hide only the fixed
// internal summary that marks handled tool delivery.
if len(visibleToolMessages) > 0 || !sessionMessageVisible(msg) || assistantMessageInternalOnly(msg) {
continue
}
transcript = append(transcript, sessionChatMessage{
Role: "assistant",
Content: msg.Content,
Media: append([]string(nil), msg.Media...),
})
}
}
return transcript
}
func assistantMessageInternalOnly(msg providers.Message) bool {
return strings.TrimSpace(msg.Content) == handledToolResponseSummaryText
}
func visibleAssistantToolMessages(toolCalls []providers.ToolCall) []sessionChatMessage {
if len(toolCalls) == 0 {
return nil
}
messages := make([]sessionChatMessage, 0, len(toolCalls))
for _, tc := range toolCalls {
name := tc.Name
argsJSON := ""
if tc.Function != nil {
if name == "" {
name = tc.Function.Name
}
argsJSON = tc.Function.Arguments
}
switch name {
case "message":
var args struct {
Content string `json:"content"`
}
if err := json.Unmarshal([]byte(argsJSON), &args); err != nil {
continue
}
if strings.TrimSpace(args.Content) == "" {
continue
}
messages = append(messages, sessionChatMessage{
Role: "assistant",
Content: args.Content,
})
}
}
return messages
}
// sessionsDir resolves the path to the gateway's session storage directory.
// It reads the workspace from config, falling back to ~/.picoclaw/workspace.
func (h *Handler) sessionsDir() (string, error) {
@@ -437,22 +529,7 @@ func (h *Handler) handleGetSession(w http.ResponseWriter, r *http.Request) {
}
}
// Convert to a simpler format for the frontend
type chatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
messages := make([]chatMessage, 0, len(sess.Messages))
for _, msg := range sess.Messages {
// Only include user and assistant messages that have actual content
if (msg.Role == "user" || msg.Role == "assistant") && strings.TrimSpace(msg.Content) != "" {
messages = append(messages, chatMessage{
Role: msg.Role,
Content: msg.Content,
})
}
}
messages := visibleSessionMessages(sess.Messages)
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]any{
+362 -7
View File
@@ -6,6 +6,7 @@ import (
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"github.com/sipeed/picoclaw/pkg/config"
@@ -87,15 +88,19 @@ func TestHandleListSessions_JSONLStorage(t *testing.T) {
if items[0].MessageCount != 2 {
t.Fatalf("items[0].MessageCount = %d, want 2", items[0].MessageCount)
}
if items[0].Title != "JSONL-backed session" {
t.Fatalf("items[0].Title = %q, want %q", items[0].Title, "JSONL-backed session")
if items[0].Title != "Explain why the history API is empty after migration." {
t.Fatalf(
"items[0].Title = %q, want %q",
items[0].Title,
"Explain why the history API is empty after migration.",
)
}
if items[0].Preview != "Explain why the history API is empty after migration." {
t.Fatalf("items[0].Preview = %q", items[0].Preview)
}
}
func TestHandleListSessions_TitleUsesTrimmedSummary(t *testing.T) {
func TestHandleListSessions_TitleUsesFirstUserMessage(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
@@ -139,10 +144,7 @@ func TestHandleListSessions_TitleUsesTrimmedSummary(t *testing.T) {
if len(items) != 1 {
t.Fatalf("len(items) = %d, want 1", len(items))
}
expectedTitle := truncateRunes(
"This summary is intentionally longer than sixty characters so it must be truncated in the history menu.",
maxSessionTitleRunes,
)
expectedTitle := truncateRunes("fallback preview", maxSessionTitleRunes)
if items[0].Title != expectedTitle {
t.Fatalf("items[0].Title = %q", items[0].Title)
}
@@ -215,6 +217,359 @@ func TestHandleGetSession_JSONLStorage(t *testing.T) {
}
}
func TestHandleGetSession_ReconstructsVisibleMessageToolOutput(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "detail-message-tool"
for _, msg := range []providers.Message{
{Role: "user", Content: "test"},
{
Role: "assistant",
Content: "",
ToolCalls: []providers.ToolCall{
{
ID: "call_1",
Type: "function",
Function: &providers.FunctionCall{
Name: "message",
Arguments: `{"content":"visible tool output"}`,
},
},
},
},
{Role: "tool", Content: "Message sent to pico:pico:detail-message-tool", ToolCallID: "call_1"},
{Role: "assistant", Content: handledToolResponseSummaryText},
} {
if err := store.AddFullMessage(nil, sessionKey, msg); err != nil {
t.Fatalf("AddFullMessage() error = %v", err)
}
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-message-tool", nil)
mux.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String())
}
var resp struct {
Messages []struct {
Role string `json:"role"`
Content string `json:"content"`
} `json:"messages"`
}
if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
t.Fatalf("Unmarshal() error = %v", err)
}
if len(resp.Messages) != 2 {
t.Fatalf("len(resp.Messages) = %d, want 2", len(resp.Messages))
}
if resp.Messages[1].Role != "assistant" || resp.Messages[1].Content != "visible tool output" {
t.Fatalf("assistant message = %#v, want visible tool output", resp.Messages[1])
}
}
func TestHandleGetSession_PreservesFinalAssistantReplyAfterMessageToolOutput(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "detail-message-tool-final-reply"
for _, msg := range []providers.Message{
{Role: "user", Content: "test"},
{
Role: "assistant",
ToolCalls: []providers.ToolCall{
{
ID: "call_1",
Type: "function",
Function: &providers.FunctionCall{
Name: "message",
Arguments: `{"content":"visible tool output"}`,
},
},
},
},
{Role: "tool", Content: "Message sent to pico:pico:detail-message-tool-final-reply", ToolCallID: "call_1"},
{Role: "assistant", Content: "final assistant reply"},
} {
if err := store.AddFullMessage(nil, sessionKey, msg); err != nil {
t.Fatalf("AddFullMessage() error = %v", err)
}
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-message-tool-final-reply", nil)
mux.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String())
}
var resp struct {
Messages []struct {
Role string `json:"role"`
Content string `json:"content"`
} `json:"messages"`
}
if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
t.Fatalf("Unmarshal() error = %v", err)
}
if len(resp.Messages) != 3 {
t.Fatalf("len(resp.Messages) = %d, want 3", len(resp.Messages))
}
if resp.Messages[1].Role != "assistant" || resp.Messages[1].Content != "visible tool output" {
t.Fatalf("interim assistant message = %#v, want visible tool output", resp.Messages[1])
}
if resp.Messages[2].Role != "assistant" || resp.Messages[2].Content != "final assistant reply" {
t.Fatalf("final assistant message = %#v, want final assistant reply", resp.Messages[2])
}
}
func TestHandleListSessions_MessageCountUsesVisibleTranscript(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "list-visible-count"
for _, msg := range []providers.Message{
{Role: "user", Content: "test"},
{
Role: "assistant",
ToolCalls: []providers.ToolCall{
{
ID: "call_1",
Type: "function",
Function: &providers.FunctionCall{
Name: "message",
Arguments: `{"content":"visible tool output"}`,
},
},
},
},
{Role: "tool", Content: "Message sent to pico:pico:list-visible-count", ToolCallID: "call_1"},
{Role: "assistant", Content: handledToolResponseSummaryText},
} {
if err := store.AddFullMessage(nil, sessionKey, msg); err != nil {
t.Fatalf("AddFullMessage() error = %v", err)
}
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/sessions", nil)
mux.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String())
}
var items []sessionListItem
if err := json.Unmarshal(rec.Body.Bytes(), &items); err != nil {
t.Fatalf("Unmarshal() error = %v", err)
}
if len(items) != 1 {
t.Fatalf("len(items) = %d, want 1", len(items))
}
if items[0].MessageCount != 2 {
t.Fatalf("items[0].MessageCount = %d, want 2", items[0].MessageCount)
}
}
func TestHandleGetSession_IncludesMediaOnlyMessages(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "detail-media-only"
if err := store.AddFullMessage(nil, sessionKey, providers.Message{
Role: "user",
Media: []string{"data:image/png;base64,abc123"},
}); err != nil {
t.Fatalf("AddFullMessage(user) error = %v", err)
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-media-only", nil)
mux.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String())
}
var resp struct {
Messages []struct {
Role string `json:"role"`
Content string `json:"content"`
Media []string `json:"media"`
} `json:"messages"`
}
if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
t.Fatalf("Unmarshal() error = %v", err)
}
if len(resp.Messages) != 1 {
t.Fatalf("len(resp.Messages) = %d, want 1", len(resp.Messages))
}
if resp.Messages[0].Role != "user" || len(resp.Messages[0].Media) != 1 {
t.Fatalf("message = %#v, want user message with media", resp.Messages[0])
}
}
func TestHandleSessions_SupportsJSONLMessagesUpToStoreCap(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "detail-large-jsonl"
largeContent := strings.Repeat("x", 9*1024*1024)
if err := store.AddFullMessage(nil, sessionKey, providers.Message{
Role: "user",
Content: largeContent,
}); err != nil {
t.Fatalf("AddFullMessage() error = %v", err)
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
listRec := httptest.NewRecorder()
listReq := httptest.NewRequest(http.MethodGet, "/api/sessions", nil)
mux.ServeHTTP(listRec, listReq)
if listRec.Code != http.StatusOK {
t.Fatalf("list status = %d, want %d, body=%s", listRec.Code, http.StatusOK, listRec.Body.String())
}
var items []sessionListItem
if err := json.Unmarshal(listRec.Body.Bytes(), &items); err != nil {
t.Fatalf("list Unmarshal() error = %v", err)
}
if len(items) != 1 {
t.Fatalf("len(items) = %d, want 1", len(items))
}
detailRec := httptest.NewRecorder()
detailReq := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-large-jsonl", nil)
mux.ServeHTTP(detailRec, detailReq)
if detailRec.Code != http.StatusOK {
t.Fatalf(
"detail status = %d, want %d, body=%s",
detailRec.Code,
http.StatusOK,
detailRec.Body.String(),
)
}
var resp struct {
Messages []struct {
Role string `json:"role"`
Content string `json:"content"`
} `json:"messages"`
}
if err := json.Unmarshal(detailRec.Body.Bytes(), &resp); err != nil {
t.Fatalf("detail Unmarshal() error = %v", err)
}
if len(resp.Messages) != 1 {
t.Fatalf("len(resp.Messages) = %d, want 1", len(resp.Messages))
}
if resp.Messages[0].Role != "user" {
t.Fatalf("resp.Messages[0].Role = %q, want %q", resp.Messages[0].Role, "user")
}
if got := len(resp.Messages[0].Content); got != len(largeContent) {
t.Fatalf("len(resp.Messages[0].Content) = %d, want %d", got, len(largeContent))
}
}
func TestHandleListSessions_UsesImagePreviewForMediaOnlyMessage(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "preview-media-only"
if err := store.AddFullMessage(nil, sessionKey, providers.Message{
Role: "user",
Media: []string{"data:image/png;base64,abc123"},
}); err != nil {
t.Fatalf("AddFullMessage() error = %v", err)
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/sessions", nil)
mux.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String())
}
var items []sessionListItem
if err := json.Unmarshal(rec.Body.Bytes(), &items); err != nil {
t.Fatalf("Unmarshal() error = %v", err)
}
if len(items) != 1 {
t.Fatalf("len(items) = %d, want 1", len(items))
}
if items[0].Preview != "[image]" {
t.Fatalf("items[0].Preview = %q, want %q", items[0].Preview, "[image]")
}
if items[0].MessageCount != 1 {
t.Fatalf("items[0].MessageCount = %d, want 1", items[0].MessageCount)
}
}
func TestHandleDeleteSession_JSONLStorage(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
+8
View File
@@ -28,4 +28,12 @@ export default defineConfig([
],
},
},
{
files: ["src/routes/**/*.{ts,tsx}"],
rules: {
// TanStack Router route modules must export Route objects, so this rule
// produces false positives for framework-managed files.
"react-refresh/only-export-components": "off",
},
},
])
+5 -3
View File
@@ -1,5 +1,3 @@
// Sessions API — list and retrieve chat session history
import { launcherFetch } from "@/api/http"
export interface SessionSummary {
@@ -13,7 +11,11 @@ export interface SessionSummary {
export interface SessionDetail {
id: string
messages: { role: "user" | "assistant"; content: string }[]
messages: {
role: "user" | "assistant"
content: string
media?: string[]
}[]
summary: string
created: string
updated: string
@@ -43,7 +43,7 @@ export function AssistantMessage({
</div>
<div className="bg-card text-card-foreground relative overflow-hidden rounded-xl border">
<div className="prose dark:prose-invert prose-p:my-2 prose-pre:my-2 prose-pre:rounded-lg prose-pre:border prose-pre:bg-zinc-950 prose-pre:p-3 max-w-none p-4 text-[15px] leading-relaxed">
<div className="prose dark:prose-invert prose-p:my-2 prose-pre:my-2 prose-pre:overflow-x-auto prose-pre:rounded-lg prose-pre:border prose-pre:bg-zinc-950 prose-pre:p-3 max-w-none p-4 text-[15px] leading-relaxed [overflow-wrap:anywhere] break-words">
<ReactMarkdown
remarkPlugins={[remarkGfm]}
rehypePlugins={[rehypeRaw, rehypeSanitize]}
@@ -1,25 +1,34 @@
import { IconArrowUp } from "@tabler/icons-react"
import { IconArrowUp, IconPhotoPlus, IconX } from "@tabler/icons-react"
import type { KeyboardEvent } from "react"
import { useTranslation } from "react-i18next"
import TextareaAutosize from "react-textarea-autosize"
import { Button } from "@/components/ui/button"
import { cn } from "@/lib/utils"
import type { ChatAttachment } from "@/store/chat"
interface ChatComposerProps {
input: string
attachments: ChatAttachment[]
onInputChange: (value: string) => void
onAddImages: () => void
onRemoveAttachment: (index: number) => void
onSend: () => void
isConnected: boolean
hasDefaultModel: boolean
canSend: boolean
}
export function ChatComposer({
input,
attachments,
onInputChange,
onAddImages,
onRemoveAttachment,
onSend,
isConnected,
hasDefaultModel,
canSend,
}: ChatComposerProps) {
const { t } = useTranslation()
const canInput = isConnected && hasDefaultModel
@@ -35,6 +44,32 @@ export function ChatComposer({
return (
<div className="bg-background shrink-0 px-4 pt-4 pb-[calc(1rem+env(safe-area-inset-bottom))] md:px-8 md:pb-8 lg:px-24 xl:px-48">
<div className="bg-card border-border/80 mx-auto flex max-w-[1000px] flex-col rounded-2xl border p-3 shadow-md">
{attachments.length > 0 && (
<div className="mb-3 flex flex-wrap gap-2 px-2">
{attachments.map((attachment, index) => (
<div
key={`${attachment.url}-${index}`}
className="bg-background relative h-20 w-20 overflow-hidden rounded-xl border"
>
<img
src={attachment.url}
alt={attachment.filename || t("chat.uploadedImage")}
className="h-full w-full object-cover"
/>
<button
type="button"
onClick={() => onRemoveAttachment(index)}
className="bg-background/85 text-foreground absolute top-1 right-1 inline-flex h-6 w-6 items-center justify-center rounded-full border shadow-sm transition hover:bg-white"
aria-label={t("chat.removeImage")}
title={t("chat.removeImage")}
>
<IconX className="h-3.5 w-3.5" />
</button>
</div>
))}
</div>
)}
<TextareaAutosize
value={input}
onChange={(e) => onInputChange(e.target.value)}
@@ -42,7 +77,7 @@ export function ChatComposer({
placeholder={t("chat.placeholder")}
disabled={!canInput}
className={cn(
"placeholder:text-muted-foreground max-h-[200px] min-h-[60px] resize-none border-0 bg-transparent px-2 py-1 text-[15px] shadow-none transition-colors focus-visible:ring-0 focus-visible:outline-none dark:bg-transparent",
"placeholder:text-muted-foreground/50 max-h-[200px] min-h-[60px] resize-none border-0 bg-transparent px-2 py-1 text-[15px] shadow-none transition-colors focus-visible:ring-0 focus-visible:outline-none dark:bg-transparent",
!canInput && "cursor-not-allowed",
)}
minRows={1}
@@ -50,13 +85,27 @@ export function ChatComposer({
/>
<div className="mt-2 flex items-center justify-between px-1">
<div className="flex items-center gap-1">{/* action buttons */}</div>
<div className="flex items-center gap-1">
<Button
type="button"
variant="ghost"
size="icon"
className="text-muted-foreground hover:text-foreground h-8 w-8 rounded-full"
onClick={onAddImages}
disabled={!canInput}
aria-label={t("chat.attachImage")}
title={t("chat.attachImage")}
>
<IconPhotoPlus className="size-4" />
</Button>
</div>
<Button
type="button"
size="icon"
className="size-8 rounded-full bg-violet-500 text-white transition-transform hover:bg-violet-600 active:scale-95"
onClick={onSend}
disabled={!input.trim() || !canInput}
disabled={!canSend}
>
<IconArrowUp className="size-4" />
</Button>
+116 -5
View File
@@ -1,6 +1,7 @@
import { IconPlus } from "@tabler/icons-react"
import { useEffect, useRef, useState } from "react"
import { type ChangeEvent, useEffect, useRef, useState } from "react"
import { useTranslation } from "react-i18next"
import { toast } from "sonner"
import { AssistantMessage } from "@/components/chat/assistant-message"
import { ChatComposer } from "@/components/chat/chat-composer"
@@ -15,13 +16,42 @@ import { useChatModels } from "@/hooks/use-chat-models"
import { useGateway } from "@/hooks/use-gateway"
import { usePicoChat } from "@/hooks/use-pico-chat"
import { useSessionHistory } from "@/hooks/use-session-history"
import type { ChatAttachment } from "@/store/chat"
const MAX_IMAGE_SIZE_BYTES = 7 * 1024 * 1024
const MAX_IMAGE_SIZE_LABEL = "7 MB"
const ALLOWED_IMAGE_TYPES = new Set([
"image/jpeg",
"image/png",
"image/gif",
"image/webp",
"image/bmp",
])
function readFileAsDataUrl(file: File): Promise<string> {
return new Promise((resolve, reject) => {
const reader = new FileReader()
reader.onload = () => {
if (typeof reader.result === "string") {
resolve(reader.result)
return
}
reject(new Error("Failed to read file"))
}
reader.onerror = () =>
reject(reader.error || new Error("Failed to read file"))
reader.readAsDataURL(file)
})
}
export function ChatPage() {
const { t } = useTranslation()
const scrollRef = useRef<HTMLDivElement>(null)
const fileInputRef = useRef<HTMLInputElement>(null)
const [isAtBottom, setIsAtBottom] = useState(true)
const [hasScrolled, setHasScrolled] = useState(false)
const [input, setInput] = useState("")
const [attachments, setAttachments] = useState<ChatAttachment[]>([])
const {
messages,
@@ -80,18 +110,84 @@ export function ChatPage() {
}, [messages, isTyping, isAtBottom])
const handleSend = () => {
if (!input.trim() || !canSend) return
if (sendMessage(input.trim())) {
if ((!input.trim() && attachments.length === 0) || !canSend) return
if (
sendMessage({
content: input,
attachments,
})
) {
setInput("")
setAttachments([])
}
}
const handleAddImages = () => {
if (!canSend) return
fileInputRef.current?.click()
}
const handleRemoveAttachment = (index: number) => {
setAttachments((prev) => prev.filter((_, itemIndex) => itemIndex !== index))
}
const handleImageSelection = async (event: ChangeEvent<HTMLInputElement>) => {
const files = Array.from(event.target.files ?? [])
event.target.value = ""
if (files.length === 0) {
return
}
const nextAttachments: ChatAttachment[] = []
for (const file of files) {
if (!ALLOWED_IMAGE_TYPES.has(file.type)) {
toast.error(
t("chat.invalidImage", {
name: file.name,
}),
)
continue
}
if (file.size > MAX_IMAGE_SIZE_BYTES) {
toast.error(
t("chat.imageTooLarge", {
name: file.name,
size: MAX_IMAGE_SIZE_LABEL,
}),
)
continue
}
try {
nextAttachments.push({
type: "image",
filename: file.name,
url: await readFileAsDataUrl(file),
})
} catch {
toast.error(
t("chat.imageReadFailed", {
name: file.name,
}),
)
}
}
if (nextAttachments.length > 0) {
setAttachments(nextAttachments.slice(0, 1))
}
}
const canSubmit = canSend && (Boolean(input.trim()) || attachments.length > 0)
return (
<div className="bg-background/95 flex h-full flex-col">
<PageHeader
title={t("navigation.chat")}
className={`transition-shadow ${
hasScrolled ? "shadow-sm" : "shadow-none"
hasScrolled ? "shadow-xs" : "shadow-none"
}`}
titleExtra={
hasAvailableModels && (
@@ -154,7 +250,10 @@ export function ChatPage() {
timestamp={msg.timestamp}
/>
) : (
<UserMessage content={msg.content} />
<UserMessage
content={msg.content}
attachments={msg.attachments}
/>
)}
</div>
))}
@@ -163,12 +262,24 @@ export function ChatPage() {
</div>
</div>
<input
ref={fileInputRef}
type="file"
accept="image/jpeg,image/png,image/gif,image/webp,image/bmp"
className="hidden"
onChange={handleImageSelection}
/>
<ChatComposer
input={input}
attachments={attachments}
onInputChange={setInput}
onAddImages={handleAddImages}
onRemoveAttachment={handleRemoveAttachment}
onSend={handleSend}
isConnected={isChatConnected}
hasDefaultModel={Boolean(defaultModelName)}
canSend={canSubmit}
/>
</div>
)
@@ -71,7 +71,7 @@ export function SessionHistoryMenu({
onClick={() => onSwitchSession(session.id)}
>
<span className="line-clamp-1 text-sm font-medium">
{session.title || session.preview}
{session.title}
</span>
<span className="text-muted-foreground text-xs">
{t("chat.messagesCount", {
@@ -1,13 +1,36 @@
import type { ChatAttachment } from "@/store/chat"
interface UserMessageProps {
content: string
attachments?: ChatAttachment[]
}
export function UserMessage({ content }: UserMessageProps) {
export function UserMessage({ content, attachments = [] }: UserMessageProps) {
const hasText = content.trim().length > 0
const imageAttachments = attachments.filter(
(attachment) => attachment.type === "image",
)
return (
<div className="flex w-full flex-col items-end gap-1.5">
<div className="max-w-[70%] rounded-2xl rounded-tr-sm bg-violet-500 px-5 py-3 text-[15px] leading-relaxed whitespace-pre-wrap text-white shadow-sm break-words">
{content}
</div>
{imageAttachments.length > 0 && (
<div className="flex max-w-[70%] flex-wrap justify-end gap-2">
{imageAttachments.map((attachment, index) => (
<img
key={`${attachment.url}-${index}`}
src={attachment.url}
alt={attachment.filename || "Uploaded image"}
className="max-h-72 max-w-full object-cover"
/>
))}
</div>
)}
{hasText && (
<div className="max-w-[70%] rounded-2xl rounded-tr-sm bg-violet-500 px-5 py-3 text-[15px] leading-relaxed wrap-break-word whitespace-pre-wrap text-white shadow-sm">
{content}
</div>
)}
</div>
)
}
+35 -4
View File
@@ -18,7 +18,11 @@ import {
normalizeWsUrlForBrowser,
} from "@/features/chat/websocket"
import i18n from "@/i18n"
import { getChatState, updateChatStore } from "@/store/chat"
import {
type ChatAttachment,
getChatState,
updateChatStore,
} from "@/store/chat"
import { type GatewayState, gatewayAtom } from "@/store/gateway"
const store = getDefaultStore()
@@ -324,19 +328,43 @@ export async function hydrateActiveSession() {
return hydratePromise
}
export function sendChatMessage(content: string) {
interface SendChatMessageInput {
content: string
attachments?: ChatAttachment[]
}
export function sendChatMessage({
content,
attachments = [],
}: SendChatMessageInput) {
if (!wsRef || wsRef.readyState !== WebSocket.OPEN) {
console.warn("WebSocket not connected")
return false
}
const normalizedContent = content.trim()
const normalizedAttachments = attachments
.filter((attachment) => attachment.type === "image" && attachment.url)
.map((attachment) => ({ ...attachment }))
if (!normalizedContent && normalizedAttachments.length === 0) {
return false
}
const socket = wsRef
const id = `msg-${++msgIdCounter}-${Date.now()}`
updateChatStore((prev) => ({
messages: [
...prev.messages,
{ id, role: "user", content, timestamp: Date.now() },
{
id,
role: "user",
content: normalizedContent,
attachments:
normalizedAttachments.length > 0 ? normalizedAttachments : undefined,
timestamp: Date.now(),
},
],
isTyping: true,
}))
@@ -346,7 +374,10 @@ export function sendChatMessage(content: string) {
JSON.stringify({
type: "message.send",
id,
payload: { content },
payload: {
content: normalizedContent,
media: normalizedAttachments.map((attachment) => attachment.url),
},
}),
)
return true
+19 -2
View File
@@ -1,6 +1,18 @@
import { getSessionHistory } from "@/api/sessions"
import { normalizeUnixTimestamp } from "@/features/chat/state"
import type { ChatMessage } from "@/store/chat"
import type { ChatAttachment, ChatMessage } from "@/store/chat"
function toChatAttachments(media?: string[]): ChatAttachment[] | undefined {
if (!media || media.length === 0) {
return undefined
}
const attachments = media
.filter((item) => item.startsWith("data:image/"))
.map((url) => ({ type: "image" as const, url }))
return attachments.length > 0 ? attachments : undefined
}
export async function loadSessionMessages(
sessionId: string,
@@ -12,6 +24,7 @@ export async function loadSessionMessages(
id: `hist-${index}-${Date.now()}`,
role: message.role,
content: message.content,
attachments: toChatAttachments(message.media),
timestamp: fallbackTime,
}))
}
@@ -31,9 +44,13 @@ function normalizeMessageTimestamp(timestamp: number | string): string {
}
function messageSignature(message: ChatMessage): string {
const attachmentSignature = (message.attachments ?? [])
.map((attachment) => `${attachment.type}\u0001${attachment.url}`)
.join("\u0002")
return `${message.role}\u0000${message.content}\u0000${normalizeMessageTimestamp(
message.timestamp,
)}`
)}\u0000${attachmentSignature}`
}
function comparableTimestamp(timestamp: number | string): number {
+18 -2
View File
@@ -1,3 +1,5 @@
import { toast } from "sonner"
import { normalizeUnixTimestamp } from "@/features/chat/state"
import { updateChatStore } from "@/store/chat"
@@ -67,10 +69,24 @@ export function handlePicoMessage(
updateChatStore({ isTyping: false })
break
case "error":
case "error": {
const requestId =
typeof payload.request_id === "string" ? payload.request_id : ""
const errorMessage =
typeof payload.message === "string" ? payload.message : ""
console.error("Pico error:", payload)
updateChatStore({ isTyping: false })
if (errorMessage) {
toast.error(errorMessage)
}
updateChatStore((prev) => ({
messages: requestId
? prev.messages.filter((msg) => msg.id !== requestId)
: prev.messages,
isTyping: false,
}))
break
}
case "pong":
break
@@ -88,8 +88,14 @@ export function useSessionHistory({
const handleDeleteSession = useCallback(
async (id: string) => {
try {
const deletedLoadedSession = sessions.some(
(session) => session.id === id,
)
await deleteSession(id)
setSessions((prev) => prev.filter((s) => s.id !== id))
if (deletedLoadedSession) {
setOffset((prev) => Math.max(prev - 1, 0))
}
if (id === activeSessionId) {
onDeletedActiveSession()
}
@@ -97,7 +103,7 @@ export function useSessionHistory({
console.error("Failed to delete session:", err)
}
},
[activeSessionId, onDeletedActiveSession],
[activeSessionId, onDeletedActiveSession, sessions],
)
return {
+6
View File
@@ -49,6 +49,12 @@
"deleteSession": "Delete session",
"messagesCount": "{{count}} messages",
"noModel": "Select model",
"attachImage": "Add images",
"removeImage": "Remove image",
"uploadedImage": "Uploaded image",
"invalidImage": "\"{{name}}\" is not a supported image file.",
"imageTooLarge": "\"{{name}}\" exceeds the {{size}} limit.",
"imageReadFailed": "Failed to read \"{{name}}\".",
"empty": {
"noConfiguredModel": "No Model Configured",
"noConfiguredModelDescription": "You need to configure at least one AI model with an API key before you can start chatting.",
+6
View File
@@ -49,6 +49,12 @@
"deleteSession": "删除会话",
"messagesCount": "{{count}} 条消息",
"noModel": "选择模型",
"attachImage": "添加图片",
"removeImage": "移除图片",
"uploadedImage": "已上传图片",
"invalidImage": "“{{name}}”不是支持的图片文件。",
"imageTooLarge": "“{{name}}”超过了 {{size}} 限制。",
"imageReadFailed": "读取“{{name}}”失败。",
"empty": {
"noConfiguredModel": "尚未配置模型",
"noConfiguredModelDescription": "请先配置至少一个带有 API Key 的 AI 模型,才能开始对话。",
+7
View File
@@ -5,11 +5,18 @@ import {
writeStoredSessionId,
} from "@/features/chat/state"
export interface ChatAttachment {
type: "image"
url: string
filename?: string
}
export interface ChatMessage {
id: string
role: "user" | "assistant"
content: string
timestamp: number | string
attachments?: ChatAttachment[]
}
export type ConnectionState =