feat(web): support image messages in pico chat (#2299)

This commit is contained in:
wenjie
2026-04-03 14:15:20 +08:00
committed by GitHub
parent f3ad5d9305
commit f2a19ab947
21 changed files with 1009 additions and 79 deletions
+112 -35
View File
@@ -42,6 +42,12 @@ type sessionListItem struct {
Updated string `json:"updated"`
}
type sessionChatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
Media []string `json:"media,omitempty"`
}
type sessionMetaFile struct {
Key string `json:"key"`
Summary string `json:"summary"`
@@ -62,8 +68,12 @@ type sessionMetaFile struct {
const (
picoSessionPrefix = "agent:main:pico:direct:pico:"
sanitizedPicoSessionPrefix = "agent_main_pico_direct_pico_"
maxSessionJSONLLineSize = 10 * 1024 * 1024 // 10 MB
maxSessionTitleRunes = 60
// Keep the session API aligned with the shared JSONL store reader limit in
// pkg/memory/jsonl.go so oversized lines fail consistently everywhere.
maxSessionJSONLLineSize = 10 * 1024 * 1024
maxSessionTitleRunes = 60
handledToolResponseSummaryText = "Requested output delivered via tool attachment."
)
// extractPicoSessionID extracts the session UUID from a full session key.
@@ -195,32 +205,21 @@ func (h *Handler) readJSONLSession(dir, sessionID string) (sessionFile, error) {
func buildSessionListItem(sessionID string, sess sessionFile) sessionListItem {
preview := ""
for _, msg := range sess.Messages {
if msg.Role == "user" && strings.TrimSpace(msg.Content) != "" {
preview = msg.Content
if msg.Role == "user" {
preview = sessionMessagePreview(msg)
}
if preview != "" {
break
}
}
title := strings.TrimSpace(sess.Summary)
if title == "" {
title = preview
}
title = truncateRunes(title, maxSessionTitleRunes)
preview = truncateRunes(preview, maxSessionTitleRunes)
if preview == "" {
preview = "(empty)"
}
if title == "" {
title = preview
}
title := preview
validMessageCount := 0
for _, msg := range sess.Messages {
if (msg.Role == "user" || msg.Role == "assistant") && strings.TrimSpace(msg.Content) != "" {
validMessageCount++
}
}
validMessageCount := len(visibleSessionMessages(sess.Messages))
return sessionListItem{
ID: sessionID,
@@ -247,6 +246,99 @@ func truncateRunes(s string, maxLen int) string {
return string(runes[:maxLen]) + "..."
}
func sessionMessageVisible(msg providers.Message) bool {
return strings.TrimSpace(msg.Content) != "" || len(msg.Media) > 0
}
func sessionMessagePreview(msg providers.Message) string {
if content := strings.TrimSpace(msg.Content); content != "" {
return content
}
if len(msg.Media) > 0 {
return "[image]"
}
return ""
}
func visibleSessionMessages(messages []providers.Message) []sessionChatMessage {
transcript := make([]sessionChatMessage, 0, len(messages))
for _, msg := range messages {
switch msg.Role {
case "user":
if sessionMessageVisible(msg) {
transcript = append(transcript, sessionChatMessage{
Role: "user",
Content: msg.Content,
Media: append([]string(nil), msg.Media...),
})
}
case "assistant":
visibleToolMessages := visibleAssistantToolMessages(msg.ToolCalls)
if len(visibleToolMessages) > 0 {
transcript = append(transcript, visibleToolMessages...)
}
// Pico web chat can persist both visible `message` tool output and a
// later plain assistant reply in the same turn. Hide only the fixed
// internal summary that marks handled tool delivery.
if len(visibleToolMessages) > 0 || !sessionMessageVisible(msg) || assistantMessageInternalOnly(msg) {
continue
}
transcript = append(transcript, sessionChatMessage{
Role: "assistant",
Content: msg.Content,
Media: append([]string(nil), msg.Media...),
})
}
}
return transcript
}
func assistantMessageInternalOnly(msg providers.Message) bool {
return strings.TrimSpace(msg.Content) == handledToolResponseSummaryText
}
func visibleAssistantToolMessages(toolCalls []providers.ToolCall) []sessionChatMessage {
if len(toolCalls) == 0 {
return nil
}
messages := make([]sessionChatMessage, 0, len(toolCalls))
for _, tc := range toolCalls {
name := tc.Name
argsJSON := ""
if tc.Function != nil {
if name == "" {
name = tc.Function.Name
}
argsJSON = tc.Function.Arguments
}
switch name {
case "message":
var args struct {
Content string `json:"content"`
}
if err := json.Unmarshal([]byte(argsJSON), &args); err != nil {
continue
}
if strings.TrimSpace(args.Content) == "" {
continue
}
messages = append(messages, sessionChatMessage{
Role: "assistant",
Content: args.Content,
})
}
}
return messages
}
// sessionsDir resolves the path to the gateway's session storage directory.
// It reads the workspace from config, falling back to ~/.picoclaw/workspace.
func (h *Handler) sessionsDir() (string, error) {
@@ -437,22 +529,7 @@ func (h *Handler) handleGetSession(w http.ResponseWriter, r *http.Request) {
}
}
// Convert to a simpler format for the frontend
type chatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
messages := make([]chatMessage, 0, len(sess.Messages))
for _, msg := range sess.Messages {
// Only include user and assistant messages that have actual content
if (msg.Role == "user" || msg.Role == "assistant") && strings.TrimSpace(msg.Content) != "" {
messages = append(messages, chatMessage{
Role: msg.Role,
Content: msg.Content,
})
}
}
messages := visibleSessionMessages(sess.Messages)
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]any{
+362 -7
View File
@@ -6,6 +6,7 @@ import (
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"github.com/sipeed/picoclaw/pkg/config"
@@ -87,15 +88,19 @@ func TestHandleListSessions_JSONLStorage(t *testing.T) {
if items[0].MessageCount != 2 {
t.Fatalf("items[0].MessageCount = %d, want 2", items[0].MessageCount)
}
if items[0].Title != "JSONL-backed session" {
t.Fatalf("items[0].Title = %q, want %q", items[0].Title, "JSONL-backed session")
if items[0].Title != "Explain why the history API is empty after migration." {
t.Fatalf(
"items[0].Title = %q, want %q",
items[0].Title,
"Explain why the history API is empty after migration.",
)
}
if items[0].Preview != "Explain why the history API is empty after migration." {
t.Fatalf("items[0].Preview = %q", items[0].Preview)
}
}
func TestHandleListSessions_TitleUsesTrimmedSummary(t *testing.T) {
func TestHandleListSessions_TitleUsesFirstUserMessage(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
@@ -139,10 +144,7 @@ func TestHandleListSessions_TitleUsesTrimmedSummary(t *testing.T) {
if len(items) != 1 {
t.Fatalf("len(items) = %d, want 1", len(items))
}
expectedTitle := truncateRunes(
"This summary is intentionally longer than sixty characters so it must be truncated in the history menu.",
maxSessionTitleRunes,
)
expectedTitle := truncateRunes("fallback preview", maxSessionTitleRunes)
if items[0].Title != expectedTitle {
t.Fatalf("items[0].Title = %q", items[0].Title)
}
@@ -215,6 +217,359 @@ func TestHandleGetSession_JSONLStorage(t *testing.T) {
}
}
func TestHandleGetSession_ReconstructsVisibleMessageToolOutput(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "detail-message-tool"
for _, msg := range []providers.Message{
{Role: "user", Content: "test"},
{
Role: "assistant",
Content: "",
ToolCalls: []providers.ToolCall{
{
ID: "call_1",
Type: "function",
Function: &providers.FunctionCall{
Name: "message",
Arguments: `{"content":"visible tool output"}`,
},
},
},
},
{Role: "tool", Content: "Message sent to pico:pico:detail-message-tool", ToolCallID: "call_1"},
{Role: "assistant", Content: handledToolResponseSummaryText},
} {
if err := store.AddFullMessage(nil, sessionKey, msg); err != nil {
t.Fatalf("AddFullMessage() error = %v", err)
}
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-message-tool", nil)
mux.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String())
}
var resp struct {
Messages []struct {
Role string `json:"role"`
Content string `json:"content"`
} `json:"messages"`
}
if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
t.Fatalf("Unmarshal() error = %v", err)
}
if len(resp.Messages) != 2 {
t.Fatalf("len(resp.Messages) = %d, want 2", len(resp.Messages))
}
if resp.Messages[1].Role != "assistant" || resp.Messages[1].Content != "visible tool output" {
t.Fatalf("assistant message = %#v, want visible tool output", resp.Messages[1])
}
}
func TestHandleGetSession_PreservesFinalAssistantReplyAfterMessageToolOutput(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "detail-message-tool-final-reply"
for _, msg := range []providers.Message{
{Role: "user", Content: "test"},
{
Role: "assistant",
ToolCalls: []providers.ToolCall{
{
ID: "call_1",
Type: "function",
Function: &providers.FunctionCall{
Name: "message",
Arguments: `{"content":"visible tool output"}`,
},
},
},
},
{Role: "tool", Content: "Message sent to pico:pico:detail-message-tool-final-reply", ToolCallID: "call_1"},
{Role: "assistant", Content: "final assistant reply"},
} {
if err := store.AddFullMessage(nil, sessionKey, msg); err != nil {
t.Fatalf("AddFullMessage() error = %v", err)
}
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-message-tool-final-reply", nil)
mux.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String())
}
var resp struct {
Messages []struct {
Role string `json:"role"`
Content string `json:"content"`
} `json:"messages"`
}
if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
t.Fatalf("Unmarshal() error = %v", err)
}
if len(resp.Messages) != 3 {
t.Fatalf("len(resp.Messages) = %d, want 3", len(resp.Messages))
}
if resp.Messages[1].Role != "assistant" || resp.Messages[1].Content != "visible tool output" {
t.Fatalf("interim assistant message = %#v, want visible tool output", resp.Messages[1])
}
if resp.Messages[2].Role != "assistant" || resp.Messages[2].Content != "final assistant reply" {
t.Fatalf("final assistant message = %#v, want final assistant reply", resp.Messages[2])
}
}
func TestHandleListSessions_MessageCountUsesVisibleTranscript(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "list-visible-count"
for _, msg := range []providers.Message{
{Role: "user", Content: "test"},
{
Role: "assistant",
ToolCalls: []providers.ToolCall{
{
ID: "call_1",
Type: "function",
Function: &providers.FunctionCall{
Name: "message",
Arguments: `{"content":"visible tool output"}`,
},
},
},
},
{Role: "tool", Content: "Message sent to pico:pico:list-visible-count", ToolCallID: "call_1"},
{Role: "assistant", Content: handledToolResponseSummaryText},
} {
if err := store.AddFullMessage(nil, sessionKey, msg); err != nil {
t.Fatalf("AddFullMessage() error = %v", err)
}
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/sessions", nil)
mux.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String())
}
var items []sessionListItem
if err := json.Unmarshal(rec.Body.Bytes(), &items); err != nil {
t.Fatalf("Unmarshal() error = %v", err)
}
if len(items) != 1 {
t.Fatalf("len(items) = %d, want 1", len(items))
}
if items[0].MessageCount != 2 {
t.Fatalf("items[0].MessageCount = %d, want 2", items[0].MessageCount)
}
}
func TestHandleGetSession_IncludesMediaOnlyMessages(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "detail-media-only"
if err := store.AddFullMessage(nil, sessionKey, providers.Message{
Role: "user",
Media: []string{"data:image/png;base64,abc123"},
}); err != nil {
t.Fatalf("AddFullMessage(user) error = %v", err)
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-media-only", nil)
mux.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String())
}
var resp struct {
Messages []struct {
Role string `json:"role"`
Content string `json:"content"`
Media []string `json:"media"`
} `json:"messages"`
}
if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
t.Fatalf("Unmarshal() error = %v", err)
}
if len(resp.Messages) != 1 {
t.Fatalf("len(resp.Messages) = %d, want 1", len(resp.Messages))
}
if resp.Messages[0].Role != "user" || len(resp.Messages[0].Media) != 1 {
t.Fatalf("message = %#v, want user message with media", resp.Messages[0])
}
}
func TestHandleSessions_SupportsJSONLMessagesUpToStoreCap(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "detail-large-jsonl"
largeContent := strings.Repeat("x", 9*1024*1024)
if err := store.AddFullMessage(nil, sessionKey, providers.Message{
Role: "user",
Content: largeContent,
}); err != nil {
t.Fatalf("AddFullMessage() error = %v", err)
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
listRec := httptest.NewRecorder()
listReq := httptest.NewRequest(http.MethodGet, "/api/sessions", nil)
mux.ServeHTTP(listRec, listReq)
if listRec.Code != http.StatusOK {
t.Fatalf("list status = %d, want %d, body=%s", listRec.Code, http.StatusOK, listRec.Body.String())
}
var items []sessionListItem
if err := json.Unmarshal(listRec.Body.Bytes(), &items); err != nil {
t.Fatalf("list Unmarshal() error = %v", err)
}
if len(items) != 1 {
t.Fatalf("len(items) = %d, want 1", len(items))
}
detailRec := httptest.NewRecorder()
detailReq := httptest.NewRequest(http.MethodGet, "/api/sessions/detail-large-jsonl", nil)
mux.ServeHTTP(detailRec, detailReq)
if detailRec.Code != http.StatusOK {
t.Fatalf(
"detail status = %d, want %d, body=%s",
detailRec.Code,
http.StatusOK,
detailRec.Body.String(),
)
}
var resp struct {
Messages []struct {
Role string `json:"role"`
Content string `json:"content"`
} `json:"messages"`
}
if err := json.Unmarshal(detailRec.Body.Bytes(), &resp); err != nil {
t.Fatalf("detail Unmarshal() error = %v", err)
}
if len(resp.Messages) != 1 {
t.Fatalf("len(resp.Messages) = %d, want 1", len(resp.Messages))
}
if resp.Messages[0].Role != "user" {
t.Fatalf("resp.Messages[0].Role = %q, want %q", resp.Messages[0].Role, "user")
}
if got := len(resp.Messages[0].Content); got != len(largeContent) {
t.Fatalf("len(resp.Messages[0].Content) = %d, want %d", got, len(largeContent))
}
}
func TestHandleListSessions_UsesImagePreviewForMediaOnlyMessage(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()
dir := sessionsTestDir(t, configPath)
store, err := memory.NewJSONLStore(dir)
if err != nil {
t.Fatalf("NewJSONLStore() error = %v", err)
}
sessionKey := picoSessionPrefix + "preview-media-only"
if err := store.AddFullMessage(nil, sessionKey, providers.Message{
Role: "user",
Media: []string{"data:image/png;base64,abc123"},
}); err != nil {
t.Fatalf("AddFullMessage() error = %v", err)
}
h := NewHandler(configPath)
mux := http.NewServeMux()
h.RegisterRoutes(mux)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/sessions", nil)
mux.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String())
}
var items []sessionListItem
if err := json.Unmarshal(rec.Body.Bytes(), &items); err != nil {
t.Fatalf("Unmarshal() error = %v", err)
}
if len(items) != 1 {
t.Fatalf("len(items) = %d, want 1", len(items))
}
if items[0].Preview != "[image]" {
t.Fatalf("items[0].Preview = %q, want %q", items[0].Preview, "[image]")
}
if items[0].MessageCount != 1 {
t.Fatalf("items[0].MessageCount = %d, want 1", items[0].MessageCount)
}
}
func TestHandleDeleteSession_JSONLStorage(t *testing.T) {
configPath, cleanup := setupOAuthTestEnv(t)
defer cleanup()