From 009a8d702bcf58b2b1f78ac0fb8b20f0c44a0da2 Mon Sep 17 00:00:00 2001 From: ywj <138745068+yangwenjie1231@users.noreply.github.com> Date: Fri, 20 Mar 2026 04:59:43 +0000 Subject: [PATCH] Feat/feishu card parsing (#1534) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(feishu): add interactive card message parsing Add support for parsing inbound Feishu interactive card messages. When a user sends a card message, the text content is now extracted and passed to the LLM for processing. - Add extractCardText() to recursively extract text from card JSON - Support both JSON 1.0 (legacy) and JSON 2.0 schema formats - Handle nested elements: header, body, actions, columns - Extract text from markdown, lark_md, and plain_text elements - Add comprehensive unit tests for card parsing Fixes # 💘 Generated with Crush Assisted-by: GLM-5 via Crush * feat(feishu): extract and download images from interactive cards When receiving interactive card messages, extract embedded images (img_key, src, icon_key) and download them for LLM processing. - Add extractCardImageKeys() to recursively extract image keys from card JSON - Support img elements (img_key, src) and icon elements (icon_key) - Update downloadInboundMedia() to handle MsgTypeInteractive - Add comprehensive unit tests for image extraction Images are downloaded and stored via MediaStore, then appended to the message content as [image: photo] tags for LLM visibility. 💘 Generated with Crush Assisted-by: GLM-5 via Crush * fix(feishu): simplify card parsing - pass raw JSON, only extract images Address review feedback: text extraction cannot exhaustively handle all card formats (i18n_elements, div.fields, etc.). Pass raw JSON to LLM instead - same approach as MsgTypePost. Only image extraction remains as images must be downloaded for LLM to process. - Remove extractCardText() and helper functions - extractContent() now returns raw JSON for MsgTypeInteractive - Keep extractCardImageKeys() for downloading embedded images - Update tests to expect raw JSON for interactive cards * fix(feishu): don't append media tags to interactive card JSON Appending media tags like "[attachment]" to raw JSON content produces invalid JSON format. For interactive cards, the JSON already contains image information and media refs are downloaded separately. - Skip appendMediaTags for MsgTypeInteractive to preserve valid JSON - Add test case for interactive card with images * fix(feishu): filter out external URLs from card image extraction Only Feishu-hosted image keys (img_xxx, icon_xxx) can be downloaded via the Feishu API. External URLs in src field (https://...) should be filtered out to avoid download failures. - Add isFeishuImageKey() to detect Feishu-hosted keys vs external URLs - Update extractImageKeysRecursive to skip external URLs in src field - Add tests for external URL filtering and mixed scenarios * feat(feishu): support downloading external images from interactive cards Previously only Feishu-hosted images (img_key, icon_key) could be downloaded. Now external URLs in src field are also downloaded via HTTP and made available to the LLM. - extractCardImageKeys now returns two slices: Feishu keys and external URLs - Add downloadExternalImage to download images from HTTP URLs - Update downloadInboundMedia to handle both Feishu API and HTTP downloads - Update tests for new function signature * fix(feishu): use HTTP client with timeout for external image downloads Replaced http.DefaultClient with a client that has a 30-second timeout to prevent hanging on unresponsive external URLs. Generated with Crush Assisted-by: GLM-5 via Crush * fix(feishu): resolve lint errors for shadow and formatting - Rename err variables to avoid shadowing in downloadExternalImage - Fix struct field alignment in TestExtractCardImageKeys Generated with Crush Assisted-by: GLM-5 via Crush * refactor(feishu): pass external image URLs to LLM instead of downloading Instead of downloading external images from interactive cards, pass the URLs directly to LLM. This reduces network overhead and lets vision-capable models fetch images as needed. - Remove downloadExternalImage function - Append external URLs to card content for LLM processing - Only download Feishu-hosted images via API 💘 Generated with Crush Assisted-by: GLM-5 via Crush * fix(feishu): add blank line between functions for gci formatting * fix(feishu): keep interactive card content as valid JSON --- pkg/channels/feishu/common.go | 61 ++++++++++++++ pkg/channels/feishu/common_test.go | 116 ++++++++++++++++++++++++++ pkg/channels/feishu/feishu_64.go | 32 +++++++ pkg/channels/feishu/feishu_64_test.go | 25 ++++++ 4 files changed, 234 insertions(+) diff --git a/pkg/channels/feishu/common.go b/pkg/channels/feishu/common.go index fbe085b73..4952394b7 100644 --- a/pkg/channels/feishu/common.go +++ b/pkg/channels/feishu/common.go @@ -84,3 +84,64 @@ func stripMentionPlaceholders(content string, mentions []*larkim.MentionEvent) s content = mentionPlaceholderRegex.ReplaceAllString(content, "") return strings.TrimSpace(content) } + +// extractCardImageKeys recursively extracts all image keys from a Feishu interactive card. +// Image keys are used to download images from Feishu API. +// Returns two slices: Feishu-hosted keys and external URLs. +func extractCardImageKeys(rawContent string) (feishuKeys []string, externalURLs []string) { + if rawContent == "" { + return nil, nil + } + + var card map[string]any + if err := json.Unmarshal([]byte(rawContent), &card); err != nil { + return nil, nil + } + + extractImageKeysRecursive(card, &feishuKeys, &externalURLs) + return feishuKeys, externalURLs +} + +// isExternalURL returns true if the string is an external HTTP/HTTPS URL. +func isExternalURL(s string) bool { + return strings.HasPrefix(s, "http://") || strings.HasPrefix(s, "https://") +} + +// extractImageKeysRecursive traverses card structure to find all image keys. +// Collects both Feishu-hosted keys and external URLs separately. +func extractImageKeysRecursive(v any, feishuKeys, externalURLs *[]string) { + switch val := v.(type) { + case map[string]any: + // Check if this is an img element + if tag, ok := val["tag"].(string); ok { + switch tag { + case "img": + // Try img_key first (always Feishu-hosted) + if imgKey, ok := val["img_key"].(string); ok && imgKey != "" { + *feishuKeys = append(*feishuKeys, imgKey) + } + // Check src - could be Feishu key or external URL + if src, ok := val["src"].(string); ok && src != "" { + if isExternalURL(src) { + *externalURLs = append(*externalURLs, src) + } else { + *feishuKeys = append(*feishuKeys, src) + } + } + case "icon": + // Icon elements use icon_key + if iconKey, ok := val["icon_key"].(string); ok && iconKey != "" { + *feishuKeys = append(*feishuKeys, iconKey) + } + } + } + // Recurse into all nested structures + for _, child := range val { + extractImageKeysRecursive(child, feishuKeys, externalURLs) + } + case []any: + for _, item := range val { + extractImageKeysRecursive(item, feishuKeys, externalURLs) + } + } +} diff --git a/pkg/channels/feishu/common_test.go b/pkg/channels/feishu/common_test.go index fefc9f7c1..ff4af0148 100644 --- a/pkg/channels/feishu/common_test.go +++ b/pkg/channels/feishu/common_test.go @@ -290,3 +290,119 @@ func TestStripMentionPlaceholders(t *testing.T) { }) } } + +func TestExtractCardImageKeys(t *testing.T) { + tests := []struct { + name string + content string + wantFeishuKeys []string + wantExternalURLs []string + }{ + { + name: "empty content", + content: "", + wantFeishuKeys: nil, + wantExternalURLs: nil, + }, + { + name: "invalid JSON", + content: "not json", + wantFeishuKeys: nil, + wantExternalURLs: nil, + }, + { + name: "card with no images", + content: `{"schema":"2.0","body":{"elements":[{"tag":"markdown","content":"text"}]}}`, + wantFeishuKeys: nil, + wantExternalURLs: nil, + }, + { + name: "single image with img_key", + content: `{"elements":[{"tag":"img","img_key":"img_abc123"}]}`, + wantFeishuKeys: []string{"img_abc123"}, + wantExternalURLs: nil, + }, + { + name: "single image with src as Feishu key", + content: `{"elements":[{"tag":"img","src":"img_xyz789"}]}`, + wantFeishuKeys: []string{"img_xyz789"}, + wantExternalURLs: nil, + }, + { + name: "multiple images", + content: `{"elements":[{"tag":"img","img_key":"img_1"},{"tag":"div","text":{"content":"text"}},{"tag":"img","img_key":"img_2"}]}`, + wantFeishuKeys: []string{"img_1", "img_2"}, + wantExternalURLs: nil, + }, + { + name: "nested image in columns", + content: `{"elements":[{"tag":"div","columns":[{"tag":"img","img_key":"img_col1"},{"tag":"img","img_key":"img_col2"}]}]}`, + wantFeishuKeys: []string{"img_col1", "img_col2"}, + wantExternalURLs: nil, + }, + { + name: "image in action", + content: `{"elements":[{"tag":"action","actions":[{"tag":"img","img_key":"img_action"}]}]}`, + wantFeishuKeys: []string{"img_action"}, + wantExternalURLs: nil, + }, + { + name: "icon element", + content: `{"elements":[{"tag":"icon","icon_key":"icon_123"}]}`, + wantFeishuKeys: []string{"icon_123"}, + wantExternalURLs: nil, + }, + { + name: "complex card with text and images", + content: `{"header":{"title":{"content":"Title"}},"elements":[{"tag":"div","text":{"content":"Description"}},{"tag":"img","img_key":"img_main"}]}`, + wantFeishuKeys: []string{"img_main"}, + wantExternalURLs: nil, + }, + { + name: "external URL in src", + content: `{"elements":[{"tag":"img","src":"https://example.com/image.png"}]}`, + wantFeishuKeys: nil, + wantExternalURLs: []string{"https://example.com/image.png"}, + }, + { + name: "mixed Feishu keys and external URLs", + content: `{"elements":[{"tag":"img","img_key":"img_feishu"},{"tag":"img","src":"https://cdn.example.com/external.jpg"},{"tag":"img","src":"img_another"}]}`, + wantFeishuKeys: []string{"img_feishu", "img_another"}, + wantExternalURLs: []string{"https://cdn.example.com/external.jpg"}, + }, + { + name: "multiple external URLs", + content: `{"elements":[{"tag":"img","src":"https://a.com/1.png"},{"tag":"img","src":"http://b.com/2.jpg"}]}`, + wantFeishuKeys: nil, + wantExternalURLs: []string{"https://a.com/1.png", "http://b.com/2.jpg"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotFeishuKeys, gotExternalURLs := extractCardImageKeys(tt.content) + + // Compare Feishu keys + if len(gotFeishuKeys) != len(tt.wantFeishuKeys) { + t.Errorf("extractCardImageKeys() feishuKeys = %v, want %v", gotFeishuKeys, tt.wantFeishuKeys) + return + } + for i, v := range gotFeishuKeys { + if v != tt.wantFeishuKeys[i] { + t.Errorf("extractCardImageKeys() feishuKeys[%d] = %q, want %q", i, v, tt.wantFeishuKeys[i]) + } + } + + // Compare external URLs + if len(gotExternalURLs) != len(tt.wantExternalURLs) { + t.Errorf("extractCardImageKeys() externalURLs = %v, want %v", gotExternalURLs, tt.wantExternalURLs) + return + } + for i, v := range gotExternalURLs { + if v != tt.wantExternalURLs[i] { + t.Errorf("extractCardImageKeys() externalURLs[%d] = %q, want %q", i, v, tt.wantExternalURLs[i]) + } + } + }) + } +} diff --git a/pkg/channels/feishu/feishu_64.go b/pkg/channels/feishu/feishu_64.go index 0341efc70..37a74718a 100644 --- a/pkg/channels/feishu/feishu_64.go +++ b/pkg/channels/feishu/feishu_64.go @@ -424,6 +424,15 @@ func (c *FeishuChannel) handleMessageReceive(ctx context.Context, event *larkim. mediaRefs = c.downloadInboundMedia(ctx, chatID, messageID, messageType, rawContent, store) } + // For interactive cards, pass external image URLs via media refs. + // Keep content as valid raw JSON for downstream parsing. + if messageType == larkim.MsgTypeInteractive { + _, externalURLs := extractCardImageKeys(rawContent) + if len(externalURLs) > 0 { + mediaRefs = append(mediaRefs, externalURLs...) + } + } + // Append media tags to content (like Telegram does) content = appendMediaTags(content, messageType, mediaRefs) @@ -559,6 +568,10 @@ func extractContent(messageType, rawContent string) string { // Pass raw JSON to LLM — structured rich text is more informative than flattened plain text return rawContent + case larkim.MsgTypeInteractive: + // Pass raw JSON to LLM — structured card is more informative than flattened text + return rawContent + case larkim.MsgTypeImage: // Image messages don't have text content return "" @@ -596,6 +609,18 @@ func (c *FeishuChannel) downloadInboundMedia( refs = append(refs, ref) } + case larkim.MsgTypeInteractive: + // Extract and download images embedded in interactive cards + feishuKeys, _ := extractCardImageKeys(rawContent) + // Download Feishu-hosted images via API + for _, imageKey := range feishuKeys { + ref := c.downloadResource(ctx, messageID, imageKey, "image", ".jpg", store, scope) + if ref != "" { + refs = append(refs, ref) + } + } + // External URLs are passed directly to LLM, not downloaded + case larkim.MsgTypeFile, larkim.MsgTypeAudio, larkim.MsgTypeMedia: fileKey := extractFileKey(rawContent) if fileKey == "" { @@ -716,11 +741,18 @@ func (c *FeishuChannel) downloadResource( } // appendMediaTags appends media type tags to content (like Telegram's "[image: photo]"). +// For interactive cards, media tags are not appended because content is raw JSON +// and appending would produce invalid JSON format. func appendMediaTags(content, messageType string, mediaRefs []string) string { if len(mediaRefs) == 0 { return content } + // Don't append tags to JSON content (interactive cards) - would produce invalid JSON + if messageType == larkim.MsgTypeInteractive { + return content + } + var tag string switch messageType { case larkim.MsgTypeImage: diff --git a/pkg/channels/feishu/feishu_64_test.go b/pkg/channels/feishu/feishu_64_test.go index dc3eab2e7..9010abf69 100644 --- a/pkg/channels/feishu/feishu_64_test.go +++ b/pkg/channels/feishu/feishu_64_test.go @@ -75,6 +75,24 @@ func TestExtractContent(t *testing.T) { rawContent: "", want: "", }, + { + name: "interactive card returns raw JSON", + messageType: "interactive", + rawContent: `{"schema":"2.0","body":{"elements":[{"tag":"markdown","content":"Hello from card"}]}}`, + want: `{"schema":"2.0","body":{"elements":[{"tag":"markdown","content":"Hello from card"}]}}`, + }, + { + name: "interactive card with complex structure returns raw JSON", + messageType: "interactive", + rawContent: `{"header":{"title":{"tag":"plain_text","content":"Title"}},"elements":[{"tag":"div","text":{"tag":"lark_md","content":"Card content"}}]}`, + want: `{"header":{"title":{"tag":"plain_text","content":"Title"}},"elements":[{"tag":"div","text":{"tag":"lark_md","content":"Card content"}}]}`, + }, + { + name: "interactive card invalid JSON returns as-is", + messageType: "interactive", + rawContent: `not valid json`, + want: `not valid json`, + }, } for _, tt := range tests { @@ -151,6 +169,13 @@ func TestAppendMediaTags(t *testing.T) { mediaRefs: []string{"ref1"}, want: "something [attachment]", }, + { + name: "interactive card with images returns content unchanged", + content: `{"schema":"2.0","body":{"elements":[{"tag":"img","img_key":"img_123"}]}}`, + messageType: "interactive", + mediaRefs: []string{"ref1"}, + want: `{"schema":"2.0","body":{"elements":[{"tag":"img","img_key":"img_123"}]}}`, + }, } for _, tt := range tests {