feat(message): support media attachments in outbound tool

This commit is contained in:
Anton Bogdanovich
2026-05-11 16:04:26 -07:00
parent f09a7d67f7
commit 5a4e42d1b6
8 changed files with 836 additions and 27 deletions
+17
View File
@@ -497,10 +497,18 @@ func (c *FeishuChannel) SendMedia(ctx context.Context, msg bus.OutboundMediaMess
return nil, fmt.Errorf("no media store available: %w", channels.ErrSendFailed)
}
caption := firstMediaCaption(msg.Parts)
sentAny := false
for _, part := range msg.Parts {
if err := c.sendMediaPart(ctx, msg.ChatID, part, store); err != nil {
return nil, err
}
sentAny = true
}
if sentAny && caption != "" {
if _, err := c.sendText(ctx, msg.ChatID, caption); err != nil {
return nil, err
}
}
if hasTrackedMsg {
@@ -557,6 +565,15 @@ func (c *FeishuChannel) sendMediaPart(
return nil
}
func firstMediaCaption(parts []bus.MediaPart) string {
for _, part := range parts {
if caption := strings.TrimSpace(part.Caption); caption != "" {
return caption
}
}
return ""
}
// --- Inbound message handling ---
func (c *FeishuChannel) handleMessageReceive(ctx context.Context, event *larkim.P2MessageReceiveV1) error {
+22
View File
@@ -171,6 +171,8 @@ func (c *SlackChannel) SendMedia(ctx context.Context, msg bus.OutboundMediaMessa
return nil, fmt.Errorf("no media store available: %w", channels.ErrSendFailed)
}
caption := slackFirstMediaCaption(msg.Parts)
sentAny := false
for _, part := range msg.Parts {
localPath, err := store.Resolve(part.Ref)
if err != nil {
@@ -205,6 +207,17 @@ func (c *SlackChannel) SendMedia(ctx context.Context, msg bus.OutboundMediaMessa
})
return nil, fmt.Errorf("slack send media: %w", channels.ErrTemporary)
}
sentAny = true
}
if sentAny && caption != "" {
opts := []slack.MsgOption{slack.MsgOptionText(caption, false)}
if threadTS != "" {
opts = append(opts, slack.MsgOptionTS(threadTS))
}
if _, _, err := c.api.PostMessageContext(ctx, channelID, opts...); err != nil {
return nil, fmt.Errorf("slack send media caption fallback: %w", channels.ErrTemporary)
}
}
// UploadFile does not expose the posted message timestamp in its
@@ -212,6 +225,15 @@ func (c *SlackChannel) SendMedia(ctx context.Context, msg bus.OutboundMediaMessa
return nil, nil
}
func slackFirstMediaCaption(parts []bus.MediaPart) string {
for _, part := range parts {
if caption := strings.TrimSpace(part.Caption); caption != "" {
return caption
}
}
return ""
}
// ReactToMessage implements channels.ReactionCapable.
// It adds an "eyes" (👀) reaction to the inbound message and returns an undo function
// that removes the reaction.
+177
View File
@@ -45,6 +45,7 @@ var (
)
const defaultMediaGroupDelay = 500 * time.Millisecond
const telegramCaptionLimit = 1024
type TelegramChannel struct {
*channels.BaseChannel
@@ -639,6 +640,34 @@ func (c *TelegramChannel) SendMedia(ctx context.Context, msg bus.OutboundMediaMe
}
var messageIDs []string
leadingCaption := telegramLeadingCaption(msg.Parts)
if len([]rune(leadingCaption)) > telegramCaptionLimit {
leadingIDs, leadingErr := c.sendCaptionText(ctx, chatID, threadID, leadingCaption)
if leadingErr != nil {
return nil, leadingErr
}
messageIDs = append(messageIDs, leadingIDs...)
msg = telegramClearMediaCaptions(msg)
}
if len(msg.Parts) > 1 && telegramCanSendMediaGroup(msg.Parts) {
groupIDs, err := c.sendImageMediaGroups(ctx, chatID, threadID, store, msg.Parts)
if err != nil {
logger.ErrorCF("telegram", "Failed to send media group", map[string]any{
"count": len(msg.Parts),
"error": err.Error(),
})
return nil, fmt.Errorf("telegram send media group: %w", channels.ErrTemporary)
}
if len(groupIDs) > 0 {
messageIDs = append(messageIDs, groupIDs...)
if hasTrackedMsg {
c.dismissTrackedToolFeedbackMessage(ctx, trackedChatID, trackedMsgID)
}
return messageIDs, nil
}
}
for _, part := range msg.Parts {
localPath, err := store.Resolve(part.Ref)
if err != nil {
@@ -742,6 +771,154 @@ func (c *TelegramChannel) SendMedia(ctx context.Context, msg bus.OutboundMediaMe
return messageIDs, nil
}
func telegramCanSendMediaGroup(parts []bus.MediaPart) bool {
if len(parts) < 2 {
return false
}
for _, part := range parts {
if part.Type != "image" {
return false
}
}
return true
}
func (c *TelegramChannel) sendImageMediaGroups(
ctx context.Context,
chatID int64,
threadID int,
store media.MediaStore,
parts []bus.MediaPart,
) ([]string, error) {
const maxGroupSize = 10
messageIDs := make([]string, 0, len(parts))
for start := 0; start < len(parts); start += maxGroupSize {
end := start + maxGroupSize
if end > len(parts) {
end = len(parts)
}
groupIDs, err := c.sendSingleImageMediaGroup(ctx, chatID, threadID, store, parts[start:end])
if err != nil {
return nil, err
}
messageIDs = append(messageIDs, groupIDs...)
}
return messageIDs, nil
}
func (c *TelegramChannel) sendSingleImageMediaGroup(
ctx context.Context,
chatID int64,
threadID int,
store media.MediaStore,
parts []bus.MediaPart,
) ([]string, error) {
opened := make([]*os.File, 0, len(parts))
defer func() {
for _, file := range opened {
file.Close()
}
}()
inputMedia := make([]telego.InputMedia, 0, len(parts))
for i, part := range parts {
localPath, err := store.Resolve(part.Ref)
if err != nil {
logger.ErrorCF("telegram", "Failed to resolve media ref for media group", map[string]any{
"ref": part.Ref,
"error": err.Error(),
})
return nil, err
}
file, err := os.Open(localPath)
if err != nil {
logger.ErrorCF("telegram", "Failed to open media file for media group", map[string]any{
"path": localPath,
"error": err.Error(),
})
return nil, err
}
opened = append(opened, file)
mediaItem := &telego.InputMediaPhoto{
Type: telego.MediaTypePhoto,
Media: telego.InputFile{File: file},
}
if i == 0 {
mediaItem.Caption = part.Caption
}
inputMedia = append(inputMedia, mediaItem)
}
results, err := c.bot.SendMediaGroup(ctx, &telego.SendMediaGroupParams{
ChatID: tu.ID(chatID),
MessageThreadID: threadID,
Media: inputMedia,
})
if err != nil {
return nil, err
}
messageIDs := make([]string, 0, len(results))
for _, result := range results {
messageIDs = append(messageIDs, strconv.Itoa(result.MessageID))
}
return messageIDs, nil
}
func (c *TelegramChannel) sendCaptionText(
ctx context.Context,
chatID int64,
threadID int,
text string,
) ([]string, error) {
text = strings.TrimSpace(text)
if text == "" {
return nil, nil
}
chunks := channels.SplitMessage(text, c.MaxMessageLength())
messageIDs := make([]string, 0, len(chunks))
for _, chunk := range chunks {
chunk = strings.TrimSpace(chunk)
if chunk == "" {
continue
}
msgID, err := c.sendChunk(ctx, sendChunkParams{
chatID: chatID,
threadID: threadID,
content: chunk,
mdFallback: chunk,
useMarkdownV2: false,
})
if err != nil {
return nil, err
}
messageIDs = append(messageIDs, msgID)
}
return messageIDs, nil
}
func telegramLeadingCaption(parts []bus.MediaPart) string {
if len(parts) == 0 {
return ""
}
return strings.TrimSpace(parts[0].Caption)
}
func telegramClearMediaCaptions(msg bus.OutboundMediaMessage) bus.OutboundMediaMessage {
if len(msg.Parts) == 0 {
return msg
}
cloned := msg
cloned.Parts = append([]bus.MediaPart(nil), msg.Parts...)
for i := range cloned.Parts {
cloned.Parts[i].Caption = ""
}
return cloned
}
func (c *TelegramChannel) handleMessage(ctx context.Context, message *telego.Message) error {
if message != nil && strings.TrimSpace(message.MediaGroupID) != "" {
return c.bufferMediaGroupMessage(ctx, message)
+265
View File
@@ -110,6 +110,17 @@ func successResponseWithMessageID(t *testing.T, messageID int) *ta.Response {
return &ta.Response{Ok: true, Result: b}
}
func successMediaGroupResponse(t *testing.T, messageIDs ...int) *ta.Response {
t.Helper()
messages := make([]telego.Message, 0, len(messageIDs))
for _, messageID := range messageIDs {
messages = append(messages, telego.Message{MessageID: messageID})
}
b, err := json.Marshal(messages)
require.NoError(t, err)
return &ta.Response{Ok: true, Result: b}
}
func successUserResponse(t *testing.T, user *telego.User) *ta.Response {
t.Helper()
b, err := json.Marshal(user)
@@ -237,6 +248,260 @@ func TestSendMedia_ImageNonDimensionErrorDoesNotFallback(t *testing.T) {
assert.NotContains(t, caller.calls[0].URL, "sendDocument")
}
func TestSendMedia_MultipleImagesUseMediaGroup(t *testing.T) {
constructor := &multipartRecordingConstructor{}
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
if strings.Contains(url, "sendMediaGroup") {
return successMediaGroupResponse(t, 101, 102), nil
}
t.Fatalf("unexpected API call: %s", url)
return nil, nil
},
}
ch := newTestChannelWithConstructor(t, caller, constructor)
store := media.NewFileMediaStore()
ch.SetMediaStore(store)
tmpDir := t.TempDir()
firstPath := filepath.Join(tmpDir, "first.png")
secondPath := filepath.Join(tmpDir, "second.png")
require.NoError(t, os.WriteFile(firstPath, []byte("first-image"), 0o644))
require.NoError(t, os.WriteFile(secondPath, []byte("second-image"), 0o644))
firstRef, err := store.Store(firstPath, media.MediaMeta{Filename: "first.png", ContentType: "image/png"}, "scope-1")
require.NoError(t, err)
secondRef, err := store.Store(secondPath, media.MediaMeta{Filename: "second.png", ContentType: "image/png"}, "scope-1")
require.NoError(t, err)
ids, err := ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "12345",
Parts: []bus.MediaPart{
{Type: "image", Ref: firstRef, Caption: "album caption"},
{Type: "image", Ref: secondRef},
},
})
require.NoError(t, err)
assert.Equal(t, []string{"101", "102"}, ids)
require.Len(t, caller.calls, 1)
assert.Contains(t, caller.calls[0].URL, "sendMediaGroup")
require.Len(t, constructor.calls, 1)
require.Len(t, constructor.calls[0].FileSizes, 2)
var mediaPayload []map[string]any
require.NoError(t, json.Unmarshal([]byte(constructor.calls[0].Parameters["media"]), &mediaPayload))
require.Len(t, mediaPayload, 2)
assert.Equal(t, "album caption", mediaPayload[0]["caption"])
_, hasSecondCaption := mediaPayload[1]["caption"]
assert.False(t, hasSecondCaption)
}
func TestSendMedia_MoreThanTenImagesSplitIntoMediaGroups(t *testing.T) {
constructor := &multipartRecordingConstructor{}
callIndex := 0
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
if !strings.Contains(url, "sendMediaGroup") {
t.Fatalf("unexpected API call: %s", url)
}
callIndex++
if callIndex == 1 {
return successMediaGroupResponse(t, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010), nil
}
if callIndex == 2 {
return successMediaGroupResponse(t, 1011, 1012, 1013, 1014, 1015), nil
}
t.Fatalf("unexpected sendMediaGroup call #%d", callIndex)
return nil, nil
},
}
ch := newTestChannelWithConstructor(t, caller, constructor)
store := media.NewFileMediaStore()
ch.SetMediaStore(store)
tmpDir := t.TempDir()
parts := make([]bus.MediaPart, 0, 15)
for i := 0; i < 15; i++ {
path := filepath.Join(tmpDir, "image-"+strconv.Itoa(i)+".png")
require.NoError(t, os.WriteFile(path, []byte("img-"+strconv.Itoa(i)), 0o644))
ref, err := store.Store(path, media.MediaMeta{Filename: filepath.Base(path), ContentType: "image/png"}, "scope-1")
require.NoError(t, err)
part := bus.MediaPart{Type: "image", Ref: ref}
if i == 0 {
part.Caption = "long album caption"
}
parts = append(parts, part)
}
ids, err := ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "12345",
Parts: parts,
})
require.NoError(t, err)
assert.Equal(t, []string{
"1001", "1002", "1003", "1004", "1005",
"1006", "1007", "1008", "1009", "1010",
"1011", "1012", "1013", "1014", "1015",
}, ids)
require.Len(t, caller.calls, 2)
require.Len(t, constructor.calls, 2)
}
func TestSendMedia_SingleImageLongCaptionSendsTextFirst(t *testing.T) {
constructor := &multipartRecordingConstructor{}
longCaption := strings.Repeat("a", telegramCaptionLimit) + " tail overflow"
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
switch {
case strings.Contains(url, "sendMessage"):
return successResponseWithMessageID(t, 201), nil
case strings.Contains(url, "sendPhoto"):
return successResponseWithMessageID(t, 202), nil
default:
t.Fatalf("unexpected API call: %s", url)
return nil, nil
}
},
}
ch := newTestChannelWithConstructor(t, caller, constructor)
store := media.NewFileMediaStore()
ch.SetMediaStore(store)
tmpDir := t.TempDir()
path := filepath.Join(tmpDir, "image.png")
require.NoError(t, os.WriteFile(path, []byte("img"), 0o644))
ref, err := store.Store(path, media.MediaMeta{Filename: "image.png", ContentType: "image/png"}, "scope-1")
require.NoError(t, err)
ids, err := ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "12345",
Parts: []bus.MediaPart{{
Type: "image",
Ref: ref,
Caption: longCaption,
}},
})
require.NoError(t, err)
assert.Equal(t, []string{"201", "202"}, ids)
require.Len(t, caller.calls, 2)
assert.Contains(t, caller.calls[0].URL, "sendMessage")
assert.Contains(t, caller.calls[1].URL, "sendPhoto")
assert.Equal(t, "", constructor.calls[0].Parameters["caption"])
}
func TestSendMedia_MediaGroupLongCaptionSendsTextFirst(t *testing.T) {
constructor := &multipartRecordingConstructor{}
longCaption := strings.Repeat("b", telegramCaptionLimit) + " trailing explanation"
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
switch {
case strings.Contains(url, "sendMessage"):
return successResponseWithMessageID(t, 301), nil
case strings.Contains(url, "sendMediaGroup"):
return successMediaGroupResponse(t, 302, 303), nil
default:
t.Fatalf("unexpected API call: %s", url)
return nil, nil
}
},
}
ch := newTestChannelWithConstructor(t, caller, constructor)
store := media.NewFileMediaStore()
ch.SetMediaStore(store)
tmpDir := t.TempDir()
firstPath := filepath.Join(tmpDir, "first.png")
secondPath := filepath.Join(tmpDir, "second.png")
require.NoError(t, os.WriteFile(firstPath, []byte("first-image"), 0o644))
require.NoError(t, os.WriteFile(secondPath, []byte("second-image"), 0o644))
firstRef, err := store.Store(firstPath, media.MediaMeta{Filename: "first.png", ContentType: "image/png"}, "scope-1")
require.NoError(t, err)
secondRef, err := store.Store(secondPath, media.MediaMeta{Filename: "second.png", ContentType: "image/png"}, "scope-1")
require.NoError(t, err)
ids, err := ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "12345",
Parts: []bus.MediaPart{
{Type: "image", Ref: firstRef, Caption: longCaption},
{Type: "image", Ref: secondRef},
},
})
require.NoError(t, err)
assert.Equal(t, []string{"301", "302", "303"}, ids)
require.Len(t, caller.calls, 2)
assert.Contains(t, caller.calls[0].URL, "sendMessage")
assert.Contains(t, caller.calls[1].URL, "sendMediaGroup")
}
func TestSendMedia_MultiGroupLongCaptionSendsTextBeforeGroups(t *testing.T) {
constructor := &multipartRecordingConstructor{}
longCaption := strings.Repeat("c", telegramCaptionLimit) + " overflow before second album"
callOrder := make([]string, 0, 3)
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
switch {
case strings.Contains(url, "sendMessage"):
callOrder = append(callOrder, "text")
return successResponseWithMessageID(t, 499), nil
case strings.Contains(url, "sendMediaGroup"):
callOrder = append(callOrder, "group")
if len(callOrder) == 2 {
return successMediaGroupResponse(t, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410), nil
}
if len(callOrder) == 3 {
return successMediaGroupResponse(t, 411, 412, 413, 414, 415), nil
}
t.Fatalf("unexpected sendMediaGroup order: %v", callOrder)
return nil, nil
default:
t.Fatalf("unexpected API call: %s", url)
return nil, nil
}
},
}
ch := newTestChannelWithConstructor(t, caller, constructor)
store := media.NewFileMediaStore()
ch.SetMediaStore(store)
tmpDir := t.TempDir()
parts := make([]bus.MediaPart, 0, 15)
for i := 0; i < 15; i++ {
path := filepath.Join(tmpDir, "image-"+strconv.Itoa(i)+".png")
require.NoError(t, os.WriteFile(path, []byte("img-"+strconv.Itoa(i)), 0o644))
ref, err := store.Store(path, media.MediaMeta{Filename: filepath.Base(path), ContentType: "image/png"}, "scope-1")
require.NoError(t, err)
part := bus.MediaPart{Type: "image", Ref: ref}
if i == 0 {
part.Caption = longCaption
}
parts = append(parts, part)
}
ids, err := ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "12345",
Parts: parts,
})
require.NoError(t, err)
assert.Equal(t, []string{
"499",
"401", "402", "403", "404", "405",
"406", "407", "408", "409", "410",
"411", "412", "413", "414", "415",
}, ids)
assert.Equal(t, []string{"text", "group", "group"}, callOrder)
}
func TestSend_EmptyContent(t *testing.T) {
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {