From a91de8546c74dbeddadbe39dffe0eecffe05c37e Mon Sep 17 00:00:00 2001 From: Hoshina Date: Sun, 22 Feb 2026 22:46:29 +0800 Subject: [PATCH] refactor(channels): unify message splitting and add per-channel worker queues Move message splitting from individual channels (Discord) to the Manager layer via per-channel worker goroutines. Each channel now declares its max message length through BaseChannelOption/MessageLengthProvider, and the Manager automatically splits oversized outbound messages before dispatch. This prevents one slow channel from blocking all others. - Add WithMaxMessageLength option and MessageLengthProvider interface - Set platform-specific limits (Discord 2000, Telegram 4096, Slack 40000, etc.) - Convert SplitMessage to rune-aware counting for correct Unicode handling - Replace single dispatcher goroutine with per-channel buffered worker queues - Remove Discord's internal SplitMessage call (now handled centrally) --- pkg/channels/base.go | 50 ++++++++++--- pkg/channels/dingtalk/dingtalk.go | 2 +- pkg/channels/discord/discord.go | 15 +--- pkg/channels/line/line.go | 2 +- pkg/channels/manager.go | 112 ++++++++++++++++++++++++++--- pkg/channels/slack/slack.go | 2 +- pkg/channels/telegram/telegram.go | 8 ++- pkg/channels/wecom/app.go | 2 +- pkg/channels/wecom/bot.go | 2 +- pkg/channels/whatsapp/whatsapp.go | 2 +- pkg/utils/message.go | 114 ++++++++++++++++++------------ pkg/utils/message_test.go | 60 +++++++++++----- 12 files changed, 272 insertions(+), 99 deletions(-) diff --git a/pkg/channels/base.go b/pkg/channels/base.go index 5e603f0d4..f70145981 100644 --- a/pkg/channels/base.go +++ b/pkg/channels/base.go @@ -17,21 +17,55 @@ type Channel interface { IsAllowed(senderID string) bool } -type BaseChannel struct { - config any - bus *bus.MessageBus - running atomic.Bool - name string - allowList []string +// BaseChannelOption is a functional option for configuring a BaseChannel. +type BaseChannelOption func(*BaseChannel) + +// WithMaxMessageLength sets the maximum message length (in runes) for a channel. +// Messages exceeding this limit will be automatically split by the Manager. +// A value of 0 means no limit. +func WithMaxMessageLength(n int) BaseChannelOption { + return func(c *BaseChannel) { c.maxMessageLength = n } } -func NewBaseChannel(name string, config any, bus *bus.MessageBus, allowList []string) *BaseChannel { - return &BaseChannel{ +// MessageLengthProvider is an opt-in interface that channels implement +// to advertise their maximum message length. The Manager uses this via +// type assertion to decide whether to split outbound messages. +type MessageLengthProvider interface { + MaxMessageLength() int +} + +type BaseChannel struct { + config any + bus *bus.MessageBus + running atomic.Bool + name string + allowList []string + maxMessageLength int +} + +func NewBaseChannel( + name string, + config any, + bus *bus.MessageBus, + allowList []string, + opts ...BaseChannelOption, +) *BaseChannel { + bc := &BaseChannel{ config: config, bus: bus, name: name, allowList: allowList, } + for _, opt := range opts { + opt(bc) + } + return bc +} + +// MaxMessageLength returns the maximum message length (in runes) for this channel. +// A value of 0 means no limit. +func (c *BaseChannel) MaxMessageLength() int { + return c.maxMessageLength } func (c *BaseChannel) Name() string { diff --git a/pkg/channels/dingtalk/dingtalk.go b/pkg/channels/dingtalk/dingtalk.go index a8aee65d6..e051add1f 100644 --- a/pkg/channels/dingtalk/dingtalk.go +++ b/pkg/channels/dingtalk/dingtalk.go @@ -38,7 +38,7 @@ func NewDingTalkChannel(cfg config.DingTalkConfig, messageBus *bus.MessageBus) ( return nil, fmt.Errorf("dingtalk client_id and client_secret are required") } - base := channels.NewBaseChannel("dingtalk", cfg, messageBus, cfg.AllowFrom) + base := channels.NewBaseChannel("dingtalk", cfg, messageBus, cfg.AllowFrom, channels.WithMaxMessageLength(20000)) return &DingTalkChannel{ BaseChannel: base, diff --git a/pkg/channels/discord/discord.go b/pkg/channels/discord/discord.go index faf1e1358..623bc9f48 100644 --- a/pkg/channels/discord/discord.go +++ b/pkg/channels/discord/discord.go @@ -41,7 +41,7 @@ func NewDiscordChannel(cfg config.DiscordConfig, bus *bus.MessageBus) (*DiscordC return nil, fmt.Errorf("failed to create discord session: %w", err) } - base := channels.NewBaseChannel("discord", cfg, bus, cfg.AllowFrom) + base := channels.NewBaseChannel("discord", cfg, bus, cfg.AllowFrom, channels.WithMaxMessageLength(2000)) return &DiscordChannel{ BaseChannel: base, @@ -121,20 +121,11 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro return fmt.Errorf("channel ID is empty") } - runes := []rune(msg.Content) - if len(runes) == 0 { + if len([]rune(msg.Content)) == 0 { return nil } - chunks := utils.SplitMessage(msg.Content, 2000) // Split messages into chunks, Discord length limit: 2000 chars - - for _, chunk := range chunks { - if err := c.sendChunk(ctx, channelID, chunk); err != nil { - return err - } - } - - return nil + return c.sendChunk(ctx, channelID, msg.Content) } func (c *DiscordChannel) sendChunk(ctx context.Context, channelID, content string) error { diff --git a/pkg/channels/line/line.go b/pkg/channels/line/line.go index 96297e2cd..9744e1848 100644 --- a/pkg/channels/line/line.go +++ b/pkg/channels/line/line.go @@ -60,7 +60,7 @@ func NewLINEChannel(cfg config.LINEConfig, messageBus *bus.MessageBus) (*LINECha return nil, fmt.Errorf("line channel_secret and channel_access_token are required") } - base := channels.NewBaseChannel("line", cfg, messageBus, cfg.AllowFrom) + base := channels.NewBaseChannel("line", cfg, messageBus, cfg.AllowFrom, channels.WithMaxMessageLength(5000)) return &LINEChannel{ BaseChannel: base, diff --git a/pkg/channels/manager.go b/pkg/channels/manager.go index 7baef058c..081d616da 100644 --- a/pkg/channels/manager.go +++ b/pkg/channels/manager.go @@ -15,10 +15,20 @@ import ( "github.com/sipeed/picoclaw/pkg/config" "github.com/sipeed/picoclaw/pkg/constants" "github.com/sipeed/picoclaw/pkg/logger" + "github.com/sipeed/picoclaw/pkg/utils" ) +const defaultChannelQueueSize = 100 + +type channelWorker struct { + ch Channel + queue chan bus.OutboundMessage + done chan struct{} +} + type Manager struct { channels map[string]Channel + workers map[string]*channelWorker bus *bus.MessageBus config *config.Config dispatchTask *asyncTask @@ -32,6 +42,7 @@ type asyncTask struct { func NewManager(cfg *config.Config, messageBus *bus.MessageBus) (*Manager, error) { m := &Manager{ channels: make(map[string]Channel), + workers: make(map[string]*channelWorker), bus: messageBus, config: cfg, } @@ -63,6 +74,11 @@ func (m *Manager) initChannel(name, displayName string) { }) } else { m.channels[name] = ch + m.workers[name] = &channelWorker{ + ch: ch, + queue: make(chan bus.OutboundMessage, defaultChannelQueueSize), + done: make(chan struct{}), + } logger.InfoCF("channels", "Channel enabled successfully", map[string]any{ "channel": displayName, }) @@ -141,8 +157,6 @@ func (m *Manager) StartAll(ctx context.Context) error { dispatchCtx, cancel := context.WithCancel(ctx) m.dispatchTask = &asyncTask{cancel: cancel} - go m.dispatchOutbound(dispatchCtx) - for name, channel := range m.channels { logger.InfoCF("channels", "Starting channel", map[string]any{ "channel": name, @@ -155,6 +169,14 @@ func (m *Manager) StartAll(ctx context.Context) error { } } + // Start per-channel workers + for name, w := range m.workers { + go m.runWorker(dispatchCtx, name, w) + } + + // Start the dispatcher that reads from the bus and routes to workers + go m.dispatchOutbound(dispatchCtx) + logger.InfoC("channels", "All channels started") return nil } @@ -165,11 +187,21 @@ func (m *Manager) StopAll(ctx context.Context) error { logger.InfoC("channels", "Stopping all channels") + // Cancel dispatcher first if m.dispatchTask != nil { m.dispatchTask.cancel() m.dispatchTask = nil } + // Close all worker queues and wait for them to drain + for _, w := range m.workers { + close(w.queue) + } + for _, w := range m.workers { + <-w.done + } + + // Stop all channels for name, channel := range m.channels { logger.InfoCF("channels", "Stopping channel", map[string]any{ "channel": name, @@ -186,6 +218,44 @@ func (m *Manager) StopAll(ctx context.Context) error { return nil } +// runWorker processes outbound messages for a single channel, splitting +// messages that exceed the channel's maximum message length. +func (m *Manager) runWorker(ctx context.Context, name string, w *channelWorker) { + defer close(w.done) + for { + select { + case msg, ok := <-w.queue: + if !ok { + return + } + maxLen := 0 + if mlp, ok := w.ch.(MessageLengthProvider); ok { + maxLen = mlp.MaxMessageLength() + } + if maxLen > 0 && len([]rune(msg.Content)) > maxLen { + chunks := utils.SplitMessage(msg.Content, maxLen) + for _, chunk := range chunks { + chunkMsg := msg + chunkMsg.Content = chunk + if err := w.ch.Send(ctx, chunkMsg); err != nil { + logger.ErrorCF("channels", "Error sending chunk", map[string]any{ + "channel": name, "error": err.Error(), + }) + } + } + } else { + if err := w.ch.Send(ctx, msg); err != nil { + logger.ErrorCF("channels", "Error sending message", map[string]any{ + "channel": name, "error": err.Error(), + }) + } + } + case <-ctx.Done(): + return + } + } +} + func (m *Manager) dispatchOutbound(ctx context.Context) { logger.InfoC("channels", "Outbound dispatcher started") @@ -206,7 +276,8 @@ func (m *Manager) dispatchOutbound(ctx context.Context) { } m.mu.RLock() - channel, exists := m.channels[msg.Channel] + _, exists := m.channels[msg.Channel] + w, wExists := m.workers[msg.Channel] m.mu.RUnlock() if !exists { @@ -216,11 +287,12 @@ func (m *Manager) dispatchOutbound(ctx context.Context) { continue } - if err := channel.Send(ctx, msg); err != nil { - logger.ErrorCF("channels", "Error sending message to channel", map[string]any{ - "channel": msg.Channel, - "error": err.Error(), - }) + if wExists { + select { + case w.queue <- msg: + case <-ctx.Done(): + return + } } } } @@ -262,17 +334,28 @@ func (m *Manager) RegisterChannel(name string, channel Channel) { m.mu.Lock() defer m.mu.Unlock() m.channels[name] = channel + m.workers[name] = &channelWorker{ + ch: channel, + queue: make(chan bus.OutboundMessage, defaultChannelQueueSize), + done: make(chan struct{}), + } } func (m *Manager) UnregisterChannel(name string) { m.mu.Lock() defer m.mu.Unlock() + if w, ok := m.workers[name]; ok { + close(w.queue) + <-w.done + } + delete(m.workers, name) delete(m.channels, name) } func (m *Manager) SendToChannel(ctx context.Context, channelName, chatID, content string) error { m.mu.RLock() - channel, exists := m.channels[channelName] + _, exists := m.channels[channelName] + w, wExists := m.workers[channelName] m.mu.RUnlock() if !exists { @@ -285,5 +368,16 @@ func (m *Manager) SendToChannel(ctx context.Context, channelName, chatID, conten Content: content, } + if wExists { + select { + case w.queue <- msg: + return nil + case <-ctx.Done(): + return ctx.Err() + } + } + + // Fallback: direct send (should not happen) + channel, _ := m.channels[channelName] return channel.Send(ctx, msg) } diff --git a/pkg/channels/slack/slack.go b/pkg/channels/slack/slack.go index b459a7140..fc0bee505 100644 --- a/pkg/channels/slack/slack.go +++ b/pkg/channels/slack/slack.go @@ -50,7 +50,7 @@ func NewSlackChannel(cfg config.SlackConfig, messageBus *bus.MessageBus) (*Slack socketClient := socketmode.New(api) - base := channels.NewBaseChannel("slack", cfg, messageBus, cfg.AllowFrom) + base := channels.NewBaseChannel("slack", cfg, messageBus, cfg.AllowFrom, channels.WithMaxMessageLength(40000)) return &SlackChannel{ BaseChannel: base, diff --git a/pkg/channels/telegram/telegram.go b/pkg/channels/telegram/telegram.go index af825ddc9..578e3c51e 100644 --- a/pkg/channels/telegram/telegram.go +++ b/pkg/channels/telegram/telegram.go @@ -76,7 +76,13 @@ func NewTelegramChannel(cfg *config.Config, bus *bus.MessageBus) (*TelegramChann return nil, fmt.Errorf("failed to create telegram bot: %w", err) } - base := channels.NewBaseChannel("telegram", telegramCfg, bus, telegramCfg.AllowFrom) + base := channels.NewBaseChannel( + "telegram", + telegramCfg, + bus, + telegramCfg.AllowFrom, + channels.WithMaxMessageLength(4096), + ) return &TelegramChannel{ BaseChannel: base, diff --git a/pkg/channels/wecom/app.go b/pkg/channels/wecom/app.go index 873431d3c..eb1711d75 100644 --- a/pkg/channels/wecom/app.go +++ b/pkg/channels/wecom/app.go @@ -120,7 +120,7 @@ func NewWeComAppChannel(cfg config.WeComAppConfig, messageBus *bus.MessageBus) ( return nil, fmt.Errorf("wecom_app corp_id, corp_secret and agent_id are required") } - base := channels.NewBaseChannel("wecom_app", cfg, messageBus, cfg.AllowFrom) + base := channels.NewBaseChannel("wecom_app", cfg, messageBus, cfg.AllowFrom, channels.WithMaxMessageLength(2048)) return &WeComAppChannel{ BaseChannel: base, diff --git a/pkg/channels/wecom/bot.go b/pkg/channels/wecom/bot.go index 3a8a16c43..bbac8611a 100644 --- a/pkg/channels/wecom/bot.go +++ b/pkg/channels/wecom/bot.go @@ -87,7 +87,7 @@ func NewWeComBotChannel(cfg config.WeComConfig, messageBus *bus.MessageBus) (*We return nil, fmt.Errorf("wecom token and webhook_url are required") } - base := channels.NewBaseChannel("wecom", cfg, messageBus, cfg.AllowFrom) + base := channels.NewBaseChannel("wecom", cfg, messageBus, cfg.AllowFrom, channels.WithMaxMessageLength(2048)) return &WeComBotChannel{ BaseChannel: base, diff --git a/pkg/channels/whatsapp/whatsapp.go b/pkg/channels/whatsapp/whatsapp.go index cbc82fd09..b5f3e99d7 100644 --- a/pkg/channels/whatsapp/whatsapp.go +++ b/pkg/channels/whatsapp/whatsapp.go @@ -28,7 +28,7 @@ type WhatsAppChannel struct { } func NewWhatsAppChannel(cfg config.WhatsAppConfig, bus *bus.MessageBus) (*WhatsAppChannel, error) { - base := channels.NewBaseChannel("whatsapp", cfg, bus, cfg.AllowFrom) + base := channels.NewBaseChannel("whatsapp", cfg, bus, cfg.AllowFrom, channels.WithMaxMessageLength(65536)) return &WhatsAppChannel{ BaseChannel: base, diff --git a/pkg/utils/message.go b/pkg/utils/message.go index 1d05950d9..52a967f4c 100644 --- a/pkg/utils/message.go +++ b/pkg/utils/message.go @@ -5,11 +5,20 @@ import ( ) // SplitMessage splits long messages into chunks, preserving code block integrity. +// The maxLen parameter is measured in runes (Unicode characters), not bytes. // The function reserves a buffer (10% of maxLen, min 50) to leave room for closing code blocks, // but may extend to maxLen when needed. // Call SplitMessage with the full text content and the maximum allowed length of a single message; // it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks. func SplitMessage(content string, maxLen int) []string { + if maxLen <= 0 { + if content == "" { + return nil + } + return []string{content} + } + + runes := []rune(content) var messages []string // Dynamic buffer: 10% of maxLen, but at least 50 chars if possible @@ -21,9 +30,9 @@ func SplitMessage(content string, maxLen int) []string { codeBlockBuffer = maxLen / 2 } - for len(content) > 0 { - if len(content) <= maxLen { - messages = append(messages, content) + for len(runes) > 0 { + if len(runes) <= maxLen { + messages = append(messages, string(runes)) break } @@ -34,56 +43,66 @@ func SplitMessage(content string, maxLen int) []string { } // Find natural split point within the effective limit - msgEnd := findLastNewline(content[:effectiveLimit], 200) + msgEnd := findLastNewlineRunes(runes[:effectiveLimit], 200) if msgEnd <= 0 { - msgEnd = findLastSpace(content[:effectiveLimit], 100) + msgEnd = findLastSpaceRunes(runes[:effectiveLimit], 100) } if msgEnd <= 0 { msgEnd = effectiveLimit } // Check if this would end with an incomplete code block - candidate := content[:msgEnd] - unclosedIdx := findLastUnclosedCodeBlock(candidate) + candidate := runes[:msgEnd] + unclosedIdx := findLastUnclosedCodeBlockRunes(candidate) if unclosedIdx >= 0 { // Message would end with incomplete code block // Try to extend up to maxLen to include the closing ``` - if len(content) > msgEnd { - closingIdx := findNextClosingCodeBlock(content, msgEnd) + if len(runes) > msgEnd { + closingIdx := findNextClosingCodeBlockRunes(runes, msgEnd) if closingIdx > 0 && closingIdx <= maxLen { // Extend to include the closing ``` msgEnd = closingIdx } else { // Code block is too long to fit in one chunk or missing closing fence. // Try to split inside by injecting closing and reopening fences. - headerEnd := strings.Index(content[unclosedIdx:], "\n") + candidateStr := string(candidate) + unclosedStr := string(runes[unclosedIdx:]) + headerEnd := strings.Index(unclosedStr, "\n") + var header string if headerEnd == -1 { - headerEnd = unclosedIdx + 3 + header = strings.TrimSpace(string(runes[unclosedIdx : unclosedIdx+3])) } else { - headerEnd += unclosedIdx + header = strings.TrimSpace(string(runes[unclosedIdx : unclosedIdx+headerEnd])) } - header := strings.TrimSpace(content[unclosedIdx:headerEnd]) + headerEndIdx := unclosedIdx + len([]rune(header)) + if headerEnd != -1 { + headerEndIdx = unclosedIdx + headerEnd + } + + _ = candidateStr // used above for context // If we have a reasonable amount of content after the header, split inside - if msgEnd > headerEnd+20 { + if msgEnd > headerEndIdx+20 { // Find a better split point closer to maxLen innerLimit := maxLen - 5 // Leave room for "\n```" - betterEnd := findLastNewline(content[:innerLimit], 200) - if betterEnd > headerEnd { + betterEnd := findLastNewlineRunes(runes[:innerLimit], 200) + if betterEnd > headerEndIdx { msgEnd = betterEnd } else { msgEnd = innerLimit } - messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```") - content = strings.TrimSpace(header + "\n" + content[msgEnd:]) + chunk := strings.TrimRight(string(runes[:msgEnd]), " \t\n\r") + "\n```" + messages = append(messages, chunk) + remaining := strings.TrimSpace(header + "\n" + string(runes[msgEnd:])) + runes = []rune(remaining) continue } // Otherwise, try to split before the code block starts - newEnd := findLastNewline(content[:unclosedIdx], 200) + newEnd := findLastNewlineRunes(runes[:unclosedIdx], 200) if newEnd <= 0 { - newEnd = findLastSpace(content[:unclosedIdx], 100) + newEnd = findLastSpaceRunes(runes[:unclosedIdx], 100) } if newEnd > 0 { msgEnd = newEnd @@ -93,8 +112,10 @@ func SplitMessage(content string, maxLen int) []string { msgEnd = unclosedIdx } else { msgEnd = maxLen - 5 - messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```") - content = strings.TrimSpace(header + "\n" + content[msgEnd:]) + chunk := strings.TrimRight(string(runes[:msgEnd]), " \t\n\r") + "\n```" + messages = append(messages, chunk) + remaining := strings.TrimSpace(header + "\n" + string(runes[msgEnd:])) + runes = []rune(remaining) continue } } @@ -106,21 +127,22 @@ func SplitMessage(content string, maxLen int) []string { msgEnd = effectiveLimit } - messages = append(messages, content[:msgEnd]) - content = strings.TrimSpace(content[msgEnd:]) + messages = append(messages, string(runes[:msgEnd])) + remaining := strings.TrimSpace(string(runes[msgEnd:])) + runes = []rune(remaining) } return messages } -// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ``` -// Returns the position of the opening ``` or -1 if all code blocks are complete -func findLastUnclosedCodeBlock(text string) int { +// findLastUnclosedCodeBlockRunes finds the last opening ``` that doesn't have a closing ``` +// Returns the rune position of the opening ``` or -1 if all code blocks are complete +func findLastUnclosedCodeBlockRunes(runes []rune) int { inCodeBlock := false lastOpenIdx := -1 - for i := 0; i < len(text); i++ { - if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { + for i := 0; i < len(runes); i++ { + if i+2 < len(runes) && runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' { // Toggle code block state on each fence if !inCodeBlock { // Entering a code block: record this opening fence @@ -137,41 +159,41 @@ func findLastUnclosedCodeBlock(text string) int { return -1 } -// findNextClosingCodeBlock finds the next closing ``` starting from a position -// Returns the position after the closing ``` or -1 if not found -func findNextClosingCodeBlock(text string, startIdx int) int { - for i := startIdx; i < len(text); i++ { - if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { +// findNextClosingCodeBlockRunes finds the next closing ``` starting from a rune position +// Returns the rune position after the closing ``` or -1 if not found +func findNextClosingCodeBlockRunes(runes []rune, startIdx int) int { + for i := startIdx; i < len(runes); i++ { + if i+2 < len(runes) && runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' { return i + 3 } } return -1 } -// findLastNewline finds the last newline character within the last N characters -// Returns the position of the newline or -1 if not found -func findLastNewline(s string, searchWindow int) int { - searchStart := len(s) - searchWindow +// findLastNewlineRunes finds the last newline character within the last N runes +// Returns the rune position of the newline or -1 if not found +func findLastNewlineRunes(runes []rune, searchWindow int) int { + searchStart := len(runes) - searchWindow if searchStart < 0 { searchStart = 0 } - for i := len(s) - 1; i >= searchStart; i-- { - if s[i] == '\n' { + for i := len(runes) - 1; i >= searchStart; i-- { + if runes[i] == '\n' { return i } } return -1 } -// findLastSpace finds the last space character within the last N characters -// Returns the position of the space or -1 if not found -func findLastSpace(s string, searchWindow int) int { - searchStart := len(s) - searchWindow +// findLastSpaceRunes finds the last space character within the last N runes +// Returns the rune position of the space or -1 if not found +func findLastSpaceRunes(runes []rune, searchWindow int) int { + searchStart := len(runes) - searchWindow if searchStart < 0 { searchStart = 0 } - for i := len(s) - 1; i >= searchStart; i-- { - if s[i] == ' ' || s[i] == '\t' { + for i := len(runes) - 1; i >= searchStart; i-- { + if runes[i] == ' ' || runes[i] == '\t' { return i } } diff --git a/pkg/utils/message_test.go b/pkg/utils/message_test.go index 338509437..78e1e2b40 100644 --- a/pkg/utils/message_test.go +++ b/pkg/utils/message_test.go @@ -34,11 +34,15 @@ func TestSplitMessage(t *testing.T) { maxLen: 2000, expectChunks: 2, checkContent: func(t *testing.T, chunks []string) { - if len(chunks[0]) > 2000 { - t.Errorf("Chunk 0 too large: %d", len(chunks[0])) + if len([]rune(chunks[0])) > 2000 { + t.Errorf("Chunk 0 too large: %d runes", len([]rune(chunks[0]))) } - if len(chunks[0])+len(chunks[1]) != len(longText) { - t.Errorf("Total length mismatch. Got %d, want %d", len(chunks[0])+len(chunks[1]), len(longText)) + if len([]rune(chunks[0]))+len([]rune(chunks[1])) != len([]rune(longText)) { + t.Errorf( + "Total rune length mismatch. Got %d, want %d", + len([]rune(chunks[0]))+len([]rune(chunks[1])), + len([]rune(longText)), + ) } }, }, @@ -53,11 +57,11 @@ func TestSplitMessage(t *testing.T) { maxLen: 2000, expectChunks: 2, checkContent: func(t *testing.T, chunks []string) { - if len(chunks[0]) != 1750 { - t.Errorf("Expected chunk 0 to be 1750 length (split at newline), got %d", len(chunks[0])) + if len([]rune(chunks[0])) != 1750 { + t.Errorf("Expected chunk 0 to be 1750 runes (split at newline), got %d", len([]rune(chunks[0]))) } if chunks[1] != strings.Repeat("b", 300) { - t.Errorf("Chunk 1 content mismatch. Len: %d", len(chunks[1])) + t.Errorf("Chunk 1 content mismatch. Len: %d", len([]rune(chunks[1]))) } }, }, @@ -78,17 +82,39 @@ func TestSplitMessage(t *testing.T) { }, }, { - name: "Preserve Unicode characters", - content: strings.Repeat("\u4e16", 1000), // 3000 bytes + name: "Preserve Unicode characters (rune-aware)", + content: strings.Repeat("\u4e16", 2500), // 2500 runes, 7500 bytes maxLen: 2000, expectChunks: 2, checkContent: func(t *testing.T, chunks []string) { - // Just verify we didn't panic and got valid strings. - // Go strings are UTF-8, if we split mid-rune it would be bad, - // but standard slicing might do that. - // Let's assume standard behavior is acceptable or check if it produces invalid rune? - if !strings.Contains(chunks[0], "\u4e16") { - t.Error("Chunk should contain unicode characters") + // Verify chunks contain valid unicode and don't split mid-rune + for i, chunk := range chunks { + runeCount := len([]rune(chunk)) + if runeCount > 2000 { + t.Errorf("Chunk %d has %d runes, exceeds maxLen 2000", i, runeCount) + } + if !strings.Contains(chunk, "\u4e16") { + t.Errorf("Chunk %d should contain unicode characters", i) + } + } + // Verify total rune count is preserved + totalRunes := 0 + for _, chunk := range chunks { + totalRunes += len([]rune(chunk)) + } + if totalRunes != 2500 { + t.Errorf("Total rune count mismatch. Got %d, want 2500", totalRunes) + } + }, + }, + { + name: "Zero maxLen returns single chunk", + content: "Hello world", + maxLen: 0, + expectChunks: 1, + checkContent: func(t *testing.T, chunks []string) { + if chunks[0] != "Hello world" { + t.Errorf("Expected original content, got %q", chunks[0]) } }, }, @@ -145,7 +171,7 @@ func TestSplitMessage_CodeBlockIntegrity(t *testing.T) { } // First chunk should contain meaningful content - if len(chunks[0]) > 40 { - t.Errorf("First chunk exceeded maxLen: length %d", len(chunks[0])) + if len([]rune(chunks[0])) > 40 { + t.Errorf("First chunk exceeded maxLen: length %d runes", len([]rune(chunks[0]))) } }