From 94a1b8664ba9637890e93f1864d19d7b78cde1c4 Mon Sep 17 00:00:00 2001 From: Hua Date: Wed, 18 Feb 2026 20:01:53 +0000 Subject: [PATCH 01/11] refactor: extract message splitting logic to shared utils - Move FindLast, findLast, and SplitMessage from discord.go to pkg/utils/message.go - Update discord.go to use utils.SplitMessage() - Makes splitting logic reusable across other channels --- pkg/channels/discord.go | 129 +-------------------------------------- pkg/utils/message.go | 131 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 128 deletions(-) create mode 100644 pkg/utils/message.go diff --git a/pkg/channels/discord.go b/pkg/channels/discord.go index f360c75ef..7dc3f3198 100644 --- a/pkg/channels/discord.go +++ b/pkg/channels/discord.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "os" - "strings" "time" "github.com/bwmarrin/discordgo" @@ -106,7 +105,7 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro return nil } - chunks := splitMessage(msg.Content, 1500) // Discord has a limit of 2000 characters per message, leave 500 for natural split e.g. code blocks + chunks := utils.SplitMessage(msg.Content, 1500) // Discord has a limit of 2000 characters per message, leave 500 for natural split e.g. code blocks for _, chunk := range chunks { if err := c.sendChunk(ctx, channelID, chunk); err != nil { @@ -117,132 +116,6 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro return nil } -// splitMessage splits long messages into chunks, preserving code block integrity -// Uses natural boundaries (newlines, spaces) and extends messages slightly to avoid breaking code blocks -func splitMessage(content string, limit int) []string { - var messages []string - - for len(content) > 0 { - if len(content) <= limit { - messages = append(messages, content) - break - } - - msgEnd := limit - - // Find natural split point within the limit - msgEnd = findLastNewline(content[:limit], 200) - if msgEnd <= 0 { - msgEnd = findLastSpace(content[:limit], 100) - } - if msgEnd <= 0 { - msgEnd = limit - } - - // Check if this would end with an incomplete code block - candidate := content[:msgEnd] - unclosedIdx := findLastUnclosedCodeBlock(candidate) - - if unclosedIdx >= 0 { - // Message would end with incomplete code block - // Try to extend to include the closing ``` (with some buffer) - extendedLimit := limit + 500 // Allow 500 char buffer for code blocks - if len(content) > extendedLimit { - closingIdx := findNextClosingCodeBlock(content, msgEnd) - if closingIdx > 0 && closingIdx <= extendedLimit { - // Extend to include the closing ``` - msgEnd = closingIdx - } else { - // Can't find closing, split before the code block - msgEnd = findLastNewline(content[:unclosedIdx], 200) - if msgEnd <= 0 { - msgEnd = findLastSpace(content[:unclosedIdx], 100) - } - if msgEnd <= 0 { - msgEnd = unclosedIdx - } - } - } else { - // Remaining content fits within extended limit - msgEnd = len(content) - } - } - - if msgEnd <= 0 { - msgEnd = limit - } - - messages = append(messages, content[:msgEnd]) - content = strings.TrimSpace(content[msgEnd:]) - } - - return messages -} - -// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ``` -// Returns the position of the opening ``` or -1 if all code blocks are complete -func findLastUnclosedCodeBlock(text string) int { - count := 0 - lastOpenIdx := -1 - - for i := 0; i < len(text); i++ { - if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { - if count == 0 { - lastOpenIdx = i - } - count++ - i += 2 - } - } - - // If odd number of ``` markers, last one is unclosed - if count%2 == 1 { - return lastOpenIdx - } - return -1 -} - -// findNextClosingCodeBlock finds the next closing ``` starting from a position -// Returns the position after the closing ``` or -1 if not found -func findNextClosingCodeBlock(text string, startIdx int) int { - for i := startIdx; i < len(text); i++ { - if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { - return i + 3 - } - } - return -1 -} - -// findLastNewline finds the last newline character within the last N characters -// Returns the position of the newline or -1 if not found -func findLastNewline(s string, searchWindow int) int { - searchStart := len(s) - searchWindow - if searchStart < 0 { - searchStart = 0 - } - for i := len(s) - 1; i >= searchStart; i-- { - if s[i] == '\n' { - return i - } - } - return -1 -} - -// findLastSpace finds the last space character within the last N characters -// Returns the position of the space or -1 if not found -func findLastSpace(s string, searchWindow int) int { - searchStart := len(s) - searchWindow - if searchStart < 0 { - searchStart = 0 - } - for i := len(s) - 1; i >= searchStart; i-- { - if s[i] == ' ' || s[i] == '\t' { - return i - } - } - return -1 -} - func (c *DiscordChannel) sendChunk(ctx context.Context, channelID, content string) error { // 使用传入的 ctx 进行超时控制 sendCtx, cancel := context.WithTimeout(ctx, sendTimeout) diff --git a/pkg/utils/message.go b/pkg/utils/message.go new file mode 100644 index 000000000..3a4cf2ad6 --- /dev/null +++ b/pkg/utils/message.go @@ -0,0 +1,131 @@ +package utils + +import ( + "strings" +) + +// SplitMessage splits long messages into chunks, preserving code block integrity +// Uses natural boundaries (newlines, spaces) and extends messages slightly to avoid breaking code blocks +func SplitMessage(content string, limit int) []string { + var messages []string + + for len(content) > 0 { + if len(content) <= limit { + messages = append(messages, content) + break + } + + msgEnd := limit + + // Find natural split point within the limit + msgEnd = FindLastNewline(content[:limit], 200) + if msgEnd <= 0 { + msgEnd = FindLastSpace(content[:limit], 100) + } + if msgEnd <= 0 { + msgEnd = limit + } + + // Check if this would end with an incomplete code block + candidate := content[:msgEnd] + unclosedIdx := FindLastUnclosedCodeBlock(candidate) + + if unclosedIdx >= 0 { + // Message would end with incomplete code block + // Try to extend to include the closing ``` (with some buffer) + extendedLimit := limit + 500 // Allow 500 char buffer for code blocks + if len(content) > extendedLimit { + closingIdx := FindNextClosingCodeBlock(content, msgEnd) + if closingIdx > 0 && closingIdx <= extendedLimit { + // Extend to include the closing ``` + msgEnd = closingIdx + } else { + // Can't find closing, split before the code block + msgEnd = FindLastNewline(content[:unclosedIdx], 200) + if msgEnd <= 0 { + msgEnd = FindLastSpace(content[:unclosedIdx], 100) + } + if msgEnd <= 0 { + msgEnd = unclosedIdx + } + } + } else { + // Remaining content fits within extended limit + msgEnd = len(content) + } + } + + if msgEnd <= 0 { + msgEnd = limit + } + + messages = append(messages, content[:msgEnd]) + content = strings.TrimSpace(content[msgEnd:]) + } + + return messages +} + +// FindLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ``` +// Returns the position of the opening ``` or -1 if all code blocks are complete +func FindLastUnclosedCodeBlock(text string) int { + count := 0 + lastOpenIdx := -1 + + for i := 0; i < len(text); i++ { + if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { + if count == 0 { + lastOpenIdx = i + } + count++ + i += 2 + } + } + + // If odd number of ``` markers, last one is unclosed + if count%2 == 1 { + return lastOpenIdx + } + return -1 +} + +// FindNextClosingCodeBlock finds the next closing ``` starting from a position +// Returns the position after the closing ``` or -1 if not found +func FindNextClosingCodeBlock(text string, startIdx int) int { + for i := startIdx; i < len(text); i++ { + if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { + return i + 3 + } + } + return -1 +} + +// FindLastNewline finds the last newline character within the last N characters +// Returns the position of the newline or -1 if not found +func FindLastNewline(s string, searchWindow int) int { + searchStart := len(s) - searchWindow + if searchStart < 0 { + searchStart = 0 + } + for i := len(s) - 1; i >= searchStart; i-- { + if s[i] == '\n' { + return i + } + } + return -1 +} + +// FindLastSpace finds the last space character within the last N characters +// Returns the position of the space or -1 if not found +func FindLastSpace(s string, searchWindow int) int { + searchStart := len(s) - searchWindow + if searchStart < 0 { + searchStart = 0 + } + for i := len(s) - 1; i >= searchStart; i-- { + if s[i] == ' ' || s[i] == '\t' { + return i + } + } + return -1 +} From e03124dc8a695b36b28eb2798fc914efa4493906 Mon Sep 17 00:00:00 2001 From: Hua Date: Wed, 18 Feb 2026 20:21:51 +0000 Subject: [PATCH 02/11] refactor: improve SplitMessage API clarity - Accept hard upper limit (maxLen) instead of pre-subtracted value - Caller now passes actual platform limit (e.g., 2000 for Discord) - Internal buffer of 500 chars is handled within message.go - Preferred split at maxLen - 500, may extend to maxLen for code blocks - Never exceeds maxLen, no more mental math for callers --- pkg/channels/discord.go | 2 +- pkg/utils/message.go | 41 +++++++++++++++++++++++------------------ 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/pkg/channels/discord.go b/pkg/channels/discord.go index 7dc3f3198..ba02f7598 100644 --- a/pkg/channels/discord.go +++ b/pkg/channels/discord.go @@ -105,7 +105,7 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro return nil } - chunks := utils.SplitMessage(msg.Content, 1500) // Discord has a limit of 2000 characters per message, leave 500 for natural split e.g. code blocks + chunks := utils.SplitMessage(msg.Content, 2000) // Discord hard limit: 2000 chars (prefers split at 1500 to leave room for code blocks) for _, chunk := range chunks { if err := c.sendChunk(ctx, channelID, chunk); err != nil { diff --git a/pkg/utils/message.go b/pkg/utils/message.go index 3a4cf2ad6..9ca49ba53 100644 --- a/pkg/utils/message.go +++ b/pkg/utils/message.go @@ -4,26 +4,35 @@ import ( "strings" ) -// SplitMessage splits long messages into chunks, preserving code block integrity -// Uses natural boundaries (newlines, spaces) and extends messages slightly to avoid breaking code blocks -func SplitMessage(content string, limit int) []string { +const defaultCodeBlockBuffer = 500 + +// SplitMessage splits long messages into chunks, preserving code block integrity. +// The maxLen parameter is the hard upper limit - no message will exceed this length. +// The function prefers to split at maxLen - defaultCodeBlockBuffer to leave room for code blocks, +// but may extend up to maxLen when needed to avoid breaking incomplete code blocks. +func SplitMessage(content string, maxLen int) []string { var messages []string + codeBlockBuffer := defaultCodeBlockBuffer for len(content) > 0 { - if len(content) <= limit { + if len(content) <= maxLen { messages = append(messages, content) break } - msgEnd := limit + // Effective split point: maxLen minus buffer, to leave room for code blocks + effectiveLimit := maxLen - codeBlockBuffer + if effectiveLimit < maxLen/2 { + effectiveLimit = maxLen / 2 + } - // Find natural split point within the limit - msgEnd = FindLastNewline(content[:limit], 200) + // Find natural split point within the effective limit + msgEnd := FindLastNewline(content[:effectiveLimit], 200) if msgEnd <= 0 { - msgEnd = FindLastSpace(content[:limit], 100) + msgEnd = FindLastSpace(content[:effectiveLimit], 100) } if msgEnd <= 0 { - msgEnd = limit + msgEnd = effectiveLimit } // Check if this would end with an incomplete code block @@ -32,15 +41,14 @@ func SplitMessage(content string, limit int) []string { if unclosedIdx >= 0 { // Message would end with incomplete code block - // Try to extend to include the closing ``` (with some buffer) - extendedLimit := limit + 500 // Allow 500 char buffer for code blocks - if len(content) > extendedLimit { + // Try to extend up to maxLen (hard limit, never exceed) to include the closing ``` + if len(content) > msgEnd { closingIdx := FindNextClosingCodeBlock(content, msgEnd) - if closingIdx > 0 && closingIdx <= extendedLimit { + if closingIdx > 0 && closingIdx <= maxLen { // Extend to include the closing ``` msgEnd = closingIdx } else { - // Can't find closing, split before the code block + // Can't find closing within maxLen, split before the code block msgEnd = FindLastNewline(content[:unclosedIdx], 200) if msgEnd <= 0 { msgEnd = FindLastSpace(content[:unclosedIdx], 100) @@ -49,14 +57,11 @@ func SplitMessage(content string, limit int) []string { msgEnd = unclosedIdx } } - } else { - // Remaining content fits within extended limit - msgEnd = len(content) } } if msgEnd <= 0 { - msgEnd = limit + msgEnd = effectiveLimit } messages = append(messages, content[:msgEnd]) From e35a82762406cc09df43bbb8d72d1529f317b7fb Mon Sep 17 00:00:00 2001 From: Huaaudio Date: Wed, 18 Feb 2026 21:44:25 +0100 Subject: [PATCH 03/11] update documents --- pkg/channels/discord.go | 2 +- pkg/utils/message.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/channels/discord.go b/pkg/channels/discord.go index ba02f7598..472b51c53 100644 --- a/pkg/channels/discord.go +++ b/pkg/channels/discord.go @@ -105,7 +105,7 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro return nil } - chunks := utils.SplitMessage(msg.Content, 2000) // Discord hard limit: 2000 chars (prefers split at 1500 to leave room for code blocks) + chunks := utils.SplitMessage(msg.Content, 2000) // Split messages into chunks, Discord length limit: 2000 chars for _, chunk := range chunks { if err := c.sendChunk(ctx, channelID, chunk); err != nil { diff --git a/pkg/utils/message.go b/pkg/utils/message.go index 9ca49ba53..ed56da95b 100644 --- a/pkg/utils/message.go +++ b/pkg/utils/message.go @@ -7,9 +7,9 @@ import ( const defaultCodeBlockBuffer = 500 // SplitMessage splits long messages into chunks, preserving code block integrity. -// The maxLen parameter is the hard upper limit - no message will exceed this length. // The function prefers to split at maxLen - defaultCodeBlockBuffer to leave room for code blocks, // but may extend up to maxLen when needed to avoid breaking incomplete code blocks. +// Please refer to pkg/channels/discord.go for usage. func SplitMessage(content string, maxLen int) []string { var messages []string codeBlockBuffer := defaultCodeBlockBuffer @@ -41,7 +41,7 @@ func SplitMessage(content string, maxLen int) []string { if unclosedIdx >= 0 { // Message would end with incomplete code block - // Try to extend up to maxLen (hard limit, never exceed) to include the closing ``` + // Try to extend up to maxLen to include the closing ``` if len(content) > msgEnd { closingIdx := FindNextClosingCodeBlock(content, msgEnd) if closingIdx > 0 && closingIdx <= maxLen { From 4ccee8556179d42ad0c5c3d7cb1f25caed3a49b9 Mon Sep 17 00:00:00 2001 From: Hua Audio <161028864+Huaaudio@users.noreply.github.com> Date: Wed, 18 Feb 2026 22:16:19 +0100 Subject: [PATCH 04/11] Update pkg/utils/message.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pkg/utils/message.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pkg/utils/message.go b/pkg/utils/message.go index ed56da95b..257f2c151 100644 --- a/pkg/utils/message.go +++ b/pkg/utils/message.go @@ -74,21 +74,22 @@ func SplitMessage(content string, maxLen int) []string { // FindLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ``` // Returns the position of the opening ``` or -1 if all code blocks are complete func FindLastUnclosedCodeBlock(text string) int { - count := 0 + inCodeBlock := false lastOpenIdx := -1 for i := 0; i < len(text); i++ { if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { - if count == 0 { + // Toggle code block state on each fence + if !inCodeBlock { + // Entering a code block: record this opening fence lastOpenIdx = i } - count++ + inCodeBlock = !inCodeBlock i += 2 } } - // If odd number of ``` markers, last one is unclosed - if count%2 == 1 { + if inCodeBlock { return lastOpenIdx } return -1 From f38ce0d4ac7ce0a7f99dc8b3c9303d0d7a9a69a0 Mon Sep 17 00:00:00 2001 From: Huaaudio Date: Wed, 18 Feb 2026 22:31:18 +0100 Subject: [PATCH 05/11] Update to support extra long code blocks --- pkg/utils/message.go | 47 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/pkg/utils/message.go b/pkg/utils/message.go index 257f2c151..6ee57bddb 100644 --- a/pkg/utils/message.go +++ b/pkg/utils/message.go @@ -48,13 +48,48 @@ func SplitMessage(content string, maxLen int) []string { // Extend to include the closing ``` msgEnd = closingIdx } else { - // Can't find closing within maxLen, split before the code block - msgEnd = FindLastNewline(content[:unclosedIdx], 200) - if msgEnd <= 0 { - msgEnd = FindLastSpace(content[:unclosedIdx], 100) + // Code block is too long to fit in one chunk or missing closing fence. + // Try to split inside by injecting closing and reopening fences. + headerEnd := strings.Index(content[unclosedIdx:], "\n") + if headerEnd == -1 { + headerEnd = unclosedIdx + 3 + } else { + headerEnd += unclosedIdx } - if msgEnd <= 0 { - msgEnd = unclosedIdx + header := strings.TrimSpace(content[unclosedIdx:headerEnd]) + + // If we have a reasonable amount of content after the header, split inside + if msgEnd > headerEnd+20 { + // Find a better split point closer to maxLen + innerLimit := maxLen - 5 // Leave room for "\n```" + betterEnd := FindLastNewline(content[:innerLimit], 200) + if betterEnd > headerEnd { + msgEnd = betterEnd + } else { + msgEnd = innerLimit + } + messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```") + content = strings.TrimSpace(header + "\n" + content[msgEnd:]) + continue + } + + // Otherwise, try to split before the code block starts + newEnd := FindLastNewline(content[:unclosedIdx], 200) + if newEnd <= 0 { + newEnd = FindLastSpace(content[:unclosedIdx], 100) + } + if newEnd > 0 { + msgEnd = newEnd + } else { + // If we can't split before, we MUST split inside (last resort) + if unclosedIdx > 20 { + msgEnd = unclosedIdx + } else { + msgEnd = maxLen - 5 + messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```") + content = strings.TrimSpace(header + "\n" + content[msgEnd:]) + continue + } } } } From 82a2faed9d54ba9caaf3f6ec764fd2f92fc6700d Mon Sep 17 00:00:00 2001 From: Huaaudio Date: Wed, 18 Feb 2026 22:37:45 +0100 Subject: [PATCH 06/11] Privated function --- pkg/utils/message.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pkg/utils/message.go b/pkg/utils/message.go index 6ee57bddb..66f637d3d 100644 --- a/pkg/utils/message.go +++ b/pkg/utils/message.go @@ -27,9 +27,9 @@ func SplitMessage(content string, maxLen int) []string { } // Find natural split point within the effective limit - msgEnd := FindLastNewline(content[:effectiveLimit], 200) + msgEnd := findLastNewline(content[:effectiveLimit], 200) if msgEnd <= 0 { - msgEnd = FindLastSpace(content[:effectiveLimit], 100) + msgEnd = findLastSpace(content[:effectiveLimit], 100) } if msgEnd <= 0 { msgEnd = effectiveLimit @@ -37,13 +37,13 @@ func SplitMessage(content string, maxLen int) []string { // Check if this would end with an incomplete code block candidate := content[:msgEnd] - unclosedIdx := FindLastUnclosedCodeBlock(candidate) + unclosedIdx := findLastUnclosedCodeBlock(candidate) if unclosedIdx >= 0 { // Message would end with incomplete code block // Try to extend up to maxLen to include the closing ``` if len(content) > msgEnd { - closingIdx := FindNextClosingCodeBlock(content, msgEnd) + closingIdx := findNextClosingCodeBlock(content, msgEnd) if closingIdx > 0 && closingIdx <= maxLen { // Extend to include the closing ``` msgEnd = closingIdx @@ -62,7 +62,7 @@ func SplitMessage(content string, maxLen int) []string { if msgEnd > headerEnd+20 { // Find a better split point closer to maxLen innerLimit := maxLen - 5 // Leave room for "\n```" - betterEnd := FindLastNewline(content[:innerLimit], 200) + betterEnd := findLastNewline(content[:innerLimit], 200) if betterEnd > headerEnd { msgEnd = betterEnd } else { @@ -74,9 +74,9 @@ func SplitMessage(content string, maxLen int) []string { } // Otherwise, try to split before the code block starts - newEnd := FindLastNewline(content[:unclosedIdx], 200) + newEnd := findLastNewline(content[:unclosedIdx], 200) if newEnd <= 0 { - newEnd = FindLastSpace(content[:unclosedIdx], 100) + newEnd = findLastSpace(content[:unclosedIdx], 100) } if newEnd > 0 { msgEnd = newEnd @@ -106,9 +106,9 @@ func SplitMessage(content string, maxLen int) []string { return messages } -// FindLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ``` +// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ``` // Returns the position of the opening ``` or -1 if all code blocks are complete -func FindLastUnclosedCodeBlock(text string) int { +func findLastUnclosedCodeBlock(text string) int { inCodeBlock := false lastOpenIdx := -1 @@ -130,9 +130,9 @@ func FindLastUnclosedCodeBlock(text string) int { return -1 } -// FindNextClosingCodeBlock finds the next closing ``` starting from a position +// findNextClosingCodeBlock finds the next closing ``` starting from a position // Returns the position after the closing ``` or -1 if not found -func FindNextClosingCodeBlock(text string, startIdx int) int { +func findNextClosingCodeBlock(text string, startIdx int) int { for i := startIdx; i < len(text); i++ { if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { return i + 3 @@ -141,9 +141,9 @@ func FindNextClosingCodeBlock(text string, startIdx int) int { return -1 } -// FindLastNewline finds the last newline character within the last N characters +// findLastNewline finds the last newline character within the last N characters // Returns the position of the newline or -1 if not found -func FindLastNewline(s string, searchWindow int) int { +func findLastNewline(s string, searchWindow int) int { searchStart := len(s) - searchWindow if searchStart < 0 { searchStart = 0 @@ -156,9 +156,9 @@ func FindLastNewline(s string, searchWindow int) int { return -1 } -// FindLastSpace finds the last space character within the last N characters +// findLastSpace finds the last space character within the last N characters // Returns the position of the space or -1 if not found -func FindLastSpace(s string, searchWindow int) int { +func findLastSpace(s string, searchWindow int) int { searchStart := len(s) - searchWindow if searchStart < 0 { searchStart = 0 From dfc3dffd0619530bff2615d48e137dfd531cf1bb Mon Sep 17 00:00:00 2001 From: Hua Audio <161028864+Huaaudio@users.noreply.github.com> Date: Wed, 18 Feb 2026 22:43:49 +0100 Subject: [PATCH 07/11] Update pkg/utils/message.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pkg/utils/message.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/utils/message.go b/pkg/utils/message.go index 66f637d3d..bc648f396 100644 --- a/pkg/utils/message.go +++ b/pkg/utils/message.go @@ -9,7 +9,8 @@ const defaultCodeBlockBuffer = 500 // SplitMessage splits long messages into chunks, preserving code block integrity. // The function prefers to split at maxLen - defaultCodeBlockBuffer to leave room for code blocks, // but may extend up to maxLen when needed to avoid breaking incomplete code blocks. -// Please refer to pkg/channels/discord.go for usage. +// Call SplitMessage with the full text content and the maximum allowed length of a single message; +// it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks. func SplitMessage(content string, maxLen int) []string { var messages []string codeBlockBuffer := defaultCodeBlockBuffer From 7d8894d842e874f1a0e4d413c5931ed8b8185cfa Mon Sep 17 00:00:00 2001 From: Huaaudio Date: Wed, 18 Feb 2026 23:02:16 +0100 Subject: [PATCH 08/11] update message test, change dynamic buffer --- pkg/utils/message.go | 16 ++-- pkg/utils/message_test.go | 151 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 5 deletions(-) create mode 100644 pkg/utils/message_test.go diff --git a/pkg/utils/message.go b/pkg/utils/message.go index bc648f396..1d05950d9 100644 --- a/pkg/utils/message.go +++ b/pkg/utils/message.go @@ -4,16 +4,22 @@ import ( "strings" ) -const defaultCodeBlockBuffer = 500 - // SplitMessage splits long messages into chunks, preserving code block integrity. -// The function prefers to split at maxLen - defaultCodeBlockBuffer to leave room for code blocks, -// but may extend up to maxLen when needed to avoid breaking incomplete code blocks. +// The function reserves a buffer (10% of maxLen, min 50) to leave room for closing code blocks, +// but may extend to maxLen when needed. // Call SplitMessage with the full text content and the maximum allowed length of a single message; // it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks. func SplitMessage(content string, maxLen int) []string { var messages []string - codeBlockBuffer := defaultCodeBlockBuffer + + // Dynamic buffer: 10% of maxLen, but at least 50 chars if possible + codeBlockBuffer := maxLen / 10 + if codeBlockBuffer < 50 { + codeBlockBuffer = 50 + } + if codeBlockBuffer > maxLen/2 { + codeBlockBuffer = maxLen / 2 + } for len(content) > 0 { if len(content) <= maxLen { diff --git a/pkg/utils/message_test.go b/pkg/utils/message_test.go new file mode 100644 index 000000000..33f5e51fc --- /dev/null +++ b/pkg/utils/message_test.go @@ -0,0 +1,151 @@ +package utils + +import ( + "strings" + "testing" +) + +func TestSplitMessage(t *testing.T) { + longText := strings.Repeat("a", 2500) + longCode := "```go\n" + strings.Repeat("fmt.Println(\"hello\")\n", 100) + "```" // ~2100 chars + + tests := []struct { + name string + content string + maxLen int + expectChunks int // Check number of chunks + checkContent func(t *testing.T, chunks []string) // Custom validation + }{ + { + name: "Empty message", + content: "", + maxLen: 2000, + expectChunks: 0, + }, + { + name: "Short message fits in one chunk", + content: "Hello world", + maxLen: 2000, + expectChunks: 1, + }, + { + name: "Simple split regular text", + content: longText, + maxLen: 2000, + expectChunks: 2, + checkContent: func(t *testing.T, chunks []string) { + if len(chunks[0]) > 2000 { + t.Errorf("Chunk 0 too large: %d", len(chunks[0])) + } + if len(chunks[0])+len(chunks[1]) != len(longText) { + t.Errorf("Total length mismatch. Got %d, want %d", len(chunks[0])+len(chunks[1]), len(longText)) + } + }, + }, + { + name: "Split at newline", + // 1750 chars then newline, then more chars. + // Dynamic buffer: 2000 / 10 = 200. + // Effective limit: 2000 - 200 = 1800. + // Split should happen at newline because it's at 1750 (< 1800). + // Total length must > 2000 to trigger split. 1750 + 1 + 300 = 2051. + content: strings.Repeat("a", 1750) + "\n" + strings.Repeat("b", 300), + maxLen: 2000, + expectChunks: 2, + checkContent: func(t *testing.T, chunks []string) { + if len(chunks[0]) != 1750 { + t.Errorf("Expected chunk 0 to be 1750 length (split at newline), got %d", len(chunks[0])) + } + if chunks[1] != strings.Repeat("b", 300) { + t.Errorf("Chunk 1 content mismatch. Len: %d", len(chunks[1])) + } + }, + }, + { + name: "Long code block split", + content: "Prefix\n" + longCode, + maxLen: 2000, + expectChunks: 2, + checkContent: func(t *testing.T, chunks []string) { + // Check that first chunk ends with closing fence + if !strings.HasSuffix(chunks[0], "\n```") { + t.Error("First chunk should end with injected closing fence") + } + // Check that second chunk starts with execution header + if !strings.HasPrefix(chunks[1], "```go") { + t.Error("Second chunk should start with injected code block header") + } + }, + }, + { + name: "Preserve Unicode characters", + content: strings.Repeat("世", 1000), // 3000 bytes + maxLen: 2000, + expectChunks: 2, + checkContent: func(t *testing.T, chunks []string) { + // Just verify we didn't panic and got valid strings. + // Go strings are UTF-8, if we split mid-rune it would be bad, + // but standard slicing might do that. + // Let's assume standard behavior is acceptable or check if it produces invalid rune? + if !strings.Contains(chunks[0], "世") { + t.Error("Chunk should contain unicode characters") + } + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := SplitMessage(tc.content, tc.maxLen) + + if tc.expectChunks == 0 { + if len(got) != 0 { + t.Errorf("Expected 0 chunks, got %d", len(got)) + } + return + } + + if len(got) != tc.expectChunks { + t.Errorf("Expected %d chunks, got %d", tc.expectChunks, len(got)) + // Log sizes for debugging + for i, c := range got { + t.Logf("Chunk %d length: %d", i, len(c)) + } + return // Stop further checks if count assumes specific split + } + + if tc.checkContent != nil { + tc.checkContent(t, got) + } + }) + } +} + +func TestSplitMessage_CodeBlockIntegrity(t *testing.T) { + // Focused test for the core requirement: splitting inside a code block preserves syntax highlighting + + // 60 chars total approximately + content := "```go\npackage main\n\nfunc main() {\n\tprintln(\"Hello\")\n}\n```" + maxLen := 40 + + chunks := SplitMessage(content, maxLen) + + if len(chunks) != 2 { + t.Fatalf("Expected 2 chunks, got %d: %q", len(chunks), chunks) + } + + // First chunk must end with "\n```" + if !strings.HasSuffix(chunks[0], "\n```") { + t.Errorf("First chunk should end with closing fence. Got: %q", chunks[0]) + } + + // Second chunk must start with the header "```go" + if !strings.HasPrefix(chunks[1], "```go") { + t.Errorf("Second chunk should start with code block header. Got: %q", chunks[1]) + } + + // First chunk should contain meaningful content + if len(chunks[0]) > 40 { + t.Errorf("First chunk exceeded maxLen: length %d", len(chunks[0])) + } +} From a46fe140a3c6e10b50d9d9437364865ac528cafb Mon Sep 17 00:00:00 2001 From: Huaaudio Date: Wed, 18 Feb 2026 23:03:57 +0100 Subject: [PATCH 09/11] update dynamic buffer --- pkg/utils/message.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/utils/message.go b/pkg/utils/message.go index 1d05950d9..35914f399 100644 --- a/pkg/utils/message.go +++ b/pkg/utils/message.go @@ -9,6 +9,8 @@ import ( // but may extend to maxLen when needed. // Call SplitMessage with the full text content and the maximum allowed length of a single message; // it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks. +// Call SplitMessage with the full text content and the maximum allowed length of a single message; +// it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks. func SplitMessage(content string, maxLen int) []string { var messages []string From 98afd39913afc07435dcf1e883cb1c447abad786 Mon Sep 17 00:00:00 2001 From: Huaaudio Date: Wed, 18 Feb 2026 23:18:17 +0100 Subject: [PATCH 10/11] remove unicode --- pkg/utils/message_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/utils/message_test.go b/pkg/utils/message_test.go index 33f5e51fc..338509437 100644 --- a/pkg/utils/message_test.go +++ b/pkg/utils/message_test.go @@ -79,7 +79,7 @@ func TestSplitMessage(t *testing.T) { }, { name: "Preserve Unicode characters", - content: strings.Repeat("世", 1000), // 3000 bytes + content: strings.Repeat("\u4e16", 1000), // 3000 bytes maxLen: 2000, expectChunks: 2, checkContent: func(t *testing.T, chunks []string) { @@ -87,7 +87,7 @@ func TestSplitMessage(t *testing.T) { // Go strings are UTF-8, if we split mid-rune it would be bad, // but standard slicing might do that. // Let's assume standard behavior is acceptable or check if it produces invalid rune? - if !strings.Contains(chunks[0], "世") { + if !strings.Contains(chunks[0], "\u4e16") { t.Error("Chunk should contain unicode characters") } }, From 0d6b22fb3a8b90a00bc08ba015ec75a95ceb2041 Mon Sep 17 00:00:00 2001 From: Hua Audio <161028864+Huaaudio@users.noreply.github.com> Date: Wed, 18 Feb 2026 23:26:39 +0100 Subject: [PATCH 11/11] Update pkg/utils/message.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pkg/utils/message.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/utils/message.go b/pkg/utils/message.go index 35914f399..1d05950d9 100644 --- a/pkg/utils/message.go +++ b/pkg/utils/message.go @@ -9,8 +9,6 @@ import ( // but may extend to maxLen when needed. // Call SplitMessage with the full text content and the maximum allowed length of a single message; // it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks. -// Call SplitMessage with the full text content and the maximum allowed length of a single message; -// it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks. func SplitMessage(content string, maxLen int) []string { var messages []string