refactor(channels): unify message splitting and add per-channel worker queues

Move message splitting from individual channels (Discord) to the Manager layer via per-channel worker goroutines. Each channel now declares its max message length through BaseChannelOption/MessageLengthProvider, and the Manager automatically splits oversized outbound messages before dispatch. This prevents one slow channel from blocking all others. - Add WithMaxMessageLength option and MessageLengthProvider interface - Set platform-specific limits (Discord 2000, Telegram 4096, Slack 40000, etc.) - Convert SplitMessage to rune-aware counting for correct Unicode handling - Replace single dispatcher goroutine with per-channel buffered worker queues - Remove Discord's internal SplitMessage call (now handled centrally)
2026-06-12 18:08:54 +00:00 · 2026-02-22 22:46:29 +08:00
parent c669784216
commit a91de8546c
12 changed files with 272 additions and 99 deletions
@@ -5,11 +5,20 @@ import (
 )

 // SplitMessage splits long messages into chunks, preserving code block integrity.
+// The maxLen parameter is measured in runes (Unicode characters), not bytes.
 // The function reserves a buffer (10% of maxLen, min 50) to leave room for closing code blocks,
 // but may extend to maxLen when needed.
 // Call SplitMessage with the full text content and the maximum allowed length of a single message;
 // it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks.
 func SplitMessage(content string, maxLen int) []string {
+	if maxLen <= 0 {
+		if content == "" {
+			return nil
+		}
+		return []string{content}
+	}
+
+	runes := []rune(content)
 	var messages []string

 	// Dynamic buffer: 10% of maxLen, but at least 50 chars if possible
@@ -21,9 +30,9 @@ func SplitMessage(content string, maxLen int) []string {
 		codeBlockBuffer = maxLen / 2
 	}

-	for len(content) > 0 {
-		if len(content) <= maxLen {
-			messages = append(messages, content)
+	for len(runes) > 0 {
+		if len(runes) <= maxLen {
+			messages = append(messages, string(runes))
 			break
 		}

@@ -34,56 +43,66 @@ func SplitMessage(content string, maxLen int) []string {
 		}

 		// Find natural split point within the effective limit
-		msgEnd := findLastNewline(content[:effectiveLimit], 200)
+		msgEnd := findLastNewlineRunes(runes[:effectiveLimit], 200)
 		if msgEnd <= 0 {
-			msgEnd = findLastSpace(content[:effectiveLimit], 100)
+			msgEnd = findLastSpaceRunes(runes[:effectiveLimit], 100)
 		}
 		if msgEnd <= 0 {
 			msgEnd = effectiveLimit
 		}

 		// Check if this would end with an incomplete code block
-		candidate := content[:msgEnd]
-		unclosedIdx := findLastUnclosedCodeBlock(candidate)
+		candidate := runes[:msgEnd]
+		unclosedIdx := findLastUnclosedCodeBlockRunes(candidate)

 		if unclosedIdx >= 0 {
 			// Message would end with incomplete code block
 			// Try to extend up to maxLen to include the closing ```
-			if len(content) > msgEnd {
-				closingIdx := findNextClosingCodeBlock(content, msgEnd)
+			if len(runes) > msgEnd {
+				closingIdx := findNextClosingCodeBlockRunes(runes, msgEnd)
 				if closingIdx > 0 && closingIdx <= maxLen {
 					// Extend to include the closing ```
 					msgEnd = closingIdx
 				} else {
 					// Code block is too long to fit in one chunk or missing closing fence.
 					// Try to split inside by injecting closing and reopening fences.
-					headerEnd := strings.Index(content[unclosedIdx:], "\n")
+					candidateStr := string(candidate)
+					unclosedStr := string(runes[unclosedIdx:])
+					headerEnd := strings.Index(unclosedStr, "\n")
+					var header string
 					if headerEnd == -1 {
-						headerEnd = unclosedIdx + 3
+						header = strings.TrimSpace(string(runes[unclosedIdx : unclosedIdx+3]))
 					} else {
-						headerEnd += unclosedIdx
+						header = strings.TrimSpace(string(runes[unclosedIdx : unclosedIdx+headerEnd]))
 					}
-					header := strings.TrimSpace(content[unclosedIdx:headerEnd])
+					headerEndIdx := unclosedIdx + len([]rune(header))
+					if headerEnd != -1 {
+						headerEndIdx = unclosedIdx + headerEnd
+					}
+
+					_ = candidateStr // used above for context

 					// If we have a reasonable amount of content after the header, split inside
-					if msgEnd > headerEnd+20 {
+					if msgEnd > headerEndIdx+20 {
 						// Find a better split point closer to maxLen
 						innerLimit := maxLen - 5 // Leave room for "\n```"
-						betterEnd := findLastNewline(content[:innerLimit], 200)
-						if betterEnd > headerEnd {
+						betterEnd := findLastNewlineRunes(runes[:innerLimit], 200)
+						if betterEnd > headerEndIdx {
 							msgEnd = betterEnd
 						} else {
 							msgEnd = innerLimit
 						}
-						messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```")
-						content = strings.TrimSpace(header + "\n" + content[msgEnd:])
+						chunk := strings.TrimRight(string(runes[:msgEnd]), " \t\n\r") + "\n```"
+						messages = append(messages, chunk)
+						remaining := strings.TrimSpace(header + "\n" + string(runes[msgEnd:]))
+						runes = []rune(remaining)
 						continue
 					}

 					// Otherwise, try to split before the code block starts
-					newEnd := findLastNewline(content[:unclosedIdx], 200)
+					newEnd := findLastNewlineRunes(runes[:unclosedIdx], 200)
 					if newEnd <= 0 {
-						newEnd = findLastSpace(content[:unclosedIdx], 100)
+						newEnd = findLastSpaceRunes(runes[:unclosedIdx], 100)
 					}
 					if newEnd > 0 {
 						msgEnd = newEnd
@@ -93,8 +112,10 @@ func SplitMessage(content string, maxLen int) []string {
 							msgEnd = unclosedIdx
 						} else {
 							msgEnd = maxLen - 5
-							messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```")
-							content = strings.TrimSpace(header + "\n" + content[msgEnd:])
+							chunk := strings.TrimRight(string(runes[:msgEnd]), " \t\n\r") + "\n```"
+							messages = append(messages, chunk)
+							remaining := strings.TrimSpace(header + "\n" + string(runes[msgEnd:]))
+							runes = []rune(remaining)
 							continue
 						}
 					}
@@ -106,21 +127,22 @@ func SplitMessage(content string, maxLen int) []string {
 			msgEnd = effectiveLimit
 		}

-		messages = append(messages, content[:msgEnd])
-		content = strings.TrimSpace(content[msgEnd:])
+		messages = append(messages, string(runes[:msgEnd]))
+		remaining := strings.TrimSpace(string(runes[msgEnd:]))
+		runes = []rune(remaining)
 	}

 	return messages
 }

-// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ```
-// Returns the position of the opening ``` or -1 if all code blocks are complete
-func findLastUnclosedCodeBlock(text string) int {
+// findLastUnclosedCodeBlockRunes finds the last opening ``` that doesn't have a closing ```
+// Returns the rune position of the opening ``` or -1 if all code blocks are complete
+func findLastUnclosedCodeBlockRunes(runes []rune) int {
 	inCodeBlock := false
 	lastOpenIdx := -1

-	for i := 0; i < len(text); i++ {
-		if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
+	for i := 0; i < len(runes); i++ {
+		if i+2 < len(runes) && runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' {
 			// Toggle code block state on each fence
 			if !inCodeBlock {
 				// Entering a code block: record this opening fence
@@ -137,41 +159,41 @@ func findLastUnclosedCodeBlock(text string) int {
 	return -1
 }

-// findNextClosingCodeBlock finds the next closing ``` starting from a position
-// Returns the position after the closing ``` or -1 if not found
-func findNextClosingCodeBlock(text string, startIdx int) int {
-	for i := startIdx; i < len(text); i++ {
-		if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
+// findNextClosingCodeBlockRunes finds the next closing ``` starting from a rune position
+// Returns the rune position after the closing ``` or -1 if not found
+func findNextClosingCodeBlockRunes(runes []rune, startIdx int) int {
+	for i := startIdx; i < len(runes); i++ {
+		if i+2 < len(runes) && runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' {
 			return i + 3
 		}
 	}
 	return -1
 }

-// findLastNewline finds the last newline character within the last N characters
-// Returns the position of the newline or -1 if not found
-func findLastNewline(s string, searchWindow int) int {
-	searchStart := len(s) - searchWindow
+// findLastNewlineRunes finds the last newline character within the last N runes
+// Returns the rune position of the newline or -1 if not found
+func findLastNewlineRunes(runes []rune, searchWindow int) int {
+	searchStart := len(runes) - searchWindow
 	if searchStart < 0 {
 		searchStart = 0
 	}
-	for i := len(s) - 1; i >= searchStart; i-- {
-		if s[i] == '\n' {
+	for i := len(runes) - 1; i >= searchStart; i-- {
+		if runes[i] == '\n' {
 			return i
 		}
 	}
 	return -1
 }

-// findLastSpace finds the last space character within the last N characters
-// Returns the position of the space or -1 if not found
-func findLastSpace(s string, searchWindow int) int {
-	searchStart := len(s) - searchWindow
+// findLastSpaceRunes finds the last space character within the last N runes
+// Returns the rune position of the space or -1 if not found
+func findLastSpaceRunes(runes []rune, searchWindow int) int {
+	searchStart := len(runes) - searchWindow
 	if searchStart < 0 {
 		searchStart = 0
 	}
-	for i := len(s) - 1; i >= searchStart; i-- {
-		if s[i] == ' ' || s[i] == '\t' {
+	for i := len(runes) - 1; i >= searchStart; i-- {
+		if runes[i] == ' ' || runes[i] == '\t' {
 			return i
 		}
 	}
@@ -34,11 +34,15 @@ func TestSplitMessage(t *testing.T) {
 			maxLen:       2000,
 			expectChunks: 2,
 			checkContent: func(t *testing.T, chunks []string) {
-				if len(chunks[0]) > 2000 {
-					t.Errorf("Chunk 0 too large: %d", len(chunks[0]))
+				if len([]rune(chunks[0])) > 2000 {
+					t.Errorf("Chunk 0 too large: %d runes", len([]rune(chunks[0])))
 				}
-				if len(chunks[0])+len(chunks[1]) != len(longText) {
-					t.Errorf("Total length mismatch. Got %d, want %d", len(chunks[0])+len(chunks[1]), len(longText))
+				if len([]rune(chunks[0]))+len([]rune(chunks[1])) != len([]rune(longText)) {
+					t.Errorf(
+						"Total rune length mismatch. Got %d, want %d",
+						len([]rune(chunks[0]))+len([]rune(chunks[1])),
+						len([]rune(longText)),
+					)
 				}
 			},
 		},
@@ -53,11 +57,11 @@ func TestSplitMessage(t *testing.T) {
 			maxLen:       2000,
 			expectChunks: 2,
 			checkContent: func(t *testing.T, chunks []string) {
-				if len(chunks[0]) != 1750 {
-					t.Errorf("Expected chunk 0 to be 1750 length (split at newline), got %d", len(chunks[0]))
+				if len([]rune(chunks[0])) != 1750 {
+					t.Errorf("Expected chunk 0 to be 1750 runes (split at newline), got %d", len([]rune(chunks[0])))
 				}
 				if chunks[1] != strings.Repeat("b", 300) {
-					t.Errorf("Chunk 1 content mismatch. Len: %d", len(chunks[1]))
+					t.Errorf("Chunk 1 content mismatch. Len: %d", len([]rune(chunks[1])))
 				}
 			},
 		},
@@ -78,17 +82,39 @@ func TestSplitMessage(t *testing.T) {
 			},
 		},
 		{
-			name:         "Preserve Unicode characters",
-			content:      strings.Repeat("\u4e16", 1000), // 3000 bytes
+			name:         "Preserve Unicode characters (rune-aware)",
+			content:      strings.Repeat("\u4e16", 2500), // 2500 runes, 7500 bytes
 			maxLen:       2000,
 			expectChunks: 2,
 			checkContent: func(t *testing.T, chunks []string) {
-				// Just verify we didn't panic and got valid strings.
-				// Go strings are UTF-8, if we split mid-rune it would be bad,
-				// but standard slicing might do that.
-				// Let's assume standard behavior is acceptable or check if it produces invalid rune?
-				if !strings.Contains(chunks[0], "\u4e16") {
-					t.Error("Chunk should contain unicode characters")
+				// Verify chunks contain valid unicode and don't split mid-rune
+				for i, chunk := range chunks {
+					runeCount := len([]rune(chunk))
+					if runeCount > 2000 {
+						t.Errorf("Chunk %d has %d runes, exceeds maxLen 2000", i, runeCount)
+					}
+					if !strings.Contains(chunk, "\u4e16") {
+						t.Errorf("Chunk %d should contain unicode characters", i)
+					}
+				}
+				// Verify total rune count is preserved
+				totalRunes := 0
+				for _, chunk := range chunks {
+					totalRunes += len([]rune(chunk))
+				}
+				if totalRunes != 2500 {
+					t.Errorf("Total rune count mismatch. Got %d, want 2500", totalRunes)
+				}
+			},
+		},
+		{
+			name:         "Zero maxLen returns single chunk",
+			content:      "Hello world",
+			maxLen:       0,
+			expectChunks: 1,
+			checkContent: func(t *testing.T, chunks []string) {
+				if chunks[0] != "Hello world" {
+					t.Errorf("Expected original content, got %q", chunks[0])
 				}
 			},
 		},
@@ -145,7 +171,7 @@ func TestSplitMessage_CodeBlockIntegrity(t *testing.T) {
 	}

 	// First chunk should contain meaningful content
-	if len(chunks[0]) > 40 {
-		t.Errorf("First chunk exceeded maxLen: length %d", len(chunks[0]))
+	if len([]rune(chunks[0])) > 40 {
+		t.Errorf("First chunk exceeded maxLen: length %d runes", len([]rune(chunks[0])))
 	}
 }