Merge remote-tracking branch 'origin/main' into feat_discord_proxy

2026-06-12 18:08:54 +00:00 · 2026-03-02 18:17:51 +08:00
parent 9be6fb1a7d 26d1b8e374
commit 69b1ae48d5
181 changed files with 22003 additions and 3595 deletions
@@ -37,6 +37,9 @@ func DoRequestWithRetry(client *http.Client, req *http.Request) (*http.Response,

 		if i < maxRetries-1 {
 			if err = sleepWithCtx(req.Context(), retryDelayUnit*time.Duration(i+1)); err != nil {
+				if resp != nil {
+					resp.Body.Close()
+				}
 				return nil, fmt.Errorf("failed to sleep: %w", err)
 			}
 		}
@@ -1,8 +1,11 @@
 package utils

 import (
+	"context"
+	"io"
 	"net/http"
 	"net/http/httptest"
+	"strings"
 	"testing"
 	"time"

@@ -77,6 +80,91 @@ func TestDoRequestWithRetry(t *testing.T) {
 	}
 }

+func TestDoRequestWithRetry_ContextCancel(t *testing.T) {
+	// Use a long retry delay so cancellation always hits during sleepWithCtx.
+	retryDelayUnit = 10 * time.Second
+	t.Cleanup(func() { retryDelayUnit = time.Second })
+
+	bodyClosed := false
+	firstRoundTripDone := make(chan struct{}, 1)
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+		w.Write([]byte("error"))
+	}))
+	defer server.Close()
+
+	client := server.Client()
+	client.Timeout = 30 * time.Second
+	client.Transport = &bodyCloseTracker{
+		rt:      client.Transport,
+		onClose: func() { bodyClosed = true },
+		// Signal after the first round-trip response is fully constructed on the client side.
+		onRoundTrip: func() {
+			select {
+			case firstRoundTripDone <- struct{}{}:
+			default:
+			}
+		},
+		trackURL: server.URL,
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	// Cancel the context after the first round-trip completes on the client side.
+	// This ensures client.Do has returned a valid resp (with body) and the retry
+	// loop is about to enter sleepWithCtx, where the cancel will be detected.
+	go func() {
+		<-firstRoundTripDone
+		cancel()
+	}()
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, server.URL, nil)
+	require.NoError(t, err)
+
+	resp, err := DoRequestWithRetry(client, req)
+	if resp != nil {
+		resp.Body.Close()
+	}
+	require.Error(t, err, "expected error from context cancellation")
+	assert.Nil(t, resp, "expected nil response when context is canceled")
+	assert.True(t, bodyClosed, "expected resp.Body to be closed on context cancellation")
+}
+
+// bodyCloseTracker wraps an http.RoundTripper and records when response bodies are closed.
+type bodyCloseTracker struct {
+	rt          http.RoundTripper
+	onClose     func()
+	onRoundTrip func() // called after each successful round-trip
+	trackURL    string
+}
+
+func (t *bodyCloseTracker) RoundTrip(req *http.Request) (*http.Response, error) {
+	resp, err := t.rt.RoundTrip(req)
+	if err != nil {
+		return resp, err
+	}
+	if strings.HasPrefix(req.URL.String(), t.trackURL) {
+		resp.Body = &closeNotifier{ReadCloser: resp.Body, onClose: t.onClose}
+		if t.onRoundTrip != nil {
+			t.onRoundTrip()
+		}
+	}
+	return resp, nil
+}
+
+// closeNotifier wraps an io.ReadCloser to detect Close calls.
+type closeNotifier struct {
+	io.ReadCloser
+	onClose func()
+}
+
+func (c *closeNotifier) Close() error {
+	c.onClose()
+	return c.ReadCloser.Close()
+}
+
 func TestDoRequestWithRetry_Delay(t *testing.T) {
 	retryDelayUnit = time.Millisecond
 	t.Cleanup(func() { retryDelayUnit = time.Second })
@@ -1,179 +0,0 @@
-package utils
-
-import (
-	"strings"
-)
-
-// SplitMessage splits long messages into chunks, preserving code block integrity.
-// The function reserves a buffer (10% of maxLen, min 50) to leave room for closing code blocks,
-// but may extend to maxLen when needed.
-// Call SplitMessage with the full text content and the maximum allowed length of a single message;
-// it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks.
-func SplitMessage(content string, maxLen int) []string {
-	var messages []string
-
-	// Dynamic buffer: 10% of maxLen, but at least 50 chars if possible
-	codeBlockBuffer := maxLen / 10
-	if codeBlockBuffer < 50 {
-		codeBlockBuffer = 50
-	}
-	if codeBlockBuffer > maxLen/2 {
-		codeBlockBuffer = maxLen / 2
-	}
-
-	for len(content) > 0 {
-		if len(content) <= maxLen {
-			messages = append(messages, content)
-			break
-		}
-
-		// Effective split point: maxLen minus buffer, to leave room for code blocks
-		effectiveLimit := maxLen - codeBlockBuffer
-		if effectiveLimit < maxLen/2 {
-			effectiveLimit = maxLen / 2
-		}
-
-		// Find natural split point within the effective limit
-		msgEnd := findLastNewline(content[:effectiveLimit], 200)
-		if msgEnd <= 0 {
-			msgEnd = findLastSpace(content[:effectiveLimit], 100)
-		}
-		if msgEnd <= 0 {
-			msgEnd = effectiveLimit
-		}
-
-		// Check if this would end with an incomplete code block
-		candidate := content[:msgEnd]
-		unclosedIdx := findLastUnclosedCodeBlock(candidate)
-
-		if unclosedIdx >= 0 {
-			// Message would end with incomplete code block
-			// Try to extend up to maxLen to include the closing ```
-			if len(content) > msgEnd {
-				closingIdx := findNextClosingCodeBlock(content, msgEnd)
-				if closingIdx > 0 && closingIdx <= maxLen {
-					// Extend to include the closing ```
-					msgEnd = closingIdx
-				} else {
-					// Code block is too long to fit in one chunk or missing closing fence.
-					// Try to split inside by injecting closing and reopening fences.
-					headerEnd := strings.Index(content[unclosedIdx:], "\n")
-					if headerEnd == -1 {
-						headerEnd = unclosedIdx + 3
-					} else {
-						headerEnd += unclosedIdx
-					}
-					header := strings.TrimSpace(content[unclosedIdx:headerEnd])
-
-					// If we have a reasonable amount of content after the header, split inside
-					if msgEnd > headerEnd+20 {
-						// Find a better split point closer to maxLen
-						innerLimit := maxLen - 5 // Leave room for "\n```"
-						betterEnd := findLastNewline(content[:innerLimit], 200)
-						if betterEnd > headerEnd {
-							msgEnd = betterEnd
-						} else {
-							msgEnd = innerLimit
-						}
-						messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```")
-						content = strings.TrimSpace(header + "\n" + content[msgEnd:])
-						continue
-					}
-
-					// Otherwise, try to split before the code block starts
-					newEnd := findLastNewline(content[:unclosedIdx], 200)
-					if newEnd <= 0 {
-						newEnd = findLastSpace(content[:unclosedIdx], 100)
-					}
-					if newEnd > 0 {
-						msgEnd = newEnd
-					} else {
-						// If we can't split before, we MUST split inside (last resort)
-						if unclosedIdx > 20 {
-							msgEnd = unclosedIdx
-						} else {
-							msgEnd = maxLen - 5
-							messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```")
-							content = strings.TrimSpace(header + "\n" + content[msgEnd:])
-							continue
-						}
-					}
-				}
-			}
-		}
-
-		if msgEnd <= 0 {
-			msgEnd = effectiveLimit
-		}
-
-		messages = append(messages, content[:msgEnd])
-		content = strings.TrimSpace(content[msgEnd:])
-	}
-
-	return messages
-}
-
-// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ```
-// Returns the position of the opening ``` or -1 if all code blocks are complete
-func findLastUnclosedCodeBlock(text string) int {
-	inCodeBlock := false
-	lastOpenIdx := -1
-
-	for i := 0; i < len(text); i++ {
-		if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
-			// Toggle code block state on each fence
-			if !inCodeBlock {
-				// Entering a code block: record this opening fence
-				lastOpenIdx = i
-			}
-			inCodeBlock = !inCodeBlock
-			i += 2
-		}
-	}
-
-	if inCodeBlock {
-		return lastOpenIdx
-	}
-	return -1
-}
-
-// findNextClosingCodeBlock finds the next closing ``` starting from a position
-// Returns the position after the closing ``` or -1 if not found
-func findNextClosingCodeBlock(text string, startIdx int) int {
-	for i := startIdx; i < len(text); i++ {
-		if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
-			return i + 3
-		}
-	}
-	return -1
-}
-
-// findLastNewline finds the last newline character within the last N characters
-// Returns the position of the newline or -1 if not found
-func findLastNewline(s string, searchWindow int) int {
-	searchStart := len(s) - searchWindow
-	if searchStart < 0 {
-		searchStart = 0
-	}
-	for i := len(s) - 1; i >= searchStart; i-- {
-		if s[i] == '\n' {
-			return i
-		}
-	}
-	return -1
-}
-
-// findLastSpace finds the last space character within the last N characters
-// Returns the position of the space or -1 if not found
-func findLastSpace(s string, searchWindow int) int {
-	searchStart := len(s) - searchWindow
-	if searchStart < 0 {
-		searchStart = 0
-	}
-	for i := len(s) - 1; i >= searchStart; i-- {
-		if s[i] == ' ' || s[i] == '\t' {
-			return i
-		}
-	}
-	return -1
-}
@@ -1,151 +0,0 @@
-package utils
-
-import (
-	"strings"
-	"testing"
-)
-
-func TestSplitMessage(t *testing.T) {
-	longText := strings.Repeat("a", 2500)
-	longCode := "```go\n" + strings.Repeat("fmt.Println(\"hello\")\n", 100) + "```" // ~2100 chars
-
-	tests := []struct {
-		name         string
-		content      string
-		maxLen       int
-		expectChunks int                                 // Check number of chunks
-		checkContent func(t *testing.T, chunks []string) // Custom validation
-	}{
-		{
-			name:         "Empty message",
-			content:      "",
-			maxLen:       2000,
-			expectChunks: 0,
-		},
-		{
-			name:         "Short message fits in one chunk",
-			content:      "Hello world",
-			maxLen:       2000,
-			expectChunks: 1,
-		},
-		{
-			name:         "Simple split regular text",
-			content:      longText,
-			maxLen:       2000,
-			expectChunks: 2,
-			checkContent: func(t *testing.T, chunks []string) {
-				if len(chunks[0]) > 2000 {
-					t.Errorf("Chunk 0 too large: %d", len(chunks[0]))
-				}
-				if len(chunks[0])+len(chunks[1]) != len(longText) {
-					t.Errorf("Total length mismatch. Got %d, want %d", len(chunks[0])+len(chunks[1]), len(longText))
-				}
-			},
-		},
-		{
-			name: "Split at newline",
-			// 1750 chars then newline, then more chars.
-			// Dynamic buffer: 2000 / 10 = 200.
-			// Effective limit: 2000 - 200 = 1800.
-			// Split should happen at newline because it's at 1750 (< 1800).
-			// Total length must > 2000 to trigger split. 1750 + 1 + 300 = 2051.
-			content:      strings.Repeat("a", 1750) + "\n" + strings.Repeat("b", 300),
-			maxLen:       2000,
-			expectChunks: 2,
-			checkContent: func(t *testing.T, chunks []string) {
-				if len(chunks[0]) != 1750 {
-					t.Errorf("Expected chunk 0 to be 1750 length (split at newline), got %d", len(chunks[0]))
-				}
-				if chunks[1] != strings.Repeat("b", 300) {
-					t.Errorf("Chunk 1 content mismatch. Len: %d", len(chunks[1]))
-				}
-			},
-		},
-		{
-			name:         "Long code block split",
-			content:      "Prefix\n" + longCode,
-			maxLen:       2000,
-			expectChunks: 2,
-			checkContent: func(t *testing.T, chunks []string) {
-				// Check that first chunk ends with closing fence
-				if !strings.HasSuffix(chunks[0], "\n```") {
-					t.Error("First chunk should end with injected closing fence")
-				}
-				// Check that second chunk starts with execution header
-				if !strings.HasPrefix(chunks[1], "```go") {
-					t.Error("Second chunk should start with injected code block header")
-				}
-			},
-		},
-		{
-			name:         "Preserve Unicode characters",
-			content:      strings.Repeat("\u4e16", 1000), // 3000 bytes
-			maxLen:       2000,
-			expectChunks: 2,
-			checkContent: func(t *testing.T, chunks []string) {
-				// Just verify we didn't panic and got valid strings.
-				// Go strings are UTF-8, if we split mid-rune it would be bad,
-				// but standard slicing might do that.
-				// Let's assume standard behavior is acceptable or check if it produces invalid rune?
-				if !strings.Contains(chunks[0], "\u4e16") {
-					t.Error("Chunk should contain unicode characters")
-				}
-			},
-		},
-	}
-
-	for _, tc := range tests {
-		t.Run(tc.name, func(t *testing.T) {
-			got := SplitMessage(tc.content, tc.maxLen)
-
-			if tc.expectChunks == 0 {
-				if len(got) != 0 {
-					t.Errorf("Expected 0 chunks, got %d", len(got))
-				}
-				return
-			}
-
-			if len(got) != tc.expectChunks {
-				t.Errorf("Expected %d chunks, got %d", tc.expectChunks, len(got))
-				// Log sizes for debugging
-				for i, c := range got {
-					t.Logf("Chunk %d length: %d", i, len(c))
-				}
-				return // Stop further checks if count assumes specific split
-			}
-
-			if tc.checkContent != nil {
-				tc.checkContent(t, got)
-			}
-		})
-	}
-}
-
-func TestSplitMessage_CodeBlockIntegrity(t *testing.T) {
-	// Focused test for the core requirement: splitting inside a code block preserves syntax highlighting
-
-	// 60 chars total approximately
-	content := "```go\npackage main\n\nfunc main() {\n\tprintln(\"Hello\")\n}\n```"
-	maxLen := 40
-
-	chunks := SplitMessage(content, maxLen)
-
-	if len(chunks) != 2 {
-		t.Fatalf("Expected 2 chunks, got %d: %q", len(chunks), chunks)
-	}
-
-	// First chunk must end with "\n```"
-	if !strings.HasSuffix(chunks[0], "\n```") {
-		t.Errorf("First chunk should end with closing fence. Got: %q", chunks[0])
-	}
-
-	// Second chunk must start with the header "```go"
-	if !strings.HasPrefix(chunks[1], "```go") {
-		t.Errorf("Second chunk should start with code block header. Got: %q", chunks[1])
-	}
-
-	// First chunk should contain meaningful content
-	if len(chunks[0]) > 40 {
-		t.Errorf("First chunk exceeded maxLen: length %d", len(chunks[0]))
-	}
-}
@@ -1,5 +1,31 @@
 package utils

+import (
+	"strings"
+	"unicode"
+)
+
+// SanitizeMessageContent removes Unicode control characters, format characters (RTL overrides,
+// zero-width characters), and other non-graphic characters that could confuse an LLM
+// or cause display issues in the agent UI.
+func SanitizeMessageContent(input string) string {
+	var sb strings.Builder
+	// Pre-allocate memory to avoid multiple allocations
+	sb.Grow(len(input))
+
+	for _, r := range input {
+		// unicode.IsGraphic returns true if the rune is a Unicode graphic character.
+		// This includes letters, marks, numbers, punctuation, and symbols.
+		// It excludes control characters (Cc), format characters (Cf),
+		// surrogates (Cs), and private use (Co).
+		if unicode.IsGraphic(r) || r == '\n' || r == '\r' || r == '\t' {
+			sb.WriteRune(r)
+		}
+	}
+
+	return sb.String()
+}
+
 // Truncate returns a truncated version of s with at most maxLen runes.
 // Handles multi-byte Unicode characters properly.
 // If the string is truncated, "..." is appended to indicate truncation.
@@ -104,3 +104,27 @@ func TestTruncate(t *testing.T) {
 		})
 	}
 }
+
+func TestSanitizeMessageContent(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  string
+	}{
+		{"empty", "", ""},
+		{"plain text unchanged", "Hello world", "Hello world"},
+		{"strip ZWSP", "Hello\u200bworld", "Helloworld"},
+		{"strip RTL override", "Hi\u202eevil", "Hievil"},
+		{"strip BOM", "\uFEFFcontent", "content"},
+		{"strip multiple", "a\u200c\u202ab\u202cc", "abc"},
+		{"unicode letters preserved", "café \u65e5\u672c\u8a9e", "café \u65e5\u672c\u8a9e"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := SanitizeMessageContent(tt.input)
+			if got != tt.want {
+				t.Errorf("SanitizeMessageContent(%q) = %q, want %q", tt.input, got, tt.want)
+			}
+		})
+	}
+}