mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
Merge remote-tracking branch 'origin/main' into feat_discord_proxy
This commit is contained in:
@@ -37,6 +37,9 @@ func DoRequestWithRetry(client *http.Client, req *http.Request) (*http.Response,
|
||||
|
||||
if i < maxRetries-1 {
|
||||
if err = sleepWithCtx(req.Context(), retryDelayUnit*time.Duration(i+1)); err != nil {
|
||||
if resp != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
return nil, fmt.Errorf("failed to sleep: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -77,6 +80,91 @@ func TestDoRequestWithRetry(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoRequestWithRetry_ContextCancel(t *testing.T) {
|
||||
// Use a long retry delay so cancellation always hits during sleepWithCtx.
|
||||
retryDelayUnit = 10 * time.Second
|
||||
t.Cleanup(func() { retryDelayUnit = time.Second })
|
||||
|
||||
bodyClosed := false
|
||||
firstRoundTripDone := make(chan struct{}, 1)
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte("error"))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := server.Client()
|
||||
client.Timeout = 30 * time.Second
|
||||
client.Transport = &bodyCloseTracker{
|
||||
rt: client.Transport,
|
||||
onClose: func() { bodyClosed = true },
|
||||
// Signal after the first round-trip response is fully constructed on the client side.
|
||||
onRoundTrip: func() {
|
||||
select {
|
||||
case firstRoundTripDone <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
},
|
||||
trackURL: server.URL,
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
// Cancel the context after the first round-trip completes on the client side.
|
||||
// This ensures client.Do has returned a valid resp (with body) and the retry
|
||||
// loop is about to enter sleepWithCtx, where the cancel will be detected.
|
||||
go func() {
|
||||
<-firstRoundTripDone
|
||||
cancel()
|
||||
}()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, server.URL, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
resp, err := DoRequestWithRetry(client, req)
|
||||
if resp != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
require.Error(t, err, "expected error from context cancellation")
|
||||
assert.Nil(t, resp, "expected nil response when context is canceled")
|
||||
assert.True(t, bodyClosed, "expected resp.Body to be closed on context cancellation")
|
||||
}
|
||||
|
||||
// bodyCloseTracker wraps an http.RoundTripper and records when response bodies are closed.
|
||||
type bodyCloseTracker struct {
|
||||
rt http.RoundTripper
|
||||
onClose func()
|
||||
onRoundTrip func() // called after each successful round-trip
|
||||
trackURL string
|
||||
}
|
||||
|
||||
func (t *bodyCloseTracker) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
resp, err := t.rt.RoundTrip(req)
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
if strings.HasPrefix(req.URL.String(), t.trackURL) {
|
||||
resp.Body = &closeNotifier{ReadCloser: resp.Body, onClose: t.onClose}
|
||||
if t.onRoundTrip != nil {
|
||||
t.onRoundTrip()
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// closeNotifier wraps an io.ReadCloser to detect Close calls.
|
||||
type closeNotifier struct {
|
||||
io.ReadCloser
|
||||
onClose func()
|
||||
}
|
||||
|
||||
func (c *closeNotifier) Close() error {
|
||||
c.onClose()
|
||||
return c.ReadCloser.Close()
|
||||
}
|
||||
|
||||
func TestDoRequestWithRetry_Delay(t *testing.T) {
|
||||
retryDelayUnit = time.Millisecond
|
||||
t.Cleanup(func() { retryDelayUnit = time.Second })
|
||||
|
||||
@@ -1,179 +0,0 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// SplitMessage splits long messages into chunks, preserving code block integrity.
|
||||
// The function reserves a buffer (10% of maxLen, min 50) to leave room for closing code blocks,
|
||||
// but may extend to maxLen when needed.
|
||||
// Call SplitMessage with the full text content and the maximum allowed length of a single message;
|
||||
// it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks.
|
||||
func SplitMessage(content string, maxLen int) []string {
|
||||
var messages []string
|
||||
|
||||
// Dynamic buffer: 10% of maxLen, but at least 50 chars if possible
|
||||
codeBlockBuffer := maxLen / 10
|
||||
if codeBlockBuffer < 50 {
|
||||
codeBlockBuffer = 50
|
||||
}
|
||||
if codeBlockBuffer > maxLen/2 {
|
||||
codeBlockBuffer = maxLen / 2
|
||||
}
|
||||
|
||||
for len(content) > 0 {
|
||||
if len(content) <= maxLen {
|
||||
messages = append(messages, content)
|
||||
break
|
||||
}
|
||||
|
||||
// Effective split point: maxLen minus buffer, to leave room for code blocks
|
||||
effectiveLimit := maxLen - codeBlockBuffer
|
||||
if effectiveLimit < maxLen/2 {
|
||||
effectiveLimit = maxLen / 2
|
||||
}
|
||||
|
||||
// Find natural split point within the effective limit
|
||||
msgEnd := findLastNewline(content[:effectiveLimit], 200)
|
||||
if msgEnd <= 0 {
|
||||
msgEnd = findLastSpace(content[:effectiveLimit], 100)
|
||||
}
|
||||
if msgEnd <= 0 {
|
||||
msgEnd = effectiveLimit
|
||||
}
|
||||
|
||||
// Check if this would end with an incomplete code block
|
||||
candidate := content[:msgEnd]
|
||||
unclosedIdx := findLastUnclosedCodeBlock(candidate)
|
||||
|
||||
if unclosedIdx >= 0 {
|
||||
// Message would end with incomplete code block
|
||||
// Try to extend up to maxLen to include the closing ```
|
||||
if len(content) > msgEnd {
|
||||
closingIdx := findNextClosingCodeBlock(content, msgEnd)
|
||||
if closingIdx > 0 && closingIdx <= maxLen {
|
||||
// Extend to include the closing ```
|
||||
msgEnd = closingIdx
|
||||
} else {
|
||||
// Code block is too long to fit in one chunk or missing closing fence.
|
||||
// Try to split inside by injecting closing and reopening fences.
|
||||
headerEnd := strings.Index(content[unclosedIdx:], "\n")
|
||||
if headerEnd == -1 {
|
||||
headerEnd = unclosedIdx + 3
|
||||
} else {
|
||||
headerEnd += unclosedIdx
|
||||
}
|
||||
header := strings.TrimSpace(content[unclosedIdx:headerEnd])
|
||||
|
||||
// If we have a reasonable amount of content after the header, split inside
|
||||
if msgEnd > headerEnd+20 {
|
||||
// Find a better split point closer to maxLen
|
||||
innerLimit := maxLen - 5 // Leave room for "\n```"
|
||||
betterEnd := findLastNewline(content[:innerLimit], 200)
|
||||
if betterEnd > headerEnd {
|
||||
msgEnd = betterEnd
|
||||
} else {
|
||||
msgEnd = innerLimit
|
||||
}
|
||||
messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```")
|
||||
content = strings.TrimSpace(header + "\n" + content[msgEnd:])
|
||||
continue
|
||||
}
|
||||
|
||||
// Otherwise, try to split before the code block starts
|
||||
newEnd := findLastNewline(content[:unclosedIdx], 200)
|
||||
if newEnd <= 0 {
|
||||
newEnd = findLastSpace(content[:unclosedIdx], 100)
|
||||
}
|
||||
if newEnd > 0 {
|
||||
msgEnd = newEnd
|
||||
} else {
|
||||
// If we can't split before, we MUST split inside (last resort)
|
||||
if unclosedIdx > 20 {
|
||||
msgEnd = unclosedIdx
|
||||
} else {
|
||||
msgEnd = maxLen - 5
|
||||
messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```")
|
||||
content = strings.TrimSpace(header + "\n" + content[msgEnd:])
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if msgEnd <= 0 {
|
||||
msgEnd = effectiveLimit
|
||||
}
|
||||
|
||||
messages = append(messages, content[:msgEnd])
|
||||
content = strings.TrimSpace(content[msgEnd:])
|
||||
}
|
||||
|
||||
return messages
|
||||
}
|
||||
|
||||
// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ```
|
||||
// Returns the position of the opening ``` or -1 if all code blocks are complete
|
||||
func findLastUnclosedCodeBlock(text string) int {
|
||||
inCodeBlock := false
|
||||
lastOpenIdx := -1
|
||||
|
||||
for i := 0; i < len(text); i++ {
|
||||
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
|
||||
// Toggle code block state on each fence
|
||||
if !inCodeBlock {
|
||||
// Entering a code block: record this opening fence
|
||||
lastOpenIdx = i
|
||||
}
|
||||
inCodeBlock = !inCodeBlock
|
||||
i += 2
|
||||
}
|
||||
}
|
||||
|
||||
if inCodeBlock {
|
||||
return lastOpenIdx
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// findNextClosingCodeBlock finds the next closing ``` starting from a position
|
||||
// Returns the position after the closing ``` or -1 if not found
|
||||
func findNextClosingCodeBlock(text string, startIdx int) int {
|
||||
for i := startIdx; i < len(text); i++ {
|
||||
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
|
||||
return i + 3
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// findLastNewline finds the last newline character within the last N characters
|
||||
// Returns the position of the newline or -1 if not found
|
||||
func findLastNewline(s string, searchWindow int) int {
|
||||
searchStart := len(s) - searchWindow
|
||||
if searchStart < 0 {
|
||||
searchStart = 0
|
||||
}
|
||||
for i := len(s) - 1; i >= searchStart; i-- {
|
||||
if s[i] == '\n' {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// findLastSpace finds the last space character within the last N characters
|
||||
// Returns the position of the space or -1 if not found
|
||||
func findLastSpace(s string, searchWindow int) int {
|
||||
searchStart := len(s) - searchWindow
|
||||
if searchStart < 0 {
|
||||
searchStart = 0
|
||||
}
|
||||
for i := len(s) - 1; i >= searchStart; i-- {
|
||||
if s[i] == ' ' || s[i] == '\t' {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
@@ -1,151 +0,0 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSplitMessage(t *testing.T) {
|
||||
longText := strings.Repeat("a", 2500)
|
||||
longCode := "```go\n" + strings.Repeat("fmt.Println(\"hello\")\n", 100) + "```" // ~2100 chars
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
maxLen int
|
||||
expectChunks int // Check number of chunks
|
||||
checkContent func(t *testing.T, chunks []string) // Custom validation
|
||||
}{
|
||||
{
|
||||
name: "Empty message",
|
||||
content: "",
|
||||
maxLen: 2000,
|
||||
expectChunks: 0,
|
||||
},
|
||||
{
|
||||
name: "Short message fits in one chunk",
|
||||
content: "Hello world",
|
||||
maxLen: 2000,
|
||||
expectChunks: 1,
|
||||
},
|
||||
{
|
||||
name: "Simple split regular text",
|
||||
content: longText,
|
||||
maxLen: 2000,
|
||||
expectChunks: 2,
|
||||
checkContent: func(t *testing.T, chunks []string) {
|
||||
if len(chunks[0]) > 2000 {
|
||||
t.Errorf("Chunk 0 too large: %d", len(chunks[0]))
|
||||
}
|
||||
if len(chunks[0])+len(chunks[1]) != len(longText) {
|
||||
t.Errorf("Total length mismatch. Got %d, want %d", len(chunks[0])+len(chunks[1]), len(longText))
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Split at newline",
|
||||
// 1750 chars then newline, then more chars.
|
||||
// Dynamic buffer: 2000 / 10 = 200.
|
||||
// Effective limit: 2000 - 200 = 1800.
|
||||
// Split should happen at newline because it's at 1750 (< 1800).
|
||||
// Total length must > 2000 to trigger split. 1750 + 1 + 300 = 2051.
|
||||
content: strings.Repeat("a", 1750) + "\n" + strings.Repeat("b", 300),
|
||||
maxLen: 2000,
|
||||
expectChunks: 2,
|
||||
checkContent: func(t *testing.T, chunks []string) {
|
||||
if len(chunks[0]) != 1750 {
|
||||
t.Errorf("Expected chunk 0 to be 1750 length (split at newline), got %d", len(chunks[0]))
|
||||
}
|
||||
if chunks[1] != strings.Repeat("b", 300) {
|
||||
t.Errorf("Chunk 1 content mismatch. Len: %d", len(chunks[1]))
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Long code block split",
|
||||
content: "Prefix\n" + longCode,
|
||||
maxLen: 2000,
|
||||
expectChunks: 2,
|
||||
checkContent: func(t *testing.T, chunks []string) {
|
||||
// Check that first chunk ends with closing fence
|
||||
if !strings.HasSuffix(chunks[0], "\n```") {
|
||||
t.Error("First chunk should end with injected closing fence")
|
||||
}
|
||||
// Check that second chunk starts with execution header
|
||||
if !strings.HasPrefix(chunks[1], "```go") {
|
||||
t.Error("Second chunk should start with injected code block header")
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Preserve Unicode characters",
|
||||
content: strings.Repeat("\u4e16", 1000), // 3000 bytes
|
||||
maxLen: 2000,
|
||||
expectChunks: 2,
|
||||
checkContent: func(t *testing.T, chunks []string) {
|
||||
// Just verify we didn't panic and got valid strings.
|
||||
// Go strings are UTF-8, if we split mid-rune it would be bad,
|
||||
// but standard slicing might do that.
|
||||
// Let's assume standard behavior is acceptable or check if it produces invalid rune?
|
||||
if !strings.Contains(chunks[0], "\u4e16") {
|
||||
t.Error("Chunk should contain unicode characters")
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := SplitMessage(tc.content, tc.maxLen)
|
||||
|
||||
if tc.expectChunks == 0 {
|
||||
if len(got) != 0 {
|
||||
t.Errorf("Expected 0 chunks, got %d", len(got))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if len(got) != tc.expectChunks {
|
||||
t.Errorf("Expected %d chunks, got %d", tc.expectChunks, len(got))
|
||||
// Log sizes for debugging
|
||||
for i, c := range got {
|
||||
t.Logf("Chunk %d length: %d", i, len(c))
|
||||
}
|
||||
return // Stop further checks if count assumes specific split
|
||||
}
|
||||
|
||||
if tc.checkContent != nil {
|
||||
tc.checkContent(t, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitMessage_CodeBlockIntegrity(t *testing.T) {
|
||||
// Focused test for the core requirement: splitting inside a code block preserves syntax highlighting
|
||||
|
||||
// 60 chars total approximately
|
||||
content := "```go\npackage main\n\nfunc main() {\n\tprintln(\"Hello\")\n}\n```"
|
||||
maxLen := 40
|
||||
|
||||
chunks := SplitMessage(content, maxLen)
|
||||
|
||||
if len(chunks) != 2 {
|
||||
t.Fatalf("Expected 2 chunks, got %d: %q", len(chunks), chunks)
|
||||
}
|
||||
|
||||
// First chunk must end with "\n```"
|
||||
if !strings.HasSuffix(chunks[0], "\n```") {
|
||||
t.Errorf("First chunk should end with closing fence. Got: %q", chunks[0])
|
||||
}
|
||||
|
||||
// Second chunk must start with the header "```go"
|
||||
if !strings.HasPrefix(chunks[1], "```go") {
|
||||
t.Errorf("Second chunk should start with code block header. Got: %q", chunks[1])
|
||||
}
|
||||
|
||||
// First chunk should contain meaningful content
|
||||
if len(chunks[0]) > 40 {
|
||||
t.Errorf("First chunk exceeded maxLen: length %d", len(chunks[0]))
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,31 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// SanitizeMessageContent removes Unicode control characters, format characters (RTL overrides,
|
||||
// zero-width characters), and other non-graphic characters that could confuse an LLM
|
||||
// or cause display issues in the agent UI.
|
||||
func SanitizeMessageContent(input string) string {
|
||||
var sb strings.Builder
|
||||
// Pre-allocate memory to avoid multiple allocations
|
||||
sb.Grow(len(input))
|
||||
|
||||
for _, r := range input {
|
||||
// unicode.IsGraphic returns true if the rune is a Unicode graphic character.
|
||||
// This includes letters, marks, numbers, punctuation, and symbols.
|
||||
// It excludes control characters (Cc), format characters (Cf),
|
||||
// surrogates (Cs), and private use (Co).
|
||||
if unicode.IsGraphic(r) || r == '\n' || r == '\r' || r == '\t' {
|
||||
sb.WriteRune(r)
|
||||
}
|
||||
}
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// Truncate returns a truncated version of s with at most maxLen runes.
|
||||
// Handles multi-byte Unicode characters properly.
|
||||
// If the string is truncated, "..." is appended to indicate truncation.
|
||||
|
||||
@@ -104,3 +104,27 @@ func TestTruncate(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeMessageContent(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{"empty", "", ""},
|
||||
{"plain text unchanged", "Hello world", "Hello world"},
|
||||
{"strip ZWSP", "Hello\u200bworld", "Helloworld"},
|
||||
{"strip RTL override", "Hi\u202eevil", "Hievil"},
|
||||
{"strip BOM", "\uFEFFcontent", "content"},
|
||||
{"strip multiple", "a\u200c\u202ab\u202cc", "abc"},
|
||||
{"unicode letters preserved", "café \u65e5\u672c\u8a9e", "café \u65e5\u672c\u8a9e"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := SanitizeMessageContent(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("SanitizeMessageContent(%q) = %q, want %q", tt.input, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user