mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
Sanitize WhatsApp messages and remove extra log messages.
This commit is contained in:
@@ -1,5 +1,31 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// SanitizeMessage removes Unicode control characters, format characters (RTL overrides,
|
||||
// zero-width characters), and other non-graphic characters that could confuse an LLM
|
||||
// or cause display issues in the agent UI.
|
||||
func SanitizeMessageContent(input string) string {
|
||||
var sb strings.Builder
|
||||
// Pre-allocate memory to avoid multiple allocations
|
||||
sb.Grow(len(input))
|
||||
|
||||
for _, r := range input {
|
||||
// unicode.IsGraphic returns true if the rune is a Unicode graphic character.
|
||||
// This includes letters, marks, numbers, punctuation, and symbols.
|
||||
// It excludes control characters (Cc), format characters (Cf),
|
||||
// surrogates (Cs), and private use (Co).
|
||||
if unicode.IsGraphic(r) || r == '\n' || r == '\r' || r == '\t' {
|
||||
sb.WriteRune(r)
|
||||
}
|
||||
}
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// Truncate returns a truncated version of s with at most maxLen runes.
|
||||
// Handles multi-byte Unicode characters properly.
|
||||
// If the string is truncated, "..." is appended to indicate truncation.
|
||||
|
||||
Reference in New Issue
Block a user