Sanitize WhatsApp messages and remove extra log messages.

This commit is contained in:
Aditya Kalro
2026-02-22 18:20:24 -08:00
parent c1ed163e77
commit 81234f7e54
3 changed files with 53 additions and 0 deletions
+26
View File
@@ -1,5 +1,31 @@
package utils
import (
"strings"
"unicode"
)
// SanitizeMessage removes Unicode control characters, format characters (RTL overrides,
// zero-width characters), and other non-graphic characters that could confuse an LLM
// or cause display issues in the agent UI.
func SanitizeMessageContent(input string) string {
var sb strings.Builder
// Pre-allocate memory to avoid multiple allocations
sb.Grow(len(input))
for _, r := range input {
// unicode.IsGraphic returns true if the rune is a Unicode graphic character.
// This includes letters, marks, numbers, punctuation, and symbols.
// It excludes control characters (Cc), format characters (Cf),
// surrogates (Cs), and private use (Co).
if unicode.IsGraphic(r) || r == '\n' || r == '\r' || r == '\t' {
sb.WriteRune(r)
}
}
return sb.String()
}
// Truncate returns a truncated version of s with at most maxLen runes.
// Handles multi-byte Unicode characters properly.
// If the string is truncated, "..." is appended to indicate truncation.