Sanitize WhatsApp messages and remove extra log messages.

This commit is contained in:
Aditya Kalro
2026-02-22 18:20:24 -08:00
parent c1ed163e77
commit 81234f7e54
3 changed files with 53 additions and 0 deletions
+3
View File
@@ -174,6 +174,9 @@ func (c *WhatsAppNativeChannel) handleIncoming(evt *events.Message) {
if content == "" && evt.Message.ExtendedTextMessage != nil {
content = evt.Message.ExtendedTextMessage.GetText()
}
content = utils.SanitizeMessageContent(content)
if content == "" { return } // ignore empty messages
var mediaPaths []string
// Optional: resolve media to local paths if needed; for now we only forward text to the bus.
+26
View File
@@ -1,5 +1,31 @@
package utils
import (
"strings"
"unicode"
)
// SanitizeMessage removes Unicode control characters, format characters (RTL overrides,
// zero-width characters), and other non-graphic characters that could confuse an LLM
// or cause display issues in the agent UI.
func SanitizeMessageContent(input string) string {
var sb strings.Builder
// Pre-allocate memory to avoid multiple allocations
sb.Grow(len(input))
for _, r := range input {
// unicode.IsGraphic returns true if the rune is a Unicode graphic character.
// This includes letters, marks, numbers, punctuation, and symbols.
// It excludes control characters (Cc), format characters (Cf),
// surrogates (Cs), and private use (Co).
if unicode.IsGraphic(r) || r == '\n' || r == '\r' || r == '\t' {
sb.WriteRune(r)
}
}
return sb.String()
}
// Truncate returns a truncated version of s with at most maxLen runes.
// Handles multi-byte Unicode characters properly.
// If the string is truncated, "..." is appended to indicate truncation.
+24
View File
@@ -104,3 +104,27 @@ func TestTruncate(t *testing.T) {
})
}
}
func TestSanitizeMessageContent(t *testing.T) {
tests := []struct {
name string
input string
want string
}{
{"empty", "", ""},
{"plain text unchanged", "Hello world", "Hello world"},
{"strip ZWSP", "Hello\u200bworld", "Helloworld"},
{"strip RTL override", "Hi\u202eevil", "Hievil"},
{"strip BOM", "\uFEFFcontent", "content"},
{"strip multiple", "a\u200c\u202ab\u202cc", "abc"},
{"unicode letters preserved", "café 日本語", "café 日本語"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := SanitizeMessageContent(tt.input)
if got != tt.want {
t.Errorf("SanitizeMessageContent(%q) = %q, want %q", tt.input, got, tt.want)
}
})
}
}