Files
picoclaw/pkg/utils/string.go
T
2026-02-27 20:20:25 +08:00

55 lines
1.5 KiB
Go

package utils
import (
"strings"
"unicode"
)
// SanitizeMessageContent removes Unicode control characters, format characters (RTL overrides,
// zero-width characters), and other non-graphic characters that could confuse an LLM
// or cause display issues in the agent UI.
func SanitizeMessageContent(input string) string {
var sb strings.Builder
// Pre-allocate memory to avoid multiple allocations
sb.Grow(len(input))
for _, r := range input {
// unicode.IsGraphic returns true if the rune is a Unicode graphic character.
// This includes letters, marks, numbers, punctuation, and symbols.
// It excludes control characters (Cc), format characters (Cf),
// surrogates (Cs), and private use (Co).
if unicode.IsGraphic(r) || r == '\n' || r == '\r' || r == '\t' {
sb.WriteRune(r)
}
}
return sb.String()
}
// Truncate returns a truncated version of s with at most maxLen runes.
// Handles multi-byte Unicode characters properly.
// If the string is truncated, "..." is appended to indicate truncation.
func Truncate(s string, maxLen int) string {
if maxLen <= 0 {
return ""
}
runes := []rune(s)
if len(runes) <= maxLen {
return s
}
// Reserve 3 chars for "..."
if maxLen <= 3 {
return string(runes[:maxLen])
}
return string(runes[:maxLen-3]) + "..."
}
// DerefStr dereferences a pointer to a string and
// returns the value or a fallback if the pointer is nil.
func DerefStr(s *string, fallback string) string {
if s == nil {
return fallback
}
return *s
}