feat(security): add sensitive data filtering for tool results sent to

LLM

Prevent LLM from seeing its own credentials (API keys, tokens, secrets)
by filtering sensitive values from tool call results before sending to
the
model. Values are collected from .security.yml and replaced with
[FILTERED] using an efficient strings.Replacer (O(n+m)).

- Add FilterSensitiveData and FilterMinLength to ToolsConfig
- Implement SensitiveDataReplacer() with sync.Once caching in
  SecurityConfig
- Use reflection to collect all sensitive values (Model API keys,
  channel
  tokens, web tool API keys, skills tokens)
- Apply filtering in agent loop at 4 tool result locations
- Add comprehensive tests covering all token types
This commit is contained in:
uiyzzi
2026-03-23 20:55:41 +08:00
parent 96e312680d
commit 16d23d8cdc
12 changed files with 599 additions and 5 deletions
+12 -2
View File
@@ -1733,7 +1733,8 @@ turnLoop:
select {
case result, ok := <-ts.pendingResults:
if ok && result != nil && result.ForLLM != "" {
msg := providers.Message{Role: "user", Content: fmt.Sprintf("[SubTurn Result] %s", result.ForLLM)}
content := al.cfg.FilterSensitiveData(result.ForLLM)
msg := providers.Message{Role: "user", Content: fmt.Sprintf("[SubTurn Result] %s", content)}
pendingMessages = append(pendingMessages, msg)
}
default:
@@ -2336,6 +2337,9 @@ turnLoop:
return
}
// Filter sensitive data before publishing
content = al.cfg.FilterSensitiveData(content)
logger.InfoCF("agent", "Async tool completed, publishing result",
map[string]any{
"tool": asyncToolName,
@@ -2451,6 +2455,11 @@ turnLoop:
contentForLLM = toolResult.Err.Error()
}
// Filter sensitive data (API keys, tokens, secrets) before sending to LLM
if al.cfg.Tools.IsFilterSensitiveDataEnabled() {
contentForLLM = al.cfg.FilterSensitiveData(contentForLLM)
}
toolResultMsg := providers.Message{
Role: "tool",
Content: contentForLLM,
@@ -2528,7 +2537,8 @@ turnLoop:
select {
case result, ok := <-ts.pendingResults:
if ok && result != nil && result.ForLLM != "" {
msg := providers.Message{Role: "user", Content: fmt.Sprintf("[SubTurn Result] %s", result.ForLLM)}
content := al.cfg.FilterSensitiveData(result.ForLLM)
msg := providers.Message{Role: "user", Content: fmt.Sprintf("[SubTurn Result] %s", content)}
messages = append(messages, msg)
ts.agent.Sessions.AddFullMessage(ts.sessionKey, msg)
}
+42 -2
View File
@@ -114,6 +114,25 @@ func (c *Config) WithSecurity(sec *SecurityConfig) *Config {
return c
}
// FilterSensitiveData filters sensitive values from content before sending to LLM.
// This prevents the LLM from seeing its own credentials.
// Uses strings.Replacer for O(n+m) performance (computed once per SecurityConfig).
// Short content (below FilterMinLength) is returned unchanged for performance.
func (c *Config) FilterSensitiveData(content string) string {
if c.security == nil || content == "" {
return content
}
// Check if filtering is enabled (default: true)
if !c.Tools.IsFilterSensitiveDataEnabled() {
return content
}
// Fast path: skip filtering for short content
if len(content) < c.Tools.GetFilterMinLength() {
return content
}
return c.security.SensitiveDataReplacer().Replace(content)
}
type HooksConfig struct {
Enabled bool `json:"enabled"`
Defaults HookDefaultsConfig `json:"defaults,omitempty"`
@@ -1201,8 +1220,16 @@ type ReadFileToolConfig struct {
}
type ToolsConfig struct {
AllowReadPaths []string `json:"allow_read_paths" env:"PICOCLAW_TOOLS_ALLOW_READ_PATHS"`
AllowWritePaths []string `json:"allow_write_paths" env:"PICOCLAW_TOOLS_ALLOW_WRITE_PATHS"`
AllowReadPaths []string `json:"allow_read_paths" env:"PICOCLAW_TOOLS_ALLOW_READ_PATHS"`
AllowWritePaths []string `json:"allow_write_paths" env:"PICOCLAW_TOOLS_ALLOW_WRITE_PATHS"`
// FilterSensitiveData controls whether to filter sensitive values (API keys,
// tokens, secrets) from tool results before sending to the LLM.
// Default: true (enabled)
FilterSensitiveData bool `json:"filter_sensitive_data" env:"PICOCLAW_TOOLS_FILTER_SENSITIVE_DATA"`
// FilterMinLength is the minimum content length required for filtering.
// Content shorter than this will be returned unchanged for performance.
// Default: 8
FilterMinLength int `json:"filter_min_length" env:"PICOCLAW_TOOLS_FILTER_MIN_LENGTH"`
Web WebToolsConfig `json:"web"`
Cron CronToolsConfig `json:"cron"`
Exec ExecConfig `json:"exec"`
@@ -1226,6 +1253,19 @@ type ToolsConfig struct {
WriteFile ToolConfig `json:"write_file" envPrefix:"PICOCLAW_TOOLS_WRITE_FILE_"`
}
// IsFilterSensitiveDataEnabled returns true if sensitive data filtering is enabled
func (c *ToolsConfig) IsFilterSensitiveDataEnabled() bool {
return c.FilterSensitiveData
}
// GetFilterMinLength returns the minimum content length for filtering (default: 8)
func (c *ToolsConfig) GetFilterMinLength() int {
if c.FilterMinLength <= 0 {
return 8
}
return c.FilterMinLength
}
type SearchCacheConfig struct {
MaxSize int `json:"max_size" env:"PICOCLAW_SKILLS_SEARCH_CACHE_MAX_SIZE"`
TTLSeconds int `json:"ttl_seconds" env:"PICOCLAW_SKILLS_SEARCH_CACHE_TTL_SECONDS"`
+210
View File
@@ -436,6 +436,40 @@ func TestDefaultConfig_ExecAllowRemoteEnabled(t *testing.T) {
}
}
func TestDefaultConfig_FilterSensitiveDataEnabled(t *testing.T) {
cfg := DefaultConfig()
if !cfg.Tools.FilterSensitiveData {
t.Fatal("DefaultConfig().Tools.FilterSensitiveData should be true")
}
}
func TestDefaultConfig_FilterMinLength(t *testing.T) {
cfg := DefaultConfig()
if cfg.Tools.FilterMinLength != 8 {
t.Fatalf("DefaultConfig().Tools.FilterMinLength = %d, want 8", cfg.Tools.FilterMinLength)
}
}
func TestToolsConfig_GetFilterMinLength(t *testing.T) {
tests := []struct {
name string
minLen int
expected int
}{
{"zero returns default", 0, 8},
{"negative returns default", -1, 8},
{"positive returns value", 16, 16},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := &ToolsConfig{FilterMinLength: tt.minLen}
if got := cfg.GetFilterMinLength(); got != tt.expected {
t.Errorf("GetFilterMinLength() = %v, want %v", got, tt.expected)
}
})
}
}
func TestDefaultConfig_CronAllowCommandEnabled(t *testing.T) {
cfg := DefaultConfig()
if !cfg.Tools.Cron.AllowCommand {
@@ -1252,3 +1286,179 @@ func TestDefaultConfig_MinimaxExtraBody(t *testing.T) {
t.Fatalf("Minimax ExtraBody[reasoning_split] = %v, want true", got)
}
}
func TestFilterSensitiveData(t *testing.T) {
// Test with nil security config
cfg := &Config{}
if got := cfg.FilterSensitiveData("hello sk-key123 world"); got != "hello sk-key123 world" {
t.Errorf("nil security: got %q, want original", got)
}
// Test with empty content
cfg.security = &SecurityConfig{}
if got := cfg.FilterSensitiveData(""); got != "" {
t.Errorf("empty content: got %q, want empty", got)
}
// Test short content (less than FilterMinLength=8, should skip filtering)
cfg.security.ModelList = map[string]ModelSecurityEntry{
"test": {APIKeys: []string{"sk-long-key-12345"}},
}
cfg.Tools.FilterSensitiveData = true
cfg.Tools.FilterMinLength = 8
// Debug: check if sensitive values are collected
values := cfg.security.collectSensitiveValues()
t.Logf("collected %d sensitive values: %v", len(values), values)
if got := cfg.FilterSensitiveData("sk-key"); got != "sk-key" {
t.Errorf("short content should not be filtered: got %q", got)
}
// Test filtering works
content := "Your API key is sk-long-key-12345 and token abc123"
// abc123 is not in sensitive values, only sk-long-key-12345 should be filtered
expected := "Your API key is [FILTERED] and token abc123"
if got := cfg.FilterSensitiveData(content); got != expected {
t.Errorf("filtering failed: got %q, want %q", got, expected)
}
// Test disabled filtering
cfg.Tools.FilterSensitiveData = false
if got := cfg.FilterSensitiveData(content); got != content {
t.Errorf("disabled filtering: got %q, want original %q", got, content)
}
}
func TestFilterSensitiveData_MultipleKeys(t *testing.T) {
cfg := &Config{
Tools: ToolsConfig{
FilterSensitiveData: true,
FilterMinLength: 8,
},
}
cfg.security = &SecurityConfig{
ModelList: map[string]ModelSecurityEntry{
"model1": {APIKeys: []string{"key-one", "key-two"}},
"model2": {APIKeys: []string{"key-three"}},
},
}
content := "key-one and key-two and key-three should be filtered"
expected := "[FILTERED] and [FILTERED] and [FILTERED] should be filtered"
if got := cfg.FilterSensitiveData(content); got != expected {
t.Errorf("multiple keys: got %q, want %q", got, expected)
}
}
func TestFilterSensitiveData_AllTokenTypes(t *testing.T) {
cfg := &Config{
Tools: ToolsConfig{
FilterSensitiveData: true,
FilterMinLength: 8,
},
}
cfg.security = &SecurityConfig{
// Model API keys
ModelList: map[string]ModelSecurityEntry{
"test-model": {APIKeys: []string{"sk-model-key-12345"}},
},
// Channel tokens
Channels: ChannelsSecurity{
Telegram: &TelegramSecurity{Token: "telegram-bot-token-abcdef"},
Discord: &DiscordSecurity{Token: "discord-bot-token-xyz789"},
Slack: &SlackSecurity{BotToken: "xoxb-slack-bot-token", AppToken: "xapp-slack-app-token"},
Matrix: &MatrixSecurity{AccessToken: "matrix-access-token-abc"},
Feishu: &FeishuSecurity{AppSecret: "feishu-app-secret-123", EncryptKey: "feishu-encrypt-key"},
DingTalk: &DingTalkSecurity{ClientSecret: "dingtalk-client-secret"},
OneBot: &OneBotSecurity{AccessToken: "onebot-access-token"},
WeCom: &WeComSecurity{Token: "wecom-token", EncodingAESKey: "wecom-aes-key"},
WeComApp: &WeComAppSecurity{CorpSecret: "wecom-app-secret", Token: "wecom-app-token"},
Pico: &PicoSecurity{Token: "pico-token-abc123"},
IRC: &IRCSecurity{Password: "irc-password", NickServPassword: "nickserv-pass", SASLPassword: "sasl-pass"},
},
// Web tool API keys
Web: WebToolsSecurity{
Brave: &BraveSecurity{APIKeys: []string{"brave-api-key"}},
Tavily: &TavilySecurity{APIKeys: []string{"tavily-api-key"}},
Perplexity: &PerplexitySecurity{APIKeys: []string{"perplexity-api-key"}},
GLMSearch: &GLMSearchSecurity{APIKey: "glm-search-key"},
BaiduSearch: &BaiduSearchSecurity{APIKey: "baidu-search-key"},
},
// Skills tokens
Skills: SkillsSecurity{
Github: &GithubSecurity{Token: "github-token-xyz"},
ClawHub: &ClawHubSecurity{AuthToken: "clawhub-auth-token"},
},
}
tests := []struct {
name string
content string
want string
}{
{
name: "model_api_key",
content: "Using model with key sk-model-key-12345",
want: "Using model with key [FILTERED]",
},
{
name: "telegram_token",
content: "Telegram token: telegram-bot-token-abcdef",
want: "Telegram token: [FILTERED]",
},
{
name: "discord_token",
content: "Discord token: discord-bot-token-xyz789",
want: "Discord token: [FILTERED]",
},
{
name: "slack_tokens",
content: "Slack bot: xoxb-slack-bot-token, app: xapp-slack-app-token",
want: "Slack bot: [FILTERED], app: [FILTERED]",
},
{
name: "matrix_token",
content: "Matrix access token: matrix-access-token-abc",
want: "Matrix access token: [FILTERED]",
},
{
name: "brave_api_key",
content: "Brave key: brave-api-key",
want: "Brave key: [FILTERED]",
},
{
name: "tavily_api_key",
content: "Tavily key: tavily-api-key",
want: "Tavily key: [FILTERED]",
},
{
name: "github_token",
content: "GitHub token: github-token-xyz",
want: "GitHub token: [FILTERED]",
},
{
name: "irc_passwords",
content: "IRC password: irc-password, nickserv: nickserv-pass",
want: "IRC password: [FILTERED], nickserv: [FILTERED]",
},
{
name: "mixed_content",
content: "Model key sk-model-key-12345 and Telegram token telegram-bot-token-abcdef",
want: "Model key [FILTERED] and Telegram token [FILTERED]",
},
{
name: "short_key_not_filtered",
content: "Key abc not filtered because length < 8",
want: "Key abc not filtered because length < 8",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := cfg.FilterSensitiveData(tt.content); got != tt.want {
t.Errorf("got %q, want %q", got, tt.want)
}
})
}
}
+2
View File
@@ -378,6 +378,8 @@ func DefaultConfig() *Config {
LogLevel: "fatal",
},
Tools: ToolsConfig{
FilterSensitiveData: true,
FilterMinLength: 8,
MediaCleanup: MediaCleanupConfig{
ToolConfig: ToolConfig{
Enabled: true,
+94
View File
@@ -10,6 +10,9 @@ import (
"fmt"
"os"
"path/filepath"
"reflect"
"strings"
"sync"
"github.com/caarlos0/env/v11"
"github.com/tencent-connect/botgo/log"
@@ -35,6 +38,9 @@ type SecurityConfig struct {
Web WebToolsSecurity `yaml:"web,omitempty"`
Skills SkillsSecurity `yaml:"skills,omitempty"`
// cache for sensitive values and compiled regex (computed once)
sensitiveCache *SensitiveDataCache
}
// ModelSecurityEntry stores security data for a model
@@ -218,3 +224,91 @@ func saveSecurityConfig(securityPath string, sec *SecurityConfig) error {
}
return fileutil.WriteFileAtomic(securityPath, buf.Bytes(), 0o600)
}
// SensitiveDataCache caches the compiled regex for filtering sensitive data.
// SensitiveDataCache caches the strings.Replacer for filtering sensitive data.
// Computed once on first access via sync.Once.
type SensitiveDataCache struct {
replacer *strings.Replacer
once sync.Once
}
// SensitiveDataReplacer returns the strings.Replacer for filtering sensitive data.
// It is computed once on first access via sync.Once.
func (sec *SecurityConfig) SensitiveDataReplacer() *strings.Replacer {
sec.initSensitiveCache()
return sec.sensitiveCache.replacer
}
// initSensitiveCache initializes the sensitive data cache if not already done.
func (sec *SecurityConfig) initSensitiveCache() {
if sec.sensitiveCache == nil {
sec.sensitiveCache = &SensitiveDataCache{}
}
sec.sensitiveCache.once.Do(func() {
values := sec.collectSensitiveValues()
if len(values) == 0 {
sec.sensitiveCache.replacer = strings.NewReplacer()
return
}
// Build old/new pairs for strings.Replacer
var pairs []string
for _, v := range values {
if len(v) > 3 {
pairs = append(pairs, v, "[FILTERED]")
}
}
if len(pairs) == 0 {
sec.sensitiveCache.replacer = strings.NewReplacer()
return
}
sec.sensitiveCache.replacer = strings.NewReplacer(pairs...)
})
}
// collectSensitiveValues collects all sensitive strings from SecurityConfig using reflection.
func (sec *SecurityConfig) collectSensitiveValues() []string {
var values []string
collectSensitive(reflect.ValueOf(sec), &values)
return values
}
// collectSensitive recursively traverses the value and collects all non-empty string fields.
func collectSensitive(v reflect.Value, values *[]string) {
// Dereference pointers/interfaces to get the underlying value
for v.Kind() == reflect.Ptr || v.Kind() == reflect.Interface {
if v.IsNil() {
return
}
v = v.Elem()
}
switch v.Kind() {
case reflect.Struct:
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := v.Type().Field(i)
if !fieldType.IsExported() {
continue
}
collectSensitive(field, values)
}
case reflect.String:
if v.String() != "" {
*values = append(*values, v.String())
}
case reflect.Slice:
if v.Type().Elem().Kind() == reflect.String {
for i := 0; i < v.Len(); i++ {
if s := v.Index(i).String(); s != "" {
*values = append(*values, s)
}
}
}
case reflect.Map:
for _, key := range v.MapKeys() {
collectSensitive(v.MapIndex(key), values)
}
}
}
+1 -1
View File
@@ -12,7 +12,7 @@ import (
)
func initPanicFile(panicFile string) io.WriteCloser {
file, err := os.OpenFile(panicFile, os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_APPEND, 0600)
file, err := os.OpenFile(panicFile, os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_APPEND, 0o600)
if err != nil {
panic(fmt.Sprintf("error in open panic: %v", err))
}