mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
feat(security): add sensitive data filtering for tool results sent to
LLM Prevent LLM from seeing its own credentials (API keys, tokens, secrets) by filtering sensitive values from tool call results before sending to the model. Values are collected from .security.yml and replaced with [FILTERED] using an efficient strings.Replacer (O(n+m)). - Add FilterSensitiveData and FilterMinLength to ToolsConfig - Implement SensitiveDataReplacer() with sync.Once caching in SecurityConfig - Use reflection to collect all sensitive values (Model API keys, channel tokens, web tool API keys, skills tokens) - Apply filtering in agent loop at 4 tool result locations - Add comprehensive tests covering all token types
This commit is contained in:
+12
-2
@@ -1733,7 +1733,8 @@ turnLoop:
|
||||
select {
|
||||
case result, ok := <-ts.pendingResults:
|
||||
if ok && result != nil && result.ForLLM != "" {
|
||||
msg := providers.Message{Role: "user", Content: fmt.Sprintf("[SubTurn Result] %s", result.ForLLM)}
|
||||
content := al.cfg.FilterSensitiveData(result.ForLLM)
|
||||
msg := providers.Message{Role: "user", Content: fmt.Sprintf("[SubTurn Result] %s", content)}
|
||||
pendingMessages = append(pendingMessages, msg)
|
||||
}
|
||||
default:
|
||||
@@ -2336,6 +2337,9 @@ turnLoop:
|
||||
return
|
||||
}
|
||||
|
||||
// Filter sensitive data before publishing
|
||||
content = al.cfg.FilterSensitiveData(content)
|
||||
|
||||
logger.InfoCF("agent", "Async tool completed, publishing result",
|
||||
map[string]any{
|
||||
"tool": asyncToolName,
|
||||
@@ -2451,6 +2455,11 @@ turnLoop:
|
||||
contentForLLM = toolResult.Err.Error()
|
||||
}
|
||||
|
||||
// Filter sensitive data (API keys, tokens, secrets) before sending to LLM
|
||||
if al.cfg.Tools.IsFilterSensitiveDataEnabled() {
|
||||
contentForLLM = al.cfg.FilterSensitiveData(contentForLLM)
|
||||
}
|
||||
|
||||
toolResultMsg := providers.Message{
|
||||
Role: "tool",
|
||||
Content: contentForLLM,
|
||||
@@ -2528,7 +2537,8 @@ turnLoop:
|
||||
select {
|
||||
case result, ok := <-ts.pendingResults:
|
||||
if ok && result != nil && result.ForLLM != "" {
|
||||
msg := providers.Message{Role: "user", Content: fmt.Sprintf("[SubTurn Result] %s", result.ForLLM)}
|
||||
content := al.cfg.FilterSensitiveData(result.ForLLM)
|
||||
msg := providers.Message{Role: "user", Content: fmt.Sprintf("[SubTurn Result] %s", content)}
|
||||
messages = append(messages, msg)
|
||||
ts.agent.Sessions.AddFullMessage(ts.sessionKey, msg)
|
||||
}
|
||||
|
||||
+42
-2
@@ -114,6 +114,25 @@ func (c *Config) WithSecurity(sec *SecurityConfig) *Config {
|
||||
return c
|
||||
}
|
||||
|
||||
// FilterSensitiveData filters sensitive values from content before sending to LLM.
|
||||
// This prevents the LLM from seeing its own credentials.
|
||||
// Uses strings.Replacer for O(n+m) performance (computed once per SecurityConfig).
|
||||
// Short content (below FilterMinLength) is returned unchanged for performance.
|
||||
func (c *Config) FilterSensitiveData(content string) string {
|
||||
if c.security == nil || content == "" {
|
||||
return content
|
||||
}
|
||||
// Check if filtering is enabled (default: true)
|
||||
if !c.Tools.IsFilterSensitiveDataEnabled() {
|
||||
return content
|
||||
}
|
||||
// Fast path: skip filtering for short content
|
||||
if len(content) < c.Tools.GetFilterMinLength() {
|
||||
return content
|
||||
}
|
||||
return c.security.SensitiveDataReplacer().Replace(content)
|
||||
}
|
||||
|
||||
type HooksConfig struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
Defaults HookDefaultsConfig `json:"defaults,omitempty"`
|
||||
@@ -1201,8 +1220,16 @@ type ReadFileToolConfig struct {
|
||||
}
|
||||
|
||||
type ToolsConfig struct {
|
||||
AllowReadPaths []string `json:"allow_read_paths" env:"PICOCLAW_TOOLS_ALLOW_READ_PATHS"`
|
||||
AllowWritePaths []string `json:"allow_write_paths" env:"PICOCLAW_TOOLS_ALLOW_WRITE_PATHS"`
|
||||
AllowReadPaths []string `json:"allow_read_paths" env:"PICOCLAW_TOOLS_ALLOW_READ_PATHS"`
|
||||
AllowWritePaths []string `json:"allow_write_paths" env:"PICOCLAW_TOOLS_ALLOW_WRITE_PATHS"`
|
||||
// FilterSensitiveData controls whether to filter sensitive values (API keys,
|
||||
// tokens, secrets) from tool results before sending to the LLM.
|
||||
// Default: true (enabled)
|
||||
FilterSensitiveData bool `json:"filter_sensitive_data" env:"PICOCLAW_TOOLS_FILTER_SENSITIVE_DATA"`
|
||||
// FilterMinLength is the minimum content length required for filtering.
|
||||
// Content shorter than this will be returned unchanged for performance.
|
||||
// Default: 8
|
||||
FilterMinLength int `json:"filter_min_length" env:"PICOCLAW_TOOLS_FILTER_MIN_LENGTH"`
|
||||
Web WebToolsConfig `json:"web"`
|
||||
Cron CronToolsConfig `json:"cron"`
|
||||
Exec ExecConfig `json:"exec"`
|
||||
@@ -1226,6 +1253,19 @@ type ToolsConfig struct {
|
||||
WriteFile ToolConfig `json:"write_file" envPrefix:"PICOCLAW_TOOLS_WRITE_FILE_"`
|
||||
}
|
||||
|
||||
// IsFilterSensitiveDataEnabled returns true if sensitive data filtering is enabled
|
||||
func (c *ToolsConfig) IsFilterSensitiveDataEnabled() bool {
|
||||
return c.FilterSensitiveData
|
||||
}
|
||||
|
||||
// GetFilterMinLength returns the minimum content length for filtering (default: 8)
|
||||
func (c *ToolsConfig) GetFilterMinLength() int {
|
||||
if c.FilterMinLength <= 0 {
|
||||
return 8
|
||||
}
|
||||
return c.FilterMinLength
|
||||
}
|
||||
|
||||
type SearchCacheConfig struct {
|
||||
MaxSize int `json:"max_size" env:"PICOCLAW_SKILLS_SEARCH_CACHE_MAX_SIZE"`
|
||||
TTLSeconds int `json:"ttl_seconds" env:"PICOCLAW_SKILLS_SEARCH_CACHE_TTL_SECONDS"`
|
||||
|
||||
@@ -436,6 +436,40 @@ func TestDefaultConfig_ExecAllowRemoteEnabled(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultConfig_FilterSensitiveDataEnabled(t *testing.T) {
|
||||
cfg := DefaultConfig()
|
||||
if !cfg.Tools.FilterSensitiveData {
|
||||
t.Fatal("DefaultConfig().Tools.FilterSensitiveData should be true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultConfig_FilterMinLength(t *testing.T) {
|
||||
cfg := DefaultConfig()
|
||||
if cfg.Tools.FilterMinLength != 8 {
|
||||
t.Fatalf("DefaultConfig().Tools.FilterMinLength = %d, want 8", cfg.Tools.FilterMinLength)
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolsConfig_GetFilterMinLength(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
minLen int
|
||||
expected int
|
||||
}{
|
||||
{"zero returns default", 0, 8},
|
||||
{"negative returns default", -1, 8},
|
||||
{"positive returns value", 16, 16},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
cfg := &ToolsConfig{FilterMinLength: tt.minLen}
|
||||
if got := cfg.GetFilterMinLength(); got != tt.expected {
|
||||
t.Errorf("GetFilterMinLength() = %v, want %v", got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultConfig_CronAllowCommandEnabled(t *testing.T) {
|
||||
cfg := DefaultConfig()
|
||||
if !cfg.Tools.Cron.AllowCommand {
|
||||
@@ -1252,3 +1286,179 @@ func TestDefaultConfig_MinimaxExtraBody(t *testing.T) {
|
||||
t.Fatalf("Minimax ExtraBody[reasoning_split] = %v, want true", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterSensitiveData(t *testing.T) {
|
||||
// Test with nil security config
|
||||
cfg := &Config{}
|
||||
if got := cfg.FilterSensitiveData("hello sk-key123 world"); got != "hello sk-key123 world" {
|
||||
t.Errorf("nil security: got %q, want original", got)
|
||||
}
|
||||
|
||||
// Test with empty content
|
||||
cfg.security = &SecurityConfig{}
|
||||
if got := cfg.FilterSensitiveData(""); got != "" {
|
||||
t.Errorf("empty content: got %q, want empty", got)
|
||||
}
|
||||
|
||||
// Test short content (less than FilterMinLength=8, should skip filtering)
|
||||
cfg.security.ModelList = map[string]ModelSecurityEntry{
|
||||
"test": {APIKeys: []string{"sk-long-key-12345"}},
|
||||
}
|
||||
cfg.Tools.FilterSensitiveData = true
|
||||
cfg.Tools.FilterMinLength = 8
|
||||
|
||||
// Debug: check if sensitive values are collected
|
||||
values := cfg.security.collectSensitiveValues()
|
||||
t.Logf("collected %d sensitive values: %v", len(values), values)
|
||||
|
||||
if got := cfg.FilterSensitiveData("sk-key"); got != "sk-key" {
|
||||
t.Errorf("short content should not be filtered: got %q", got)
|
||||
}
|
||||
|
||||
// Test filtering works
|
||||
content := "Your API key is sk-long-key-12345 and token abc123"
|
||||
// abc123 is not in sensitive values, only sk-long-key-12345 should be filtered
|
||||
expected := "Your API key is [FILTERED] and token abc123"
|
||||
if got := cfg.FilterSensitiveData(content); got != expected {
|
||||
t.Errorf("filtering failed: got %q, want %q", got, expected)
|
||||
}
|
||||
|
||||
// Test disabled filtering
|
||||
cfg.Tools.FilterSensitiveData = false
|
||||
if got := cfg.FilterSensitiveData(content); got != content {
|
||||
t.Errorf("disabled filtering: got %q, want original %q", got, content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterSensitiveData_MultipleKeys(t *testing.T) {
|
||||
cfg := &Config{
|
||||
Tools: ToolsConfig{
|
||||
FilterSensitiveData: true,
|
||||
FilterMinLength: 8,
|
||||
},
|
||||
}
|
||||
cfg.security = &SecurityConfig{
|
||||
ModelList: map[string]ModelSecurityEntry{
|
||||
"model1": {APIKeys: []string{"key-one", "key-two"}},
|
||||
"model2": {APIKeys: []string{"key-three"}},
|
||||
},
|
||||
}
|
||||
|
||||
content := "key-one and key-two and key-three should be filtered"
|
||||
expected := "[FILTERED] and [FILTERED] and [FILTERED] should be filtered"
|
||||
if got := cfg.FilterSensitiveData(content); got != expected {
|
||||
t.Errorf("multiple keys: got %q, want %q", got, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterSensitiveData_AllTokenTypes(t *testing.T) {
|
||||
cfg := &Config{
|
||||
Tools: ToolsConfig{
|
||||
FilterSensitiveData: true,
|
||||
FilterMinLength: 8,
|
||||
},
|
||||
}
|
||||
cfg.security = &SecurityConfig{
|
||||
// Model API keys
|
||||
ModelList: map[string]ModelSecurityEntry{
|
||||
"test-model": {APIKeys: []string{"sk-model-key-12345"}},
|
||||
},
|
||||
// Channel tokens
|
||||
Channels: ChannelsSecurity{
|
||||
Telegram: &TelegramSecurity{Token: "telegram-bot-token-abcdef"},
|
||||
Discord: &DiscordSecurity{Token: "discord-bot-token-xyz789"},
|
||||
Slack: &SlackSecurity{BotToken: "xoxb-slack-bot-token", AppToken: "xapp-slack-app-token"},
|
||||
Matrix: &MatrixSecurity{AccessToken: "matrix-access-token-abc"},
|
||||
Feishu: &FeishuSecurity{AppSecret: "feishu-app-secret-123", EncryptKey: "feishu-encrypt-key"},
|
||||
DingTalk: &DingTalkSecurity{ClientSecret: "dingtalk-client-secret"},
|
||||
OneBot: &OneBotSecurity{AccessToken: "onebot-access-token"},
|
||||
WeCom: &WeComSecurity{Token: "wecom-token", EncodingAESKey: "wecom-aes-key"},
|
||||
WeComApp: &WeComAppSecurity{CorpSecret: "wecom-app-secret", Token: "wecom-app-token"},
|
||||
Pico: &PicoSecurity{Token: "pico-token-abc123"},
|
||||
IRC: &IRCSecurity{Password: "irc-password", NickServPassword: "nickserv-pass", SASLPassword: "sasl-pass"},
|
||||
},
|
||||
// Web tool API keys
|
||||
Web: WebToolsSecurity{
|
||||
Brave: &BraveSecurity{APIKeys: []string{"brave-api-key"}},
|
||||
Tavily: &TavilySecurity{APIKeys: []string{"tavily-api-key"}},
|
||||
Perplexity: &PerplexitySecurity{APIKeys: []string{"perplexity-api-key"}},
|
||||
GLMSearch: &GLMSearchSecurity{APIKey: "glm-search-key"},
|
||||
BaiduSearch: &BaiduSearchSecurity{APIKey: "baidu-search-key"},
|
||||
},
|
||||
// Skills tokens
|
||||
Skills: SkillsSecurity{
|
||||
Github: &GithubSecurity{Token: "github-token-xyz"},
|
||||
ClawHub: &ClawHubSecurity{AuthToken: "clawhub-auth-token"},
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "model_api_key",
|
||||
content: "Using model with key sk-model-key-12345",
|
||||
want: "Using model with key [FILTERED]",
|
||||
},
|
||||
{
|
||||
name: "telegram_token",
|
||||
content: "Telegram token: telegram-bot-token-abcdef",
|
||||
want: "Telegram token: [FILTERED]",
|
||||
},
|
||||
{
|
||||
name: "discord_token",
|
||||
content: "Discord token: discord-bot-token-xyz789",
|
||||
want: "Discord token: [FILTERED]",
|
||||
},
|
||||
{
|
||||
name: "slack_tokens",
|
||||
content: "Slack bot: xoxb-slack-bot-token, app: xapp-slack-app-token",
|
||||
want: "Slack bot: [FILTERED], app: [FILTERED]",
|
||||
},
|
||||
{
|
||||
name: "matrix_token",
|
||||
content: "Matrix access token: matrix-access-token-abc",
|
||||
want: "Matrix access token: [FILTERED]",
|
||||
},
|
||||
{
|
||||
name: "brave_api_key",
|
||||
content: "Brave key: brave-api-key",
|
||||
want: "Brave key: [FILTERED]",
|
||||
},
|
||||
{
|
||||
name: "tavily_api_key",
|
||||
content: "Tavily key: tavily-api-key",
|
||||
want: "Tavily key: [FILTERED]",
|
||||
},
|
||||
{
|
||||
name: "github_token",
|
||||
content: "GitHub token: github-token-xyz",
|
||||
want: "GitHub token: [FILTERED]",
|
||||
},
|
||||
{
|
||||
name: "irc_passwords",
|
||||
content: "IRC password: irc-password, nickserv: nickserv-pass",
|
||||
want: "IRC password: [FILTERED], nickserv: [FILTERED]",
|
||||
},
|
||||
{
|
||||
name: "mixed_content",
|
||||
content: "Model key sk-model-key-12345 and Telegram token telegram-bot-token-abcdef",
|
||||
want: "Model key [FILTERED] and Telegram token [FILTERED]",
|
||||
},
|
||||
{
|
||||
name: "short_key_not_filtered",
|
||||
content: "Key abc not filtered because length < 8",
|
||||
want: "Key abc not filtered because length < 8",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := cfg.FilterSensitiveData(tt.content); got != tt.want {
|
||||
t.Errorf("got %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -378,6 +378,8 @@ func DefaultConfig() *Config {
|
||||
LogLevel: "fatal",
|
||||
},
|
||||
Tools: ToolsConfig{
|
||||
FilterSensitiveData: true,
|
||||
FilterMinLength: 8,
|
||||
MediaCleanup: MediaCleanupConfig{
|
||||
ToolConfig: ToolConfig{
|
||||
Enabled: true,
|
||||
|
||||
@@ -10,6 +10,9 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/caarlos0/env/v11"
|
||||
"github.com/tencent-connect/botgo/log"
|
||||
@@ -35,6 +38,9 @@ type SecurityConfig struct {
|
||||
|
||||
Web WebToolsSecurity `yaml:"web,omitempty"`
|
||||
Skills SkillsSecurity `yaml:"skills,omitempty"`
|
||||
|
||||
// cache for sensitive values and compiled regex (computed once)
|
||||
sensitiveCache *SensitiveDataCache
|
||||
}
|
||||
|
||||
// ModelSecurityEntry stores security data for a model
|
||||
@@ -218,3 +224,91 @@ func saveSecurityConfig(securityPath string, sec *SecurityConfig) error {
|
||||
}
|
||||
return fileutil.WriteFileAtomic(securityPath, buf.Bytes(), 0o600)
|
||||
}
|
||||
|
||||
// SensitiveDataCache caches the compiled regex for filtering sensitive data.
|
||||
// SensitiveDataCache caches the strings.Replacer for filtering sensitive data.
|
||||
// Computed once on first access via sync.Once.
|
||||
type SensitiveDataCache struct {
|
||||
replacer *strings.Replacer
|
||||
once sync.Once
|
||||
}
|
||||
|
||||
// SensitiveDataReplacer returns the strings.Replacer for filtering sensitive data.
|
||||
// It is computed once on first access via sync.Once.
|
||||
func (sec *SecurityConfig) SensitiveDataReplacer() *strings.Replacer {
|
||||
sec.initSensitiveCache()
|
||||
return sec.sensitiveCache.replacer
|
||||
}
|
||||
|
||||
// initSensitiveCache initializes the sensitive data cache if not already done.
|
||||
func (sec *SecurityConfig) initSensitiveCache() {
|
||||
if sec.sensitiveCache == nil {
|
||||
sec.sensitiveCache = &SensitiveDataCache{}
|
||||
}
|
||||
sec.sensitiveCache.once.Do(func() {
|
||||
values := sec.collectSensitiveValues()
|
||||
if len(values) == 0 {
|
||||
sec.sensitiveCache.replacer = strings.NewReplacer()
|
||||
return
|
||||
}
|
||||
|
||||
// Build old/new pairs for strings.Replacer
|
||||
var pairs []string
|
||||
for _, v := range values {
|
||||
if len(v) > 3 {
|
||||
pairs = append(pairs, v, "[FILTERED]")
|
||||
}
|
||||
}
|
||||
if len(pairs) == 0 {
|
||||
sec.sensitiveCache.replacer = strings.NewReplacer()
|
||||
return
|
||||
}
|
||||
sec.sensitiveCache.replacer = strings.NewReplacer(pairs...)
|
||||
})
|
||||
}
|
||||
|
||||
// collectSensitiveValues collects all sensitive strings from SecurityConfig using reflection.
|
||||
func (sec *SecurityConfig) collectSensitiveValues() []string {
|
||||
var values []string
|
||||
collectSensitive(reflect.ValueOf(sec), &values)
|
||||
return values
|
||||
}
|
||||
|
||||
// collectSensitive recursively traverses the value and collects all non-empty string fields.
|
||||
func collectSensitive(v reflect.Value, values *[]string) {
|
||||
// Dereference pointers/interfaces to get the underlying value
|
||||
for v.Kind() == reflect.Ptr || v.Kind() == reflect.Interface {
|
||||
if v.IsNil() {
|
||||
return
|
||||
}
|
||||
v = v.Elem()
|
||||
}
|
||||
|
||||
switch v.Kind() {
|
||||
case reflect.Struct:
|
||||
for i := 0; i < v.NumField(); i++ {
|
||||
field := v.Field(i)
|
||||
fieldType := v.Type().Field(i)
|
||||
if !fieldType.IsExported() {
|
||||
continue
|
||||
}
|
||||
collectSensitive(field, values)
|
||||
}
|
||||
case reflect.String:
|
||||
if v.String() != "" {
|
||||
*values = append(*values, v.String())
|
||||
}
|
||||
case reflect.Slice:
|
||||
if v.Type().Elem().Kind() == reflect.String {
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
if s := v.Index(i).String(); s != "" {
|
||||
*values = append(*values, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
case reflect.Map:
|
||||
for _, key := range v.MapKeys() {
|
||||
collectSensitive(v.MapIndex(key), values)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
)
|
||||
|
||||
func initPanicFile(panicFile string) io.WriteCloser {
|
||||
file, err := os.OpenFile(panicFile, os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_APPEND, 0600)
|
||||
file, err := os.OpenFile(panicFile, os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_APPEND, 0o600)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("error in open panic: %v", err))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user