Files
picoclaw/pkg/evolution/pattern_clusterer_test.go
T
lxowalle b3a7b7ad64 feat: agent self evolution (#2847)
* feat: add agent self-evolution

* fix ci

* delete unused doc

* fix lint

* fix evolution review issues
2026-05-11 16:13:27 +08:00

403 lines
12 KiB
Go

package evolution_test
import (
"context"
"strings"
"testing"
"time"
"github.com/sipeed/picoclaw/pkg/evolution"
"github.com/sipeed/picoclaw/pkg/providers"
)
type llmClusterTestProvider struct {
content string
defaultModel string
messages []providers.Message
}
func (p *llmClusterTestProvider) Chat(
_ context.Context,
messages []providers.Message,
_ []providers.ToolDefinition,
_ string,
_ map[string]any,
) (*providers.LLMResponse, error) {
p.messages = append([]providers.Message(nil), messages...)
return &providers.LLMResponse{Content: p.content}, nil
}
func (p *llmClusterTestProvider) GetDefaultModel() string {
return p.defaultModel
}
func TestHeuristicPatternClusterer_GroupsChineseSummariesWithoutLLM(t *testing.T) {
clusterer := evolution.NewHeuristicPatternClusterer(3, func() time.Time {
return time.Unix(1700000000, 0).UTC()
})
success := true
tasks := []evolution.LearningRecord{
{
ID: "task-1",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace",
Summary: "调用三一定理计算100",
FinalOutput: "100 + 31 = 131; 131 + 42 = 173; 173 - 53 = 120",
Status: evolution.RecordStatus("new"),
Success: &success,
},
{
ID: "task-2",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace",
Summary: "调用三一定理计算200",
FinalOutput: "200 + 31 = 231; 231 + 42 = 273; 273 - 53 = 220",
Status: evolution.RecordStatus("new"),
Success: &success,
},
{
ID: "task-3",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace",
Summary: "调用三一定理计算300",
FinalOutput: "300 + 31 = 331; 331 + 42 = 373; 373 - 53 = 320",
Status: evolution.RecordStatus("new"),
Success: &success,
UsedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
},
}
patterns, clusteredIDs, err := clusterer.BuildPatterns(context.Background(), "workspace", tasks, nil)
if err != nil {
t.Fatalf("BuildPatterns: %v", err)
}
if len(patterns) != 1 {
t.Fatalf("len(patterns) = %d, want 1: %#v", len(patterns), patterns)
}
if !strings.HasPrefix(patterns[0].Label, "task-") {
t.Fatalf("Label = %q, want task-* fallback label", patterns[0].Label)
}
if patterns[0].Summary != "调用三一定理计算100" {
t.Fatalf("Summary = %q, want representative Chinese summary", patterns[0].Summary)
}
if len(patterns[0].TaskRecordIDs) != 3 {
t.Fatalf("TaskRecordIDs = %v, want 3 ids", patterns[0].TaskRecordIDs)
}
if len(clusteredIDs) != 3 {
t.Fatalf("clusteredIDs = %v, want 3 ids", clusteredIDs)
}
}
func TestLLMPatternClusterer_FallsBackWhenLLMReturnsNoUsableClusters(t *testing.T) {
fallback := evolution.NewHeuristicPatternClusterer(2, func() time.Time {
return time.Unix(1700000000, 0).UTC()
})
clusterer := evolution.NewLLMPatternClusterer(
&llmClusterTestProvider{content: `{"clusters":[]}`, defaultModel: "test-model"},
"test-model",
fallback,
2,
func() time.Time { return time.Unix(1700000000, 0).UTC() },
)
success := true
tasks := []evolution.LearningRecord{
{
ID: "task-1",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace",
Summary: "调用三一定理计算100",
FinalOutput: "100 + 31 = 131",
Status: evolution.RecordStatus("new"),
Success: &success,
},
{
ID: "task-2",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace",
Summary: "调用三一定理计算200",
FinalOutput: "200 + 31 = 231",
Status: evolution.RecordStatus("new"),
Success: &success,
},
}
patterns, clusteredIDs, err := clusterer.BuildPatterns(context.Background(), "workspace", tasks, nil)
if err != nil {
t.Fatalf("BuildPatterns: %v", err)
}
if len(patterns) != 1 {
t.Fatalf("len(patterns) = %d, want fallback pattern: %#v", len(patterns), patterns)
}
if len(clusteredIDs) != 2 {
t.Fatalf("clusteredIDs = %v, want 2 task IDs", clusteredIDs)
}
}
func TestLLMPatternClusterer_PromptFiltersExistingPatternsByWorkspace(t *testing.T) {
provider := &llmClusterTestProvider{
content: `{"clusters":[{"label":"current-weather-path","summary":"current summary","task_record_ids":["task-1"],"cluster_reason":"same goal"}]}`,
defaultModel: "test-model",
}
clusterer := evolution.NewLLMPatternClusterer(
provider,
"test-model",
evolution.NewHeuristicPatternClusterer(1, nil),
1,
func() time.Time { return time.Unix(1700000000, 0).UTC() },
)
success := true
tasks := []evolution.LearningRecord{
{
ID: "task-1",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace-a",
Summary: "weather lookup",
FinalOutput: "sunny",
Status: evolution.RecordStatus("new"),
Success: &success,
},
}
existing := []evolution.LearningRecord{
{
ID: "rule-a",
Kind: evolution.RecordKindPattern,
WorkspaceID: "workspace-a",
Label: "current-weather-path",
Summary: "current workspace pattern",
},
{
ID: "rule-b",
Kind: evolution.RecordKindPattern,
WorkspaceID: "workspace-b",
Label: "other-workspace-secret-path",
Summary: "other workspace pattern",
},
}
if _, _, err := clusterer.BuildPatterns(context.Background(), "workspace-a", tasks, existing); err != nil {
t.Fatalf("BuildPatterns: %v", err)
}
if len(provider.messages) != 2 {
t.Fatalf("len(messages) = %d, want 2", len(provider.messages))
}
prompt := provider.messages[1].Content
if !strings.Contains(prompt, "current-weather-path") {
t.Fatalf("prompt = %q, want current workspace pattern", prompt)
}
if strings.Contains(prompt, "other-workspace-secret-path") || strings.Contains(prompt, "other workspace pattern") {
t.Fatalf("prompt leaked other workspace pattern: %s", prompt)
}
}
func TestLLMPatternClusterer_RejectsClusterBelowEvidenceSuccessRatio(t *testing.T) {
provider := &llmClusterTestProvider{
content: `{"clusters":[{"label":"weather-lookup","summary":"lookup weather","task_record_ids":["task-success","task-failed"],"cluster_reason":"same weather lookup goal"}]}`,
defaultModel: "test-model",
}
clusterer := evolution.NewLLMPatternClusterer(
provider,
"test-model",
evolution.NewHeuristicPatternClusterer(1, nil),
1,
func() time.Time { return time.Unix(1700000000, 0).UTC() },
)
success := true
failed := false
successfulTasks := []evolution.LearningRecord{
{
ID: "task-success",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace-a",
Summary: "weather lookup shanghai",
FinalOutput: "sunny",
Status: evolution.RecordStatus("new"),
Success: &success,
},
}
evidenceTasks := []evolution.LearningRecord{
successfulTasks[0],
{
ID: "task-failed",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace-a",
Summary: "forecast for shanghai",
FinalOutput: "could not complete",
Status: evolution.RecordStatus("new"),
Success: &failed,
},
}
patterns, clusteredIDs, err := clusterer.BuildPatternsWithEvidence(
context.Background(),
"workspace-a",
successfulTasks,
evidenceTasks,
nil,
0.8,
)
if err != nil {
t.Fatalf("BuildPatternsWithEvidence: %v", err)
}
if len(patterns) != 0 {
t.Fatalf("len(patterns) = %d, want 0: %#v", len(patterns), patterns)
}
if len(clusteredIDs) != 0 {
t.Fatalf("clusteredIDs = %v, want none", clusteredIDs)
}
prompt := provider.messages[1].Content
if !strings.Contains(prompt, `"success": true`) || !strings.Contains(prompt, `"success": false`) {
t.Fatalf("prompt should include success and failure evidence:\n%s", prompt)
}
}
func TestLLMPatternClusterer_RejectsIncompleteEvidenceAssignment(t *testing.T) {
provider := &llmClusterTestProvider{
content: `{"clusters":[{"label":"weather-lookup","summary":"lookup weather","task_record_ids":["task-success"],"cluster_reason":"same weather lookup goal"}]}`,
defaultModel: "test-model",
}
clusterer := evolution.NewLLMPatternClusterer(
provider,
"test-model",
evolution.NewHeuristicPatternClusterer(1, nil),
1,
func() time.Time { return time.Unix(1700000000, 0).UTC() },
)
success := true
failed := false
successfulTasks := []evolution.LearningRecord{
{
ID: "task-success",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace-a",
Summary: "weather lookup shanghai",
FinalOutput: "sunny",
Status: evolution.RecordStatus("new"),
Success: &success,
},
}
evidenceTasks := []evolution.LearningRecord{
successfulTasks[0],
{
ID: "task-failed",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace-a",
Summary: "forecast for shanghai",
FinalOutput: "could not complete",
Status: evolution.RecordStatus("new"),
Success: &failed,
},
}
patterns, clusteredIDs, err := clusterer.BuildPatternsWithEvidence(
context.Background(),
"workspace-a",
successfulTasks,
evidenceTasks,
nil,
0.8,
)
if err != nil {
t.Fatalf("BuildPatternsWithEvidence: %v", err)
}
if len(patterns) != 0 {
t.Fatalf("len(patterns) = %d, want 0: %#v", len(patterns), patterns)
}
if len(clusteredIDs) != 0 {
t.Fatalf("clusteredIDs = %v, want none", clusteredIDs)
}
}
func TestLLMPatternClusterer_MarksAllAcceptedEvidenceClusteredButStoresSuccessfulTaskIDs(t *testing.T) {
provider := &llmClusterTestProvider{
content: `{"clusters":[{"label":"weather-lookup","summary":"lookup weather","task_record_ids":["task-success","task-failed"],"cluster_reason":"same weather lookup goal"}]}`,
defaultModel: "test-model",
}
assertClustererMarksAllAcceptedEvidenceClustered(
t,
provider,
"weather lookup shanghai",
"forecast for shanghai",
"could not complete",
"1",
)
}
func TestLLMPatternClusterer_FallbackMarksAllAcceptedEvidenceClustered(t *testing.T) {
provider := &llmClusterTestProvider{
content: `not-json`,
defaultModel: "test-model",
}
assertClustererMarksAllAcceptedEvidenceClustered(
t,
provider,
"weather lookup 100",
"weather lookup 200",
"partial result",
"fallback pattern",
)
}
func assertClustererMarksAllAcceptedEvidenceClustered(
t *testing.T,
provider *llmClusterTestProvider,
successSummary string,
failedSummary string,
failedOutput string,
wantPatternDescription string,
) {
t.Helper()
clusterer := evolution.NewLLMPatternClusterer(
provider,
"test-model",
evolution.NewHeuristicPatternClusterer(1, nil),
1,
func() time.Time { return time.Unix(1700000000, 0).UTC() },
)
success := true
failed := false
successfulTasks := []evolution.LearningRecord{
{
ID: "task-success",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace-a",
Summary: successSummary,
FinalOutput: "sunny",
Status: evolution.RecordStatus("new"),
Success: &success,
},
}
evidenceTasks := []evolution.LearningRecord{
successfulTasks[0],
{
ID: "task-failed",
Kind: evolution.RecordKindTask,
WorkspaceID: "workspace-a",
Summary: failedSummary,
FinalOutput: failedOutput,
Status: evolution.RecordStatus("new"),
Success: &failed,
},
}
patterns, clusteredIDs, err := clusterer.BuildPatternsWithEvidence(
context.Background(),
"workspace-a",
successfulTasks,
evidenceTasks,
nil,
0.5,
)
if err != nil {
t.Fatalf("BuildPatternsWithEvidence: %v", err)
}
if len(patterns) != 1 {
t.Fatalf("len(patterns) = %d, want %s: %#v", len(patterns), wantPatternDescription, patterns)
}
if got := strings.Join(patterns[0].TaskRecordIDs, ","); got != "task-success" {
t.Fatalf("pattern TaskRecordIDs = %v, want only successful task", patterns[0].TaskRecordIDs)
}
if got := strings.Join(clusteredIDs, ","); got != "task-success,task-failed" {
t.Fatalf("clusteredIDs = %v, want all accepted evidence IDs", clusteredIDs)
}
}