Files
picoclaw/pkg/evolution/runtime_cold_path_test.go
T
lxowalle b3a7b7ad64 feat: agent self evolution (#2847)
* feat: add agent self-evolution

* fix ci

* delete unused doc

* fix lint

* fix evolution review issues
2026-05-11 16:13:27 +08:00

1286 lines
40 KiB
Go

package evolution_test
import (
"context"
"errors"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/evolution"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/skills"
)
type stubDraftGenerator struct {
draft evolution.SkillDraft
err error
}
func (g stubDraftGenerator) GenerateDraft(
_ context.Context,
_ evolution.LearningRecord,
_ []skills.SkillInfo,
) (evolution.SkillDraft, error) {
return g.draft, g.err
}
type sequenceDraftGenerator struct {
results []draftGenerationResult
index int
}
type draftGenerationResult struct {
draft evolution.SkillDraft
err error
}
type evidenceCaptureDraftGenerator struct {
evidence evolution.DraftEvidence
}
func (g *evidenceCaptureDraftGenerator) GenerateDraft(
_ context.Context,
_ evolution.LearningRecord,
_ []skills.SkillInfo,
) (evolution.SkillDraft, error) {
return evolution.SkillDraft{}, nil
}
func (g *evidenceCaptureDraftGenerator) GenerateDraftWithEvidence(
_ context.Context,
_ evolution.LearningRecord,
_ []skills.SkillInfo,
evidence evolution.DraftEvidence,
) (evolution.SkillDraft, error) {
g.evidence = evidence
return evolution.SkillDraft{
ID: "draft-evidence",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindCreate,
HumanSummary: "weather helper",
BodyOrPatch: "---\nname: weather\ndescription: weather helper\n---\n# Weather\nUse current workspace evidence.\n",
}, nil
}
type stubSuccessJudge struct {
decisions map[string]evolution.TaskSuccessDecision
calls []string
}
func (j *stubSuccessJudge) JudgeTaskRecord(
_ context.Context,
record evolution.LearningRecord,
) (evolution.TaskSuccessDecision, error) {
j.calls = append(j.calls, record.ID)
if decision, ok := j.decisions[record.ID]; ok {
return decision, nil
}
return evolution.TaskSuccessDecision{Success: true, Reason: "default success"}, nil
}
func (g *sequenceDraftGenerator) GenerateDraft(
_ context.Context,
_ evolution.LearningRecord,
_ []skills.SkillInfo,
) (evolution.SkillDraft, error) {
if g.index >= len(g.results) {
return evolution.SkillDraft{}, nil
}
result := g.results[g.index]
g.index++
return result.draft, result.err
}
func TestRuntime_RunColdPathOnce_GeneratesCandidateDraft(t *testing.T) {
root := t.TempDir()
paths := evolution.NewPaths(root, "")
store := evolution.NewStore(paths)
rule := evolution.LearningRecord{
ID: "rule-1",
Kind: evolution.RecordKindRule,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "weather native-name path",
Status: evolution.RecordStatus("ready"),
EventCount: 4,
}
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
Now: func() time.Time { return time.Unix(1700001000, 0).UTC() },
DraftGenerator: stubDraftGenerator{
draft: evolution.SkillDraft{
ID: "draft-1",
WorkspaceID: root,
SourceRecordID: "rule-1",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "prefer native-name path first",
BodyOrPatch: "## Start Here\nUse native-name query first.",
},
},
Store: store,
SkillsRecaller: evolution.NewSkillsRecaller(root),
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 1 {
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
}
if drafts[0].Status != evolution.DraftStatusCandidate {
t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusCandidate)
}
}
func TestRuntime_RunColdPathOnce_AdmitsOnlyRecordsApprovedBySuccessJudge(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
ok := true
failed := false
records := []evolution.LearningRecord{
{
ID: "task-failed",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "failed weather attempt",
UserGoal: "check weather in shanghai",
FinalOutput: "tool failed",
Status: evolution.RecordStatus("new"),
Success: &failed,
UsedSkillNames: []string{"weather", "native-name"},
ToolKinds: []string{"read_file"},
},
{
ID: "task-rejected",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000100, 0).UTC(),
Summary: "partial weather answer",
UserGoal: "check weather in shanghai",
FinalOutput: "I will check it next",
Status: evolution.RecordStatus("new"),
Success: &ok,
UsedSkillNames: []string{"weather", "native-name"},
ToolKinds: []string{"read_file"},
ToolExecutions: []evolution.ToolExecutionRecord{
{Name: "read_file", Success: true},
{Name: "read_file", Success: true},
},
},
{
ID: "task-admitted",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000200, 0).UTC(),
Summary: "weather answer delivered",
UserGoal: "check weather in shanghai",
FinalOutput: "sunny, 26C",
Status: evolution.RecordStatus("new"),
Success: &ok,
UsedSkillNames: []string{"weather", "native-name"},
AddedSkillNames: []string{"native-name"},
ToolKinds: []string{"read_file"},
ToolExecutions: []evolution.ToolExecutionRecord{
{Name: "read_file", Success: true},
{Name: "read_file", Success: true},
},
AttemptTrail: &evolution.AttemptTrail{
AttemptedSkills: []string{"weather"},
FinalSuccessfulPath: []string{"weather"},
},
},
}
if err := store.AppendLearningRecords(records); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
judge := &stubSuccessJudge{
decisions: map[string]evolution.TaskSuccessDecision{
"task-rejected": {Success: false, Reason: "only partial reasoning"},
"task-admitted": {Success: true, Reason: "goal achieved"},
},
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1},
Store: store,
SuccessJudge: judge,
Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}),
SkillsRecaller: evolution.NewSkillsRecaller(root),
DraftGenerator: stubDraftGenerator{
draft: evolution.SkillDraft{
ID: "draft-weather",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "prefer the proven weather path",
BodyOrPatch: "## Start Here\nUse the weather path directly.",
},
},
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
if len(judge.calls) != 2 || judge.calls[0] != "task-rejected" || judge.calls[1] != "task-admitted" {
t.Fatalf("judge calls = %v, want [task-rejected task-admitted]", judge.calls)
}
allRecords, err := store.LoadLearningRecords()
if err != nil {
t.Fatalf("LoadLearningRecords: %v", err)
}
var pattern evolution.LearningRecord
foundPattern := false
for _, record := range allRecords {
if record.Kind != evolution.RecordKindPattern {
continue
}
pattern = record
foundPattern = true
break
}
if !foundPattern {
t.Fatal("expected generated pattern record")
}
if len(pattern.TaskRecordIDs) != 1 || pattern.TaskRecordIDs[0] != "task-admitted" {
t.Fatalf("TaskRecordIDs = %v, want [task-admitted]", pattern.TaskRecordIDs)
}
if pattern.Label == "" {
t.Fatal("pattern Label should not be empty")
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 1 {
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
}
if drafts[0].SourceRecordID != pattern.ID {
t.Fatalf("draft SourceRecordID = %q, want %q", drafts[0].SourceRecordID, pattern.ID)
}
}
func TestRuntime_RunColdPathOnce_RejectsClusterBelowMinSuccessRatio(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
ok := true
failed := false
records := []evolution.LearningRecord{
{
ID: "task-success",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000200, 0).UTC(),
Summary: "weather lookup 100",
FinalOutput: "sunny",
Status: evolution.RecordStatus("new"),
Success: &ok,
UsedSkillNames: []string{"weather"},
},
{
ID: "task-failed-1",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000100, 0).UTC(),
Summary: "weather lookup 200",
FinalOutput: "failed",
Status: evolution.RecordStatus("new"),
Success: &failed,
UsedSkillNames: []string{"weather"},
},
{
ID: "task-failed-2",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "weather lookup 300",
FinalOutput: "failed",
Status: evolution.RecordStatus("new"),
Success: &failed,
UsedSkillNames: []string{"weather"},
},
}
if err := store.AppendLearningRecords(records); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.8},
Store: store,
SuccessJudge: &stubSuccessJudge{},
SkillsRecaller: evolution.NewSkillsRecaller(root),
DraftGenerator: stubDraftGenerator{
draft: evolution.SkillDraft{
ID: "draft-weather",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "prefer the proven weather path",
BodyOrPatch: "## Start Here\nUse the weather path directly.",
},
},
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
patterns, err := store.LoadPatternRecords()
if err != nil {
t.Fatalf("LoadPatternRecords: %v", err)
}
if len(patterns) != 0 {
t.Fatalf("len(patterns) = %d, want 0", len(patterns))
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 0 {
t.Fatalf("len(drafts) = %d, want 0", len(drafts))
}
}
func TestRuntime_RunColdPathOnce_FallbackUsesJudgeAdjustedSuccessRatio(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
ok := true
records := []evolution.LearningRecord{
{
ID: "task-success",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000200, 0).UTC(),
Summary: "weather lookup 100",
FinalOutput: "sunny",
Status: evolution.RecordStatus("new"),
Success: &ok,
UsedSkillNames: []string{"weather"},
},
{
ID: "task-judge-rejected",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000100, 0).UTC(),
Summary: "weather lookup 200",
FinalOutput: "partial answer",
Status: evolution.RecordStatus("new"),
Success: &ok,
UsedSkillNames: []string{"weather"},
},
}
if err := store.AppendLearningRecords(records); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
judge := &stubSuccessJudge{
decisions: map[string]evolution.TaskSuccessDecision{
"task-success": {Success: true, Reason: "goal achieved"},
"task-judge-rejected": {Success: false, Reason: "partial result"},
},
}
clusterer := evolution.NewLLMPatternClusterer(
&llmClusterTestProvider{content: `not-json`, defaultModel: "test-model"},
"test-model",
evolution.NewHeuristicPatternClusterer(1, nil),
1,
func() time.Time { return time.Unix(1700000000, 0).UTC() },
)
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.8},
Store: store,
PatternClusterer: clusterer,
SuccessJudge: judge,
SkillsRecaller: evolution.NewSkillsRecaller(root),
DraftGenerator: stubDraftGenerator{
draft: evolution.SkillDraft{
ID: "draft-weather",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "prefer the proven weather path",
BodyOrPatch: "## Start Here\nUse the weather path directly.",
},
},
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
patterns, err := store.LoadPatternRecords()
if err != nil {
t.Fatalf("LoadPatternRecords: %v", err)
}
if len(patterns) != 0 {
t.Fatalf("len(patterns) = %d, want 0", len(patterns))
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 0 {
t.Fatalf("len(drafts) = %d, want 0", len(drafts))
}
}
func TestRuntime_RunColdPathOnce_FallbackMarksAcceptedFailureEvidenceClustered(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
ok := true
records := []evolution.LearningRecord{
{
ID: "task-success",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000200, 0).UTC(),
Summary: "weather lookup 100",
FinalOutput: "sunny",
Status: evolution.RecordStatus("new"),
Success: &ok,
UsedSkillNames: []string{"weather"},
},
{
ID: "task-judge-rejected",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000100, 0).UTC(),
Summary: "weather lookup 200",
FinalOutput: "partial answer",
Status: evolution.RecordStatus("new"),
Success: &ok,
UsedSkillNames: []string{"weather"},
},
}
if err := store.AppendLearningRecords(records); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
judge := &stubSuccessJudge{
decisions: map[string]evolution.TaskSuccessDecision{
"task-success": {Success: true, Reason: "goal achieved"},
"task-judge-rejected": {Success: false, Reason: "partial result"},
},
}
clusterer := evolution.NewLLMPatternClusterer(
&llmClusterTestProvider{content: `not-json`, defaultModel: "test-model"},
"test-model",
evolution.NewHeuristicPatternClusterer(1, nil),
1,
func() time.Time { return time.Unix(1700000000, 0).UTC() },
)
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.5},
Store: store,
PatternClusterer: clusterer,
SuccessJudge: judge,
SkillsRecaller: evolution.NewSkillsRecaller(root),
DraftGenerator: stubDraftGenerator{
draft: evolution.SkillDraft{
ID: "draft-weather",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "prefer the proven weather path",
BodyOrPatch: "## Start Here\nUse the weather path directly.",
},
},
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
patterns, err := store.LoadPatternRecords()
if err != nil {
t.Fatalf("LoadPatternRecords: %v", err)
}
if len(patterns) != 1 {
t.Fatalf("len(patterns) = %d, want 1", len(patterns))
}
if got := strings.Join(patterns[0].TaskRecordIDs, ","); got != "task-success" {
t.Fatalf("pattern TaskRecordIDs = %v, want only successful task", patterns[0].TaskRecordIDs)
}
taskRecords, err := store.LoadTaskRecords()
if err != nil {
t.Fatalf("LoadTaskRecords: %v", err)
}
statusByID := make(map[string]evolution.RecordStatus)
for _, record := range taskRecords {
statusByID[record.ID] = record.Status
}
for _, id := range []string{"task-success", "task-judge-rejected"} {
if statusByID[id] != evolution.RecordStatus("clustered") {
t.Fatalf("statusByID[%s] = %q, want clustered", id, statusByID[id])
}
}
}
func TestRuntime_RunColdPathOnce_DraftEvidenceDoesNotCrossWorkspaceWithDuplicateTaskID(t *testing.T) {
sharedState := t.TempDir()
workspaceA := t.TempDir()
workspaceB := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(workspaceA, sharedState))
ok := true
if err := store.AppendTaskRecords(context.Background(), []evolution.LearningRecord{
{
ID: "main-turn-1",
Kind: evolution.RecordKindTask,
WorkspaceID: workspaceB,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "other workspace weather",
FinalOutput: "foreign workspace output",
Status: evolution.RecordStatus("clustered"),
Success: &ok,
UsedSkillNames: []string{"foreign-skill"},
},
{
ID: "main-turn-1",
Kind: evolution.RecordKindTask,
WorkspaceID: workspaceA,
CreatedAt: time.Unix(1700000001, 0).UTC(),
Summary: "current workspace weather",
FinalOutput: "current workspace output",
Status: evolution.RecordStatus("clustered"),
Success: &ok,
UsedSkillNames: []string{"current-skill"},
},
}); err != nil {
t.Fatalf("AppendTaskRecords: %v", err)
}
if err := store.AppendPatternRecords([]evolution.LearningRecord{{
ID: "pattern-workspace-a",
Kind: evolution.RecordKindPattern,
WorkspaceID: workspaceA,
CreatedAt: time.Unix(1700000002, 0).UTC(),
Summary: "current workspace weather",
Status: evolution.RecordStatus("ready"),
TaskRecordIDs: []string{"main-turn-1"},
}}); err != nil {
t.Fatalf("AppendPatternRecords: %v", err)
}
generator := &evidenceCaptureDraftGenerator{}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft", StateDir: sharedState},
Store: store,
SkillsRecaller: evolution.NewSkillsRecaller(workspaceA),
DraftGenerator: generator,
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), workspaceA); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
if len(generator.evidence.TaskRecords) != 1 {
t.Fatalf(
"evidence task count = %d, want 1: %#v",
len(generator.evidence.TaskRecords),
generator.evidence.TaskRecords,
)
}
task := generator.evidence.TaskRecords[0]
if task.WorkspaceID != workspaceA {
t.Fatalf("evidence workspace = %q, want %q", task.WorkspaceID, workspaceA)
}
if task.FinalOutput != "current workspace output" {
t.Fatalf("evidence FinalOutput = %q, want current workspace output", task.FinalOutput)
}
if len(task.UsedSkillNames) != 1 || task.UsedSkillNames[0] != "current-skill" {
t.Fatalf("evidence UsedSkillNames = %v, want [current-skill]", task.UsedSkillNames)
}
}
func TestRuntime_RunColdPathOnce_AdmitsSingleSkillTaskButWaitsForMinTaskCount(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
ok := true
record := evolution.LearningRecord{
ID: "task-simple",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000250, 0).UTC(),
Summary: "simple weather lookup",
UserGoal: "check weather",
FinalOutput: "sunny",
Status: evolution.RecordStatus("new"),
Success: &ok,
UsedSkillNames: []string{"weather"},
AddedSkillNames: []string{"weather"},
ToolKinds: []string{"read_file"},
ToolExecutions: []evolution.ToolExecutionRecord{
{Name: "read_file", Success: true, SkillNames: []string{"weather"}},
},
AttemptTrail: &evolution.AttemptTrail{
AttemptedSkills: []string{"weather"},
FinalSuccessfulPath: []string{"weather"},
},
}
if err := store.AppendLearningRecords([]evolution.LearningRecord{record}); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
judge := &stubSuccessJudge{}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
Store: store,
SuccessJudge: judge,
Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}),
SkillsRecaller: evolution.NewSkillsRecaller(root),
DraftGenerator: stubDraftGenerator{
draft: evolution.SkillDraft{
ID: "draft-simple",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "simple draft",
BodyOrPatch: "## Start Here\nUse weather.",
},
},
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
if len(judge.calls) != 1 || judge.calls[0] != "task-simple" {
t.Fatalf("judge calls = %v, want [task-simple]", judge.calls)
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 0 {
t.Fatalf("len(drafts) = %d, want 0", len(drafts))
}
}
func TestRuntime_RunColdPathOnce_RejectsTaskWhenSuccessJudgeRejects(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
ok := true
record := evolution.LearningRecord{
ID: "task-detailed-path",
Kind: evolution.RecordKindTask,
WorkspaceID: root,
CreatedAt: time.Unix(1700000300, 0).UTC(),
Summary: "computed theorem chain",
UserGoal: "调用三一定理计算100",
FinalOutput: "最终结果:100 通过三一定理计算得到 120",
Status: evolution.RecordStatus("new"),
Success: &ok,
UsedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
AddedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
ToolKinds: []string{"read_file"},
ToolExecutions: []evolution.ToolExecutionRecord{
{Name: "read_file", Success: true, SkillNames: []string{"three-one-theorem"}},
{Name: "read_file", Success: true, SkillNames: []string{"four-two-theorem"}},
{Name: "read_file", Success: true, SkillNames: []string{"five-three-theorem"}},
},
AttemptTrail: &evolution.AttemptTrail{
AttemptedSkills: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
FinalSuccessfulPath: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
},
}
if err := store.AppendLearningRecords([]evolution.LearningRecord{record}); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
judge := &stubSuccessJudge{
decisions: map[string]evolution.TaskSuccessDecision{
"task-detailed-path": {Success: false, Reason: "llm false negative"},
},
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
Store: store,
SuccessJudge: judge,
Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}),
SkillsRecaller: evolution.NewSkillsRecaller(root),
DraftGenerator: stubDraftGenerator{
draft: evolution.SkillDraft{
ID: "draft-detailed-path",
TargetSkillName: "three-one-theorem",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "prefer the full theorem chain",
BodyOrPatch: "## Start Here\nUse the full three-one, four-two, five-three theorem chain.",
},
},
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
allRecords, err := store.LoadLearningRecords()
if err != nil {
t.Fatalf("LoadLearningRecords: %v", err)
}
foundPattern := false
for _, record := range allRecords {
if record.Kind != evolution.RecordKindPattern {
continue
}
foundPattern = true
break
}
if foundPattern {
t.Fatal("unexpected pattern record for rejected task")
}
}
func TestRuntime_RunColdPathOnce_QuarantinesInvalidDraft(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
rule := evolution.LearningRecord{
ID: "rule-1",
Kind: evolution.RecordKindRule,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "release path",
Status: evolution.RecordStatus("ready"),
EventCount: 4,
}
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
DraftGenerator: stubDraftGenerator{
draft: evolution.SkillDraft{
ID: "draft-1",
WorkspaceID: root,
SourceRecordID: "rule-1",
TargetSkillName: "",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "broken",
BodyOrPatch: "",
},
},
Store: store,
SkillsRecaller: evolution.NewSkillsRecaller(root),
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 1 {
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
}
if drafts[0].Status != evolution.DraftStatusQuarantined {
t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusQuarantined)
}
if len(drafts[0].ScanFindings) == 0 {
t.Fatal("expected scan findings for invalid draft")
}
}
func TestRuntime_RunColdPathOnce_DoesNotWriteSkillFile(t *testing.T) {
root := t.TempDir()
skillPath := filepath.Join(root, "skills", "weather", "SKILL.md")
if err := os.MkdirAll(filepath.Dir(skillPath), 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
if err := os.WriteFile(
skillPath,
[]byte("---\nname: weather\ndescription: test\n---\n# Weather"),
0o644,
); err != nil {
t.Fatalf("WriteFile: %v", err)
}
store := evolution.NewStore(evolution.NewPaths(root, ""))
rule := evolution.LearningRecord{
ID: "rule-1",
Kind: evolution.RecordKindRule,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "weather native-name path",
Status: evolution.RecordStatus("ready"),
EventCount: 4,
}
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
original, err := os.ReadFile(skillPath)
if err != nil {
t.Fatalf("ReadFile(original): %v", err)
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "apply"},
DraftGenerator: stubDraftGenerator{
draft: evolution.SkillDraft{
ID: "draft-1",
WorkspaceID: root,
SourceRecordID: "rule-1",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "prefer native-name path first",
BodyOrPatch: "## Start Here\nUse native-name query first.",
},
},
Store: store,
SkillsRecaller: evolution.NewSkillsRecaller(root),
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
got, err := os.ReadFile(skillPath)
if err != nil {
t.Fatalf("ReadFile(after): %v", err)
}
if string(got) != string(original) {
t.Fatalf("skill file changed unexpectedly:\n%s", string(got))
}
}
func TestRuntime_RunColdPathOnce_UsesDefaultDraftGenerator(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
rule := evolution.LearningRecord{
ID: "rule-1",
Kind: evolution.RecordKindRule,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "weather native-name path",
Status: evolution.RecordStatus("ready"),
EventCount: 4,
SuccessRate: 1,
WinningPath: []string{"weather"},
}
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
Store: store,
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 1 {
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
}
if drafts[0].TargetSkillName != "weather" {
t.Fatalf("TargetSkillName = %q, want weather", drafts[0].TargetSkillName)
}
if drafts[0].Status != evolution.DraftStatusCandidate {
t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusCandidate)
}
if drafts[0].BodyOrPatch == "" {
t.Fatal("expected generated draft body")
}
}
func TestRuntime_RunColdPathOnce_UsesLLMDraftGeneratorWhenProviderAvailable(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
rule := evolution.LearningRecord{
ID: "rule-1",
Kind: evolution.RecordKindRule,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "weather native-name path",
Status: evolution.RecordStatus("ready"),
EventCount: 4,
SuccessRate: 1,
WinningPath: []string{"weather"},
}
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
provider := &llmDraftRuntimeProvider{
response: &providers.LLMResponse{
Content: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name path first","body_or_patch":"## Start Here\nUse native-name query first."}`,
},
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
Store: store,
DraftGenerator: evolution.NewDraftGeneratorForWorkspace(root, provider, "runtime-explicit-model"),
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 1 {
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
}
if provider.calls != 1 {
t.Fatalf("provider.calls = %d, want 1", provider.calls)
}
if drafts[0].HumanSummary != "Prefer native-name path first" {
t.Fatalf("HumanSummary = %q, want %q", drafts[0].HumanSummary, "Prefer native-name path first")
}
}
func TestRuntime_RunColdPathOnce_UsesDefaultDraftGeneratorWhenFactoryHasNoProvider(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
rule := evolution.LearningRecord{
ID: "rule-1",
Kind: evolution.RecordKindRule,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "weather native-name path",
Status: evolution.RecordStatus("ready"),
EventCount: 4,
SuccessRate: 1,
WinningPath: []string{"weather"},
}
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
Store: store,
DraftGenerator: evolution.NewDraftGeneratorForWorkspace(root, nil, ""),
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 1 {
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
}
if drafts[0].TargetSkillName != "weather" {
t.Fatalf("TargetSkillName = %q, want weather", drafts[0].TargetSkillName)
}
if drafts[0].BodyOrPatch == "" {
t.Fatal("expected generated draft body")
}
}
func TestRuntime_RunColdPathOnce_UsesGeneratorFactoryWorkspaceForFallback(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
if err := os.MkdirAll(filepath.Join(root, "skills", "weather"), 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
skillBody := "---\nname: weather\ndescription: workspace weather helper\n---\n# Weather\n## Start Here\nUse the workspace-specific path.\n"
if err := os.WriteFile(filepath.Join(root, "skills", "weather", "SKILL.md"), []byte(skillBody), 0o644); err != nil {
t.Fatalf("WriteFile: %v", err)
}
rule := evolution.LearningRecord{
ID: "rule-1",
Kind: evolution.RecordKindRule,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "weather native-name path",
Status: evolution.RecordStatus("ready"),
EventCount: 4,
SuccessRate: 1,
WinningPath: []string{"weather"},
}
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
provider := &llmDraftRuntimeProvider{
response: &providers.LLMResponse{Content: `not-json`},
defaultModel: "runtime-test-model",
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
Store: store,
GeneratorFactory: func(workspace string) evolution.DraftGenerator {
return evolution.NewDraftGeneratorForWorkspace(workspace, provider, "runtime-explicit-model")
},
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 1 {
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
}
if drafts[0].ChangeKind != evolution.ChangeKindAppend {
t.Fatalf("ChangeKind = %q, want %q", drafts[0].ChangeKind, evolution.ChangeKindAppend)
}
if !strings.Contains(drafts[0].BodyOrPatch, "## Learned Evolution") {
t.Fatalf("BodyOrPatch = %q, want appended learned evolution section", drafts[0].BodyOrPatch)
}
}
func TestRuntime_RunColdPathOnce_PersistsEarlierDraftWhenLaterRuleFails(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
rules := []evolution.LearningRecord{
{
ID: "rule-1",
Kind: evolution.RecordKindRule,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "weather native-name path",
Status: evolution.RecordStatus("ready"),
EventCount: 4,
},
{
ID: "rule-2",
Kind: evolution.RecordKindRule,
WorkspaceID: root,
CreatedAt: time.Unix(1700000100, 0).UTC(),
Summary: "release path",
Status: evolution.RecordStatus("ready"),
EventCount: 4,
},
}
if err := store.AppendLearningRecords(rules); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
generator := &sequenceDraftGenerator{
results: []draftGenerationResult{
{
draft: evolution.SkillDraft{
ID: "draft-1",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "prefer native-name path first",
BodyOrPatch: "## Start Here\nUse native-name query first.",
},
},
{
err: context.DeadlineExceeded,
},
},
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
Store: store,
DraftGenerator: generator,
SkillsRecaller: evolution.NewSkillsRecaller(root),
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
err = rt.RunColdPathOnce(context.Background(), root)
if !errors.Is(err, context.DeadlineExceeded) {
t.Fatalf("RunColdPathOnce error = %v, want %v", err, context.DeadlineExceeded)
}
drafts, loadErr := store.LoadDrafts()
if loadErr != nil {
t.Fatalf("LoadDrafts: %v", loadErr)
}
if len(drafts) != 1 {
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
}
if drafts[0].SourceRecordID != "rule-1" {
t.Fatalf("SourceRecordID = %q, want rule-1", drafts[0].SourceRecordID)
}
}
func TestRuntime_RunColdPathOnce_RegeneratesAfterQuarantinedDraft(t *testing.T) {
root := t.TempDir()
store := evolution.NewStore(evolution.NewPaths(root, ""))
rule := evolution.LearningRecord{
ID: "rule-1",
Kind: evolution.RecordKindRule,
WorkspaceID: root,
CreatedAt: time.Unix(1700000000, 0).UTC(),
Summary: "weather native-name path",
Status: evolution.RecordStatus("ready"),
EventCount: 4,
}
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
t.Fatalf("AppendLearningRecords: %v", err)
}
if err := store.SaveDrafts([]evolution.SkillDraft{{
ID: "draft-old",
WorkspaceID: root,
CreatedAt: time.Unix(1700000100, 0).UTC(),
SourceRecordID: "rule-1",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "broken attempt",
BodyOrPatch: "## Start Here\nBroken content.",
Status: evolution.DraftStatusQuarantined,
ScanFindings: []string{"apply failed"},
}}); err != nil {
t.Fatalf("SaveDrafts: %v", err)
}
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
Store: store,
DraftGenerator: stubDraftGenerator{
draft: evolution.SkillDraft{
ID: "draft-new",
TargetSkillName: "weather",
DraftType: evolution.DraftTypeShortcut,
ChangeKind: evolution.ChangeKindAppend,
HumanSummary: "fixed attempt",
BodyOrPatch: "## Start Here\nUse native-name query first.",
},
},
SkillsRecaller: evolution.NewSkillsRecaller(root),
})
if err != nil {
t.Fatalf("NewRuntime: %v", err)
}
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
t.Fatalf("RunColdPathOnce: %v", runErr)
}
drafts, err := store.LoadDrafts()
if err != nil {
t.Fatalf("LoadDrafts: %v", err)
}
if len(drafts) != 2 {
t.Fatalf("len(drafts) = %d, want 2", len(drafts))
}
if drafts[1].ID != "draft-new" {
t.Fatalf("drafts[1].ID = %q, want draft-new", drafts[1].ID)
}
}
type llmDraftRuntimeProvider struct {
response *providers.LLMResponse
err error
calls int
defaultModel string
}
func (p *llmDraftRuntimeProvider) Chat(
_ context.Context,
_ []providers.Message,
_ []providers.ToolDefinition,
_ string,
_ map[string]any,
) (*providers.LLMResponse, error) {
p.calls++
return p.response, p.err
}
func (p *llmDraftRuntimeProvider) GetDefaultModel() string {
if p.defaultModel != "" {
return p.defaultModel
}
return "runtime-test-model"
}