mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
b3a7b7ad64
* feat: add agent self-evolution * fix ci * delete unused doc * fix lint * fix evolution review issues
1286 lines
40 KiB
Go
1286 lines
40 KiB
Go
package evolution_test
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/config"
|
|
"github.com/sipeed/picoclaw/pkg/evolution"
|
|
"github.com/sipeed/picoclaw/pkg/providers"
|
|
"github.com/sipeed/picoclaw/pkg/skills"
|
|
)
|
|
|
|
type stubDraftGenerator struct {
|
|
draft evolution.SkillDraft
|
|
err error
|
|
}
|
|
|
|
func (g stubDraftGenerator) GenerateDraft(
|
|
_ context.Context,
|
|
_ evolution.LearningRecord,
|
|
_ []skills.SkillInfo,
|
|
) (evolution.SkillDraft, error) {
|
|
return g.draft, g.err
|
|
}
|
|
|
|
type sequenceDraftGenerator struct {
|
|
results []draftGenerationResult
|
|
index int
|
|
}
|
|
|
|
type draftGenerationResult struct {
|
|
draft evolution.SkillDraft
|
|
err error
|
|
}
|
|
|
|
type evidenceCaptureDraftGenerator struct {
|
|
evidence evolution.DraftEvidence
|
|
}
|
|
|
|
func (g *evidenceCaptureDraftGenerator) GenerateDraft(
|
|
_ context.Context,
|
|
_ evolution.LearningRecord,
|
|
_ []skills.SkillInfo,
|
|
) (evolution.SkillDraft, error) {
|
|
return evolution.SkillDraft{}, nil
|
|
}
|
|
|
|
func (g *evidenceCaptureDraftGenerator) GenerateDraftWithEvidence(
|
|
_ context.Context,
|
|
_ evolution.LearningRecord,
|
|
_ []skills.SkillInfo,
|
|
evidence evolution.DraftEvidence,
|
|
) (evolution.SkillDraft, error) {
|
|
g.evidence = evidence
|
|
return evolution.SkillDraft{
|
|
ID: "draft-evidence",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindCreate,
|
|
HumanSummary: "weather helper",
|
|
BodyOrPatch: "---\nname: weather\ndescription: weather helper\n---\n# Weather\nUse current workspace evidence.\n",
|
|
}, nil
|
|
}
|
|
|
|
type stubSuccessJudge struct {
|
|
decisions map[string]evolution.TaskSuccessDecision
|
|
calls []string
|
|
}
|
|
|
|
func (j *stubSuccessJudge) JudgeTaskRecord(
|
|
_ context.Context,
|
|
record evolution.LearningRecord,
|
|
) (evolution.TaskSuccessDecision, error) {
|
|
j.calls = append(j.calls, record.ID)
|
|
if decision, ok := j.decisions[record.ID]; ok {
|
|
return decision, nil
|
|
}
|
|
return evolution.TaskSuccessDecision{Success: true, Reason: "default success"}, nil
|
|
}
|
|
|
|
func (g *sequenceDraftGenerator) GenerateDraft(
|
|
_ context.Context,
|
|
_ evolution.LearningRecord,
|
|
_ []skills.SkillInfo,
|
|
) (evolution.SkillDraft, error) {
|
|
if g.index >= len(g.results) {
|
|
return evolution.SkillDraft{}, nil
|
|
}
|
|
result := g.results[g.index]
|
|
g.index++
|
|
return result.draft, result.err
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_GeneratesCandidateDraft(t *testing.T) {
|
|
root := t.TempDir()
|
|
paths := evolution.NewPaths(root, "")
|
|
store := evolution.NewStore(paths)
|
|
|
|
rule := evolution.LearningRecord{
|
|
ID: "rule-1",
|
|
Kind: evolution.RecordKindRule,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "weather native-name path",
|
|
Status: evolution.RecordStatus("ready"),
|
|
EventCount: 4,
|
|
}
|
|
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
|
|
Now: func() time.Time { return time.Unix(1700001000, 0).UTC() },
|
|
DraftGenerator: stubDraftGenerator{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-1",
|
|
WorkspaceID: root,
|
|
SourceRecordID: "rule-1",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "prefer native-name path first",
|
|
BodyOrPatch: "## Start Here\nUse native-name query first.",
|
|
},
|
|
},
|
|
Store: store,
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 1 {
|
|
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
|
|
}
|
|
if drafts[0].Status != evolution.DraftStatusCandidate {
|
|
t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusCandidate)
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_AdmitsOnlyRecordsApprovedBySuccessJudge(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
ok := true
|
|
failed := false
|
|
|
|
records := []evolution.LearningRecord{
|
|
{
|
|
ID: "task-failed",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "failed weather attempt",
|
|
UserGoal: "check weather in shanghai",
|
|
FinalOutput: "tool failed",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &failed,
|
|
UsedSkillNames: []string{"weather", "native-name"},
|
|
ToolKinds: []string{"read_file"},
|
|
},
|
|
{
|
|
ID: "task-rejected",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000100, 0).UTC(),
|
|
Summary: "partial weather answer",
|
|
UserGoal: "check weather in shanghai",
|
|
FinalOutput: "I will check it next",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"weather", "native-name"},
|
|
ToolKinds: []string{"read_file"},
|
|
ToolExecutions: []evolution.ToolExecutionRecord{
|
|
{Name: "read_file", Success: true},
|
|
{Name: "read_file", Success: true},
|
|
},
|
|
},
|
|
{
|
|
ID: "task-admitted",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000200, 0).UTC(),
|
|
Summary: "weather answer delivered",
|
|
UserGoal: "check weather in shanghai",
|
|
FinalOutput: "sunny, 26C",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"weather", "native-name"},
|
|
AddedSkillNames: []string{"native-name"},
|
|
ToolKinds: []string{"read_file"},
|
|
ToolExecutions: []evolution.ToolExecutionRecord{
|
|
{Name: "read_file", Success: true},
|
|
{Name: "read_file", Success: true},
|
|
},
|
|
AttemptTrail: &evolution.AttemptTrail{
|
|
AttemptedSkills: []string{"weather"},
|
|
FinalSuccessfulPath: []string{"weather"},
|
|
},
|
|
},
|
|
}
|
|
if err := store.AppendLearningRecords(records); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
judge := &stubSuccessJudge{
|
|
decisions: map[string]evolution.TaskSuccessDecision{
|
|
"task-rejected": {Success: false, Reason: "only partial reasoning"},
|
|
"task-admitted": {Success: true, Reason: "goal achieved"},
|
|
},
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1},
|
|
Store: store,
|
|
SuccessJudge: judge,
|
|
Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}),
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
DraftGenerator: stubDraftGenerator{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-weather",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "prefer the proven weather path",
|
|
BodyOrPatch: "## Start Here\nUse the weather path directly.",
|
|
},
|
|
},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
if len(judge.calls) != 2 || judge.calls[0] != "task-rejected" || judge.calls[1] != "task-admitted" {
|
|
t.Fatalf("judge calls = %v, want [task-rejected task-admitted]", judge.calls)
|
|
}
|
|
|
|
allRecords, err := store.LoadLearningRecords()
|
|
if err != nil {
|
|
t.Fatalf("LoadLearningRecords: %v", err)
|
|
}
|
|
|
|
var pattern evolution.LearningRecord
|
|
foundPattern := false
|
|
for _, record := range allRecords {
|
|
if record.Kind != evolution.RecordKindPattern {
|
|
continue
|
|
}
|
|
pattern = record
|
|
foundPattern = true
|
|
break
|
|
}
|
|
if !foundPattern {
|
|
t.Fatal("expected generated pattern record")
|
|
}
|
|
if len(pattern.TaskRecordIDs) != 1 || pattern.TaskRecordIDs[0] != "task-admitted" {
|
|
t.Fatalf("TaskRecordIDs = %v, want [task-admitted]", pattern.TaskRecordIDs)
|
|
}
|
|
if pattern.Label == "" {
|
|
t.Fatal("pattern Label should not be empty")
|
|
}
|
|
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 1 {
|
|
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
|
|
}
|
|
if drafts[0].SourceRecordID != pattern.ID {
|
|
t.Fatalf("draft SourceRecordID = %q, want %q", drafts[0].SourceRecordID, pattern.ID)
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_RejectsClusterBelowMinSuccessRatio(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
ok := true
|
|
failed := false
|
|
|
|
records := []evolution.LearningRecord{
|
|
{
|
|
ID: "task-success",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000200, 0).UTC(),
|
|
Summary: "weather lookup 100",
|
|
FinalOutput: "sunny",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"weather"},
|
|
},
|
|
{
|
|
ID: "task-failed-1",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000100, 0).UTC(),
|
|
Summary: "weather lookup 200",
|
|
FinalOutput: "failed",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &failed,
|
|
UsedSkillNames: []string{"weather"},
|
|
},
|
|
{
|
|
ID: "task-failed-2",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "weather lookup 300",
|
|
FinalOutput: "failed",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &failed,
|
|
UsedSkillNames: []string{"weather"},
|
|
},
|
|
}
|
|
if err := store.AppendLearningRecords(records); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.8},
|
|
Store: store,
|
|
SuccessJudge: &stubSuccessJudge{},
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
DraftGenerator: stubDraftGenerator{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-weather",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "prefer the proven weather path",
|
|
BodyOrPatch: "## Start Here\nUse the weather path directly.",
|
|
},
|
|
},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
patterns, err := store.LoadPatternRecords()
|
|
if err != nil {
|
|
t.Fatalf("LoadPatternRecords: %v", err)
|
|
}
|
|
if len(patterns) != 0 {
|
|
t.Fatalf("len(patterns) = %d, want 0", len(patterns))
|
|
}
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 0 {
|
|
t.Fatalf("len(drafts) = %d, want 0", len(drafts))
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_FallbackUsesJudgeAdjustedSuccessRatio(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
ok := true
|
|
|
|
records := []evolution.LearningRecord{
|
|
{
|
|
ID: "task-success",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000200, 0).UTC(),
|
|
Summary: "weather lookup 100",
|
|
FinalOutput: "sunny",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"weather"},
|
|
},
|
|
{
|
|
ID: "task-judge-rejected",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000100, 0).UTC(),
|
|
Summary: "weather lookup 200",
|
|
FinalOutput: "partial answer",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"weather"},
|
|
},
|
|
}
|
|
if err := store.AppendLearningRecords(records); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
judge := &stubSuccessJudge{
|
|
decisions: map[string]evolution.TaskSuccessDecision{
|
|
"task-success": {Success: true, Reason: "goal achieved"},
|
|
"task-judge-rejected": {Success: false, Reason: "partial result"},
|
|
},
|
|
}
|
|
clusterer := evolution.NewLLMPatternClusterer(
|
|
&llmClusterTestProvider{content: `not-json`, defaultModel: "test-model"},
|
|
"test-model",
|
|
evolution.NewHeuristicPatternClusterer(1, nil),
|
|
1,
|
|
func() time.Time { return time.Unix(1700000000, 0).UTC() },
|
|
)
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.8},
|
|
Store: store,
|
|
PatternClusterer: clusterer,
|
|
SuccessJudge: judge,
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
DraftGenerator: stubDraftGenerator{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-weather",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "prefer the proven weather path",
|
|
BodyOrPatch: "## Start Here\nUse the weather path directly.",
|
|
},
|
|
},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
patterns, err := store.LoadPatternRecords()
|
|
if err != nil {
|
|
t.Fatalf("LoadPatternRecords: %v", err)
|
|
}
|
|
if len(patterns) != 0 {
|
|
t.Fatalf("len(patterns) = %d, want 0", len(patterns))
|
|
}
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 0 {
|
|
t.Fatalf("len(drafts) = %d, want 0", len(drafts))
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_FallbackMarksAcceptedFailureEvidenceClustered(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
ok := true
|
|
|
|
records := []evolution.LearningRecord{
|
|
{
|
|
ID: "task-success",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000200, 0).UTC(),
|
|
Summary: "weather lookup 100",
|
|
FinalOutput: "sunny",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"weather"},
|
|
},
|
|
{
|
|
ID: "task-judge-rejected",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000100, 0).UTC(),
|
|
Summary: "weather lookup 200",
|
|
FinalOutput: "partial answer",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"weather"},
|
|
},
|
|
}
|
|
if err := store.AppendLearningRecords(records); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
judge := &stubSuccessJudge{
|
|
decisions: map[string]evolution.TaskSuccessDecision{
|
|
"task-success": {Success: true, Reason: "goal achieved"},
|
|
"task-judge-rejected": {Success: false, Reason: "partial result"},
|
|
},
|
|
}
|
|
clusterer := evolution.NewLLMPatternClusterer(
|
|
&llmClusterTestProvider{content: `not-json`, defaultModel: "test-model"},
|
|
"test-model",
|
|
evolution.NewHeuristicPatternClusterer(1, nil),
|
|
1,
|
|
func() time.Time { return time.Unix(1700000000, 0).UTC() },
|
|
)
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.5},
|
|
Store: store,
|
|
PatternClusterer: clusterer,
|
|
SuccessJudge: judge,
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
DraftGenerator: stubDraftGenerator{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-weather",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "prefer the proven weather path",
|
|
BodyOrPatch: "## Start Here\nUse the weather path directly.",
|
|
},
|
|
},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
patterns, err := store.LoadPatternRecords()
|
|
if err != nil {
|
|
t.Fatalf("LoadPatternRecords: %v", err)
|
|
}
|
|
if len(patterns) != 1 {
|
|
t.Fatalf("len(patterns) = %d, want 1", len(patterns))
|
|
}
|
|
if got := strings.Join(patterns[0].TaskRecordIDs, ","); got != "task-success" {
|
|
t.Fatalf("pattern TaskRecordIDs = %v, want only successful task", patterns[0].TaskRecordIDs)
|
|
}
|
|
taskRecords, err := store.LoadTaskRecords()
|
|
if err != nil {
|
|
t.Fatalf("LoadTaskRecords: %v", err)
|
|
}
|
|
statusByID := make(map[string]evolution.RecordStatus)
|
|
for _, record := range taskRecords {
|
|
statusByID[record.ID] = record.Status
|
|
}
|
|
for _, id := range []string{"task-success", "task-judge-rejected"} {
|
|
if statusByID[id] != evolution.RecordStatus("clustered") {
|
|
t.Fatalf("statusByID[%s] = %q, want clustered", id, statusByID[id])
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_DraftEvidenceDoesNotCrossWorkspaceWithDuplicateTaskID(t *testing.T) {
|
|
sharedState := t.TempDir()
|
|
workspaceA := t.TempDir()
|
|
workspaceB := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(workspaceA, sharedState))
|
|
ok := true
|
|
|
|
if err := store.AppendTaskRecords(context.Background(), []evolution.LearningRecord{
|
|
{
|
|
ID: "main-turn-1",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: workspaceB,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "other workspace weather",
|
|
FinalOutput: "foreign workspace output",
|
|
Status: evolution.RecordStatus("clustered"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"foreign-skill"},
|
|
},
|
|
{
|
|
ID: "main-turn-1",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: workspaceA,
|
|
CreatedAt: time.Unix(1700000001, 0).UTC(),
|
|
Summary: "current workspace weather",
|
|
FinalOutput: "current workspace output",
|
|
Status: evolution.RecordStatus("clustered"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"current-skill"},
|
|
},
|
|
}); err != nil {
|
|
t.Fatalf("AppendTaskRecords: %v", err)
|
|
}
|
|
if err := store.AppendPatternRecords([]evolution.LearningRecord{{
|
|
ID: "pattern-workspace-a",
|
|
Kind: evolution.RecordKindPattern,
|
|
WorkspaceID: workspaceA,
|
|
CreatedAt: time.Unix(1700000002, 0).UTC(),
|
|
Summary: "current workspace weather",
|
|
Status: evolution.RecordStatus("ready"),
|
|
TaskRecordIDs: []string{"main-turn-1"},
|
|
}}); err != nil {
|
|
t.Fatalf("AppendPatternRecords: %v", err)
|
|
}
|
|
|
|
generator := &evidenceCaptureDraftGenerator{}
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft", StateDir: sharedState},
|
|
Store: store,
|
|
SkillsRecaller: evolution.NewSkillsRecaller(workspaceA),
|
|
DraftGenerator: generator,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), workspaceA); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
if len(generator.evidence.TaskRecords) != 1 {
|
|
t.Fatalf(
|
|
"evidence task count = %d, want 1: %#v",
|
|
len(generator.evidence.TaskRecords),
|
|
generator.evidence.TaskRecords,
|
|
)
|
|
}
|
|
task := generator.evidence.TaskRecords[0]
|
|
if task.WorkspaceID != workspaceA {
|
|
t.Fatalf("evidence workspace = %q, want %q", task.WorkspaceID, workspaceA)
|
|
}
|
|
if task.FinalOutput != "current workspace output" {
|
|
t.Fatalf("evidence FinalOutput = %q, want current workspace output", task.FinalOutput)
|
|
}
|
|
if len(task.UsedSkillNames) != 1 || task.UsedSkillNames[0] != "current-skill" {
|
|
t.Fatalf("evidence UsedSkillNames = %v, want [current-skill]", task.UsedSkillNames)
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_AdmitsSingleSkillTaskButWaitsForMinTaskCount(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
ok := true
|
|
|
|
record := evolution.LearningRecord{
|
|
ID: "task-simple",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000250, 0).UTC(),
|
|
Summary: "simple weather lookup",
|
|
UserGoal: "check weather",
|
|
FinalOutput: "sunny",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"weather"},
|
|
AddedSkillNames: []string{"weather"},
|
|
ToolKinds: []string{"read_file"},
|
|
ToolExecutions: []evolution.ToolExecutionRecord{
|
|
{Name: "read_file", Success: true, SkillNames: []string{"weather"}},
|
|
},
|
|
AttemptTrail: &evolution.AttemptTrail{
|
|
AttemptedSkills: []string{"weather"},
|
|
FinalSuccessfulPath: []string{"weather"},
|
|
},
|
|
}
|
|
if err := store.AppendLearningRecords([]evolution.LearningRecord{record}); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
judge := &stubSuccessJudge{}
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
|
|
Store: store,
|
|
SuccessJudge: judge,
|
|
Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}),
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
DraftGenerator: stubDraftGenerator{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-simple",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "simple draft",
|
|
BodyOrPatch: "## Start Here\nUse weather.",
|
|
},
|
|
},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
if len(judge.calls) != 1 || judge.calls[0] != "task-simple" {
|
|
t.Fatalf("judge calls = %v, want [task-simple]", judge.calls)
|
|
}
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 0 {
|
|
t.Fatalf("len(drafts) = %d, want 0", len(drafts))
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_RejectsTaskWhenSuccessJudgeRejects(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
ok := true
|
|
|
|
record := evolution.LearningRecord{
|
|
ID: "task-detailed-path",
|
|
Kind: evolution.RecordKindTask,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000300, 0).UTC(),
|
|
Summary: "computed theorem chain",
|
|
UserGoal: "调用三一定理计算100",
|
|
FinalOutput: "最终结果:100 通过三一定理计算得到 120",
|
|
Status: evolution.RecordStatus("new"),
|
|
Success: &ok,
|
|
UsedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
|
|
AddedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
|
|
ToolKinds: []string{"read_file"},
|
|
ToolExecutions: []evolution.ToolExecutionRecord{
|
|
{Name: "read_file", Success: true, SkillNames: []string{"three-one-theorem"}},
|
|
{Name: "read_file", Success: true, SkillNames: []string{"four-two-theorem"}},
|
|
{Name: "read_file", Success: true, SkillNames: []string{"five-three-theorem"}},
|
|
},
|
|
AttemptTrail: &evolution.AttemptTrail{
|
|
AttemptedSkills: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
|
|
FinalSuccessfulPath: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
|
|
},
|
|
}
|
|
if err := store.AppendLearningRecords([]evolution.LearningRecord{record}); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
judge := &stubSuccessJudge{
|
|
decisions: map[string]evolution.TaskSuccessDecision{
|
|
"task-detailed-path": {Success: false, Reason: "llm false negative"},
|
|
},
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
|
|
Store: store,
|
|
SuccessJudge: judge,
|
|
Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}),
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
DraftGenerator: stubDraftGenerator{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-detailed-path",
|
|
TargetSkillName: "three-one-theorem",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "prefer the full theorem chain",
|
|
BodyOrPatch: "## Start Here\nUse the full three-one, four-two, five-three theorem chain.",
|
|
},
|
|
},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
allRecords, err := store.LoadLearningRecords()
|
|
if err != nil {
|
|
t.Fatalf("LoadLearningRecords: %v", err)
|
|
}
|
|
|
|
foundPattern := false
|
|
for _, record := range allRecords {
|
|
if record.Kind != evolution.RecordKindPattern {
|
|
continue
|
|
}
|
|
foundPattern = true
|
|
break
|
|
}
|
|
if foundPattern {
|
|
t.Fatal("unexpected pattern record for rejected task")
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_QuarantinesInvalidDraft(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
|
|
rule := evolution.LearningRecord{
|
|
ID: "rule-1",
|
|
Kind: evolution.RecordKindRule,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "release path",
|
|
Status: evolution.RecordStatus("ready"),
|
|
EventCount: 4,
|
|
}
|
|
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
|
|
DraftGenerator: stubDraftGenerator{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-1",
|
|
WorkspaceID: root,
|
|
SourceRecordID: "rule-1",
|
|
TargetSkillName: "",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "broken",
|
|
BodyOrPatch: "",
|
|
},
|
|
},
|
|
Store: store,
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 1 {
|
|
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
|
|
}
|
|
if drafts[0].Status != evolution.DraftStatusQuarantined {
|
|
t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusQuarantined)
|
|
}
|
|
if len(drafts[0].ScanFindings) == 0 {
|
|
t.Fatal("expected scan findings for invalid draft")
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_DoesNotWriteSkillFile(t *testing.T) {
|
|
root := t.TempDir()
|
|
skillPath := filepath.Join(root, "skills", "weather", "SKILL.md")
|
|
if err := os.MkdirAll(filepath.Dir(skillPath), 0o755); err != nil {
|
|
t.Fatalf("MkdirAll: %v", err)
|
|
}
|
|
if err := os.WriteFile(
|
|
skillPath,
|
|
[]byte("---\nname: weather\ndescription: test\n---\n# Weather"),
|
|
0o644,
|
|
); err != nil {
|
|
t.Fatalf("WriteFile: %v", err)
|
|
}
|
|
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
rule := evolution.LearningRecord{
|
|
ID: "rule-1",
|
|
Kind: evolution.RecordKindRule,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "weather native-name path",
|
|
Status: evolution.RecordStatus("ready"),
|
|
EventCount: 4,
|
|
}
|
|
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
original, err := os.ReadFile(skillPath)
|
|
if err != nil {
|
|
t.Fatalf("ReadFile(original): %v", err)
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "apply"},
|
|
DraftGenerator: stubDraftGenerator{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-1",
|
|
WorkspaceID: root,
|
|
SourceRecordID: "rule-1",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "prefer native-name path first",
|
|
BodyOrPatch: "## Start Here\nUse native-name query first.",
|
|
},
|
|
},
|
|
Store: store,
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
got, err := os.ReadFile(skillPath)
|
|
if err != nil {
|
|
t.Fatalf("ReadFile(after): %v", err)
|
|
}
|
|
if string(got) != string(original) {
|
|
t.Fatalf("skill file changed unexpectedly:\n%s", string(got))
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_UsesDefaultDraftGenerator(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
|
|
rule := evolution.LearningRecord{
|
|
ID: "rule-1",
|
|
Kind: evolution.RecordKindRule,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "weather native-name path",
|
|
Status: evolution.RecordStatus("ready"),
|
|
EventCount: 4,
|
|
SuccessRate: 1,
|
|
WinningPath: []string{"weather"},
|
|
}
|
|
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
|
|
Store: store,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 1 {
|
|
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
|
|
}
|
|
if drafts[0].TargetSkillName != "weather" {
|
|
t.Fatalf("TargetSkillName = %q, want weather", drafts[0].TargetSkillName)
|
|
}
|
|
if drafts[0].Status != evolution.DraftStatusCandidate {
|
|
t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusCandidate)
|
|
}
|
|
if drafts[0].BodyOrPatch == "" {
|
|
t.Fatal("expected generated draft body")
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_UsesLLMDraftGeneratorWhenProviderAvailable(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
|
|
rule := evolution.LearningRecord{
|
|
ID: "rule-1",
|
|
Kind: evolution.RecordKindRule,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "weather native-name path",
|
|
Status: evolution.RecordStatus("ready"),
|
|
EventCount: 4,
|
|
SuccessRate: 1,
|
|
WinningPath: []string{"weather"},
|
|
}
|
|
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
provider := &llmDraftRuntimeProvider{
|
|
response: &providers.LLMResponse{
|
|
Content: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name path first","body_or_patch":"## Start Here\nUse native-name query first."}`,
|
|
},
|
|
}
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
|
|
Store: store,
|
|
DraftGenerator: evolution.NewDraftGeneratorForWorkspace(root, provider, "runtime-explicit-model"),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 1 {
|
|
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
|
|
}
|
|
if provider.calls != 1 {
|
|
t.Fatalf("provider.calls = %d, want 1", provider.calls)
|
|
}
|
|
if drafts[0].HumanSummary != "Prefer native-name path first" {
|
|
t.Fatalf("HumanSummary = %q, want %q", drafts[0].HumanSummary, "Prefer native-name path first")
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_UsesDefaultDraftGeneratorWhenFactoryHasNoProvider(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
|
|
rule := evolution.LearningRecord{
|
|
ID: "rule-1",
|
|
Kind: evolution.RecordKindRule,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "weather native-name path",
|
|
Status: evolution.RecordStatus("ready"),
|
|
EventCount: 4,
|
|
SuccessRate: 1,
|
|
WinningPath: []string{"weather"},
|
|
}
|
|
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
|
|
Store: store,
|
|
DraftGenerator: evolution.NewDraftGeneratorForWorkspace(root, nil, ""),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 1 {
|
|
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
|
|
}
|
|
if drafts[0].TargetSkillName != "weather" {
|
|
t.Fatalf("TargetSkillName = %q, want weather", drafts[0].TargetSkillName)
|
|
}
|
|
if drafts[0].BodyOrPatch == "" {
|
|
t.Fatal("expected generated draft body")
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_UsesGeneratorFactoryWorkspaceForFallback(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
|
|
if err := os.MkdirAll(filepath.Join(root, "skills", "weather"), 0o755); err != nil {
|
|
t.Fatalf("MkdirAll: %v", err)
|
|
}
|
|
skillBody := "---\nname: weather\ndescription: workspace weather helper\n---\n# Weather\n## Start Here\nUse the workspace-specific path.\n"
|
|
if err := os.WriteFile(filepath.Join(root, "skills", "weather", "SKILL.md"), []byte(skillBody), 0o644); err != nil {
|
|
t.Fatalf("WriteFile: %v", err)
|
|
}
|
|
|
|
rule := evolution.LearningRecord{
|
|
ID: "rule-1",
|
|
Kind: evolution.RecordKindRule,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "weather native-name path",
|
|
Status: evolution.RecordStatus("ready"),
|
|
EventCount: 4,
|
|
SuccessRate: 1,
|
|
WinningPath: []string{"weather"},
|
|
}
|
|
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
provider := &llmDraftRuntimeProvider{
|
|
response: &providers.LLMResponse{Content: `not-json`},
|
|
defaultModel: "runtime-test-model",
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
|
|
Store: store,
|
|
GeneratorFactory: func(workspace string) evolution.DraftGenerator {
|
|
return evolution.NewDraftGeneratorForWorkspace(workspace, provider, "runtime-explicit-model")
|
|
},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 1 {
|
|
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
|
|
}
|
|
if drafts[0].ChangeKind != evolution.ChangeKindAppend {
|
|
t.Fatalf("ChangeKind = %q, want %q", drafts[0].ChangeKind, evolution.ChangeKindAppend)
|
|
}
|
|
if !strings.Contains(drafts[0].BodyOrPatch, "## Learned Evolution") {
|
|
t.Fatalf("BodyOrPatch = %q, want appended learned evolution section", drafts[0].BodyOrPatch)
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_PersistsEarlierDraftWhenLaterRuleFails(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
|
|
rules := []evolution.LearningRecord{
|
|
{
|
|
ID: "rule-1",
|
|
Kind: evolution.RecordKindRule,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "weather native-name path",
|
|
Status: evolution.RecordStatus("ready"),
|
|
EventCount: 4,
|
|
},
|
|
{
|
|
ID: "rule-2",
|
|
Kind: evolution.RecordKindRule,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000100, 0).UTC(),
|
|
Summary: "release path",
|
|
Status: evolution.RecordStatus("ready"),
|
|
EventCount: 4,
|
|
},
|
|
}
|
|
if err := store.AppendLearningRecords(rules); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
|
|
generator := &sequenceDraftGenerator{
|
|
results: []draftGenerationResult{
|
|
{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-1",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "prefer native-name path first",
|
|
BodyOrPatch: "## Start Here\nUse native-name query first.",
|
|
},
|
|
},
|
|
{
|
|
err: context.DeadlineExceeded,
|
|
},
|
|
},
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
|
|
Store: store,
|
|
DraftGenerator: generator,
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
err = rt.RunColdPathOnce(context.Background(), root)
|
|
if !errors.Is(err, context.DeadlineExceeded) {
|
|
t.Fatalf("RunColdPathOnce error = %v, want %v", err, context.DeadlineExceeded)
|
|
}
|
|
|
|
drafts, loadErr := store.LoadDrafts()
|
|
if loadErr != nil {
|
|
t.Fatalf("LoadDrafts: %v", loadErr)
|
|
}
|
|
if len(drafts) != 1 {
|
|
t.Fatalf("len(drafts) = %d, want 1", len(drafts))
|
|
}
|
|
if drafts[0].SourceRecordID != "rule-1" {
|
|
t.Fatalf("SourceRecordID = %q, want rule-1", drafts[0].SourceRecordID)
|
|
}
|
|
}
|
|
|
|
func TestRuntime_RunColdPathOnce_RegeneratesAfterQuarantinedDraft(t *testing.T) {
|
|
root := t.TempDir()
|
|
store := evolution.NewStore(evolution.NewPaths(root, ""))
|
|
|
|
rule := evolution.LearningRecord{
|
|
ID: "rule-1",
|
|
Kind: evolution.RecordKindRule,
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000000, 0).UTC(),
|
|
Summary: "weather native-name path",
|
|
Status: evolution.RecordStatus("ready"),
|
|
EventCount: 4,
|
|
}
|
|
if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
|
|
t.Fatalf("AppendLearningRecords: %v", err)
|
|
}
|
|
if err := store.SaveDrafts([]evolution.SkillDraft{{
|
|
ID: "draft-old",
|
|
WorkspaceID: root,
|
|
CreatedAt: time.Unix(1700000100, 0).UTC(),
|
|
SourceRecordID: "rule-1",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "broken attempt",
|
|
BodyOrPatch: "## Start Here\nBroken content.",
|
|
Status: evolution.DraftStatusQuarantined,
|
|
ScanFindings: []string{"apply failed"},
|
|
}}); err != nil {
|
|
t.Fatalf("SaveDrafts: %v", err)
|
|
}
|
|
|
|
rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
|
|
Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
|
|
Store: store,
|
|
DraftGenerator: stubDraftGenerator{
|
|
draft: evolution.SkillDraft{
|
|
ID: "draft-new",
|
|
TargetSkillName: "weather",
|
|
DraftType: evolution.DraftTypeShortcut,
|
|
ChangeKind: evolution.ChangeKindAppend,
|
|
HumanSummary: "fixed attempt",
|
|
BodyOrPatch: "## Start Here\nUse native-name query first.",
|
|
},
|
|
},
|
|
SkillsRecaller: evolution.NewSkillsRecaller(root),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewRuntime: %v", err)
|
|
}
|
|
|
|
if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
|
|
t.Fatalf("RunColdPathOnce: %v", runErr)
|
|
}
|
|
|
|
drafts, err := store.LoadDrafts()
|
|
if err != nil {
|
|
t.Fatalf("LoadDrafts: %v", err)
|
|
}
|
|
if len(drafts) != 2 {
|
|
t.Fatalf("len(drafts) = %d, want 2", len(drafts))
|
|
}
|
|
if drafts[1].ID != "draft-new" {
|
|
t.Fatalf("drafts[1].ID = %q, want draft-new", drafts[1].ID)
|
|
}
|
|
}
|
|
|
|
type llmDraftRuntimeProvider struct {
|
|
response *providers.LLMResponse
|
|
err error
|
|
calls int
|
|
defaultModel string
|
|
}
|
|
|
|
func (p *llmDraftRuntimeProvider) Chat(
|
|
_ context.Context,
|
|
_ []providers.Message,
|
|
_ []providers.ToolDefinition,
|
|
_ string,
|
|
_ map[string]any,
|
|
) (*providers.LLMResponse, error) {
|
|
p.calls++
|
|
return p.response, p.err
|
|
}
|
|
|
|
func (p *llmDraftRuntimeProvider) GetDefaultModel() string {
|
|
if p.defaultModel != "" {
|
|
return p.defaultModel
|
|
}
|
|
return "runtime-test-model"
|
|
}
|