picoclaw/pkg/evolution/runtime_cold_path_test.go

package evolution_test

import (
	"context"
	"errors"
	"os"
	"path/filepath"
	"strings"
	"testing"
	"time"

	"github.com/sipeed/picoclaw/pkg/config"
	"github.com/sipeed/picoclaw/pkg/evolution"
	"github.com/sipeed/picoclaw/pkg/providers"
	"github.com/sipeed/picoclaw/pkg/skills"
)

type stubDraftGenerator struct {
	draft evolution.SkillDraft
	err   error
}

func (g stubDraftGenerator) GenerateDraft(
	_ context.Context,
	_ evolution.LearningRecord,
	_ []skills.SkillInfo,
) (evolution.SkillDraft, error) {
	return g.draft, g.err
}

type sequenceDraftGenerator struct {
	results []draftGenerationResult
	index   int
}

type draftGenerationResult struct {
	draft evolution.SkillDraft
	err   error
}

type evidenceCaptureDraftGenerator struct {
	evidence evolution.DraftEvidence
}

func (g *evidenceCaptureDraftGenerator) GenerateDraft(
	_ context.Context,
	_ evolution.LearningRecord,
	_ []skills.SkillInfo,
) (evolution.SkillDraft, error) {
	return evolution.SkillDraft{}, nil
}

func (g *evidenceCaptureDraftGenerator) GenerateDraftWithEvidence(
	_ context.Context,
	_ evolution.LearningRecord,
	_ []skills.SkillInfo,
	evidence evolution.DraftEvidence,
) (evolution.SkillDraft, error) {
	g.evidence = evidence
	return evolution.SkillDraft{
		ID:              "draft-evidence",
		TargetSkillName: "weather",
		DraftType:       evolution.DraftTypeShortcut,
		ChangeKind:      evolution.ChangeKindCreate,
		HumanSummary:    "weather helper",
		BodyOrPatch:     "---\nname: weather\ndescription: weather helper\n---\n# Weather\nUse current workspace evidence.\n",
	}, nil
}

type stubSuccessJudge struct {
	decisions map[string]evolution.TaskSuccessDecision
	calls     []string
}

func (j *stubSuccessJudge) JudgeTaskRecord(
	_ context.Context,
	record evolution.LearningRecord,
) (evolution.TaskSuccessDecision, error) {
	j.calls = append(j.calls, record.ID)
	if decision, ok := j.decisions[record.ID]; ok {
		return decision, nil
	}
	return evolution.TaskSuccessDecision{Success: true, Reason: "default success"}, nil
}

func (g *sequenceDraftGenerator) GenerateDraft(
	_ context.Context,
	_ evolution.LearningRecord,
	_ []skills.SkillInfo,
) (evolution.SkillDraft, error) {
	if g.index >= len(g.results) {
		return evolution.SkillDraft{}, nil
	}
	result := g.results[g.index]
	g.index++
	return result.draft, result.err
}

func TestRuntime_RunColdPathOnce_GeneratesCandidateDraft(t *testing.T) {
	root := t.TempDir()
	paths := evolution.NewPaths(root, "")
	store := evolution.NewStore(paths)

	rule := evolution.LearningRecord{
		ID:          "rule-1",
		Kind:        evolution.RecordKindRule,
		WorkspaceID: root,
		CreatedAt:   time.Unix(1700000000, 0).UTC(),
		Summary:     "weather native-name path",
		Status:      evolution.RecordStatus("ready"),
		EventCount:  4,
	}
	if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
		Now:    func() time.Time { return time.Unix(1700001000, 0).UTC() },
		DraftGenerator: stubDraftGenerator{
			draft: evolution.SkillDraft{
				ID:              "draft-1",
				WorkspaceID:     root,
				SourceRecordID:  "rule-1",
				TargetSkillName: "weather",
				DraftType:       evolution.DraftTypeShortcut,
				ChangeKind:      evolution.ChangeKindAppend,
				HumanSummary:    "prefer native-name path first",
				BodyOrPatch:     "## Start Here\nUse native-name query first.",
			},
		},
		Store:          store,
		SkillsRecaller: evolution.NewSkillsRecaller(root),
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 1 {
		t.Fatalf("len(drafts) = %d, want 1", len(drafts))
	}
	if drafts[0].Status != evolution.DraftStatusCandidate {
		t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusCandidate)
	}
}

func TestRuntime_RunColdPathOnce_AdmitsOnlyRecordsApprovedBySuccessJudge(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))
	ok := true
	failed := false

	records := []evolution.LearningRecord{
		{
			ID:             "task-failed",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    root,
			CreatedAt:      time.Unix(1700000000, 0).UTC(),
			Summary:        "failed weather attempt",
			UserGoal:       "check weather in shanghai",
			FinalOutput:    "tool failed",
			Status:         evolution.RecordStatus("new"),
			Success:        &failed,
			UsedSkillNames: []string{"weather", "native-name"},
			ToolKinds:      []string{"read_file"},
		},
		{
			ID:             "task-rejected",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    root,
			CreatedAt:      time.Unix(1700000100, 0).UTC(),
			Summary:        "partial weather answer",
			UserGoal:       "check weather in shanghai",
			FinalOutput:    "I will check it next",
			Status:         evolution.RecordStatus("new"),
			Success:        &ok,
			UsedSkillNames: []string{"weather", "native-name"},
			ToolKinds:      []string{"read_file"},
			ToolExecutions: []evolution.ToolExecutionRecord{
				{Name: "read_file", Success: true},
				{Name: "read_file", Success: true},
			},
		},
		{
			ID:              "task-admitted",
			Kind:            evolution.RecordKindTask,
			WorkspaceID:     root,
			CreatedAt:       time.Unix(1700000200, 0).UTC(),
			Summary:         "weather answer delivered",
			UserGoal:        "check weather in shanghai",
			FinalOutput:     "sunny, 26C",
			Status:          evolution.RecordStatus("new"),
			Success:         &ok,
			UsedSkillNames:  []string{"weather", "native-name"},
			AddedSkillNames: []string{"native-name"},
			ToolKinds:       []string{"read_file"},
			ToolExecutions: []evolution.ToolExecutionRecord{
				{Name: "read_file", Success: true},
				{Name: "read_file", Success: true},
			},
			AttemptTrail: &evolution.AttemptTrail{
				AttemptedSkills:     []string{"weather"},
				FinalSuccessfulPath: []string{"weather"},
			},
		},
	}
	if err := store.AppendLearningRecords(records); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	judge := &stubSuccessJudge{
		decisions: map[string]evolution.TaskSuccessDecision{
			"task-rejected": {Success: false, Reason: "only partial reasoning"},
			"task-admitted": {Success: true, Reason: "goal achieved"},
		},
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config:         config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1},
		Store:          store,
		SuccessJudge:   judge,
		Organizer:      evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}),
		SkillsRecaller: evolution.NewSkillsRecaller(root),
		DraftGenerator: stubDraftGenerator{
			draft: evolution.SkillDraft{
				ID:              "draft-weather",
				TargetSkillName: "weather",
				DraftType:       evolution.DraftTypeShortcut,
				ChangeKind:      evolution.ChangeKindAppend,
				HumanSummary:    "prefer the proven weather path",
				BodyOrPatch:     "## Start Here\nUse the weather path directly.",
			},
		},
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	if len(judge.calls) != 2 || judge.calls[0] != "task-rejected" || judge.calls[1] != "task-admitted" {
		t.Fatalf("judge calls = %v, want [task-rejected task-admitted]", judge.calls)
	}

	allRecords, err := store.LoadLearningRecords()
	if err != nil {
		t.Fatalf("LoadLearningRecords: %v", err)
	}

	var pattern evolution.LearningRecord
	foundPattern := false
	for _, record := range allRecords {
		if record.Kind != evolution.RecordKindPattern {
			continue
		}
		pattern = record
		foundPattern = true
		break
	}
	if !foundPattern {
		t.Fatal("expected generated pattern record")
	}
	if len(pattern.TaskRecordIDs) != 1 || pattern.TaskRecordIDs[0] != "task-admitted" {
		t.Fatalf("TaskRecordIDs = %v, want [task-admitted]", pattern.TaskRecordIDs)
	}
	if pattern.Label == "" {
		t.Fatal("pattern Label should not be empty")
	}

	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 1 {
		t.Fatalf("len(drafts) = %d, want 1", len(drafts))
	}
	if drafts[0].SourceRecordID != pattern.ID {
		t.Fatalf("draft SourceRecordID = %q, want %q", drafts[0].SourceRecordID, pattern.ID)
	}
}

func TestRuntime_RunColdPathOnce_RejectsClusterBelowMinSuccessRatio(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))
	ok := true
	failed := false

	records := []evolution.LearningRecord{
		{
			ID:             "task-success",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    root,
			CreatedAt:      time.Unix(1700000200, 0).UTC(),
			Summary:        "weather lookup 100",
			FinalOutput:    "sunny",
			Status:         evolution.RecordStatus("new"),
			Success:        &ok,
			UsedSkillNames: []string{"weather"},
		},
		{
			ID:             "task-failed-1",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    root,
			CreatedAt:      time.Unix(1700000100, 0).UTC(),
			Summary:        "weather lookup 200",
			FinalOutput:    "failed",
			Status:         evolution.RecordStatus("new"),
			Success:        &failed,
			UsedSkillNames: []string{"weather"},
		},
		{
			ID:             "task-failed-2",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    root,
			CreatedAt:      time.Unix(1700000000, 0).UTC(),
			Summary:        "weather lookup 300",
			FinalOutput:    "failed",
			Status:         evolution.RecordStatus("new"),
			Success:        &failed,
			UsedSkillNames: []string{"weather"},
		},
	}
	if err := store.AppendLearningRecords(records); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config:         config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.8},
		Store:          store,
		SuccessJudge:   &stubSuccessJudge{},
		SkillsRecaller: evolution.NewSkillsRecaller(root),
		DraftGenerator: stubDraftGenerator{
			draft: evolution.SkillDraft{
				ID:              "draft-weather",
				TargetSkillName: "weather",
				DraftType:       evolution.DraftTypeShortcut,
				ChangeKind:      evolution.ChangeKindAppend,
				HumanSummary:    "prefer the proven weather path",
				BodyOrPatch:     "## Start Here\nUse the weather path directly.",
			},
		},
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	patterns, err := store.LoadPatternRecords()
	if err != nil {
		t.Fatalf("LoadPatternRecords: %v", err)
	}
	if len(patterns) != 0 {
		t.Fatalf("len(patterns) = %d, want 0", len(patterns))
	}
	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 0 {
		t.Fatalf("len(drafts) = %d, want 0", len(drafts))
	}
}

func TestRuntime_RunColdPathOnce_FallbackUsesJudgeAdjustedSuccessRatio(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))
	ok := true

	records := []evolution.LearningRecord{
		{
			ID:             "task-success",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    root,
			CreatedAt:      time.Unix(1700000200, 0).UTC(),
			Summary:        "weather lookup 100",
			FinalOutput:    "sunny",
			Status:         evolution.RecordStatus("new"),
			Success:        &ok,
			UsedSkillNames: []string{"weather"},
		},
		{
			ID:             "task-judge-rejected",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    root,
			CreatedAt:      time.Unix(1700000100, 0).UTC(),
			Summary:        "weather lookup 200",
			FinalOutput:    "partial answer",
			Status:         evolution.RecordStatus("new"),
			Success:        &ok,
			UsedSkillNames: []string{"weather"},
		},
	}
	if err := store.AppendLearningRecords(records); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	judge := &stubSuccessJudge{
		decisions: map[string]evolution.TaskSuccessDecision{
			"task-success":        {Success: true, Reason: "goal achieved"},
			"task-judge-rejected": {Success: false, Reason: "partial result"},
		},
	}
	clusterer := evolution.NewLLMPatternClusterer(
		&llmClusterTestProvider{content: `not-json`, defaultModel: "test-model"},
		"test-model",
		evolution.NewHeuristicPatternClusterer(1, nil),
		1,
		func() time.Time { return time.Unix(1700000000, 0).UTC() },
	)

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config:           config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.8},
		Store:            store,
		PatternClusterer: clusterer,
		SuccessJudge:     judge,
		SkillsRecaller:   evolution.NewSkillsRecaller(root),
		DraftGenerator: stubDraftGenerator{
			draft: evolution.SkillDraft{
				ID:              "draft-weather",
				TargetSkillName: "weather",
				DraftType:       evolution.DraftTypeShortcut,
				ChangeKind:      evolution.ChangeKindAppend,
				HumanSummary:    "prefer the proven weather path",
				BodyOrPatch:     "## Start Here\nUse the weather path directly.",
			},
		},
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	patterns, err := store.LoadPatternRecords()
	if err != nil {
		t.Fatalf("LoadPatternRecords: %v", err)
	}
	if len(patterns) != 0 {
		t.Fatalf("len(patterns) = %d, want 0", len(patterns))
	}
	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 0 {
		t.Fatalf("len(drafts) = %d, want 0", len(drafts))
	}
}

func TestRuntime_RunColdPathOnce_FallbackMarksAcceptedFailureEvidenceClustered(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))
	ok := true

	records := []evolution.LearningRecord{
		{
			ID:             "task-success",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    root,
			CreatedAt:      time.Unix(1700000200, 0).UTC(),
			Summary:        "weather lookup 100",
			FinalOutput:    "sunny",
			Status:         evolution.RecordStatus("new"),
			Success:        &ok,
			UsedSkillNames: []string{"weather"},
		},
		{
			ID:             "task-judge-rejected",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    root,
			CreatedAt:      time.Unix(1700000100, 0).UTC(),
			Summary:        "weather lookup 200",
			FinalOutput:    "partial answer",
			Status:         evolution.RecordStatus("new"),
			Success:        &ok,
			UsedSkillNames: []string{"weather"},
		},
	}
	if err := store.AppendLearningRecords(records); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	judge := &stubSuccessJudge{
		decisions: map[string]evolution.TaskSuccessDecision{
			"task-success":        {Success: true, Reason: "goal achieved"},
			"task-judge-rejected": {Success: false, Reason: "partial result"},
		},
	}
	clusterer := evolution.NewLLMPatternClusterer(
		&llmClusterTestProvider{content: `not-json`, defaultModel: "test-model"},
		"test-model",
		evolution.NewHeuristicPatternClusterer(1, nil),
		1,
		func() time.Time { return time.Unix(1700000000, 0).UTC() },
	)

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config:           config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.5},
		Store:            store,
		PatternClusterer: clusterer,
		SuccessJudge:     judge,
		SkillsRecaller:   evolution.NewSkillsRecaller(root),
		DraftGenerator: stubDraftGenerator{
			draft: evolution.SkillDraft{
				ID:              "draft-weather",
				TargetSkillName: "weather",
				DraftType:       evolution.DraftTypeShortcut,
				ChangeKind:      evolution.ChangeKindAppend,
				HumanSummary:    "prefer the proven weather path",
				BodyOrPatch:     "## Start Here\nUse the weather path directly.",
			},
		},
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	patterns, err := store.LoadPatternRecords()
	if err != nil {
		t.Fatalf("LoadPatternRecords: %v", err)
	}
	if len(patterns) != 1 {
		t.Fatalf("len(patterns) = %d, want 1", len(patterns))
	}
	if got := strings.Join(patterns[0].TaskRecordIDs, ","); got != "task-success" {
		t.Fatalf("pattern TaskRecordIDs = %v, want only successful task", patterns[0].TaskRecordIDs)
	}
	taskRecords, err := store.LoadTaskRecords()
	if err != nil {
		t.Fatalf("LoadTaskRecords: %v", err)
	}
	statusByID := make(map[string]evolution.RecordStatus)
	for _, record := range taskRecords {
		statusByID[record.ID] = record.Status
	}
	for _, id := range []string{"task-success", "task-judge-rejected"} {
		if statusByID[id] != evolution.RecordStatus("clustered") {
			t.Fatalf("statusByID[%s] = %q, want clustered", id, statusByID[id])
		}
	}
}

func TestRuntime_RunColdPathOnce_DraftEvidenceDoesNotCrossWorkspaceWithDuplicateTaskID(t *testing.T) {
	sharedState := t.TempDir()
	workspaceA := t.TempDir()
	workspaceB := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(workspaceA, sharedState))
	ok := true

	if err := store.AppendTaskRecords(context.Background(), []evolution.LearningRecord{
		{
			ID:             "main-turn-1",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    workspaceB,
			CreatedAt:      time.Unix(1700000000, 0).UTC(),
			Summary:        "other workspace weather",
			FinalOutput:    "foreign workspace output",
			Status:         evolution.RecordStatus("clustered"),
			Success:        &ok,
			UsedSkillNames: []string{"foreign-skill"},
		},
		{
			ID:             "main-turn-1",
			Kind:           evolution.RecordKindTask,
			WorkspaceID:    workspaceA,
			CreatedAt:      time.Unix(1700000001, 0).UTC(),
			Summary:        "current workspace weather",
			FinalOutput:    "current workspace output",
			Status:         evolution.RecordStatus("clustered"),
			Success:        &ok,
			UsedSkillNames: []string{"current-skill"},
		},
	}); err != nil {
		t.Fatalf("AppendTaskRecords: %v", err)
	}
	if err := store.AppendPatternRecords([]evolution.LearningRecord{{
		ID:            "pattern-workspace-a",
		Kind:          evolution.RecordKindPattern,
		WorkspaceID:   workspaceA,
		CreatedAt:     time.Unix(1700000002, 0).UTC(),
		Summary:       "current workspace weather",
		Status:        evolution.RecordStatus("ready"),
		TaskRecordIDs: []string{"main-turn-1"},
	}}); err != nil {
		t.Fatalf("AppendPatternRecords: %v", err)
	}

	generator := &evidenceCaptureDraftGenerator{}
	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config:         config.EvolutionConfig{Enabled: true, Mode: "draft", StateDir: sharedState},
		Store:          store,
		SkillsRecaller: evolution.NewSkillsRecaller(workspaceA),
		DraftGenerator: generator,
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), workspaceA); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}
	if len(generator.evidence.TaskRecords) != 1 {
		t.Fatalf(
			"evidence task count = %d, want 1: %#v",
			len(generator.evidence.TaskRecords),
			generator.evidence.TaskRecords,
		)
	}
	task := generator.evidence.TaskRecords[0]
	if task.WorkspaceID != workspaceA {
		t.Fatalf("evidence workspace = %q, want %q", task.WorkspaceID, workspaceA)
	}
	if task.FinalOutput != "current workspace output" {
		t.Fatalf("evidence FinalOutput = %q, want current workspace output", task.FinalOutput)
	}
	if len(task.UsedSkillNames) != 1 || task.UsedSkillNames[0] != "current-skill" {
		t.Fatalf("evidence UsedSkillNames = %v, want [current-skill]", task.UsedSkillNames)
	}
}

func TestRuntime_RunColdPathOnce_AdmitsSingleSkillTaskButWaitsForMinTaskCount(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))
	ok := true

	record := evolution.LearningRecord{
		ID:              "task-simple",
		Kind:            evolution.RecordKindTask,
		WorkspaceID:     root,
		CreatedAt:       time.Unix(1700000250, 0).UTC(),
		Summary:         "simple weather lookup",
		UserGoal:        "check weather",
		FinalOutput:     "sunny",
		Status:          evolution.RecordStatus("new"),
		Success:         &ok,
		UsedSkillNames:  []string{"weather"},
		AddedSkillNames: []string{"weather"},
		ToolKinds:       []string{"read_file"},
		ToolExecutions: []evolution.ToolExecutionRecord{
			{Name: "read_file", Success: true, SkillNames: []string{"weather"}},
		},
		AttemptTrail: &evolution.AttemptTrail{
			AttemptedSkills:     []string{"weather"},
			FinalSuccessfulPath: []string{"weather"},
		},
	}
	if err := store.AppendLearningRecords([]evolution.LearningRecord{record}); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	judge := &stubSuccessJudge{}
	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config:         config.EvolutionConfig{Enabled: true, Mode: "draft"},
		Store:          store,
		SuccessJudge:   judge,
		Organizer:      evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}),
		SkillsRecaller: evolution.NewSkillsRecaller(root),
		DraftGenerator: stubDraftGenerator{
			draft: evolution.SkillDraft{
				ID:              "draft-simple",
				TargetSkillName: "weather",
				DraftType:       evolution.DraftTypeShortcut,
				ChangeKind:      evolution.ChangeKindAppend,
				HumanSummary:    "simple draft",
				BodyOrPatch:     "## Start Here\nUse weather.",
			},
		},
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}
	if len(judge.calls) != 1 || judge.calls[0] != "task-simple" {
		t.Fatalf("judge calls = %v, want [task-simple]", judge.calls)
	}
	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 0 {
		t.Fatalf("len(drafts) = %d, want 0", len(drafts))
	}
}

func TestRuntime_RunColdPathOnce_RejectsTaskWhenSuccessJudgeRejects(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))
	ok := true

	record := evolution.LearningRecord{
		ID:              "task-detailed-path",
		Kind:            evolution.RecordKindTask,
		WorkspaceID:     root,
		CreatedAt:       time.Unix(1700000300, 0).UTC(),
		Summary:         "computed theorem chain",
		UserGoal:        "调用三一定理计算100",
		FinalOutput:     "最终结果：100 通过三一定理计算得到 120",
		Status:          evolution.RecordStatus("new"),
		Success:         &ok,
		UsedSkillNames:  []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
		AddedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
		ToolKinds:       []string{"read_file"},
		ToolExecutions: []evolution.ToolExecutionRecord{
			{Name: "read_file", Success: true, SkillNames: []string{"three-one-theorem"}},
			{Name: "read_file", Success: true, SkillNames: []string{"four-two-theorem"}},
			{Name: "read_file", Success: true, SkillNames: []string{"five-three-theorem"}},
		},
		AttemptTrail: &evolution.AttemptTrail{
			AttemptedSkills:     []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
			FinalSuccessfulPath: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
		},
	}
	if err := store.AppendLearningRecords([]evolution.LearningRecord{record}); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	judge := &stubSuccessJudge{
		decisions: map[string]evolution.TaskSuccessDecision{
			"task-detailed-path": {Success: false, Reason: "llm false negative"},
		},
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config:         config.EvolutionConfig{Enabled: true, Mode: "draft"},
		Store:          store,
		SuccessJudge:   judge,
		Organizer:      evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}),
		SkillsRecaller: evolution.NewSkillsRecaller(root),
		DraftGenerator: stubDraftGenerator{
			draft: evolution.SkillDraft{
				ID:              "draft-detailed-path",
				TargetSkillName: "three-one-theorem",
				DraftType:       evolution.DraftTypeShortcut,
				ChangeKind:      evolution.ChangeKindAppend,
				HumanSummary:    "prefer the full theorem chain",
				BodyOrPatch:     "## Start Here\nUse the full three-one, four-two, five-three theorem chain.",
			},
		},
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	allRecords, err := store.LoadLearningRecords()
	if err != nil {
		t.Fatalf("LoadLearningRecords: %v", err)
	}

	foundPattern := false
	for _, record := range allRecords {
		if record.Kind != evolution.RecordKindPattern {
			continue
		}
		foundPattern = true
		break
	}
	if foundPattern {
		t.Fatal("unexpected pattern record for rejected task")
	}
}

func TestRuntime_RunColdPathOnce_QuarantinesInvalidDraft(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))

	rule := evolution.LearningRecord{
		ID:          "rule-1",
		Kind:        evolution.RecordKindRule,
		WorkspaceID: root,
		CreatedAt:   time.Unix(1700000000, 0).UTC(),
		Summary:     "release path",
		Status:      evolution.RecordStatus("ready"),
		EventCount:  4,
	}
	if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
		DraftGenerator: stubDraftGenerator{
			draft: evolution.SkillDraft{
				ID:              "draft-1",
				WorkspaceID:     root,
				SourceRecordID:  "rule-1",
				TargetSkillName: "",
				DraftType:       evolution.DraftTypeShortcut,
				ChangeKind:      evolution.ChangeKindAppend,
				HumanSummary:    "broken",
				BodyOrPatch:     "",
			},
		},
		Store:          store,
		SkillsRecaller: evolution.NewSkillsRecaller(root),
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 1 {
		t.Fatalf("len(drafts) = %d, want 1", len(drafts))
	}
	if drafts[0].Status != evolution.DraftStatusQuarantined {
		t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusQuarantined)
	}
	if len(drafts[0].ScanFindings) == 0 {
		t.Fatal("expected scan findings for invalid draft")
	}
}

func TestRuntime_RunColdPathOnce_DoesNotWriteSkillFile(t *testing.T) {
	root := t.TempDir()
	skillPath := filepath.Join(root, "skills", "weather", "SKILL.md")
	if err := os.MkdirAll(filepath.Dir(skillPath), 0o755); err != nil {
		t.Fatalf("MkdirAll: %v", err)
	}
	if err := os.WriteFile(
		skillPath,
		[]byte("---\nname: weather\ndescription: test\n---\n# Weather"),
		0o644,
	); err != nil {
		t.Fatalf("WriteFile: %v", err)
	}

	store := evolution.NewStore(evolution.NewPaths(root, ""))
	rule := evolution.LearningRecord{
		ID:          "rule-1",
		Kind:        evolution.RecordKindRule,
		WorkspaceID: root,
		CreatedAt:   time.Unix(1700000000, 0).UTC(),
		Summary:     "weather native-name path",
		Status:      evolution.RecordStatus("ready"),
		EventCount:  4,
	}
	if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	original, err := os.ReadFile(skillPath)
	if err != nil {
		t.Fatalf("ReadFile(original): %v", err)
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config: config.EvolutionConfig{Enabled: true, Mode: "apply"},
		DraftGenerator: stubDraftGenerator{
			draft: evolution.SkillDraft{
				ID:              "draft-1",
				WorkspaceID:     root,
				SourceRecordID:  "rule-1",
				TargetSkillName: "weather",
				DraftType:       evolution.DraftTypeShortcut,
				ChangeKind:      evolution.ChangeKindAppend,
				HumanSummary:    "prefer native-name path first",
				BodyOrPatch:     "## Start Here\nUse native-name query first.",
			},
		},
		Store:          store,
		SkillsRecaller: evolution.NewSkillsRecaller(root),
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	got, err := os.ReadFile(skillPath)
	if err != nil {
		t.Fatalf("ReadFile(after): %v", err)
	}
	if string(got) != string(original) {
		t.Fatalf("skill file changed unexpectedly:\n%s", string(got))
	}
}

func TestRuntime_RunColdPathOnce_UsesDefaultDraftGenerator(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))

	rule := evolution.LearningRecord{
		ID:          "rule-1",
		Kind:        evolution.RecordKindRule,
		WorkspaceID: root,
		CreatedAt:   time.Unix(1700000000, 0).UTC(),
		Summary:     "weather native-name path",
		Status:      evolution.RecordStatus("ready"),
		EventCount:  4,
		SuccessRate: 1,
		WinningPath: []string{"weather"},
	}
	if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
		Store:  store,
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 1 {
		t.Fatalf("len(drafts) = %d, want 1", len(drafts))
	}
	if drafts[0].TargetSkillName != "weather" {
		t.Fatalf("TargetSkillName = %q, want weather", drafts[0].TargetSkillName)
	}
	if drafts[0].Status != evolution.DraftStatusCandidate {
		t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusCandidate)
	}
	if drafts[0].BodyOrPatch == "" {
		t.Fatal("expected generated draft body")
	}
}

func TestRuntime_RunColdPathOnce_UsesLLMDraftGeneratorWhenProviderAvailable(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))

	rule := evolution.LearningRecord{
		ID:          "rule-1",
		Kind:        evolution.RecordKindRule,
		WorkspaceID: root,
		CreatedAt:   time.Unix(1700000000, 0).UTC(),
		Summary:     "weather native-name path",
		Status:      evolution.RecordStatus("ready"),
		EventCount:  4,
		SuccessRate: 1,
		WinningPath: []string{"weather"},
	}
	if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	provider := &llmDraftRuntimeProvider{
		response: &providers.LLMResponse{
			Content: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name path first","body_or_patch":"## Start Here\nUse native-name query first."}`,
		},
	}
	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config:         config.EvolutionConfig{Enabled: true, Mode: "draft"},
		Store:          store,
		DraftGenerator: evolution.NewDraftGeneratorForWorkspace(root, provider, "runtime-explicit-model"),
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 1 {
		t.Fatalf("len(drafts) = %d, want 1", len(drafts))
	}
	if provider.calls != 1 {
		t.Fatalf("provider.calls = %d, want 1", provider.calls)
	}
	if drafts[0].HumanSummary != "Prefer native-name path first" {
		t.Fatalf("HumanSummary = %q, want %q", drafts[0].HumanSummary, "Prefer native-name path first")
	}
}

func TestRuntime_RunColdPathOnce_UsesDefaultDraftGeneratorWhenFactoryHasNoProvider(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))

	rule := evolution.LearningRecord{
		ID:          "rule-1",
		Kind:        evolution.RecordKindRule,
		WorkspaceID: root,
		CreatedAt:   time.Unix(1700000000, 0).UTC(),
		Summary:     "weather native-name path",
		Status:      evolution.RecordStatus("ready"),
		EventCount:  4,
		SuccessRate: 1,
		WinningPath: []string{"weather"},
	}
	if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config:         config.EvolutionConfig{Enabled: true, Mode: "draft"},
		Store:          store,
		DraftGenerator: evolution.NewDraftGeneratorForWorkspace(root, nil, ""),
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 1 {
		t.Fatalf("len(drafts) = %d, want 1", len(drafts))
	}
	if drafts[0].TargetSkillName != "weather" {
		t.Fatalf("TargetSkillName = %q, want weather", drafts[0].TargetSkillName)
	}
	if drafts[0].BodyOrPatch == "" {
		t.Fatal("expected generated draft body")
	}
}

func TestRuntime_RunColdPathOnce_UsesGeneratorFactoryWorkspaceForFallback(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))

	if err := os.MkdirAll(filepath.Join(root, "skills", "weather"), 0o755); err != nil {
		t.Fatalf("MkdirAll: %v", err)
	}
	skillBody := "---\nname: weather\ndescription: workspace weather helper\n---\n# Weather\n## Start Here\nUse the workspace-specific path.\n"
	if err := os.WriteFile(filepath.Join(root, "skills", "weather", "SKILL.md"), []byte(skillBody), 0o644); err != nil {
		t.Fatalf("WriteFile: %v", err)
	}

	rule := evolution.LearningRecord{
		ID:          "rule-1",
		Kind:        evolution.RecordKindRule,
		WorkspaceID: root,
		CreatedAt:   time.Unix(1700000000, 0).UTC(),
		Summary:     "weather native-name path",
		Status:      evolution.RecordStatus("ready"),
		EventCount:  4,
		SuccessRate: 1,
		WinningPath: []string{"weather"},
	}
	if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	provider := &llmDraftRuntimeProvider{
		response:     &providers.LLMResponse{Content: `not-json`},
		defaultModel: "runtime-test-model",
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
		Store:  store,
		GeneratorFactory: func(workspace string) evolution.DraftGenerator {
			return evolution.NewDraftGeneratorForWorkspace(workspace, provider, "runtime-explicit-model")
		},
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 1 {
		t.Fatalf("len(drafts) = %d, want 1", len(drafts))
	}
	if drafts[0].ChangeKind != evolution.ChangeKindAppend {
		t.Fatalf("ChangeKind = %q, want %q", drafts[0].ChangeKind, evolution.ChangeKindAppend)
	}
	if !strings.Contains(drafts[0].BodyOrPatch, "## Learned Evolution") {
		t.Fatalf("BodyOrPatch = %q, want appended learned evolution section", drafts[0].BodyOrPatch)
	}
}

func TestRuntime_RunColdPathOnce_PersistsEarlierDraftWhenLaterRuleFails(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))

	rules := []evolution.LearningRecord{
		{
			ID:          "rule-1",
			Kind:        evolution.RecordKindRule,
			WorkspaceID: root,
			CreatedAt:   time.Unix(1700000000, 0).UTC(),
			Summary:     "weather native-name path",
			Status:      evolution.RecordStatus("ready"),
			EventCount:  4,
		},
		{
			ID:          "rule-2",
			Kind:        evolution.RecordKindRule,
			WorkspaceID: root,
			CreatedAt:   time.Unix(1700000100, 0).UTC(),
			Summary:     "release path",
			Status:      evolution.RecordStatus("ready"),
			EventCount:  4,
		},
	}
	if err := store.AppendLearningRecords(rules); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}

	generator := &sequenceDraftGenerator{
		results: []draftGenerationResult{
			{
				draft: evolution.SkillDraft{
					ID:              "draft-1",
					TargetSkillName: "weather",
					DraftType:       evolution.DraftTypeShortcut,
					ChangeKind:      evolution.ChangeKindAppend,
					HumanSummary:    "prefer native-name path first",
					BodyOrPatch:     "## Start Here\nUse native-name query first.",
				},
			},
			{
				err: context.DeadlineExceeded,
			},
		},
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config:         config.EvolutionConfig{Enabled: true, Mode: "draft"},
		Store:          store,
		DraftGenerator: generator,
		SkillsRecaller: evolution.NewSkillsRecaller(root),
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	err = rt.RunColdPathOnce(context.Background(), root)
	if !errors.Is(err, context.DeadlineExceeded) {
		t.Fatalf("RunColdPathOnce error = %v, want %v", err, context.DeadlineExceeded)
	}

	drafts, loadErr := store.LoadDrafts()
	if loadErr != nil {
		t.Fatalf("LoadDrafts: %v", loadErr)
	}
	if len(drafts) != 1 {
		t.Fatalf("len(drafts) = %d, want 1", len(drafts))
	}
	if drafts[0].SourceRecordID != "rule-1" {
		t.Fatalf("SourceRecordID = %q, want rule-1", drafts[0].SourceRecordID)
	}
}

func TestRuntime_RunColdPathOnce_RegeneratesAfterQuarantinedDraft(t *testing.T) {
	root := t.TempDir()
	store := evolution.NewStore(evolution.NewPaths(root, ""))

	rule := evolution.LearningRecord{
		ID:          "rule-1",
		Kind:        evolution.RecordKindRule,
		WorkspaceID: root,
		CreatedAt:   time.Unix(1700000000, 0).UTC(),
		Summary:     "weather native-name path",
		Status:      evolution.RecordStatus("ready"),
		EventCount:  4,
	}
	if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil {
		t.Fatalf("AppendLearningRecords: %v", err)
	}
	if err := store.SaveDrafts([]evolution.SkillDraft{{
		ID:              "draft-old",
		WorkspaceID:     root,
		CreatedAt:       time.Unix(1700000100, 0).UTC(),
		SourceRecordID:  "rule-1",
		TargetSkillName: "weather",
		DraftType:       evolution.DraftTypeShortcut,
		ChangeKind:      evolution.ChangeKindAppend,
		HumanSummary:    "broken attempt",
		BodyOrPatch:     "## Start Here\nBroken content.",
		Status:          evolution.DraftStatusQuarantined,
		ScanFindings:    []string{"apply failed"},
	}}); err != nil {
		t.Fatalf("SaveDrafts: %v", err)
	}

	rt, err := evolution.NewRuntime(evolution.RuntimeOptions{
		Config: config.EvolutionConfig{Enabled: true, Mode: "draft"},
		Store:  store,
		DraftGenerator: stubDraftGenerator{
			draft: evolution.SkillDraft{
				ID:              "draft-new",
				TargetSkillName: "weather",
				DraftType:       evolution.DraftTypeShortcut,
				ChangeKind:      evolution.ChangeKindAppend,
				HumanSummary:    "fixed attempt",
				BodyOrPatch:     "## Start Here\nUse native-name query first.",
			},
		},
		SkillsRecaller: evolution.NewSkillsRecaller(root),
	})
	if err != nil {
		t.Fatalf("NewRuntime: %v", err)
	}

	if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil {
		t.Fatalf("RunColdPathOnce: %v", runErr)
	}

	drafts, err := store.LoadDrafts()
	if err != nil {
		t.Fatalf("LoadDrafts: %v", err)
	}
	if len(drafts) != 2 {
		t.Fatalf("len(drafts) = %d, want 2", len(drafts))
	}
	if drafts[1].ID != "draft-new" {
		t.Fatalf("drafts[1].ID = %q, want draft-new", drafts[1].ID)
	}
}

type llmDraftRuntimeProvider struct {
	response     *providers.LLMResponse
	err          error
	calls        int
	defaultModel string
}

func (p *llmDraftRuntimeProvider) Chat(
	_ context.Context,
	_ []providers.Message,
	_ []providers.ToolDefinition,
	_ string,
	_ map[string]any,
) (*providers.LLMResponse, error) {
	p.calls++
	return p.response, p.err
}

func (p *llmDraftRuntimeProvider) GetDefaultModel() string {
	if p.defaultModel != "" {
		return p.defaultModel
	}
	return "runtime-test-model"
}