diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index bc6d2b39b..5749149c1 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -55,6 +55,7 @@ type AgentLoop struct { transcriber asr.Transcriber cmdRegistry *commands.Registry mcp mcpRuntime + evolution *evolutionBridge hookRuntime hookRuntime steering *steeringQueue pendingSkills sync.Map @@ -310,6 +311,15 @@ func (al *AgentLoop) Close() { }) } } + evolution := al.currentEvolutionBridge() + if evolution != nil { + if err := evolution.Close(); err != nil { + logger.ErrorCF("agent", "Failed to close evolution bridge", + map[string]any{ + "error": err.Error(), + }) + } + } al.GetRegistry().Close() if al.hooks != nil { @@ -394,14 +404,29 @@ func (al *AgentLoop) ReloadProviderAndConfig( // Ensure shared tools are re-registered on the new registry registerSharedTools(al, cfg, al.bus, registry, provider) + newEvolution, evolutionErr := newEvolutionBridge(registry, cfg, provider) + if evolutionErr != nil { + logger.WarnCF("agent", "Failed to reinitialize evolution bridge during reload", + map[string]any{"error": evolutionErr.Error()}) + } + if newEvolution != nil { + newEvolution.setCurrentCheck(al.isCurrentEvolutionBridge) + if err := newEvolution.subscribeRuntimeEvents(al.runtimeEvents.Channel()); err != nil { + logger.WarnCF("agent", "Failed to subscribe reloaded evolution bridge to runtime events", + map[string]any{"error": err.Error()}) + } + } + // Atomically swap the config and registry under write lock // This ensures readers see a consistent pair al.mu.Lock() oldRegistry := al.registry + oldEvolution := al.evolution // Store new values al.cfg = cfg al.registry = registry + al.evolution = newEvolution // Also update fallback chain with new config; rebuild rate limiter registry. newRL := providers.NewRateLimiterRegistry() @@ -429,6 +454,12 @@ func (al *AgentLoop) ReloadProviderAndConfig( map[string]any{"error": err.Error()}) } } + if oldEvolution != nil { + if err := oldEvolution.Close(); err != nil { + logger.WarnCF("agent", "Failed to close previous evolution bridge during reload", + map[string]any{"error": err.Error()}) + } + } if err := al.ensureMCPInitialized(ctx); err != nil { logger.WarnCF("agent", "MCP failed to reinitialize after reload", map[string]any{"error": err.Error()}) diff --git a/pkg/agent/agent_event.go b/pkg/agent/agent_event.go index 99ea2a18e..174b93e38 100644 --- a/pkg/agent/agent_event.go +++ b/pkg/agent/agent_event.go @@ -47,9 +47,40 @@ func (al *AgentLoop) emitEvent(kind runtimeevents.Kind, meta HookMeta, payload a return } + deliveredToEvolution := false + if kind == runtimeevents.KindAgentTurnEnd { + evolution := al.currentEvolutionBridge() + if evolution != nil { + deliveredToEvolution = evolution.handleRuntimeTurnEnd(evt) + } + } + if deliveredToEvolution { + if evt.Attrs == nil { + evt.Attrs = make(map[string]any, 1) + } + evt.Attrs[evolutionDirectDeliveryAttr] = true + } al.publishRuntimeEvent(evt) } +func (al *AgentLoop) currentEvolutionBridge() *evolutionBridge { + if al == nil { + return nil + } + al.mu.RLock() + defer al.mu.RUnlock() + return al.evolution +} + +func (al *AgentLoop) isCurrentEvolutionBridge(bridge *evolutionBridge) bool { + if al == nil || bridge == nil { + return false + } + al.mu.RLock() + defer al.mu.RUnlock() + return al.evolution == bridge +} + // MountHook registers an in-process hook on the agent loop. func (al *AgentLoop) MountHook(reg HookRegistration) error { if al == nil || al.hooks == nil { diff --git a/pkg/agent/agent_init.go b/pkg/agent/agent_init.go index 8420cd101..50f0227a1 100644 --- a/pkg/agent/agent_init.go +++ b/pkg/agent/agent_init.go @@ -49,6 +49,13 @@ func NewAgentLoop( stateManager = state.NewManager(defaultAgent.Workspace) } + bridge, err := newEvolutionBridge(registry, cfg, provider) + if err != nil { + logger.WarnCF("agent", "Failed to initialize evolution bridge", map[string]any{ + "error": err.Error(), + }) + } + // Determine worker pool size from config (default: 1 = sequential) workerPoolSize := cfg.Agents.Defaults.MaxParallelTurns if workerPoolSize <= 0 { @@ -62,6 +69,7 @@ func NewAgentLoop( state: stateManager, fallback: fallbackChain, cmdRegistry: commands.NewRegistry(commands.BuiltinDefinitions()), + evolution: bridge, steering: newSteeringQueue(parseSteeringMode(cfg.Agents.Defaults.SteeringMode)), workerSem: make(chan struct{}, workerPoolSize), ownsRuntimeEvents: true, @@ -75,6 +83,14 @@ func NewAgentLoop( al.runtimeEvents = runtimeevents.NewBus() al.ownsRuntimeEvents = true } + if bridge != nil { + bridge.setCurrentCheck(al.isCurrentEvolutionBridge) + if err := bridge.subscribeRuntimeEvents(al.runtimeEvents.Channel()); err != nil { + logger.WarnCF("agent", "Failed to subscribe evolution bridge to runtime events", map[string]any{ + "error": err.Error(), + }) + } + } al.refreshRuntimeEventLogger(cfg) al.providerFactory = providers.CreateProviderFromConfig al.hooks = NewHookManager(al.runtimeEvents.Channel()) diff --git a/pkg/agent/context.go b/pkg/agent/context.go index 7f5b32fef..87bdd6b41 100644 --- a/pkg/agent/context.go +++ b/pkg/agent/context.go @@ -1024,10 +1024,28 @@ func (cb *ContextBuilder) AddAssistantMessage( } func (cb *ContextBuilder) buildActiveSkillsContext(skillNames []string) string { - if cb.skillsLoader == nil || len(skillNames) == 0 { + ordered := cb.ResolveActiveSkillsForContext(skillNames) + if len(ordered) == 0 { return "" } + content := cb.skillsLoader.LoadSkillsForContext(ordered) + if strings.TrimSpace(content) == "" { + return "" + } + + return fmt.Sprintf(`# Active Skills + +The following skills are active for this request. Follow them when relevant. + +%s`, content) +} + +func (cb *ContextBuilder) ResolveActiveSkillsForContext(skillNames []string) []string { + if cb.skillsLoader == nil || len(skillNames) == 0 { + return nil + } + var ordered []string seen := make(map[string]struct{}, len(skillNames)) for _, name := range skillNames { @@ -1042,19 +1060,9 @@ func (cb *ContextBuilder) buildActiveSkillsContext(skillNames []string) string { ordered = append(ordered, canonical) } if len(ordered) == 0 { - return "" + return nil } - - content := cb.skillsLoader.LoadSkillsForContext(ordered) - if strings.TrimSpace(content) == "" { - return "" - } - - return fmt.Sprintf(`# Active Skills - -The following skills are active for this request. Follow them when relevant. - -%s`, content) + return ordered } func (cb *ContextBuilder) buildActiveSkillsPromptParts(skillNames []string) []PromptPart { diff --git a/pkg/agent/event_payloads.go b/pkg/agent/event_payloads.go index 18fcbd4a0..dee3e620a 100644 --- a/pkg/agent/event_payloads.go +++ b/pkg/agent/event_payloads.go @@ -20,12 +20,39 @@ type TurnStartPayload struct { MediaCount int } +const ( + skillContextTriggerInitialBuild = "initial_build" + skillContextTriggerContextRetryRebuild = "context_retry_rebuild" +) + +type SkillContextSnapshot struct { + Sequence int `json:"sequence"` + Trigger string `json:"trigger"` + SkillNames []string `json:"skill_names,omitempty"` +} + +type ToolExecutionRecord struct { + Name string `json:"name"` + Success bool `json:"success"` + ErrorSummary string `json:"error_summary,omitempty"` + SkillNames []string `json:"skill_names,omitempty"` +} + // TurnEndPayload describes the completion of a turn. type TurnEndPayload struct { - Status TurnEndStatus - Iterations int - Duration time.Duration - FinalContentLen int + Status TurnEndStatus + Workspace string + Iterations int + Duration time.Duration + FinalContentLen int + UserMessage string + FinalContent string + ActiveSkills []string + AttemptedSkills []string + FinalSuccessfulPath []string + SkillContextSnapshots []SkillContextSnapshot + ToolKinds []string + ToolExecutions []ToolExecutionRecord } // LLMRequestPayload describes an outbound LLM request. diff --git a/pkg/agent/events.go b/pkg/agent/events.go index 0dd861f43..b23350774 100644 --- a/pkg/agent/events.go +++ b/pkg/agent/events.go @@ -1,5 +1,11 @@ package agent +import ( + "time" + + runtimeevents "github.com/sipeed/picoclaw/pkg/events" +) + // HookMeta contains correlation fields shared by agent hook requests and // runtime events emitted from turn processing. type HookMeta struct { @@ -12,3 +18,42 @@ type HookMeta struct { Source string turnContext *TurnContext } + +// EventKind is the legacy in-agent event kind alias kept for tests and +// compatibility shims on top of the runtime event bus. +type EventKind = runtimeevents.Kind + +const ( + EventKindTurnStart EventKind = runtimeevents.KindAgentTurnStart + EventKindTurnEnd EventKind = runtimeevents.KindAgentTurnEnd + EventKindLLMRequest EventKind = runtimeevents.KindAgentLLMRequest + EventKindLLMDelta EventKind = runtimeevents.KindAgentLLMDelta + EventKindLLMResponse EventKind = runtimeevents.KindAgentLLMResponse + EventKindLLMRetry EventKind = runtimeevents.KindAgentLLMRetry + EventKindContextCompress EventKind = runtimeevents.KindAgentContextCompress + EventKindSessionSummarize EventKind = runtimeevents.KindAgentSessionSummarize + EventKindToolExecStart EventKind = runtimeevents.KindAgentToolExecStart + EventKindToolExecEnd EventKind = runtimeevents.KindAgentToolExecEnd + EventKindToolExecSkipped EventKind = runtimeevents.KindAgentToolExecSkipped + EventKindSteeringInjected EventKind = runtimeevents.KindAgentSteeringInjected + EventKindFollowUpQueued EventKind = runtimeevents.KindAgentFollowUpQueued + EventKindInterruptReceived EventKind = runtimeevents.KindAgentInterruptReceived + EventKindSubTurnSpawn EventKind = runtimeevents.KindAgentSubTurnSpawn + EventKindSubTurnEnd EventKind = runtimeevents.KindAgentSubTurnEnd + EventKindSubTurnResultDelivered EventKind = runtimeevents.KindAgentSubTurnResultDelivered + EventKindSubTurnOrphan EventKind = runtimeevents.KindAgentSubTurnOrphan + EventKindError EventKind = runtimeevents.KindAgentError +) + +// EventMeta is the legacy name for hook metadata. +type EventMeta = HookMeta + +// Event is the legacy agent event envelope exposed by SubscribeEvents and a +// handful of tests. Runtime code publishes pkg/events.Event internally. +type Event struct { + Kind EventKind + Time time.Time + Meta EventMeta + Context *TurnContext + Payload any +} diff --git a/pkg/agent/evolution_bridge.go b/pkg/agent/evolution_bridge.go new file mode 100644 index 000000000..2e54c8690 --- /dev/null +++ b/pkg/agent/evolution_bridge.go @@ -0,0 +1,444 @@ +package agent + +import ( + "context" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/sipeed/picoclaw/pkg/config" + runtimeevents "github.com/sipeed/picoclaw/pkg/events" + "github.com/sipeed/picoclaw/pkg/evolution" + "github.com/sipeed/picoclaw/pkg/logger" + "github.com/sipeed/picoclaw/pkg/providers" +) + +type evolutionBridge struct { + cfg config.EvolutionConfig + registry *AgentRegistry + runtime *evolution.Runtime + coldPathRunner *evolution.ColdPathRunner + runtimeSub runtimeevents.Subscription + bgCtx context.Context + cancel context.CancelFunc + closeMu sync.Mutex + closed bool + wg sync.WaitGroup + isCurrent func(*evolutionBridge) bool + + scheduledMu sync.Mutex + scheduledWorkspaces map[string]struct{} +} + +const evolutionDirectDeliveryAttr = "evolution_direct_delivery" + +func newEvolutionBridge( + registry *AgentRegistry, + cfg *config.Config, + provider providers.LLMProvider, +) (*evolutionBridge, error) { + if cfg == nil { + return nil, nil + } + + modelID := resolvedEvolutionModelID(cfg, provider) + runtime, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: cfg.Evolution, + PatternClusterer: evolution.NewLLMPatternClusterer( + provider, + modelID, + evolution.NewHeuristicPatternClusterer(cfg.Evolution.EffectiveMinTaskCount(), nil), + cfg.Evolution.EffectiveMinTaskCount(), + nil, + ), + GeneratorFactory: func(workspace string) evolution.DraftGenerator { + return evolution.NewDraftGeneratorForWorkspace(workspace, provider, modelID) + }, + SuccessJudgeFactory: func(workspace string) evolution.SuccessJudge { + return evolution.NewLLMTaskSuccessJudge(provider, modelID, &evolution.HeuristicSuccessJudge{}) + }, + ApplierFactory: func(workspace string) *evolution.Applier { + return evolution.NewApplier(evolution.NewPaths(workspace, cfg.Evolution.StateDir), nil) + }, + }) + if err != nil { + return nil, err + } + bgCtx, cancel := context.WithCancel(context.Background()) + + bridge := &evolutionBridge{ + cfg: cfg.Evolution, + registry: registry, + runtime: runtime, + bgCtx: bgCtx, + cancel: cancel, + } + if cfg.Evolution.RunsColdPathAutomatically() { + bridge.coldPathRunner = evolution.NewColdPathRunnerWithErrorHandler(runtime, func(err error) { + logger.WarnCF("agent", "Cold path run failed", map[string]any{ + "error": err.Error(), + }) + }) + } + if cfg.Evolution.RunsColdPathScheduled() { + bridge.startScheduledColdPath(cfg.Agents.Defaults.Workspace, cfg.Evolution.EffectiveColdPathTimes()) + bridge.rememberScheduledColdPathWorkspaces(registryWorkspaces(registry)) + } + + return bridge, nil +} + +func resolvedEvolutionModelID(cfg *config.Config, provider providers.LLMProvider) string { + if cfg != nil { + if modelID := cfg.Agents.Defaults.GetModelName(); modelID != "" { + return modelID + } + } + if provider != nil { + return provider.GetDefaultModel() + } + return "" +} + +func (b *evolutionBridge) Close() error { + if b == nil { + return nil + } + + if b.runtimeSub != nil { + if err := b.runtimeSub.Close(); err != nil { + logger.WarnCF("agent", "Failed to close evolution runtime subscription", map[string]any{ + "error": err.Error(), + }) + } + <-b.runtimeSub.Done() + } + + b.closeMu.Lock() + alreadyClosed := b.closed + b.closed = true + b.closeMu.Unlock() + if alreadyClosed { + return nil + } + if b.cancel != nil { + b.cancel() + } + var closeErr error + if b.coldPathRunner != nil { + closeErr = b.coldPathRunner.Close() + } + b.wg.Wait() + return closeErr +} + +func (b *evolutionBridge) OnEvent(_ context.Context, evt Event) error { + if b == nil || !b.cfg.Enabled || b.runtime == nil { + return nil + } + + switch evt.Kind { + case EventKindTurnEnd: + payload, ok := evt.Payload.(TurnEndPayload) + if !ok { + return nil + } + b.handleTurnEndAsync(evt.Meta, payload) + return nil + } + + return nil +} + +func (b *evolutionBridge) OnRuntimeEvent(_ context.Context, evt runtimeevents.Event) error { + if b == nil || !b.cfg.Enabled || b.runtime == nil || evt.Kind != runtimeevents.KindAgentTurnEnd { + return nil + } + if b.isCurrent != nil && !b.isCurrent(b) { + return nil + } + if deliveredDirectly, _ := evt.Attrs[evolutionDirectDeliveryAttr].(bool); deliveredDirectly { + return nil + } + payload, ok := evt.Payload.(TurnEndPayload) + if !ok { + return nil + } + b.handleTurnEndAsync(hookMetaFromRuntimeEvent(evt), payload) + return nil +} + +func (b *evolutionBridge) handleRuntimeTurnEnd(evt runtimeevents.Event) bool { + if b == nil || !b.cfg.Enabled || b.runtime == nil || evt.Kind != runtimeevents.KindAgentTurnEnd { + return false + } + payload, ok := evt.Payload.(TurnEndPayload) + if !ok { + return false + } + return b.handleTurnEndAsync(hookMetaFromRuntimeEvent(evt), payload) +} + +func (b *evolutionBridge) handleTurnEndAsync(meta EventMeta, payload TurnEndPayload) bool { + if b == nil || b.runtime == nil { + return false + } + + input := evolution.TurnCaseInput{ + Workspace: payload.Workspace, + WorkspaceID: payload.Workspace, + TurnID: meta.TurnID, + SessionKey: meta.SessionKey, + AgentID: meta.AgentID, + Status: string(payload.Status), + UserMessage: payload.UserMessage, + FinalContent: payload.FinalContent, + ToolKinds: append([]string(nil), payload.ToolKinds...), + ToolExecutions: toEvolutionToolExecutions(payload.ToolExecutions), + ActiveSkillNames: append([]string(nil), payload.ActiveSkills...), + AttemptedSkillNames: append([]string(nil), payload.AttemptedSkills...), + FinalSuccessfulPath: append([]string(nil), payload.FinalSuccessfulPath...), + SkillContextSnapshots: toEvolutionSkillContextSnapshots(payload.SkillContextSnapshots), + } + b.rememberScheduledColdPathWorkspace(input.Workspace) + + b.closeMu.Lock() + if b.closed { + b.closeMu.Unlock() + return false + } + b.wg.Add(1) + b.closeMu.Unlock() + go func() { + defer b.wg.Done() + if err := b.runtime.FinalizeTurn(b.bgCtx, input); err != nil { + logger.WarnCF("agent", "Evolution finalize turn failed", map[string]any{ + "error": err.Error(), + "turn_id": input.TurnID, + "workspace": input.Workspace, + }) + return + } + if b.coldPathRunner != nil && b.cfg.RunsColdPathAfterTurn() { + b.coldPathRunner.Trigger(input.Workspace) + } + }() + return true +} + +func (b *evolutionBridge) subscribeRuntimeEvents(ch runtimeevents.EventChannel) error { + if b == nil || ch == nil { + return nil + } + sub, err := ch.Source("agent").OfKind(runtimeevents.KindAgentTurnEnd).Subscribe( + b.bgCtx, + runtimeevents.SubscribeOptions{ + Name: "evolution-bridge", + Buffer: hookObserverBufferSize, + Backpressure: runtimeevents.Block, + Concurrency: runtimeevents.Locked, + }, + func(ctx context.Context, evt runtimeevents.Event) error { + return b.OnRuntimeEvent(ctx, evt) + }, + ) + if err != nil { + return err + } + b.runtimeSub = sub + return nil +} + +func (b *evolutionBridge) setCurrentCheck(check func(*evolutionBridge) bool) { + if b == nil { + return + } + b.closeMu.Lock() + defer b.closeMu.Unlock() + b.isCurrent = check +} + +func (b *evolutionBridge) startScheduledColdPath(workspace string, times []string) { + if b == nil || b.coldPathRunner == nil || len(times) == 0 { + return + } + b.rememberScheduledColdPathWorkspace(workspace) + schedule := parseColdPathSchedule(times) + if len(schedule) == 0 { + logger.WarnCF("agent", "No valid evolution cold path schedule times configured", map[string]any{ + "times": times, + }) + return + } + + b.wg.Add(1) + go func() { + defer b.wg.Done() + for { + now := time.Now() + next := nextColdPathScheduledTime(now, schedule) + timer := time.NewTimer(time.Until(next)) + select { + case <-timer.C: + for _, workspace := range b.scheduledColdPathWorkspaces() { + b.coldPathRunner.Trigger(workspace) + } + case <-b.bgCtx.Done(): + if !timer.Stop() { + select { + case <-timer.C: + default: + } + } + return + } + } + }() +} + +func (b *evolutionBridge) rememberScheduledColdPathWorkspace(workspace string) { + if b == nil || !b.cfg.RunsColdPathScheduled() { + return + } + workspace = strings.TrimSpace(workspace) + if workspace == "" { + return + } + b.scheduledMu.Lock() + defer b.scheduledMu.Unlock() + if b.scheduledWorkspaces == nil { + b.scheduledWorkspaces = make(map[string]struct{}) + } + b.scheduledWorkspaces[workspace] = struct{}{} +} + +func (b *evolutionBridge) rememberScheduledColdPathWorkspaces(workspaces []string) { + for _, workspace := range workspaces { + b.rememberScheduledColdPathWorkspace(workspace) + } +} + +func (b *evolutionBridge) scheduledColdPathWorkspaces() []string { + if b == nil { + return nil + } + b.scheduledMu.Lock() + defer b.scheduledMu.Unlock() + out := make([]string, 0, len(b.scheduledWorkspaces)) + for workspace := range b.scheduledWorkspaces { + out = append(out, workspace) + } + sort.Strings(out) + return out +} + +func registryWorkspaces(registry *AgentRegistry) []string { + if registry == nil { + return nil + } + registry.mu.RLock() + defer registry.mu.RUnlock() + + out := make([]string, 0, len(registry.agents)) + seen := make(map[string]struct{}, len(registry.agents)) + for _, agent := range registry.agents { + if agent == nil { + continue + } + workspace := strings.TrimSpace(agent.Workspace) + if workspace == "" { + continue + } + if _, ok := seen[workspace]; ok { + continue + } + seen[workspace] = struct{}{} + out = append(out, workspace) + } + sort.Strings(out) + return out +} + +type coldPathScheduleTime struct { + hour int + minute int +} + +func parseColdPathSchedule(values []string) []coldPathScheduleTime { + out := make([]coldPathScheduleTime, 0, len(values)) + seen := make(map[coldPathScheduleTime]struct{}, len(values)) + for _, value := range values { + parts := strings.Split(strings.TrimSpace(value), ":") + if len(parts) != 2 { + continue + } + hour, err := strconv.Atoi(parts[0]) + if err != nil || hour < 0 || hour > 23 { + continue + } + minute, err := strconv.Atoi(parts[1]) + if err != nil || minute < 0 || minute > 59 { + continue + } + item := coldPathScheduleTime{hour: hour, minute: minute} + if _, ok := seen[item]; ok { + continue + } + seen[item] = struct{}{} + out = append(out, item) + } + sort.Slice(out, func(i, j int) bool { + if out[i].hour != out[j].hour { + return out[i].hour < out[j].hour + } + return out[i].minute < out[j].minute + }) + return out +} + +func nextColdPathScheduledTime(now time.Time, schedule []coldPathScheduleTime) time.Time { + for _, item := range schedule { + candidate := time.Date(now.Year(), now.Month(), now.Day(), item.hour, item.minute, 0, 0, now.Location()) + if candidate.After(now) { + return candidate + } + } + first := schedule[0] + tomorrow := now.AddDate(0, 0, 1) + return time.Date(tomorrow.Year(), tomorrow.Month(), tomorrow.Day(), first.hour, first.minute, 0, 0, now.Location()) +} + +func toEvolutionSkillContextSnapshots(input []SkillContextSnapshot) []evolution.SkillContextSnapshot { + if len(input) == 0 { + return nil + } + + out := make([]evolution.SkillContextSnapshot, 0, len(input)) + for _, snapshot := range input { + out = append(out, evolution.SkillContextSnapshot{ + Sequence: snapshot.Sequence, + Trigger: snapshot.Trigger, + SkillNames: append([]string(nil), snapshot.SkillNames...), + }) + } + return out +} + +func toEvolutionToolExecutions(input []ToolExecutionRecord) []evolution.ToolExecutionRecord { + if len(input) == 0 { + return nil + } + + out := make([]evolution.ToolExecutionRecord, 0, len(input)) + for _, record := range input { + out = append(out, evolution.ToolExecutionRecord{ + Name: record.Name, + Success: record.Success, + ErrorSummary: record.ErrorSummary, + SkillNames: append([]string(nil), record.SkillNames...), + }) + } + return out +} diff --git a/pkg/agent/evolution_bridge_test.go b/pkg/agent/evolution_bridge_test.go new file mode 100644 index 000000000..8469acd80 --- /dev/null +++ b/pkg/agent/evolution_bridge_test.go @@ -0,0 +1,1344 @@ +package agent + +import ( + "context" + "encoding/json" + "errors" + "os" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + "github.com/sipeed/picoclaw/pkg/bus" + "github.com/sipeed/picoclaw/pkg/config" + runtimeevents "github.com/sipeed/picoclaw/pkg/events" + "github.com/sipeed/picoclaw/pkg/evolution" + "github.com/sipeed/picoclaw/pkg/providers" +) + +func TestEvolutionBridge_DisabledWritesNothing(t *testing.T) { + tmpDir := t.TempDir() + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: false, + Mode: "observe", + }, &simpleMockProvider{response: "ok"}) + defer al.Close() + + resp, err := al.ProcessDirectWithChannel(context.Background(), "hello", "session-disabled", "cli", "direct") + if err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + if resp != "ok" { + t.Fatalf("response = %q, want %q", resp, "ok") + } + + assertNotExists(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + assertNotExists(t, filepath.Join(tmpDir, "state", "evolution", "skill-drafts.json")) +} + +func TestEvolutionBridge_ObserveWritesCaseRecord(t *testing.T) { + tmpDir := t.TempDir() + provider := &toolCallRespProvider{ + toolName: "echo_text", + toolArgs: map[string]any{"text": "bridge"}, + response: "done", + } + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, provider) + defer al.Close() + + defaultAgent := al.registry.GetDefaultAgent() + if defaultAgent == nil { + t.Fatal("expected default agent") + } + defaultAgent.SkillsFilter = []string{"observe-skill"} + al.RegisterTool(&echoTextTool{}) + + resp, err := al.ProcessDirectWithChannel(context.Background(), "hello", "session-observe", "cli", "direct") + if err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + if resp != "done" { + t.Fatalf("response = %q, want %q", resp, "done") + } + + record := waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + + if got := record["kind"]; got != string(evolution.RecordKindCase) { + t.Fatalf("kind = %v, want %q", got, evolution.RecordKindCase) + } + if got := record["workspace_id"]; got != tmpDir { + t.Fatalf("workspace_id = %v, want %q", got, tmpDir) + } + if got := record["status"]; got != "new" { + t.Fatalf("status = %v, want %q", got, "new") + } + + for _, field := range []string{"tool_kinds", "tool_executions", "initial_skill_names", "active_skill_names", "attempt_trail", "source"} { + if _, exists := record[field]; exists { + t.Fatalf("%s should not be persisted in slim task record: %#v", field, record[field]) + } + } +} + +func TestEvolutionBridge_TurnEndBypassesHookObserverBackpressure(t *testing.T) { + tmpDir := t.TempDir() + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, &simpleMockProvider{response: "ok"}) + defer al.Close() + + blocker := &blockingRuntimeObserver{ + started: make(chan struct{}), + release: make(chan struct{}), + } + defer close(blocker.release) + al.hooks.ConfigureTimeouts(5*time.Second, 0, 0) + if err := al.MountHook(NamedHook("aaa-block-runtime-events", blocker)); err != nil { + t.Fatalf("MountHook: %v", err) + } + + al.publishRuntimeEvent(runtimeevents.Event{ + Kind: runtimeevents.KindAgentTurnStart, + Source: runtimeevents.Source{Component: "agent", Name: "main"}, + }) + select { + case <-blocker.started: + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for blocking runtime observer") + } + + for i := 0; i < hookObserverBufferSize+10; i++ { + al.publishRuntimeEvent(runtimeevents.Event{ + Kind: runtimeevents.KindAgentLLMDelta, + Source: runtimeevents.Source{Component: "agent", Name: "main"}, + }) + } + + al.emitEvent(runtimeevents.KindAgentTurnEnd, EventMeta{ + AgentID: "main", + TurnID: "turn-backpressure", + SessionKey: "session-backpressure", + }, TurnEndPayload{ + Status: TurnEndStatusCompleted, + Workspace: tmpDir, + UserMessage: "hello", + FinalContent: "ok", + }) + + record := waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + if got := record["session_key"]; got != "session-backpressure" { + t.Fatalf("session_key = %v, want session-backpressure", got) + } + if got := record["summary"]; got != "hello" { + t.Fatalf("summary = %v, want hello", got) + } +} + +func TestEvolutionBridge_RuntimeBusTurnEndWritesCaseRecord(t *testing.T) { + tmpDir := t.TempDir() + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, &simpleMockProvider{response: "ok"}) + defer al.Close() + + result := al.RuntimeEventBus().Publish(context.Background(), runtimeevents.Event{ + Kind: runtimeevents.KindAgentTurnEnd, + Source: runtimeevents.Source{Component: "agent", Name: "main"}, + Scope: runtimeevents.Scope{ + AgentID: "main", + TurnID: "turn-runtime-bus", + SessionKey: "session-runtime-bus", + }, + Payload: TurnEndPayload{ + Status: TurnEndStatusCompleted, + Workspace: tmpDir, + UserMessage: "runtime bus task", + FinalContent: "ok", + }, + }) + if result.Delivered == 0 { + t.Fatalf("runtime bus publish delivered = %d, want > 0", result.Delivered) + } + + record := waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + if got := record["session_key"]; got != "session-runtime-bus" { + t.Fatalf("session_key = %v, want session-runtime-bus", got) + } + if got := record["summary"]; got != "runtime bus task" { + t.Fatalf("summary = %v, want runtime bus task", got) + } +} + +func TestEvolutionBridge_RuntimeBusOnlyCurrentBridgeConsumesTurnEnd(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Evolution: config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, + } + eventBus := runtimeevents.NewBus() + defer eventBus.Close() + + oldBridge, err := newEvolutionBridge(nil, cfg, nil) + if err != nil { + t.Fatalf("newEvolutionBridge(old): %v", err) + } + defer oldBridge.Close() + newBridge, err := newEvolutionBridge(nil, cfg, nil) + if err != nil { + t.Fatalf("newEvolutionBridge(new): %v", err) + } + defer newBridge.Close() + + current := newBridge + oldBridge.setCurrentCheck(func(bridge *evolutionBridge) bool { + return current == bridge + }) + newBridge.setCurrentCheck(func(bridge *evolutionBridge) bool { + return current == bridge + }) + if err := oldBridge.subscribeRuntimeEvents(eventBus.Channel()); err != nil { + t.Fatalf("old subscribeRuntimeEvents: %v", err) + } + if err := newBridge.subscribeRuntimeEvents(eventBus.Channel()); err != nil { + t.Fatalf("new subscribeRuntimeEvents: %v", err) + } + + eventBus.Publish(context.Background(), runtimeevents.Event{ + Kind: runtimeevents.KindAgentTurnEnd, + Source: runtimeevents.Source{Component: "agent", Name: "main"}, + Scope: runtimeevents.Scope{ + AgentID: "main", + TurnID: "turn-current-bridge", + SessionKey: "session-current-bridge", + }, + Payload: TurnEndPayload{ + Status: TurnEndStatusCompleted, + Workspace: tmpDir, + UserMessage: "current bridge task", + FinalContent: "ok", + }, + }) + + recordsPath := filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl") + waitForEvolutionRecord(t, recordsPath) + time.Sleep(100 * time.Millisecond) + if got := countEvolutionTaskRecords(t, recordsPath); got != 1 { + t.Fatalf("task record count = %d, want 1", got) + } +} + +func TestEvolutionBridge_DirectDeliveryFailureFallsBackToCurrentRuntimeBridge(t *testing.T) { + tmpDir := t.TempDir() + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, &simpleMockProvider{response: "ok"}) + defer al.Close() + + oldBridge := al.evolution + if oldBridge == nil { + t.Fatal("expected initial evolution bridge") + } + defer oldBridge.Close() + + newBridge, err := newEvolutionBridge(al.registry, al.cfg, &simpleMockProvider{response: "ok"}) + if err != nil { + t.Fatalf("newEvolutionBridge: %v", err) + } + newBridge.setCurrentCheck(al.isCurrentEvolutionBridge) + if err := newBridge.subscribeRuntimeEvents(al.RuntimeEventBus().Channel()); err != nil { + t.Fatalf("subscribeRuntimeEvents: %v", err) + } + + oldBridge.closeMu.Lock() + done := make(chan struct{}) + go func() { + defer close(done) + al.emitEvent(runtimeevents.KindAgentTurnEnd, EventMeta{ + AgentID: "main", + TurnID: "turn-direct-fallback", + SessionKey: "session-direct-fallback", + }, TurnEndPayload{ + Status: TurnEndStatusCompleted, + Workspace: tmpDir, + UserMessage: "direct fallback task", + FinalContent: "ok", + }) + }() + + time.Sleep(20 * time.Millisecond) + al.mu.Lock() + al.evolution = newBridge + al.mu.Unlock() + oldBridge.closed = true + oldBridge.closeMu.Unlock() + + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for emitEvent") + } + + recordsPath := filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl") + record := waitForEvolutionRecord(t, recordsPath) + if got := record["session_key"]; got != "session-direct-fallback" { + t.Fatalf("session_key = %v, want session-direct-fallback", got) + } + if got := countEvolutionTaskRecords(t, recordsPath); got != 1 { + t.Fatalf("task record count = %d, want 1", got) + } +} + +func TestEvolutionBridge_CloseCancelsPendingTurnEndRecord(t *testing.T) { + tmpDir := t.TempDir() + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, &simpleMockProvider{response: "ok"}) + + al.emitEvent(runtimeevents.KindAgentTurnEnd, EventMeta{ + AgentID: "main", + TurnID: "turn-close-flush", + SessionKey: "session-close-flush", + }, TurnEndPayload{ + Status: TurnEndStatusCompleted, + Workspace: tmpDir, + UserMessage: "close flush task", + FinalContent: "ok", + }) + + done := make(chan struct{}) + go func() { + al.Close() + close(done) + }() + + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("Close timed out") + } +} + +func TestEvolutionBridge_ObserveTurnEndPayloadIncludesResolvedAttemptTrail(t *testing.T) { + tmpDir := t.TempDir() + skillDir := filepath.Join(tmpDir, "skills", "observe-skill") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile( + filepath.Join(skillDir, "SKILL.md"), + []byte("---\nname: observe-skill\ndescription: observe test skill\n---\n# Observe Skill\n"), + 0o644, + ); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, &simpleMockProvider{response: "ok"}) + defer al.Close() + + defaultAgent := al.registry.GetDefaultAgent() + if defaultAgent == nil { + t.Fatal("expected default agent") + } + defaultAgent.SkillsFilter = []string{"missing-skill", "observe-skill", "observe-skill"} + + sub := al.SubscribeEvents(16) + defer al.UnsubscribeEvents(sub.ID) + + resp, err := al.ProcessDirectWithChannel( + context.Background(), + "hello", + "session-observe-attempt-trail", + "cli", + "direct", + ) + if err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + if resp != "ok" { + t.Fatalf("response = %q, want %q", resp, "ok") + } + + turnEndEvt := waitForEvent(t, sub.C, 2*time.Second, func(evt Event) bool { + return evt.Kind == EventKindTurnEnd + }) + turnEndPayload, ok := turnEndEvt.Payload.(TurnEndPayload) + if !ok { + t.Fatalf("expected TurnEndPayload, got %T", turnEndEvt.Payload) + } + if got := turnEndPayload.AttemptedSkills; len(got) != 1 || got[0] != "observe-skill" { + t.Fatalf("AttemptedSkills = %v, want [observe-skill]", got) + } + if got := turnEndPayload.FinalSuccessfulPath; len(got) != 1 || got[0] != "observe-skill" { + t.Fatalf("FinalSuccessfulPath = %v, want [observe-skill]", got) + } + if got := turnEndPayload.SkillContextSnapshots; len(got) != 1 || got[0].Trigger != skillContextTriggerInitialBuild { + t.Fatalf("SkillContextSnapshots = %+v, want single initial_build snapshot", got) + } +} + +func TestEvolutionBridge_ObserveTurnEndUsesLatestSkillSnapshotAfterRetry(t *testing.T) { + tmpDir := t.TempDir() + baseSkillDir := filepath.Join(tmpDir, "skills", "base-skill") + if err := os.MkdirAll(baseSkillDir, 0o755); err != nil { + t.Fatalf("MkdirAll(baseSkillDir): %v", err) + } + if err := os.WriteFile( + filepath.Join(baseSkillDir, "SKILL.md"), + []byte("---\nname: base-skill\ndescription: base test skill\n---\n# Base Skill\n"), + 0o644, + ); err != nil { + t.Fatalf("WriteFile(base-skill): %v", err) + } + + lateSkillPath := filepath.Join(tmpDir, "skills", "late-skill", "SKILL.md") + provider := &lateSkillOnRetryProvider{lateSkillPath: lateSkillPath} + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, provider) + defer al.Close() + + defaultAgent := al.registry.GetDefaultAgent() + if defaultAgent == nil { + t.Fatal("expected default agent") + } + defaultAgent.SkillsFilter = []string{"base-skill", "late-skill"} + + sub := al.SubscribeEvents(16) + defer al.UnsubscribeEvents(sub.ID) + + resp, err := al.ProcessDirectWithChannel( + context.Background(), + "hello", + "session-observe-retry-snapshot", + "cli", + "direct", + ) + if err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + if resp != "Recovered after retry" { + t.Fatalf("response = %q, want %q", resp, "Recovered after retry") + } + + turnEndEvt := waitForEvent(t, sub.C, 2*time.Second, func(evt Event) bool { + return evt.Kind == EventKindTurnEnd + }) + turnEndPayload, ok := turnEndEvt.Payload.(TurnEndPayload) + if !ok { + t.Fatalf("expected TurnEndPayload, got %T", turnEndEvt.Payload) + } + if got := turnEndPayload.AttemptedSkills; len(got) != 2 || got[0] != "base-skill" || got[1] != "late-skill" { + t.Fatalf("AttemptedSkills = %v, want [base-skill late-skill]", got) + } + if got := turnEndPayload.FinalSuccessfulPath; len(got) != 2 || got[0] != "base-skill" || got[1] != "late-skill" { + t.Fatalf("FinalSuccessfulPath = %v, want [base-skill late-skill]", got) + } + if got := turnEndPayload.SkillContextSnapshots; len(got) != 2 { + t.Fatalf("len(SkillContextSnapshots) = %d, want 2", len(got)) + } + if turnEndPayload.SkillContextSnapshots[0].Trigger != skillContextTriggerInitialBuild { + t.Fatalf( + "SkillContextSnapshots[0].Trigger = %q, want %q", + turnEndPayload.SkillContextSnapshots[0].Trigger, + skillContextTriggerInitialBuild, + ) + } + if turnEndPayload.SkillContextSnapshots[1].Trigger != skillContextTriggerContextRetryRebuild { + t.Fatalf( + "SkillContextSnapshots[1].Trigger = %q, want %q", + turnEndPayload.SkillContextSnapshots[1].Trigger, + skillContextTriggerContextRetryRebuild, + ) + } + if got := turnEndPayload.SkillContextSnapshots[1].SkillNames; len(got) != 2 || got[0] != "base-skill" || + got[1] != "late-skill" { + t.Fatalf("SkillContextSnapshots[1].SkillNames = %v, want [base-skill late-skill]", got) + } +} + +func TestEvolutionBridge_ObserveDoesNotCreateDraftFile(t *testing.T) { + tmpDir := t.TempDir() + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, &simpleMockProvider{response: "ok"}) + defer al.Close() + + resp, err := al.ProcessDirectWithChannel(context.Background(), "hello", "session-observe-no-draft", "cli", "direct") + if err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + if resp != "ok" { + t.Fatalf("response = %q, want %q", resp, "ok") + } + + waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + assertNotExists(t, filepath.Join(tmpDir, "state", "evolution", "skill-drafts.json")) +} + +func TestEvolutionBridge_DraftModeAutomaticallyRunsColdPathAndCreatesDraftFile(t *testing.T) { + tmpDir := t.TempDir() + seedReadyRule(t, tmpDir) + + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "draft", + }, &simpleMockProvider{response: "ok"}) + defer al.Close() + + resp, err := al.ProcessDirectWithChannel(context.Background(), "hello", "session-auto-cold-path", "cli", "direct") + if err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + if resp != "ok" { + t.Fatalf("response = %q, want %q", resp, "ok") + } + + waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + waitForDrafts(t, filepath.Join(tmpDir, "state", "evolution", "skill-drafts.json"), 1) +} + +func TestEvolutionBridge_ScheduledModeDoesNotRunColdPathAfterTurn(t *testing.T) { + tmpDir := t.TempDir() + seedReadyRule(t, tmpDir) + + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "draft", + ColdPathTrigger: "scheduled", + }, &simpleMockProvider{response: "ok"}) + defer al.Close() + + resp, err := al.ProcessDirectWithChannel( + context.Background(), + "hello", + "session-scheduled-cold-path", + "cli", + "direct", + ) + if err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + if resp != "ok" { + t.Fatalf("response = %q, want %q", resp, "ok") + } + + waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + time.Sleep(150 * time.Millisecond) + assertNotExists(t, filepath.Join(tmpDir, "state", "evolution", "skill-drafts.json")) +} + +func TestEvolutionBridge_DraftModeUsesProviderBackedDraftGenerator(t *testing.T) { + tmpDir := t.TempDir() + seedReadyRule(t, tmpDir) + + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "draft", + }, &simpleMockProvider{ + response: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name path first","body_or_patch":"## Start Here\nUse native-name query first."}`, + }) + defer al.Close() + + resp, err := al.ProcessDirectWithChannel( + context.Background(), + "hello", + "session-auto-cold-path-llm", + "cli", + "direct", + ) + if err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + if resp == "" { + t.Fatal("expected non-empty response") + } + + waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + drafts := waitForDrafts(t, filepath.Join(tmpDir, "state", "evolution", "skill-drafts.json"), 1) + if drafts[0].HumanSummary != "Prefer native-name path first" { + t.Fatalf("HumanSummary = %q, want %q", drafts[0].HumanSummary, "Prefer native-name path first") + } +} + +func TestEvolutionBridge_DraftModeUsesProviderDefaultModel(t *testing.T) { + tmpDir := t.TempDir() + seedReadyRule(t, tmpDir) + + provider := &capturingEvolutionDraftProvider{ + defaultModel: "provider-explicit-model", + response: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name path first","body_or_patch":"## Start Here\nUse native-name query first."}`, + } + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "", + MaxTokens: 4096, + MaxToolIterations: 3, + }, + }, + Evolution: config.EvolutionConfig{ + Enabled: true, + Mode: "draft", + }, + } + + al := NewAgentLoop(cfg, bus.NewMessageBus(), provider) + defer al.Close() + + if _, err := al.ProcessDirectWithChannel( + context.Background(), + "hello", + "session-auto-cold-path-model", + "cli", + "direct", + ); err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + + waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + waitForDrafts(t, filepath.Join(tmpDir, "state", "evolution", "skill-drafts.json"), 1) + if provider.lastModel != "provider-explicit-model" { + t.Fatalf("lastModel = %q, want provider-explicit-model", provider.lastModel) + } +} + +func TestEvolutionBridge_DraftModePrefersConfigDefaultModelName(t *testing.T) { + tmpDir := t.TempDir() + seedReadyRule(t, tmpDir) + + provider := &capturingEvolutionDraftProvider{ + defaultModel: "provider-default-model", + response: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name path first","body_or_patch":"## Start Here\nUse native-name query first."}`, + } + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 3, + }, + }, + Evolution: config.EvolutionConfig{ + Enabled: true, + Mode: "draft", + }, + } + cfg.Agents.Defaults.ModelName = "resolved-config-model" + + al := NewAgentLoop(cfg, bus.NewMessageBus(), provider) + defer al.Close() + + if _, err := al.ProcessDirectWithChannel( + context.Background(), + "hello", + "session-auto-cold-path-model-config", + "cli", + "direct", + ); err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + + waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + waitForDrafts(t, filepath.Join(tmpDir, "state", "evolution", "skill-drafts.json"), 1) + if provider.lastModel != "resolved-config-model" { + t.Fatalf("lastModel = %q, want resolved-config-model", provider.lastModel) + } +} + +func TestEvolutionBridge_DraftModeKeepsCandidateDraft(t *testing.T) { + tmpDir := t.TempDir() + seedReadyRule(t, tmpDir) + + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "draft", + }, &simpleMockProvider{ + response: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"create","human_summary":"Create weather helper","body_or_patch":"---\nname: weather\ndescription: weather helper\n---\n# Weather\n## Start Here\nUse native-name query first.\n"}`, + }) + defer al.Close() + + if _, err := al.ProcessDirectWithChannel( + context.Background(), + "hello", + "session-apply-no-auto-apply", + "cli", + "direct", + ); err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + + waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + drafts := waitForDrafts(t, filepath.Join(tmpDir, "state", "evolution", "skill-drafts.json"), 1) + if drafts[0].Status != evolution.DraftStatusCandidate { + t.Fatalf("draft status = %q, want %q", drafts[0].Status, evolution.DraftStatusCandidate) + } + + assertNotExists(t, filepath.Join(tmpDir, "skills", "weather", "SKILL.md")) + assertProfileNotExists(t, tmpDir, "weather") +} + +func TestEvolutionBridge_ApplyModeAutomaticallyRunsColdPathAndAppliesMergeDraft(t *testing.T) { + tmpDir := t.TempDir() + seedReadyRule(t, tmpDir) + + skillDir := filepath.Join(tmpDir, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + skillPath := filepath.Join(skillDir, "SKILL.md") + original := "---\nname: weather\ndescription: weather helper\n---\n# Weather\n## Start Here\nUse city names.\n" + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "apply", + }, &simpleMockProvider{ + response: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"merge","human_summary":"Merge native-name path","body_or_patch":"Prefer native-name query first."}`, + }) + defer al.Close() + + if _, err := al.ProcessDirectWithChannel( + context.Background(), + "hello", + "session-apply-merge", + "cli", + "direct", + ); err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + + waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + drafts := waitForDrafts(t, filepath.Join(tmpDir, "state", "evolution", "skill-drafts.json"), 1) + if drafts[0].Status != evolution.DraftStatusAccepted { + t.Fatalf("draft status = %q, want %q", drafts[0].Status, evolution.DraftStatusAccepted) + } + + merged := waitForSkillBody(t, skillPath) + if !strings.Contains(merged, "Use city names.") { + t.Fatalf("merged skill lost original content:\n%s", merged) + } + if !strings.Contains(merged, "## Merged Knowledge") { + t.Fatalf("merged skill missing merged section:\n%s", merged) + } + if !strings.Contains(merged, "Prefer native-name query first.") { + t.Fatalf("merged skill missing learned knowledge:\n%s", merged) + } + + profile := waitForProfile(t, tmpDir, "weather") + if profile.Status != evolution.SkillStatusActive { + t.Fatalf("profile status = %q, want %q", profile.Status, evolution.SkillStatusActive) + } + if profile.CurrentVersion == "" { + t.Fatal("expected applied profile current version") + } +} + +func TestEvolutionBridge_ObserveModeDoesNotRunColdPathOrCreateDraftFile(t *testing.T) { + tmpDir := t.TempDir() + seedReadyRule(t, tmpDir) + + al := newEvolutionTestLoop(t, tmpDir, config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, &simpleMockProvider{response: "ok"}) + defer al.Close() + + resp, err := al.ProcessDirectWithChannel( + context.Background(), + "hello", + "session-no-auto-cold-path", + "cli", + "direct", + ) + if err != nil { + t.Fatalf("ProcessDirectWithChannel failed: %v", err) + } + if resp != "ok" { + t.Fatalf("response = %q, want %q", resp, "ok") + } + + waitForEvolutionRecord(t, filepath.Join(tmpDir, "state", "evolution", "task-records.jsonl")) + assertNotExists(t, filepath.Join(tmpDir, "state", "evolution", "skill-drafts.json")) +} + +func TestEvolutionBridge_TurnEndUsesPayloadWorkspace(t *testing.T) { + workspace := t.TempDir() + cfg := &config.Config{ + Evolution: config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, + } + + bridge, err := newEvolutionBridge(nil, cfg, nil) + if err != nil { + t.Fatalf("newEvolutionBridge: %v", err) + } + + err = bridge.OnEvent(context.Background(), Event{ + Kind: EventKindTurnEnd, + Meta: EventMeta{ + AgentID: "main", + TurnID: "turn-1", + SessionKey: "session-1", + }, + Payload: TurnEndPayload{ + Status: TurnEndStatusCompleted, + Workspace: workspace, + ActiveSkills: []string{"observe-skill"}, + ToolKinds: []string{"echo_text"}, + }, + }) + if err != nil { + t.Fatalf("OnEvent: %v", err) + } + + record := waitForEvolutionRecord(t, filepath.Join(workspace, "state", "evolution", "task-records.jsonl")) + if got := record["workspace_id"]; got != workspace { + t.Fatalf("workspace_id = %v, want %q", got, workspace) + } +} + +func TestEvolutionBridge_TurnEndUsesExplicitAttemptTrail(t *testing.T) { + workspace := t.TempDir() + cfg := &config.Config{ + Evolution: config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, + } + + bridge, err := newEvolutionBridge(nil, cfg, nil) + if err != nil { + t.Fatalf("newEvolutionBridge: %v", err) + } + + err = bridge.OnEvent(context.Background(), Event{ + Kind: EventKindTurnEnd, + Meta: EventMeta{ + AgentID: "main", + TurnID: "turn-1", + SessionKey: "session-1", + }, + Payload: TurnEndPayload{ + Status: TurnEndStatusCompleted, + Workspace: workspace, + ActiveSkills: []string{"weather"}, + AttemptedSkills: []string{"geocode", "weather"}, + FinalSuccessfulPath: []string{"geocode", "weather"}, + SkillContextSnapshots: []SkillContextSnapshot{ + {Sequence: 1, Trigger: skillContextTriggerInitialBuild, SkillNames: []string{"weather"}}, + { + Sequence: 2, + Trigger: skillContextTriggerContextRetryRebuild, + SkillNames: []string{"geocode", "weather"}, + }, + }, + ToolKinds: []string{"echo_text"}, + }, + }) + if err != nil { + t.Fatalf("OnEvent: %v", err) + } + + record := waitForEvolutionRecord(t, filepath.Join(workspace, "state", "evolution", "task-records.jsonl")) + usedSkills, ok := record["used_skill_names"].([]any) + if !ok || len(usedSkills) != 2 || usedSkills[0] != "geocode" || usedSkills[1] != "weather" { + t.Fatalf("used_skill_names = %#v, want [geocode weather]", record["used_skill_names"]) + } + for _, field := range []string{"attempt_trail", "initial_skill_names", "added_skill_names"} { + if _, exists := record[field]; exists { + t.Fatalf("%s should not be persisted in slim task record: %#v", field, record[field]) + } + } +} + +func TestEvolutionBridge_CloseStopsColdPathRunnerIdempotently(t *testing.T) { + cfg := &config.Config{ + Evolution: config.EvolutionConfig{ + Enabled: true, + Mode: "draft", + }, + } + + bridge, err := newEvolutionBridge(nil, cfg, nil) + if err != nil { + t.Fatalf("newEvolutionBridge: %v", err) + } + if bridge.coldPathRunner == nil { + t.Fatal("expected cold path runner") + } + + if err := bridge.Close(); err != nil { + t.Fatalf("first Close() error = %v", err) + } + if err := bridge.Close(); err != nil { + t.Fatalf("second Close() error = %v", err) + } + if bridge.coldPathRunner.Trigger(t.TempDir()) { + t.Fatal("expected closed bridge runner to reject new work") + } +} + +func TestEvolutionBridge_CloseRejectsLateTurnEndEvents(t *testing.T) { + workspace := t.TempDir() + cfg := &config.Config{ + Evolution: config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, + } + + bridge, err := newEvolutionBridge(nil, cfg, nil) + if err != nil { + t.Fatalf("newEvolutionBridge: %v", err) + } + + if closeErr := bridge.Close(); closeErr != nil { + t.Fatalf("Close() error = %v", closeErr) + } + + err = bridge.OnEvent(context.Background(), Event{ + Kind: EventKindTurnEnd, + Meta: EventMeta{ + TurnID: "turn-after-close", + SessionKey: "session-after-close", + AgentID: "agent-after-close", + }, + Payload: TurnEndPayload{ + Status: TurnEndStatusCompleted, + Workspace: workspace, + }, + }) + if err != nil { + t.Fatalf("OnEvent() error = %v", err) + } + + assertNotExists(t, filepath.Join(workspace, "state", "evolution", "task-records.jsonl")) +} + +func TestAgentLoop_ReloadProviderAndConfig_RebuildsEvolutionBridge(t *testing.T) { + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: t.TempDir(), + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 3, + }, + }, + Evolution: config.EvolutionConfig{ + Enabled: false, + Mode: "observe", + }, + } + + al := NewAgentLoop(cfg, bus.NewMessageBus(), &mockProvider{}) + defer al.Close() + + oldBridge := al.evolution + if oldBridge == nil { + t.Fatal("expected initial evolution bridge") + } + + reloadCfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: t.TempDir(), + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 3, + }, + }, + Evolution: config.EvolutionConfig{ + Enabled: true, + Mode: "apply", + StateDir: filepath.Join(t.TempDir(), "evolution-state"), + }, + } + + if err := al.ReloadProviderAndConfig(context.Background(), &mockProvider{}, reloadCfg); err != nil { + t.Fatalf("ReloadProviderAndConfig failed: %v", err) + } + + if al.evolution == nil { + t.Fatal("expected evolution bridge after reload") + } + if al.evolution == oldBridge { + t.Fatal("expected evolution bridge to be rebuilt on reload") + } + if al.evolution.cfg.Enabled != reloadCfg.Evolution.Enabled { + t.Fatalf("reloaded evolution enabled = %v, want %v", al.evolution.cfg.Enabled, reloadCfg.Evolution.Enabled) + } + if al.evolution.cfg.Mode != reloadCfg.Evolution.Mode { + t.Fatalf("reloaded evolution mode = %q, want %q", al.evolution.cfg.Mode, reloadCfg.Evolution.Mode) + } + if al.evolution.cfg.StateDir != reloadCfg.Evolution.StateDir { + t.Fatalf("reloaded evolution state_dir = %q, want %q", al.evolution.cfg.StateDir, reloadCfg.Evolution.StateDir) + } +} + +func TestEvolutionBridge_ColdPathScheduleParsing(t *testing.T) { + schedule := parseColdPathSchedule([]string{"18:30", "bad", "03:05", "18:30", "24:00", "09:99"}) + if len(schedule) != 2 { + t.Fatalf("len(schedule) = %d, want 2: %+v", len(schedule), schedule) + } + if schedule[0].hour != 3 || schedule[0].minute != 5 { + t.Fatalf("schedule[0] = %+v, want 03:05", schedule[0]) + } + if schedule[1].hour != 18 || schedule[1].minute != 30 { + t.Fatalf("schedule[1] = %+v, want 18:30", schedule[1]) + } + + now := time.Date(2026, 5, 7, 4, 0, 0, 0, time.Local) + next := nextColdPathScheduledTime(now, schedule) + want := time.Date(2026, 5, 7, 18, 30, 0, 0, time.Local) + if !next.Equal(want) { + t.Fatalf("next = %v, want %v", next, want) + } + + now = time.Date(2026, 5, 7, 19, 0, 0, 0, time.Local) + next = nextColdPathScheduledTime(now, schedule) + want = time.Date(2026, 5, 8, 3, 5, 0, 0, time.Local) + if !next.Equal(want) { + t.Fatalf("next after day end = %v, want %v", next, want) + } +} + +func TestEvolutionBridge_ScheduledColdPathTracksObservedWorkspaces(t *testing.T) { + bridge := &evolutionBridge{ + cfg: config.EvolutionConfig{ + Enabled: true, + Mode: "draft", + ColdPathTrigger: "scheduled", + ColdPathTimes: []string{"03:00"}, + }, + } + + bridge.rememberScheduledColdPathWorkspace("/tmp/workspace-b") + bridge.rememberScheduledColdPathWorkspace("/tmp/workspace-a") + bridge.rememberScheduledColdPathWorkspace("/tmp/workspace-b") + bridge.rememberScheduledColdPathWorkspace("") + + got := bridge.scheduledColdPathWorkspaces() + want := []string{"/tmp/workspace-a", "/tmp/workspace-b"} + if strings.Join(got, "\n") != strings.Join(want, "\n") { + t.Fatalf("scheduled workspaces = %v, want %v", got, want) + } +} + +func TestEvolutionBridge_ScheduledColdPathSeedsConfiguredAgentWorkspaces(t *testing.T) { + defaultWorkspace := t.TempDir() + workerWorkspace := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: defaultWorkspace, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 3, + }, + List: []config.AgentConfig{ + {ID: "main", Default: true}, + {ID: "worker", Workspace: workerWorkspace}, + }, + }, + Evolution: config.EvolutionConfig{ + Enabled: true, + Mode: "draft", + ColdPathTrigger: "scheduled", + ColdPathTimes: []string{"03:00"}, + }, + } + registry := NewAgentRegistry(cfg, &simpleMockProvider{response: "ok"}) + bridge, err := newEvolutionBridge(registry, cfg, &simpleMockProvider{response: "ok"}) + if err != nil { + t.Fatalf("newEvolutionBridge: %v", err) + } + defer bridge.Close() + + got := bridge.scheduledColdPathWorkspaces() + want := []string{defaultWorkspace, workerWorkspace} + if strings.Join(got, "\n") != strings.Join(want, "\n") { + t.Fatalf("scheduled workspaces = %v, want %v", got, want) + } +} + +func seedReadyRule(t *testing.T, workspace string) { + t.Helper() + + store := evolution.NewStore(evolution.NewPaths(workspace, "")) + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: workspace, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Label: "weather-native-name-path", + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + TaskRecordIDs: []string{"task-1", "task-2"}, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } +} + +func newEvolutionTestLoop( + t *testing.T, + workspace string, + evo config.EvolutionConfig, + provider providers.LLMProvider, +) *AgentLoop { + t.Helper() + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: workspace, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 3, + }, + }, + Evolution: evo, + } + + return NewAgentLoop(cfg, bus.NewMessageBus(), provider) +} + +func waitForEvolutionRecord(t *testing.T, path string) map[string]any { + t.Helper() + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + data, err := os.ReadFile(path) + if err == nil { + lines := strings.Split(strings.TrimSpace(string(data)), "\n") + for i := len(lines) - 1; i >= 0; i-- { + if strings.TrimSpace(lines[i]) == "" { + continue + } + var record map[string]any + if err := json.Unmarshal([]byte(lines[i]), &record); err != nil { + t.Fatalf("json.Unmarshal(%s): %v", path, err) + } + if kind, _ := record["kind"].(string); kind == string(evolution.RecordKindTask) { + return record + } + } + } + time.Sleep(10 * time.Millisecond) + } + + t.Fatalf("timed out waiting for evolution record at %s", path) + return nil +} + +func countEvolutionTaskRecords(t *testing.T, path string) int { + t.Helper() + + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("ReadFile(%s): %v", path, err) + } + count := 0 + for _, line := range strings.Split(strings.TrimSpace(string(data)), "\n") { + if strings.TrimSpace(line) == "" { + continue + } + var record map[string]any + if err := json.Unmarshal([]byte(line), &record); err != nil { + t.Fatalf("json.Unmarshal(%s): %v", path, err) + } + if kind, _ := record["kind"].(string); kind == string(evolution.RecordKindTask) { + count++ + } + } + return count +} + +func waitForDrafts(t *testing.T, path string, want int) []evolution.SkillDraft { + t.Helper() + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + data, err := os.ReadFile(path) + if err == nil { + var drafts []evolution.SkillDraft + if err := json.Unmarshal(data, &drafts); err != nil { + t.Fatalf("json.Unmarshal(%s): %v", path, err) + } + if len(drafts) == want { + return drafts + } + } + time.Sleep(10 * time.Millisecond) + } + + t.Fatalf("timed out waiting for %d drafts at %s", want, path) + return nil +} + +func waitForSkillBody(t *testing.T, path string) string { + t.Helper() + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + data, err := os.ReadFile(path) + if err == nil { + return string(data) + } + time.Sleep(10 * time.Millisecond) + } + + t.Fatalf("timed out waiting for skill file at %s", path) + return "" +} + +func waitForProfile(t *testing.T, workspace, skillName string) evolution.SkillProfile { + t.Helper() + + store := evolution.NewStore(evolution.NewPaths(workspace, "")) + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + profile, err := store.LoadProfile(skillName) + if err == nil { + return profile + } + time.Sleep(10 * time.Millisecond) + } + + t.Fatalf("timed out waiting for profile %q in %s", skillName, workspace) + return evolution.SkillProfile{} +} + +func assertProfileNotExists(t *testing.T, workspace, skillName string) { + t.Helper() + + store := evolution.NewStore(evolution.NewPaths(workspace, "")) + if _, loadErr := store.LoadProfile(skillName); !os.IsNotExist(loadErr) { + t.Fatalf("profile %q should not exist, got err = %v", skillName, loadErr) + } +} + +func assertNotExists(t *testing.T, path string) { + t.Helper() + if _, statErr := os.Stat(path); !os.IsNotExist(statErr) { + t.Fatalf("%s should not exist, stat err = %v", path, statErr) + } +} + +func waitForEvent(t *testing.T, ch <-chan Event, timeout time.Duration, match func(Event) bool) Event { + t.Helper() + + timer := time.NewTimer(timeout) + defer timer.Stop() + for { + select { + case evt, ok := <-ch: + if !ok { + t.Fatal("event channel closed") + } + if match == nil || match(evt) { + return evt + } + case <-timer.C: + t.Fatal("timed out waiting for event") + } + } +} + +type blockingRuntimeObserver struct { + once sync.Once + started chan struct{} + release chan struct{} +} + +func (o *blockingRuntimeObserver) OnRuntimeEvent(ctx context.Context, _ runtimeevents.Event) error { + o.once.Do(func() { + close(o.started) + }) + select { + case <-o.release: + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +type capturingEvolutionDraftProvider struct { + response string + defaultModel string + lastModel string +} + +type lateSkillOnRetryProvider struct { + calls int + lateSkillPath string +} + +func (p *lateSkillOnRetryProvider) Chat( + _ context.Context, + _ []providers.Message, + _ []providers.ToolDefinition, + _ string, + _ map[string]any, +) (*providers.LLMResponse, error) { + p.calls++ + if p.calls == 1 { + if err := os.MkdirAll(filepath.Dir(p.lateSkillPath), 0o755); err != nil { + return nil, err + } + if err := os.WriteFile( + p.lateSkillPath, + []byte("---\nname: late-skill\ndescription: late test skill\n---\n# Late Skill\n"), + 0o644, + ); err != nil { + return nil, err + } + return nil, errors.New("context_window_exceeded") + } + + return &providers.LLMResponse{Content: "Recovered after retry"}, nil +} + +func (p *lateSkillOnRetryProvider) GetDefaultModel() string { + return "mock-model" +} + +func (p *capturingEvolutionDraftProvider) Chat( + _ context.Context, + _ []providers.Message, + _ []providers.ToolDefinition, + model string, + _ map[string]any, +) (*providers.LLMResponse, error) { + p.lastModel = model + return &providers.LLMResponse{Content: p.response}, nil +} + +func (p *capturingEvolutionDraftProvider) GetDefaultModel() string { + return p.defaultModel +} diff --git a/pkg/agent/legacy_events.go b/pkg/agent/legacy_events.go new file mode 100644 index 000000000..30761e8e6 --- /dev/null +++ b/pkg/agent/legacy_events.go @@ -0,0 +1,177 @@ +package agent + +import ( + "context" + "sync" + "sync/atomic" + + "github.com/sipeed/picoclaw/pkg/bus" + runtimeevents "github.com/sipeed/picoclaw/pkg/events" +) + +const defaultEventSubscriberBuffer = 16 + +// EventSubscription identifies a legacy subscriber channel returned by +// AgentLoop.SubscribeEvents. +type EventSubscription struct { + ID uint64 + C <-chan Event +} + +type legacyEventSubscription struct { + cancel context.CancelFunc + sub runtimeevents.Subscription +} + +var ( + legacyEventSubSeq atomic.Uint64 + legacyEventSubLock sync.Map +) + +// SubscribeEvents exposes the previous in-agent event subscription API on top +// of the runtime event bus for tests and compatibility. +func (al *AgentLoop) SubscribeEvents(buffer int) EventSubscription { + if buffer <= 0 { + buffer = defaultEventSubscriberBuffer + } + + out := make(chan Event, buffer) + if al == nil || al.runtimeEvents == nil { + close(out) + return EventSubscription{C: out} + } + + ctx, cancel := context.WithCancel(context.Background()) + sub, in, err := al.runtimeEvents.Channel(). + Source("agent"). + OfKind(legacyAgentEventKinds()...). + SubscribeChan(ctx, runtimeevents.SubscribeOptions{ + Name: "legacy-agent-events", + Buffer: buffer, + }) + if err != nil { + cancel() + close(out) + return EventSubscription{C: out} + } + + id := legacyEventSubSeq.Add(1) + legacyEventSubLock.Store(id, legacyEventSubscription{cancel: cancel, sub: sub}) + go func() { + defer legacyEventSubLock.LoadAndDelete(id) + defer close(out) + for { + select { + case <-ctx.Done(): + return + case evt, ok := <-in: + if !ok { + return + } + select { + case out <- legacyEventFromRuntimeEvent(evt): + case <-ctx.Done(): + return + } + } + } + }() + + return EventSubscription{ID: id, C: out} +} + +func (al *AgentLoop) UnsubscribeEvents(id uint64) { + if id == 0 { + return + } + value, ok := legacyEventSubLock.LoadAndDelete(id) + if !ok { + return + } + sub := value.(legacyEventSubscription) + sub.cancel() + if sub.sub != nil { + _ = sub.sub.Close() + } +} + +func legacyEventFromRuntimeEvent(evt runtimeevents.Event) Event { + meta := hookMetaFromRuntimeEvent(evt) + return Event{ + Kind: evt.Kind, + Time: evt.Time, + Meta: meta, + Context: turnContextFromRuntimeScope(evt.Scope), + Payload: evt.Payload, + } +} + +func hookMetaFromRuntimeEvent(evt runtimeevents.Event) HookMeta { + meta := HookMeta{ + AgentID: evt.Scope.AgentID, + TurnID: evt.Scope.TurnID, + ParentTurnID: evt.Correlation.ParentTurnID, + SessionKey: evt.Scope.SessionKey, + TracePath: evt.Correlation.TraceID, + } + if evt.Attrs != nil { + if source, ok := evt.Attrs["agent_source"].(string); ok { + meta.Source = source + } + if iteration, ok := evt.Attrs["iteration"].(int); ok { + meta.Iteration = iteration + } + } + return meta +} + +func turnContextFromRuntimeScope(scope runtimeevents.Scope) *TurnContext { + if scope.Channel == "" && + scope.Account == "" && + scope.ChatID == "" && + scope.ChatType == "" && + scope.TopicID == "" && + scope.SpaceID == "" && + scope.SpaceType == "" && + scope.SenderID == "" && + scope.MessageID == "" { + return nil + } + return &TurnContext{ + Inbound: &bus.InboundContext{ + Channel: scope.Channel, + Account: scope.Account, + ChatID: scope.ChatID, + ChatType: scope.ChatType, + TopicID: scope.TopicID, + SpaceID: scope.SpaceID, + SpaceType: scope.SpaceType, + SenderID: scope.SenderID, + MessageID: scope.MessageID, + }, + } +} + +func legacyAgentEventKinds() []runtimeevents.Kind { + return []runtimeevents.Kind{ + EventKindTurnStart, + EventKindTurnEnd, + EventKindLLMRequest, + EventKindLLMDelta, + EventKindLLMResponse, + EventKindLLMRetry, + EventKindContextCompress, + EventKindSessionSummarize, + EventKindToolExecStart, + EventKindToolExecEnd, + EventKindToolExecSkipped, + EventKindSteeringInjected, + EventKindFollowUpQueued, + EventKindInterruptReceived, + EventKindSubTurnSpawn, + EventKindSubTurnEnd, + EventKindSubTurnResultDelivered, + EventKindSubTurnOrphan, + EventKindError, + } +} diff --git a/pkg/agent/legacy_events_test.go b/pkg/agent/legacy_events_test.go new file mode 100644 index 000000000..3aa7782fe --- /dev/null +++ b/pkg/agent/legacy_events_test.go @@ -0,0 +1,76 @@ +package agent + +import ( + "context" + "testing" + "time" + + "github.com/sipeed/picoclaw/pkg/bus" + "github.com/sipeed/picoclaw/pkg/config" + runtimeevents "github.com/sipeed/picoclaw/pkg/events" +) + +func TestSubscribeEventsFiltersRuntimeBusToLegacyAgentEvents(t *testing.T) { + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: t.TempDir(), + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 3, + }, + }, + } + al := NewAgentLoop(cfg, bus.NewMessageBus(), &simpleMockProvider{response: "ok"}) + defer al.Close() + + sub := al.SubscribeEvents(4) + defer al.UnsubscribeEvents(sub.ID) + + al.RuntimeEventBus().Publish(context.Background(), runtimeevents.Event{ + Kind: runtimeevents.KindGatewayReady, + Source: runtimeevents.Source{Component: "gateway"}, + }) + select { + case evt := <-sub.C: + t.Fatalf("legacy subscriber received non-agent runtime event: %s", evt.Kind) + case <-time.After(50 * time.Millisecond): + } + + al.RuntimeEventBus().Publish(context.Background(), runtimeevents.Event{ + Kind: runtimeevents.KindAgentTurnStart, + Source: runtimeevents.Source{Component: "agent", Name: "main"}, + Scope: runtimeevents.Scope{ + AgentID: "main", + TurnID: "turn-1", + SessionKey: "session-1", + Channel: "telegram", + Account: "bot-1", + ChatID: "chat-1", + ChatType: "private", + TopicID: "topic-1", + SpaceID: "space-1", + SpaceType: "dm", + SenderID: "sender-1", + MessageID: "message-1", + }, + Payload: TurnStartPayload{UserMessage: "hello"}, + }) + + evt := waitForEvent(t, sub.C, 2*time.Second, nil) + if evt.Kind != EventKindTurnStart { + t.Fatalf("event kind = %q, want %q", evt.Kind, EventKindTurnStart) + } + if evt.Context == nil || evt.Context.Inbound == nil { + t.Fatalf("expected legacy event inbound context, got %#v", evt.Context) + } + if got := evt.Context.Inbound.Channel; got != "telegram" { + t.Fatalf("inbound channel = %q, want telegram", got) + } + if got := evt.Context.Inbound.ChatID; got != "chat-1" { + t.Fatalf("inbound chat_id = %q, want chat-1", got) + } + if got := evt.Context.Inbound.MessageID; got != "message-1" { + t.Fatalf("inbound message_id = %q, want message-1", got) + } +} diff --git a/pkg/agent/pipeline_execute.go b/pkg/agent/pipeline_execute.go index 0f71c7432..567e56d17 100644 --- a/pkg/agent/pipeline_execute.go +++ b/pkg/agent/pipeline_execute.go @@ -6,6 +6,9 @@ import ( "context" "encoding/json" "fmt" + "path/filepath" + "sort" + "strings" "time" "github.com/sipeed/picoclaw/pkg/bus" @@ -17,6 +20,89 @@ import ( "github.com/sipeed/picoclaw/pkg/utils" ) +func toolErrorSummary(result *tools.ToolResult) string { + if result == nil || !result.IsError { + return "" + } + content := strings.TrimSpace(result.ContentForLLM()) + if content == "" && result.Err != nil { + content = strings.TrimSpace(result.Err.Error()) + } + return utils.Truncate(content, 200) +} + +func inferSkillNamesFromToolCall(ts *turnState, toolName string, toolArgs map[string]any) []string { + if ts == nil || toolName != "read_file" { + return nil + } + + rawPath, ok := toolArgs["path"].(string) + if !ok { + return nil + } + path := strings.TrimSpace(rawPath) + if path == "" { + return nil + } + + cleanPath := filepath.Clean(path) + if !filepath.IsAbs(cleanPath) { + cleanPath = filepath.Join(ts.workspace, cleanPath) + } + if filepath.Base(cleanPath) != "SKILL.md" { + return nil + } + + var roots []string + if ts.agent != nil && ts.agent.ContextBuilder != nil { + roots = ts.agent.ContextBuilder.skillRoots() + } + if len(roots) == 0 && strings.TrimSpace(ts.workspace) != "" { + roots = []string{filepath.Join(ts.workspace, "skills")} + } + + found := make(map[string]struct{}) + for _, root := range roots { + root = strings.TrimSpace(root) + if root == "" { + continue + } + rel, err := filepath.Rel(filepath.Clean(root), cleanPath) + if err != nil { + continue + } + if rel == "." || rel == "" || strings.HasPrefix(rel, "..") { + continue + } + parts := strings.Split(rel, string(filepath.Separator)) + if len(parts) != 2 || parts[1] != "SKILL.md" { + continue + } + + skillName := strings.TrimSpace(parts[0]) + if skillName == "" { + continue + } + if ts.agent != nil && ts.agent.ContextBuilder != nil { + if canonical, ok := ts.agent.ContextBuilder.ResolveSkillName(skillName); ok { + skillName = canonical + } + } + found[skillName] = struct{}{} + } + + if len(found) == 0 { + return nil + } + + names := make([]string, 0, len(found)) + for skillName := range found { + names = append(names, skillName) + } + sort.Strings(names) + return names +} + // ExecuteTools executes the tool loop, handling BeforeTool/ApproveTool/AfterTool hooks, // tool execution with async callbacks, media delivery, and steering injection. // Returns ToolControl indicating what the coordinator should do next: @@ -203,6 +289,12 @@ toolLoop: Async: hookResult.Async, }, ) + ts.recordToolExecution( + toolName, + !hookResult.IsError, + toolErrorSummary(hookResult), + inferSkillNamesFromToolCall(ts, toolName, toolArgs), + ) messages = append(messages, toolResultMsg) if !ts.opts.NoHistory { @@ -579,6 +671,12 @@ toolLoop: Async: toolResult.Async, }, ) + ts.recordToolExecution( + toolName, + !toolResult.IsError, + toolErrorSummary(toolResult), + inferSkillNamesFromToolCall(ts, toolName, toolArgs), + ) messages = append(messages, toolResultMsg) if !ts.opts.NoHistory { ts.agent.Sessions.AddFullMessage(ts.sessionKey, toolResultMsg) diff --git a/pkg/agent/pipeline_execute_test.go b/pkg/agent/pipeline_execute_test.go new file mode 100644 index 000000000..404da320c --- /dev/null +++ b/pkg/agent/pipeline_execute_test.go @@ -0,0 +1,50 @@ +package agent + +import ( + "os" + "path/filepath" + "testing" +) + +func TestInferSkillNamesFromToolCall_ReadFileSkillMarkdown(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "three-one") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile( + filepath.Join(skillDir, "SKILL.md"), + []byte("---\nname: three-one\ndescription: test\n---\n# Three One\n"), + 0o644, + ); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + cb := NewContextBuilder(workspace) + ts := &turnState{ + workspace: workspace, + agent: &AgentInstance{ + Workspace: workspace, + ContextBuilder: cb, + }, + } + + got := inferSkillNamesFromToolCall(ts, "read_file", map[string]any{ + "path": filepath.Join(workspace, "skills", "three-one", "SKILL.md"), + }) + if len(got) != 1 || got[0] != "three-one" { + t.Fatalf("inferSkillNamesFromToolCall = %v, want [three-one]", got) + } +} + +func TestInferSkillNamesFromToolCall_NonSkillFileIgnored(t *testing.T) { + workspace := t.TempDir() + ts := &turnState{workspace: workspace} + + got := inferSkillNamesFromToolCall(ts, "read_file", map[string]any{ + "path": filepath.Join(workspace, "README.md"), + }) + if len(got) != 0 { + t.Fatalf("inferSkillNamesFromToolCall = %v, want empty", got) + } +} diff --git a/pkg/agent/pipeline_llm.go b/pkg/agent/pipeline_llm.go index 496fcd7e4..3a3c496f6 100644 --- a/pkg/agent/pipeline_llm.go +++ b/pkg/agent/pipeline_llm.go @@ -364,9 +364,14 @@ func (p *Pipeline) CallLLM( exec.history = asmResp.History exec.summary = asmResp.Summary } - exec.messages = ts.agent.ContextBuilder.BuildMessagesFromPrompt( - promptBuildRequestForTurn(ts, exec.history, exec.summary, "", nil), - ) + contextualSkills := ts.activeSkills + if ts.agent.ContextBuilder != nil { + contextualSkills = ts.agent.ContextBuilder.ResolveActiveSkillsForContext(ts.activeSkills) + } + ts.recordSkillContextSnapshot(skillContextTriggerContextRetryRebuild, contextualSkills) + rebuildPromptReq := promptBuildRequestForTurn(ts, exec.history, exec.summary, "", nil) + rebuildPromptReq.ActiveSkills = append([]string(nil), contextualSkills...) + exec.messages = ts.agent.ContextBuilder.BuildMessagesFromPrompt(rebuildPromptReq) exec.callMessages = exec.messages if exec.gracefulTerminal { msgs := append([]providers.Message(nil), exec.messages...) diff --git a/pkg/agent/pipeline_setup.go b/pkg/agent/pipeline_setup.go index 219e4e5de..f6fed09de 100644 --- a/pkg/agent/pipeline_setup.go +++ b/pkg/agent/pipeline_setup.go @@ -31,9 +31,14 @@ func (p *Pipeline) SetupTurn(ctx context.Context, ts *turnState) (*turnExecution } ts.captureRestorePoint(history, summary) - messages := ts.agent.ContextBuilder.BuildMessagesFromPrompt( - promptBuildRequestForTurn(ts, history, summary, ts.userMessage, ts.media), - ) + contextualSkills := ts.activeSkills + if ts.agent.ContextBuilder != nil { + contextualSkills = ts.agent.ContextBuilder.ResolveActiveSkillsForContext(ts.activeSkills) + } + ts.recordSkillContextSnapshot(skillContextTriggerInitialBuild, contextualSkills) + initialPromptReq := promptBuildRequestForTurn(ts, history, summary, ts.userMessage, ts.media) + initialPromptReq.ActiveSkills = append([]string(nil), contextualSkills...) + messages := ts.agent.ContextBuilder.BuildMessagesFromPrompt(initialPromptReq) messages = resolveMediaRefs(messages, p.MediaStore, maxMediaSize) @@ -61,9 +66,9 @@ func (p *Pipeline) SetupTurn(ctx context.Context, ts *turnState) (*turnExecution history = resp.History summary = resp.Summary } - messages = ts.agent.ContextBuilder.BuildMessagesFromPrompt( - promptBuildRequestForTurn(ts, history, summary, ts.userMessage, ts.media), - ) + rebuildPromptReq := promptBuildRequestForTurn(ts, history, summary, ts.userMessage, ts.media) + rebuildPromptReq.ActiveSkills = append([]string(nil), contextualSkills...) + messages = ts.agent.ContextBuilder.BuildMessagesFromPrompt(rebuildPromptReq) messages = resolveMediaRefs(messages, p.MediaStore, maxMediaSize) } } diff --git a/pkg/agent/turn_coord.go b/pkg/agent/turn_coord.go index 2826e662c..060346339 100644 --- a/pkg/agent/turn_coord.go +++ b/pkg/agent/turn_coord.go @@ -32,14 +32,33 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState, pipeline *Pipel turnStatus := TurnEndStatusCompleted defer func() { + attemptedSkills := ts.attemptedSkillsSnapshot() + skillContextSnapshots := ts.skillContextSnapshotsSnapshot() + finalSuccessfulPath := []string(nil) + if turnStatus == TurnEndStatusCompleted { + if latest := ts.latestSkillContextSnapshot(); len(latest) > 0 { + finalSuccessfulPath = latest + } else { + finalSuccessfulPath = append([]string(nil), attemptedSkills...) + } + } al.emitEvent( runtimeevents.KindAgentTurnEnd, ts.eventMeta("runTurn", "turn.end"), TurnEndPayload{ - Status: turnStatus, - Iterations: ts.currentIteration(), - Duration: time.Since(ts.startedAt), - FinalContentLen: ts.finalContentLen(), + Status: turnStatus, + Workspace: ts.workspace, + Iterations: ts.currentIteration(), + Duration: time.Since(ts.startedAt), + FinalContentLen: ts.finalContentLen(), + UserMessage: ts.userMessage, + FinalContent: ts.finalContentSnapshot(), + ActiveSkills: append([]string(nil), ts.activeSkills...), + AttemptedSkills: attemptedSkills, + FinalSuccessfulPath: finalSuccessfulPath, + SkillContextSnapshots: skillContextSnapshots, + ToolKinds: ts.toolKindsSnapshot(), + ToolExecutions: ts.toolExecutionsSnapshot(), }, ) }() @@ -200,7 +219,11 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState, pipeline *Pipel if finalContent == "" { finalContent = ts.opts.DefaultResponse } - return pipeline.Finalize(ctx, turnCtx, ts, exec, turnStatus, finalContent) + result, finalizeErr := pipeline.Finalize(ctx, turnCtx, ts, exec, turnStatus, finalContent) + if finalizeErr != nil { + turnStatus = TurnEndStatusError + } + return result, finalizeErr case ControlToolLoop: // Execute tools via Pipeline toolCtrl := pipeline.ExecuteTools(ctx, turnCtx, ts, exec, iteration) @@ -227,7 +250,11 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState, pipeline *Pipel if exec.allResponsesHandled { finalContent = "" } - return pipeline.Finalize(ctx, turnCtx, ts, exec, turnStatus, finalContent) + result, finalizeErr := pipeline.Finalize(ctx, turnCtx, ts, exec, turnStatus, finalContent) + if finalizeErr != nil { + turnStatus = TurnEndStatusError + } + return result, finalizeErr } } } @@ -251,7 +278,11 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState, pipeline *Pipel return al.abortTurn(ts) } - return pipeline.Finalize(ctx, turnCtx, ts, exec, turnStatus, finalContent) + result, err := pipeline.Finalize(ctx, turnCtx, ts, exec, turnStatus, finalContent) + if err != nil { + turnStatus = TurnEndStatusError + } + return result, err } func (al *AgentLoop) abortTurn(ts *turnState) (turnResult, error) { diff --git a/pkg/agent/turn_coord_test.go b/pkg/agent/turn_coord_test.go index 898ae3931..c7cdd8a32 100644 --- a/pkg/agent/turn_coord_test.go +++ b/pkg/agent/turn_coord_test.go @@ -10,6 +10,7 @@ import ( "github.com/sipeed/picoclaw/pkg/bus" "github.com/sipeed/picoclaw/pkg/config" "github.com/sipeed/picoclaw/pkg/providers" + "github.com/sipeed/picoclaw/pkg/session" ) // ============================================================================= @@ -198,6 +199,15 @@ func makeTestProcessOpts(sessionKey string) processOptions { } } +type saveFailingSessionStore struct { + session.SessionStore + err error +} + +func (s *saveFailingSessionStore) Save(_ string) error { + return s.err +} + // ============================================================================= // Pipeline Method Tests: SetupTurn // ============================================================================= @@ -261,6 +271,44 @@ func TestPipeline_CallLLM_SimpleResponse(t *testing.T) { } } +func TestRunTurn_FinalizeSaveErrorEmitsErrorTurnEnd(t *testing.T) { + al, agent, cleanup := newTurnCoordTestLoop(t, &simpleConvProvider{}) + defer cleanup() + + saveErr := errors.New("session save failed") + agent.Sessions = &saveFailingSessionStore{ + SessionStore: session.NewSessionManager(""), + err: saveErr, + } + + sub := al.SubscribeEvents(8) + defer al.UnsubscribeEvents(sub.ID) + + if _, err := al.ProcessDirect(context.Background(), "hello", "session-save-fail"); err == nil { + t.Fatal("expected ProcessDirect to fail") + } + + deadline := time.After(2 * time.Second) + for { + select { + case evt := <-sub.C: + if evt.Kind != EventKindTurnEnd { + continue + } + payload, ok := evt.Payload.(TurnEndPayload) + if !ok { + t.Fatalf("TurnEnd payload type = %T", evt.Payload) + } + if payload.Status != TurnEndStatusError { + t.Fatalf("TurnEnd status = %q, want %q", payload.Status, TurnEndStatusError) + } + return + case <-deadline: + t.Fatal("timed out waiting for turn_end event") + } + } +} + func TestPipeline_CallLLM_WithToolCall(t *testing.T) { provider := &toolCallRespProvider{ toolName: "web_search", @@ -780,3 +828,30 @@ func TestTurnState_HardAbortRequested(t *testing.T) { t.Error("expected hard abort to be requested") } } + +func TestTurnState_SkillContextSnapshotsTrackLatestSuccessfulPath(t *testing.T) { + ts := &turnState{} + + ts.recordSkillContextSnapshot(skillContextTriggerInitialBuild, []string{"skill-a"}) + ts.recordSkillContextSnapshot(skillContextTriggerContextRetryRebuild, []string{"skill-b", "skill-c"}) + + if got := ts.attemptedSkillsSnapshot(); len(got) != 3 || got[0] != "skill-a" || got[1] != "skill-b" || + got[2] != "skill-c" { + t.Fatalf("attemptedSkillsSnapshot = %v, want [skill-a skill-b skill-c]", got) + } + + if got := ts.latestSkillContextSnapshot(); len(got) != 2 || got[0] != "skill-b" || got[1] != "skill-c" { + t.Fatalf("latestSkillContextSnapshot = %v, want [skill-b skill-c]", got) + } + + snapshots := ts.skillContextSnapshotsSnapshot() + if len(snapshots) != 2 { + t.Fatalf("len(skillContextSnapshotsSnapshot()) = %d, want 2", len(snapshots)) + } + if snapshots[0].Sequence != 1 || snapshots[0].Trigger != skillContextTriggerInitialBuild { + t.Fatalf("snapshots[0] = %+v, want sequence=1 trigger=%q", snapshots[0], skillContextTriggerInitialBuild) + } + if snapshots[1].Sequence != 2 || snapshots[1].Trigger != skillContextTriggerContextRetryRebuild { + t.Fatalf("snapshots[1] = %+v, want sequence=2 trigger=%q", snapshots[1], skillContextTriggerContextRetryRebuild) + } +} diff --git a/pkg/agent/turn_state.go b/pkg/agent/turn_state.go index b769ebcd0..ae058e49d 100644 --- a/pkg/agent/turn_state.go +++ b/pkg/agent/turn_state.go @@ -5,6 +5,7 @@ package agent import ( "context" "reflect" + "strings" "sync" "sync/atomic" "time" @@ -176,13 +177,19 @@ type turnState struct { opts processOptions scope turnEventScope - turnID string - agentID string - sessionKey string - turnCtx *TurnContext + turnID string + agentID string + sessionKey string + activeSkills []string + attemptedSkills []string + skillContextTrace []SkillContextSnapshot + toolKinds []string + toolExecutions []ToolExecutionRecord + turnCtx *TurnContext channel string chatID string + workspace string userMessage string media []string @@ -238,19 +245,21 @@ type turnState struct { func newTurnState(agent *AgentInstance, opts processOptions, scope turnEventScope) *turnState { ts := &turnState{ - agent: agent, - opts: opts, - scope: scope, - turnID: scope.turnID, - agentID: agent.ID, - sessionKey: opts.Dispatch.SessionKey, - turnCtx: cloneTurnContext(scope.context), - channel: opts.Dispatch.Channel(), - chatID: opts.Dispatch.ChatID(), - userMessage: opts.Dispatch.UserMessage, - media: append([]string(nil), opts.Dispatch.Media...), - phase: TurnPhaseSetup, - startedAt: time.Now(), + agent: agent, + opts: opts, + scope: scope, + turnID: scope.turnID, + agentID: agent.ID, + sessionKey: opts.Dispatch.SessionKey, + activeSkills: activeSkillNames(agent, opts), + turnCtx: cloneTurnContext(scope.context), + channel: opts.Dispatch.Channel(), + chatID: opts.Dispatch.ChatID(), + workspace: agent.Workspace, + userMessage: opts.Dispatch.UserMessage, + media: append([]string(nil), opts.Dispatch.Media...), + phase: TurnPhaseSetup, + startedAt: time.Now(), } // Bind session store and capture initial history length for rollback logic @@ -378,6 +387,160 @@ func (ts *turnState) finalContentLen() int { return len(ts.finalContent) } +func (ts *turnState) finalContentSnapshot() string { + ts.mu.RLock() + defer ts.mu.RUnlock() + return ts.finalContent +} + +func (ts *turnState) recordToolKind(tool string) { + tool = strings.TrimSpace(tool) + if tool == "" { + return + } + + ts.mu.Lock() + defer ts.mu.Unlock() + + for _, existing := range ts.toolKinds { + if existing == tool { + return + } + } + ts.toolKinds = append(ts.toolKinds, tool) +} + +func (ts *turnState) toolKindsSnapshot() []string { + ts.mu.RLock() + defer ts.mu.RUnlock() + return append([]string(nil), ts.toolKinds...) +} + +func (ts *turnState) recordToolExecution(tool string, success bool, errorSummary string, skillNames []string) { + tool = strings.TrimSpace(tool) + if tool == "" { + return + } + + ts.recordToolKind(tool) + + ts.mu.Lock() + defer ts.mu.Unlock() + ts.toolExecutions = append(ts.toolExecutions, ToolExecutionRecord{ + Name: tool, + Success: success, + ErrorSummary: strings.TrimSpace(errorSummary), + SkillNames: append([]string(nil), skillNames...), + }) +} + +func (ts *turnState) toolExecutionsSnapshot() []ToolExecutionRecord { + ts.mu.RLock() + defer ts.mu.RUnlock() + if len(ts.toolExecutions) == 0 { + return nil + } + + out := make([]ToolExecutionRecord, 0, len(ts.toolExecutions)) + for _, exec := range ts.toolExecutions { + out = append(out, ToolExecutionRecord{ + Name: exec.Name, + Success: exec.Success, + ErrorSummary: exec.ErrorSummary, + SkillNames: append([]string(nil), exec.SkillNames...), + }) + } + return out +} + +func (ts *turnState) recordAttemptedSkills(skillNames []string) { + if len(skillNames) == 0 { + return + } + + ts.mu.Lock() + defer ts.mu.Unlock() + + for _, skillName := range skillNames { + skillName = strings.TrimSpace(skillName) + if skillName == "" { + continue + } + seen := false + for _, existing := range ts.attemptedSkills { + if existing == skillName { + seen = true + break + } + } + if seen { + continue + } + ts.attemptedSkills = append(ts.attemptedSkills, skillName) + } +} + +func (ts *turnState) attemptedSkillsSnapshot() []string { + ts.mu.RLock() + defer ts.mu.RUnlock() + return append([]string(nil), ts.attemptedSkills...) +} + +func (ts *turnState) recordSkillContextSnapshot(trigger string, skillNames []string) { + if len(skillNames) == 0 { + return + } + + filtered := make([]string, 0, len(skillNames)) + for _, skillName := range skillNames { + skillName = strings.TrimSpace(skillName) + if skillName == "" { + continue + } + filtered = append(filtered, skillName) + } + if len(filtered) == 0 { + return + } + + ts.recordAttemptedSkills(filtered) + + ts.mu.Lock() + defer ts.mu.Unlock() + ts.skillContextTrace = append(ts.skillContextTrace, SkillContextSnapshot{ + Sequence: len(ts.skillContextTrace) + 1, + Trigger: trigger, + SkillNames: append([]string(nil), filtered...), + }) +} + +func (ts *turnState) latestSkillContextSnapshot() []string { + ts.mu.RLock() + defer ts.mu.RUnlock() + if len(ts.skillContextTrace) == 0 { + return nil + } + return append([]string(nil), ts.skillContextTrace[len(ts.skillContextTrace)-1].SkillNames...) +} + +func (ts *turnState) skillContextSnapshotsSnapshot() []SkillContextSnapshot { + ts.mu.RLock() + defer ts.mu.RUnlock() + if len(ts.skillContextTrace) == 0 { + return nil + } + + snapshots := make([]SkillContextSnapshot, 0, len(ts.skillContextTrace)) + for _, snapshot := range ts.skillContextTrace { + snapshots = append(snapshots, SkillContextSnapshot{ + Sequence: snapshot.Sequence, + Trigger: snapshot.Trigger, + SkillNames: append([]string(nil), snapshot.SkillNames...), + }) + } + return snapshots +} + func (ts *turnState) setTurnCancel(cancel context.CancelFunc) { ts.mu.Lock() defer ts.mu.Unlock() diff --git a/pkg/config/config.go b/pkg/config/config.go index c9d90e0f8..11d179911 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -37,6 +37,7 @@ type Config struct { Isolation IsolationConfig `json:"isolation,omitempty" yaml:"-"` Agents AgentsConfig `json:"agents" yaml:"-"` Session SessionConfig `json:"session,omitempty" yaml:"-"` + Evolution EvolutionConfig `json:"evolution,omitempty" yaml:"-"` Channels ChannelsConfig `json:"channel_list" yaml:"channel_list"` ModelList SecureModelList `json:"model_list" yaml:"model_list"` // New model-centric provider configuration Gateway GatewayConfig `json:"gateway" yaml:"-"` @@ -53,6 +54,126 @@ type Config struct { sensitiveCache *SensitiveDataCache } +type EvolutionConfig struct { + Enabled bool `json:"enabled,omitempty"` + Mode string `json:"mode,omitempty"` + StateDir string `json:"state_dir,omitempty"` + MinTaskCount int `json:"min_task_count,omitempty"` + MinSuccessRatio float64 `json:"min_success_ratio,omitempty"` + ColdPathTrigger string `json:"cold_path_trigger,omitempty"` + ColdPathTimes []string `json:"cold_path_times,omitempty"` + // Deprecated: use MinTaskCount. + MinCaseCount int `json:"min_case_count,omitempty"` + // Deprecated: use MinSuccessRatio. + MinSuccessRate float64 `json:"min_success_rate,omitempty"` +} + +func (c EvolutionConfig) MarshalJSON() ([]byte, error) { + out := struct { + Enabled bool `json:"enabled,omitempty"` + Mode string `json:"mode,omitempty"` + StateDir string `json:"state_dir,omitempty"` + MinTaskCount int `json:"min_task_count,omitempty"` + MinSuccessRatio float64 `json:"min_success_ratio,omitempty"` + ColdPathTrigger string `json:"cold_path_trigger,omitempty"` + ColdPathTimes []string `json:"cold_path_times,omitempty"` + }{ + Enabled: c.Enabled, + Mode: c.Mode, + StateDir: c.StateDir, + MinTaskCount: c.EffectiveMinTaskCount(), + MinSuccessRatio: c.EffectiveMinSuccessRatio(), + ColdPathTrigger: strings.TrimSpace(c.ColdPathTrigger), + ColdPathTimes: c.EffectiveColdPathTimes(), + } + if !out.Enabled { + out.Mode = "" + out.ColdPathTrigger = "" + out.ColdPathTimes = nil + } + return json.Marshal(out) +} + +func (c EvolutionConfig) EffectiveMode() string { + if !c.Enabled { + return "" + } + switch strings.ToLower(strings.TrimSpace(c.Mode)) { + case "draft": + return "draft" + case "apply": + return "apply" + case "", "observe": + return "observe" + default: + return "observe" + } +} + +func (c EvolutionConfig) RunsColdPathAutomatically() bool { + return c.RunsColdPathAfterTurn() || c.RunsColdPathScheduled() +} + +func (c EvolutionConfig) ColdPathTriggerMode() string { + if c.EffectiveMode() != "draft" && c.EffectiveMode() != "apply" { + return "" + } + switch strings.ToLower(strings.TrimSpace(c.ColdPathTrigger)) { + case "", "after_turn": + return "after_turn" + case "scheduled": + return "scheduled" + case "manual", "none", "off": + return "manual" + default: + return "after_turn" + } +} + +func (c EvolutionConfig) RunsColdPathAfterTurn() bool { + return c.ColdPathTriggerMode() == "after_turn" +} + +func (c EvolutionConfig) RunsColdPathScheduled() bool { + return c.ColdPathTriggerMode() == "scheduled" +} + +func (c EvolutionConfig) EffectiveMinTaskCount() int { + if c.MinTaskCount > 0 { + return c.MinTaskCount + } + if c.MinCaseCount > 0 { + return c.MinCaseCount + } + return 2 +} + +func (c EvolutionConfig) EffectiveMinSuccessRatio() float64 { + if c.MinSuccessRatio > 0 { + return c.MinSuccessRatio + } + if c.MinSuccessRate > 0 { + return c.MinSuccessRate + } + return 0.7 +} + +func (c EvolutionConfig) EffectiveColdPathTimes() []string { + out := make([]string, 0, len(c.ColdPathTimes)) + for _, value := range c.ColdPathTimes { + value = strings.TrimSpace(value) + if value == "" { + continue + } + out = append(out, value) + } + return out +} + +func (c EvolutionConfig) AutoAppliesDrafts() bool { + return c.EffectiveMode() == "apply" +} + // IsolationConfig controls subprocess isolation for commands started by PicoClaw. // It is applied by the isolation package rather than by sandboxing the main process. type IsolationConfig struct { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index fc810ad7d..d744e15dc 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -171,6 +171,317 @@ func TestDefaultConfig_MCPMaxInlineTextChars(t *testing.T) { } } +func TestDefaultConfig_EvolutionDefaults(t *testing.T) { + cfg := DefaultConfig() + + assert.False(t, cfg.Evolution.Enabled) + assert.Equal(t, "observe", cfg.Evolution.Mode) + assert.Equal(t, "", cfg.Evolution.StateDir) + assert.Equal(t, 2, cfg.Evolution.MinTaskCount) + assert.Equal(t, 0.7, cfg.Evolution.MinSuccessRatio) + assert.Equal(t, "after_turn", cfg.Evolution.ColdPathTrigger) + assert.Equal(t, 2, cfg.Evolution.EffectiveMinTaskCount()) + assert.Equal(t, 0.7, cfg.Evolution.EffectiveMinSuccessRatio()) + assert.False(t, cfg.Evolution.RunsColdPathAutomatically()) + assert.False(t, cfg.Evolution.AutoAppliesDrafts()) +} + +func TestEvolutionConfig_EffectiveMode(t *testing.T) { + tests := []struct { + name string + cfg EvolutionConfig + want string + }{ + { + name: "disabled returns empty", + cfg: EvolutionConfig{ + Enabled: false, + Mode: "apply", + }, + want: "", + }, + { + name: "enabled empty mode defaults to observe", + cfg: EvolutionConfig{ + Enabled: true, + }, + want: "observe", + }, + { + name: "enabled whitespace mode defaults to observe", + cfg: EvolutionConfig{ + Enabled: true, + Mode: " \t\n ", + }, + want: "observe", + }, + { + name: "enabled returns configured mode", + cfg: EvolutionConfig{ + Enabled: true, + Mode: "draft", + }, + want: "draft", + }, + { + name: "enabled trims and normalizes mode", + cfg: EvolutionConfig{ + Enabled: true, + Mode: " Draft ", + }, + want: "draft", + }, + { + name: "enabled returns apply mode", + cfg: EvolutionConfig{ + Enabled: true, + Mode: "apply", + }, + want: "apply", + }, + { + name: "enabled normalizes uppercase apply", + cfg: EvolutionConfig{ + Enabled: true, + Mode: "APPLY", + }, + want: "apply", + }, + { + name: "enabled unknown mode falls back to observe", + cfg: EvolutionConfig{ + Enabled: true, + Mode: "propose", + }, + want: "observe", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.cfg.EffectiveMode()) + }) + } +} + +func TestEvolutionConfig_ModeSemantics(t *testing.T) { + tests := []struct { + name string + cfg EvolutionConfig + wantRunsCold bool + wantAutoApply bool + }{ + { + name: "disabled does not run cold path", + cfg: EvolutionConfig{ + Enabled: false, + Mode: "apply", + }, + wantRunsCold: false, + wantAutoApply: false, + }, + { + name: "observe only records hot path", + cfg: EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, + wantRunsCold: false, + wantAutoApply: false, + }, + { + name: "draft runs cold path without applying", + cfg: EvolutionConfig{ + Enabled: true, + Mode: "draft", + }, + wantRunsCold: true, + wantAutoApply: false, + }, + { + name: "draft scheduled runs cold path without after turn", + cfg: EvolutionConfig{ + Enabled: true, + Mode: "draft", + ColdPathTrigger: "scheduled", + ColdPathTimes: []string{"03:00"}, + }, + wantRunsCold: true, + wantAutoApply: false, + }, + { + name: "apply runs cold path and auto applies", + cfg: EvolutionConfig{ + Enabled: true, + Mode: "apply", + }, + wantRunsCold: true, + wantAutoApply: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.wantRunsCold, tt.cfg.RunsColdPathAutomatically()) + assert.Equal(t, tt.wantAutoApply, tt.cfg.AutoAppliesDrafts()) + }) + } +} + +func TestEvolutionConfig_ColdPathTriggerMode(t *testing.T) { + assert.Equal(t, "after_turn", (EvolutionConfig{Enabled: true, Mode: "draft"}).ColdPathTriggerMode()) + assert.True(t, (EvolutionConfig{Enabled: true, Mode: "draft"}).RunsColdPathAfterTurn()) + assert.False(t, (EvolutionConfig{Enabled: true, Mode: "draft"}).RunsColdPathScheduled()) + + scheduled := EvolutionConfig{ + Enabled: true, + Mode: "apply", + ColdPathTrigger: "scheduled", + ColdPathTimes: []string{"03:00"}, + } + assert.Equal(t, "scheduled", scheduled.ColdPathTriggerMode()) + assert.False(t, scheduled.RunsColdPathAfterTurn()) + assert.True(t, scheduled.RunsColdPathScheduled()) + + manual := EvolutionConfig{Enabled: true, Mode: "draft", ColdPathTrigger: "manual"} + assert.Equal(t, "manual", manual.ColdPathTriggerMode()) + assert.False(t, manual.RunsColdPathAutomatically()) +} + +func TestEvolutionConfig_NewThresholdNamesPreferLegacyAliases(t *testing.T) { + cfg := EvolutionConfig{MinTaskCount: 4, MinSuccessRatio: 0.9, MinCaseCount: 1, MinSuccessRate: 0.2} + assert.Equal(t, 4, cfg.EffectiveMinTaskCount()) + assert.Equal(t, 0.9, cfg.EffectiveMinSuccessRatio()) + + legacy := EvolutionConfig{MinCaseCount: 5, MinSuccessRate: 0.8} + assert.Equal(t, 5, legacy.EffectiveMinTaskCount()) + assert.Equal(t, 0.8, legacy.EffectiveMinSuccessRatio()) +} + +func TestEvolutionConfig_MarshalUsesNewThresholdNames(t *testing.T) { + data, err := json.Marshal(EvolutionConfig{ + Enabled: true, + Mode: "draft", + MinCaseCount: 5, + MinSuccessRate: 0.8, + }) + if err != nil { + t.Fatalf("json.Marshal: %v", err) + } + + var raw map[string]any + if err := json.Unmarshal(data, &raw); err != nil { + t.Fatalf("json.Unmarshal: %v", err) + } + if raw["min_task_count"] != float64(5) { + t.Fatalf("min_task_count = %#v, want 5", raw["min_task_count"]) + } + if raw["min_success_ratio"] != 0.8 { + t.Fatalf("min_success_ratio = %#v, want 0.8", raw["min_success_ratio"]) + } + if _, ok := raw["min_case_count"]; ok { + t.Fatalf("min_case_count should not be marshaled: %#v", raw) + } + if _, ok := raw["min_success_rate"]; ok { + t.Fatalf("min_success_rate should not be marshaled: %#v", raw) + } +} + +func TestLoadConfig_EvolutionEnabledWithoutModeUsesObserveSemantics(t *testing.T) { + dir := t.TempDir() + configPath := filepath.Join(dir, "config.json") + raw := `{ + "version": 3, + "evolution": { + "enabled": true + } + }` + if err := os.WriteFile(configPath, []byte(raw), 0o644); err != nil { + t.Fatalf("WriteFile(configPath): %v", err) + } + + cfg, err := LoadConfig(configPath) + if err != nil { + t.Fatalf("LoadConfig() error: %v", err) + } + + assert.True(t, cfg.Evolution.Enabled) + assert.Equal(t, "", cfg.Evolution.Mode) + assert.Equal(t, "observe", cfg.Evolution.EffectiveMode()) + assert.False(t, cfg.Evolution.RunsColdPathAutomatically()) + assert.False(t, cfg.Evolution.AutoAppliesDrafts()) +} + +func TestLoadConfig_EvolutionExplicitApplyModeAutoApplies(t *testing.T) { + dir := t.TempDir() + configPath := filepath.Join(dir, "config.json") + raw := `{ + "version": 3, + "evolution": { + "enabled": true, + "mode": "apply" + } + }` + if err := os.WriteFile(configPath, []byte(raw), 0o644); err != nil { + t.Fatalf("WriteFile(configPath): %v", err) + } + + cfg, err := LoadConfig(configPath) + if err != nil { + t.Fatalf("LoadConfig() error: %v", err) + } + + assert.True(t, cfg.Evolution.Enabled) + assert.Equal(t, "apply", cfg.Evolution.Mode) + assert.Equal(t, "apply", cfg.Evolution.EffectiveMode()) + assert.True(t, cfg.Evolution.RunsColdPathAutomatically()) + assert.True(t, cfg.Evolution.AutoAppliesDrafts()) +} + +func TestSaveConfig_DisabledEvolutionOmitsApplyMode(t *testing.T) { + dir := t.TempDir() + configPath := filepath.Join(dir, "config.json") + cfg := DefaultConfig() + + if err := SaveConfig(configPath, cfg); err != nil { + t.Fatalf("SaveConfig() error: %v", err) + } + + data, err := os.ReadFile(configPath) + if err != nil { + t.Fatalf("ReadFile(configPath): %v", err) + } + + var raw map[string]any + if unmarshalErr := json.Unmarshal(data, &raw); unmarshalErr != nil { + t.Fatalf("Unmarshal saved config: %v", unmarshalErr) + } + evolutionRaw, ok := raw["evolution"].(map[string]any) + if !ok { + t.Fatalf("saved evolution config = %#v, want object", raw["evolution"]) + } + if _, ok := evolutionRaw["mode"]; ok { + t.Fatalf("disabled evolution should not persist mode: %#v", evolutionRaw) + } + + evolutionRaw["enabled"] = true + edited, err := json.Marshal(raw) + if err != nil { + t.Fatalf("Marshal edited config: %v", err) + } + if writeErr := os.WriteFile(configPath, edited, 0o600); writeErr != nil { + t.Fatalf("WriteFile(configPath): %v", writeErr) + } + + loaded, err := LoadConfig(configPath) + if err != nil { + t.Fatalf("LoadConfig() error: %v", err) + } + assert.True(t, loaded.Evolution.Enabled) + assert.Equal(t, "observe", loaded.Evolution.EffectiveMode()) + assert.False(t, loaded.Evolution.AutoAppliesDrafts()) +} + func TestLoadConfig_MCPMaxInlineTextChars(t *testing.T) { dir := t.TempDir() configPath := filepath.Join(dir, "config.json") diff --git a/pkg/config/defaults.go b/pkg/config/defaults.go index 8e2494ae5..1742a4b87 100644 --- a/pkg/config/defaults.go +++ b/pkg/config/defaults.go @@ -47,6 +47,13 @@ func DefaultConfig() *Config { Session: SessionConfig{ Dimensions: []string{"chat"}, }, + Evolution: EvolutionConfig{ + Enabled: false, + Mode: "observe", + MinTaskCount: 2, + MinSuccessRatio: 0.7, + ColdPathTrigger: "after_turn", + }, Channels: defaultChannels(), Hooks: HooksConfig{ Enabled: true, diff --git a/pkg/config/migration.go b/pkg/config/migration.go index 96914819e..40ef1a5a2 100644 --- a/pkg/config/migration.go +++ b/pkg/config/migration.go @@ -83,6 +83,8 @@ func migrateLegacyAgentDefaultsModel(m map[string]any) { // loadConfigV1 loads a version 1 config (current schema) func loadConfig(data []byte) (*Config, error) { cfg := DefaultConfig() + evolutionModeExplicit := configObjectHasField(data, "evolution", "mode") + evolutionExplicitWithoutMode := configObjectHasTopLevelField(data, "evolution") && !evolutionModeExplicit // Pre-scan the JSON to check how many model_list entries the user provided. // Go's JSON decoder reuses existing slice backing-array elements rather than @@ -101,9 +103,38 @@ func loadConfig(data []byte) (*Config, error) { if err := decodeJSONWithDiagnostics(data, cfg, "config.json"); err != nil { return nil, err } + if evolutionExplicitWithoutMode { + cfg.Evolution.Mode = "" + } return cfg, nil } +func configObjectHasTopLevelField(data []byte, field string) bool { + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + return false + } + _, ok := raw[field] + return ok +} + +func configObjectHasField(data []byte, objectField, nestedField string) bool { + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + return false + } + objectData, ok := raw[objectField] + if !ok { + return false + } + var object map[string]json.RawMessage + if err := json.Unmarshal(objectData, &object); err != nil { + return false + } + _, ok = object[nestedField] + return ok +} + func mergeAPIKeys(apiKey string, apiKeys []string) []string { seen := make(map[string]struct{}) var all []string diff --git a/pkg/evolution/apply.go b/pkg/evolution/apply.go new file mode 100644 index 000000000..7cb1b9b5e --- /dev/null +++ b/pkg/evolution/apply.go @@ -0,0 +1,308 @@ +package evolution + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "gopkg.in/yaml.v3" + + "github.com/sipeed/picoclaw/pkg/fileutil" + "github.com/sipeed/picoclaw/pkg/skills" +) + +type Applier struct { + paths Paths + now func() time.Time +} + +func NewApplier(paths Paths, now func() time.Time) *Applier { + if now == nil { + now = time.Now + } + return &Applier{ + paths: paths, + now: now, + } +} + +func (a *Applier) ApplyDraft(ctx context.Context, workspace string, draft SkillDraft) error { + rollback, err := a.applyDraftWithRollback(ctx, workspace, draft) + if err != nil { + return err + } + _ = rollback + return nil +} + +func (a *Applier) applyDraftWithRollback( + ctx context.Context, + workspace string, + draft SkillDraft, +) (func() error, error) { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + if validateErr := skills.ValidateSkillName(draft.TargetSkillName); validateErr != nil { + return nil, validateErr + } + + existingBody, backupPath, hadOriginal, err := a.backupCurrentSkill(workspace, draft.TargetSkillName) + if err != nil { + return nil, err + } + + renderedBody, err := renderAppliedBody(draft, existingBody, hadOriginal) + if err != nil { + return nil, err + } + + if err := validateAppliedSkillBody( + renderedBody, + draft.TargetSkillName, + allowsExistingFrontmatterFields(draft.ChangeKind, hadOriginal), + ); err != nil { + return nil, err + } + + skillDir := filepath.Join(workspace, "skills", draft.TargetSkillName) + if mkdirErr := os.MkdirAll(skillDir, 0o755); mkdirErr != nil { + return nil, mkdirErr + } + + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := fileutil.WriteFileAtomic(skillPath, []byte(renderedBody), 0o644); err != nil { + return nil, err + } + + return func() error { + return a.rollbackSkill(skillPath, backupPath, hadOriginal) + }, nil +} + +func (a *Applier) backupCurrentSkill( + workspace, skillName string, +) (currentBody, backupPath string, hadOriginal bool, err error) { + if validateErr := skills.ValidateSkillName(skillName); validateErr != nil { + return "", "", false, validateErr + } + + skillPath := filepath.Join(workspace, "skills", skillName, "SKILL.md") + data, err := os.ReadFile(skillPath) + if os.IsNotExist(err) { + return "", "", false, nil + } + if err != nil { + return "", "", false, err + } + + backupDir := filepath.Join( + a.paths.BackupsDir, + workspaceScopeDir(workspace), + skillName, + a.now().Format("20060102-150405.000000000"), + ) + if err := os.MkdirAll(backupDir, 0o755); err != nil { + return "", "", false, err + } + + backupPath = filepath.Join(backupDir, "SKILL.md") + if err := fileutil.WriteFileAtomic(backupPath, data, 0o644); err != nil { + return "", "", false, err + } + return string(data), backupPath, true, nil +} + +func (a *Applier) rollbackSkill(skillPath, backupPath string, hadOriginal bool) error { + if hadOriginal { + data, err := os.ReadFile(backupPath) + if err != nil { + return err + } + return fileutil.WriteFileAtomic(skillPath, data, 0o644) + } + if err := os.Remove(skillPath); err != nil && !os.IsNotExist(err) { + return err + } + skillDir := filepath.Dir(skillPath) + if err := os.Remove(skillDir); err != nil && !os.IsNotExist(err) && !isDirNotEmptyError(err) { + return err + } + return nil +} + +func isDirNotEmptyError(err error) bool { + if err == nil { + return false + } + return strings.Contains(strings.ToLower(err.Error()), "directory not empty") +} + +func validateAppliedSkillBody(body, targetSkillName string, allowExtraFrontmatterFields bool) error { + body = strings.TrimSpace(body) + if !strings.HasPrefix(body, "---\n") { + return fmt.Errorf("skill frontmatter is required") + } + if !strings.Contains(body, "\n# ") { + return fmt.Errorf("skill heading is required") + } + frontmatter, _ := splitSkillFrontmatter(body) + fields, err := parseSkillFrontmatterFields(frontmatter, allowExtraFrontmatterFields) + if err != nil { + return err + } + name := strings.TrimSpace(fields["name"]) + if name == "" { + return fmt.Errorf("skill frontmatter name is required") + } + if name != targetSkillName { + return fmt.Errorf("skill frontmatter name %q does not match target skill %q", name, targetSkillName) + } + if strings.TrimSpace(fields["description"]) == "" { + return fmt.Errorf("skill frontmatter description is required") + } + return nil +} + +func allowsExistingFrontmatterFields(kind ChangeKind, hadOriginal bool) bool { + return hadOriginal && (kind == ChangeKindAppend || kind == ChangeKindMerge) +} + +func renderAppliedBody(draft SkillDraft, existingBody string, hadOriginal bool) (string, error) { + switch draft.ChangeKind { + case ChangeKindCreate: + if hadOriginal { + return "", fmt.Errorf("cannot create skill %q: skill already exists", draft.TargetSkillName) + } + return renderDeployableSkillBody(draft.BodyOrPatch), nil + case ChangeKindReplace: + if !hadOriginal { + return "", fmt.Errorf("cannot replace skill %q: skill does not exist", draft.TargetSkillName) + } + return renderDeployableSkillBody(draft.BodyOrPatch), nil + case ChangeKindAppend: + patch, err := renderDeployablePatchBody(draft.BodyOrPatch, draft.TargetSkillName) + if err != nil { + return "", err + } + if !hadOriginal || strings.TrimSpace(existingBody) == "" { + return renderDeployableSkillBody(draft.BodyOrPatch), nil + } + return strings.TrimRight(existingBody, "\n") + "\n\n" + strings.TrimLeft(patch, "\n"), nil + case ChangeKindMerge: + patch, err := renderDeployablePatchBody(draft.BodyOrPatch, draft.TargetSkillName) + if err != nil { + return "", err + } + if !hadOriginal || strings.TrimSpace(existingBody) == "" { + return renderDeployableSkillBody(draft.BodyOrPatch), nil + } + mergedSection := strings.Join([]string{ + "", + "## Merged Knowledge", + strings.TrimSpace(patch), + "", + }, "\n") + return strings.TrimRight(existingBody, "\n") + mergedSection, nil + default: + return "", fmt.Errorf("unsupported change_kind %q", draft.ChangeKind) + } +} + +func renderDeployablePatchBody(body, targetSkillName string) (string, error) { + body = renderDeployableSkillBody(body) + frontmatter, markdownBody := splitSkillFrontmatter(body) + if frontmatter == "" { + markdownBody = body + } else { + fields, err := parseSkillFrontmatterFields(frontmatter, true) + if err != nil { + return "", err + } + if name := strings.TrimSpace(fields["name"]); name != "" && name != targetSkillName { + return "", fmt.Errorf( + "skill patch frontmatter name %q does not match target skill %q", + name, + targetSkillName, + ) + } + } + return strings.TrimSpace(stripLeadingH1(markdownBody)), nil +} + +func splitSkillFrontmatter(body string) (frontmatter, markdownBody string) { + normalized := strings.ReplaceAll(strings.TrimSpace(body), "\r\n", "\n") + lines := strings.Split(normalized, "\n") + if len(lines) == 0 || strings.TrimSpace(lines[0]) != "---" { + return "", body + } + end := -1 + for i := 1; i < len(lines); i++ { + if strings.TrimSpace(lines[i]) == "---" { + end = i + break + } + } + if end < 0 { + return "", body + } + return strings.Join(lines[1:end], "\n"), strings.TrimLeft(strings.Join(lines[end+1:], "\n"), "\n") +} + +func parseSkillFrontmatterFields(frontmatter string, allowExtraFields bool) (map[string]string, error) { + var raw map[string]any + if err := yaml.Unmarshal([]byte(frontmatter), &raw); err != nil { + return nil, fmt.Errorf("invalid skill frontmatter: %w", err) + } + for key := range raw { + if key != "name" && key != "description" { + if allowExtraFields { + continue + } + return nil, fmt.Errorf("unsupported skill frontmatter field %q", key) + } + } + + var typed struct { + Name string `yaml:"name"` + Description string `yaml:"description"` + } + if err := yaml.Unmarshal([]byte(frontmatter), &typed); err != nil { + return nil, fmt.Errorf("invalid skill frontmatter: %w", err) + } + return map[string]string{ + "name": typed.Name, + "description": typed.Description, + }, nil +} + +func stripLeadingH1(body string) string { + lines := strings.Split(strings.TrimLeft(body, "\n"), "\n") + for len(lines) > 0 && strings.TrimSpace(lines[0]) == "" { + lines = lines[1:] + } + if len(lines) > 0 && strings.HasPrefix(strings.TrimSpace(lines[0]), "# ") { + lines = lines[1:] + } + return strings.Join(lines, "\n") +} + +func errorsJoin(errs ...error) error { + var first error + for _, err := range errs { + if err == nil { + continue + } + if first == nil { + first = err + continue + } + first = fmt.Errorf("%w; %v", first, err) + } + return first +} diff --git a/pkg/evolution/apply_test.go b/pkg/evolution/apply_test.go new file mode 100644 index 000000000..36e4e21e5 --- /dev/null +++ b/pkg/evolution/apply_test.go @@ -0,0 +1,785 @@ +package evolution_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/sipeed/picoclaw/pkg/evolution" +) + +func TestApplier_CreateDraftWritesSkillFile(t *testing.T) { + workspace := t.TempDir() + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: workspace, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + BodyOrPatch: "---\nname: weather\ndescription: weather helper\n---\n# Weather\n## Start Here\nUse native-name query first.\n", + Status: evolution.DraftStatusAccepted, + } + + if err := applier.ApplyDraft(context.Background(), workspace, draft); err != nil { + t.Fatalf("ApplyDraft: %v", err) + } + + skillPath := filepath.Join(workspace, "skills", "weather", "SKILL.md") + data, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if !strings.Contains(string(data), "# Weather") { + t.Fatalf("unexpected content: %s", string(data)) + } +} + +func TestApplier_CreateDraftRendersDeployableSkillWithoutLearningTrace(t *testing.T) { + workspace := t.TempDir() + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: workspace, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + BodyOrPatch: strings.Join([]string{ + "---", + "name: weather", + "description: Create combined shortcut perform-mathematical-calculations-by-via-theorems for: Perform mathematical calculations by applying specific theorems and their associated rules.", + "---", + "# Weather", + "", + "## Learned Context", + "- Learned task: use native-name weather lookup.", + "", + "## Source Evidence", + "- Evidence: learned from task records: task-1", + "", + "## Procedure", + "Use native-name query first.", + "", + }, "\n"), + Status: evolution.DraftStatusAccepted, + } + + if err := applier.ApplyDraft(context.Background(), workspace, draft); err != nil { + t.Fatalf("ApplyDraft: %v", err) + } + + data, err := os.ReadFile(filepath.Join(workspace, "skills", "weather", "SKILL.md")) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(data) + for _, forbidden := range []string{ + "Create combined shortcut", + "perform-mathematical-calculations-by-via-theorems for:", + "Learned Context", + "Learned task", + "Source Evidence", + "task records", + } { + if strings.Contains(content, forbidden) { + t.Fatalf("deployed skill contains %q:\n%s", forbidden, content) + } + } + if !strings.Contains(content, "Use native-name query first.") { + t.Fatalf("deployed skill lost procedure:\n%s", content) + } + if !strings.Contains( + content, + "description: Perform mathematical calculations by applying specific theorems and their associated rules.", + ) { + t.Fatalf("deployed skill did not clean description:\n%s", content) + } +} + +func TestApplier_CreateDraftDoesNotRewriteEvolutionDomainTextOrFrontmatter(t *testing.T) { + workspace := t.TempDir() + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-evolution-domain", + WorkspaceID: workspace, + SourceRecordID: "rule-evolution-domain", + TargetSkillName: "agent-evolution-helper", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "agent evolution helper", + BodyOrPatch: "---\nname: agent-evolution-helper\ndescription: Explain agent evolution workflows.\n---\n# Agent Evolution Helper\nUse this skill to reason about agent evolution behavior.\n", + Status: evolution.DraftStatusAccepted, + } + + if err := applier.ApplyDraft(context.Background(), workspace, draft); err != nil { + t.Fatalf("ApplyDraft: %v", err) + } + + data, err := os.ReadFile(filepath.Join(workspace, "skills", "agent-evolution-helper", "SKILL.md")) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(data) + if !strings.Contains(content, "name: agent-evolution-helper") { + t.Fatalf("frontmatter name was rewritten:\n%s", content) + } + if strings.Contains(content, "agent-update-helper") { + t.Fatalf("frontmatter name should not be rewritten:\n%s", content) + } + if !strings.Contains(content, "agent evolution behavior") { + t.Fatalf("domain text should preserve evolution wording:\n%s", content) + } +} + +func TestApplier_CreateDraftFailsWhenSkillAlreadyExists(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + original := "---\nname: weather\ndescription: valid\n---\n# Weather\nold body\n" + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-create-existing", + WorkspaceID: workspace, + SourceRecordID: "rule-create-existing", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + BodyOrPatch: "---\nname: weather\ndescription: replacement\n---\n# Weather\nnew body\n", + } + + err := applier.ApplyDraft(context.Background(), workspace, draft) + if err == nil { + t.Fatal("expected ApplyDraft to fail") + } + if !strings.Contains(err.Error(), "already exists") { + t.Fatalf("error = %v, want already exists", err) + } + + got, readErr := os.ReadFile(skillPath) + if readErr != nil { + t.Fatalf("ReadFile: %v", readErr) + } + if string(got) != original { + t.Fatalf("skill content changed unexpectedly:\n%s", string(got)) + } +} + +func TestApplier_CreateDraftRejectsMismatchedFrontmatterName(t *testing.T) { + workspace := t.TempDir() + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-mismatched-name", + WorkspaceID: workspace, + SourceRecordID: "rule-mismatched-name", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + BodyOrPatch: "---\nname: other-skill\ndescription: other helper\n---\n# Other\nUse something else.\n", + } + + err := applier.ApplyDraft(context.Background(), workspace, draft) + if err == nil { + t.Fatal("expected ApplyDraft to fail") + } + if !strings.Contains(err.Error(), "frontmatter name") { + t.Fatalf("error = %v, want frontmatter name mismatch", err) + } + if _, statErr := os.Stat(filepath.Join(workspace, "skills", "weather", "SKILL.md")); !os.IsNotExist(statErr) { + t.Fatalf("expected no skill file, got err=%v", statErr) + } +} + +func TestApplier_RollsBackOnInvalidSkillBody(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + original := "---\nname: weather\ndescription: valid\n---\n# Weather\nold body\n" + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-2", + WorkspaceID: workspace, + SourceRecordID: "rule-2", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindReplace, + HumanSummary: "broken draft", + BodyOrPatch: "invalid-frontmatter", + Status: evolution.DraftStatusAccepted, + } + + err := applier.ApplyDraft(context.Background(), workspace, draft) + if err == nil { + t.Fatal("expected ApplyDraft to fail") + } + + got, readErr := os.ReadFile(skillPath) + if readErr != nil { + t.Fatalf("ReadFile: %v", readErr) + } + if string(got) != original { + t.Fatalf("skill content changed after rollback:\n%s", string(got)) + } +} + +func TestApplier_FailedNewSkillDoesNotLeaveEmptyDirectory(t *testing.T) { + workspace := t.TempDir() + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-invalid-new-skill", + WorkspaceID: workspace, + SourceRecordID: "rule-invalid-new-skill", + TargetSkillName: "calculate-100-via-theorems", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "broken new skill", + BodyOrPatch: "invalid-frontmatter", + Status: evolution.DraftStatusAccepted, + } + + err := applier.ApplyDraft(context.Background(), workspace, draft) + if err == nil { + t.Fatal("expected ApplyDraft to fail") + } + + skillPath := filepath.Join(workspace, "skills", "calculate-100-via-theorems", "SKILL.md") + if _, statErr := os.Stat(skillPath); !os.IsNotExist(statErr) { + t.Fatalf("expected no skill file, got err=%v", statErr) + } + skillDir := filepath.Dir(skillPath) + if _, statErr := os.Stat(skillDir); !os.IsNotExist(statErr) { + t.Fatalf("expected no leftover skill dir, got err=%v", statErr) + } +} + +func TestApplier_ReplaceDraftFailsWhenSkillDoesNotExist(t *testing.T) { + workspace := t.TempDir() + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-replace-missing", + WorkspaceID: workspace, + SourceRecordID: "rule-replace-missing", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindReplace, + HumanSummary: "replace missing skill", + BodyOrPatch: "---\nname: weather\ndescription: replacement\n---\n# Weather\nnew body\n", + } + + err := applier.ApplyDraft(context.Background(), workspace, draft) + if err == nil { + t.Fatal("expected ApplyDraft to fail") + } + if !strings.Contains(err.Error(), "does not exist") { + t.Fatalf("error = %v, want does not exist", err) + } + + skillPath := filepath.Join(workspace, "skills", "weather", "SKILL.md") + if _, statErr := os.Stat(skillPath); !os.IsNotExist(statErr) { + t.Fatalf("expected no skill file, got err=%v", statErr) + } +} + +func TestApplier_AppendDraftPreservesOriginalBody(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + original := "---\nname: weather\ndescription: valid\n---\n# Weather\n## Start Here\nUse city names.\n" + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-append", + WorkspaceID: workspace, + SourceRecordID: "rule-append", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "append draft", + BodyOrPatch: "\n## Learned Pattern\nPrefer native-name query first.\n", + } + + if err := applier.ApplyDraft(context.Background(), workspace, draft); err != nil { + t.Fatalf("ApplyDraft: %v", err) + } + + got, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(got) + if !strings.Contains(content, "Use city names.") { + t.Fatalf("appended content lost original body:\n%s", content) + } + if !strings.Contains(content, "Prefer native-name query first.") { + t.Fatalf("appended content missing new body:\n%s", content) + } +} + +func TestApplier_AppendDraftAllowsExistingExtraFrontmatterFields(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + original := strings.Join([]string{ + "---", + "name: weather", + "description: valid", + "# Human-authored metadata should not block append updates.", + "homepage: https://example.com/weather", + "aliases:", + "- forecast", + "metadata:", + " owner: human", + "---", + "# Weather", + "## Start Here", + "Use city names.", + "", + }, "\n") + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-append-extra-frontmatter", + WorkspaceID: workspace, + SourceRecordID: "rule-append-extra-frontmatter", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "append draft", + BodyOrPatch: "\n## Learned Pattern\nPrefer native-name query first.\n", + } + + if err := applier.ApplyDraft(context.Background(), workspace, draft); err != nil { + t.Fatalf("ApplyDraft: %v", err) + } + + got, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(got) + for _, want := range []string{ + "homepage: https://example.com/weather", + "aliases:", + "- forecast", + "metadata:", + " owner: human", + "Prefer native-name query first.", + } { + if !strings.Contains(content, want) { + t.Fatalf("appended content missing %q:\n%s", want, content) + } + } +} + +func TestApplier_CreateDraftRejectsExtraFrontmatterFields(t *testing.T) { + workspace := t.TempDir() + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-create-extra-frontmatter", + WorkspaceID: workspace, + SourceRecordID: "rule-create-extra-frontmatter", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + BodyOrPatch: "---\nname: weather\ndescription: weather helper\nhomepage: https://example.com/weather\n---\n# Weather\nUse weather.\n", + } + + err := applier.ApplyDraft(context.Background(), workspace, draft) + if err == nil { + t.Fatal("expected ApplyDraft to fail") + } + if !strings.Contains(err.Error(), "unsupported skill frontmatter field") { + t.Fatalf("error = %v, want unsupported field", err) + } +} + +func TestApplier_AppendDraftDoesNotRewriteExistingLearningTerms(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + original := "---\nname: weather\ndescription: valid\n---\n# Weather\n## Evolution Notes\nKeep this manually-authored Learned phrase unchanged.\n" + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-append-clean", + WorkspaceID: workspace, + SourceRecordID: "rule-append-clean", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "append draft", + BodyOrPatch: "\n## Learned Pattern\nPrefer native-name query first.\n", + } + + if err := applier.ApplyDraft(context.Background(), workspace, draft); err != nil { + t.Fatalf("ApplyDraft: %v", err) + } + + got, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(got) + if !strings.Contains(content, "## Evolution Notes") { + t.Fatalf("existing heading was rewritten:\n%s", content) + } + if !strings.Contains(content, "Keep this manually-authored Learned phrase unchanged.") { + t.Fatalf("existing body was rewritten:\n%s", content) + } + if strings.Contains(content, "## Learned Pattern") { + t.Fatalf("new patch should be deploy-sanitized:\n%s", content) + } + if !strings.Contains(content, "## Usage Pattern") { + t.Fatalf("new patch missing sanitized heading:\n%s", content) + } +} + +func TestApplier_AppendDraftStripsPlainMarkdownTopLevelHeading(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + original := "---\nname: weather\ndescription: valid\n---\n# Weather\n## Start Here\nUse city names.\n" + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-append-plain-doc", + WorkspaceID: workspace, + SourceRecordID: "rule-append-plain-doc", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "append draft", + BodyOrPatch: "# Weather\n## Procedure\nPrefer native-name query first.\n", + } + + if err := applier.ApplyDraft(context.Background(), workspace, draft); err != nil { + t.Fatalf("ApplyDraft: %v", err) + } + + got, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(got) + if strings.Count(content, "# Weather") != 1 { + t.Fatalf("appended content should not duplicate top-level heading:\n%s", content) + } + if !strings.Contains(content, "## Procedure") || !strings.Contains(content, "Prefer native-name query first.") { + t.Fatalf("appended content lost patch body:\n%s", content) + } +} + +func TestApplier_AppendAndMergeRejectFullDocumentPatchWithMismatchedName(t *testing.T) { + for _, kind := range []evolution.ChangeKind{evolution.ChangeKindAppend, evolution.ChangeKindMerge} { + t.Run(string(kind), func(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + original := "---\nname: weather\ndescription: valid\n---\n# Weather\n## Start Here\nUse city names.\n" + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + err := applier.ApplyDraft(context.Background(), workspace, evolution.SkillDraft{ + ID: "draft-mismatched-patch", + WorkspaceID: workspace, + SourceRecordID: "rule-mismatched-patch", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: kind, + HumanSummary: "append draft", + BodyOrPatch: "---\nname: other-skill\ndescription: wrong target\n---\n# Other Skill\n## Procedure\nDo something else.\n", + }) + if err == nil { + t.Fatal("expected ApplyDraft to fail") + } + if !strings.Contains(err.Error(), "patch frontmatter name") { + t.Fatalf("error = %v, want patch frontmatter name mismatch", err) + } + got, readErr := os.ReadFile(skillPath) + if readErr != nil { + t.Fatalf("ReadFile: %v", readErr) + } + if string(got) != original { + t.Fatalf("skill content changed unexpectedly:\n%s", string(got)) + } + }) + } +} + +func TestApplier_AppendDraftStripsFullSkillDocumentPatch(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + original := "---\nname: weather\ndescription: valid\n---\n# Weather\n## Start Here\nUse city names.\n" + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-append-full-doc", + WorkspaceID: workspace, + SourceRecordID: "rule-append-full-doc", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "append draft", + BodyOrPatch: "---\nname: weather\ndescription: duplicate document\n---\n# Weather\n## Procedure\nPrefer native-name query first.\n", + } + + if err := applier.ApplyDraft(context.Background(), workspace, draft); err != nil { + t.Fatalf("ApplyDraft: %v", err) + } + + got, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(got) + if strings.Count(content, "---") != 2 { + t.Fatalf("appended content should keep only original frontmatter:\n%s", content) + } + if strings.Count(content, "# Weather") != 1 { + t.Fatalf("appended content should not duplicate top-level heading:\n%s", content) + } + if !strings.Contains(content, "## Procedure") || !strings.Contains(content, "Prefer native-name query first.") { + t.Fatalf("appended content lost patch body:\n%s", content) + } +} + +func TestApplier_BackupsAreScopedByWorkspace(t *testing.T) { + sharedState := t.TempDir() + workspaceA := t.TempDir() + workspaceB := t.TempDir() + now := time.Unix(1700000000, 0).UTC() + + for workspace, body := range map[string]string{ + workspaceA: "---\nname: weather\ndescription: valid\n---\n# Weather\nworkspace A\n", + workspaceB: "---\nname: weather\ndescription: valid\n---\n# Weather\nworkspace B\n", + } { + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll(%s): %v", workspace, err) + } + if err := os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(body), 0o644); err != nil { + t.Fatalf("WriteFile(%s): %v", workspace, err) + } + } + + for _, workspace := range []string{workspaceA, workspaceB} { + applier := evolution.NewApplier(evolution.NewPaths(workspace, sharedState), func() time.Time { + return now + }) + if err := applier.ApplyDraft(context.Background(), workspace, evolution.SkillDraft{ + ID: "draft-replace", + WorkspaceID: workspace, + SourceRecordID: "rule-replace", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindReplace, + HumanSummary: "replace weather", + BodyOrPatch: "---\nname: weather\ndescription: replacement\n---\n# Weather\nreplacement\n", + }); err != nil { + t.Fatalf("ApplyDraft(%s): %v", workspace, err) + } + } + + var backupBodies []string + if err := filepath.WalkDir( + filepath.Join(sharedState, "backups"), + func(path string, entry os.DirEntry, err error) error { + if err != nil { + return err + } + if entry.IsDir() || entry.Name() != "SKILL.md" { + return nil + } + data, err := os.ReadFile(path) + if err != nil { + return err + } + backupBodies = append(backupBodies, string(data)) + return nil + }, + ); err != nil { + t.Fatalf("WalkDir(backups): %v", err) + } + + if len(backupBodies) != 2 { + t.Fatalf("backup count = %d, want 2", len(backupBodies)) + } + joined := strings.Join(backupBodies, "\n") + if !strings.Contains(joined, "workspace A") || !strings.Contains(joined, "workspace B") { + t.Fatalf("backups should preserve both workspace bodies:\n%s", joined) + } +} + +func TestApplier_MergeDraftAddsMergedKnowledgeSection(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + original := "---\nname: weather\ndescription: valid\n---\n# Weather\n## Start Here\nUse city names.\n" + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + draft := evolution.SkillDraft{ + ID: "draft-merge", + WorkspaceID: workspace, + SourceRecordID: "rule-merge", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindMerge, + HumanSummary: "merge draft", + BodyOrPatch: "Prefer native-name query first.", + } + + if err := applier.ApplyDraft(context.Background(), workspace, draft); err != nil { + t.Fatalf("ApplyDraft: %v", err) + } + + got, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(got) + if !strings.Contains(content, "Use city names.") { + t.Fatalf("merged content lost original body:\n%s", content) + } + if !strings.Contains(content, "## Merged Knowledge") { + t.Fatalf("merged content missing merged section:\n%s", content) + } + if !strings.Contains(content, "Prefer native-name query first.") { + t.Fatalf("merged content missing new knowledge:\n%s", content) + } +} + +func TestApplier_RejectsInvalidSkillName(t *testing.T) { + workspace := t.TempDir() + applier := evolution.NewApplier(evolution.NewPaths(workspace, ""), func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + + for _, name := range []string{"../escape", "/tmp/escape"} { + err := applier.ApplyDraft(context.Background(), workspace, evolution.SkillDraft{ + ID: "draft-invalid-name", + WorkspaceID: workspace, + SourceRecordID: "rule-invalid-name", + TargetSkillName: name, + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "bad name", + BodyOrPatch: "---\nname: weather\ndescription: weather helper\n---\n# Weather\nbody\n", + }) + if err == nil { + t.Fatalf("TargetSkillName %q expected error", name) + } + } +} diff --git a/pkg/evolution/case_writer.go b/pkg/evolution/case_writer.go new file mode 100644 index 000000000..6948ff69e --- /dev/null +++ b/pkg/evolution/case_writer.go @@ -0,0 +1,21 @@ +package evolution + +import ( + "context" +) + +type CaseWriter struct { + paths Paths + store *Store +} + +func NewCaseWriter(paths Paths) *CaseWriter { + return &CaseWriter{ + paths: paths, + store: NewStore(paths), + } +} + +func (w *CaseWriter) AppendCase(ctx context.Context, record LearningRecord) error { + return w.store.AppendTaskRecord(ctx, record) +} diff --git a/pkg/evolution/case_writer_test.go b/pkg/evolution/case_writer_test.go new file mode 100644 index 000000000..e6d0742e8 --- /dev/null +++ b/pkg/evolution/case_writer_test.go @@ -0,0 +1,77 @@ +package evolution_test + +import ( + "context" + "encoding/json" + "os" + "strings" + "testing" + "time" + + "github.com/sipeed/picoclaw/pkg/evolution" +) + +func TestCaseWriter_AppendsOneRecord(t *testing.T) { + root := t.TempDir() + paths := evolution.NewPaths(root, "") + writer := evolution.NewCaseWriter(paths) + + record1 := testRecord("rec-1", "ws-1", true) + record2 := testRecord("rec-2", "ws-2", false) + + if err := writer.AppendCase(context.Background(), record1); err != nil { + t.Fatalf("AppendCase: %v", err) + } + if err := writer.AppendCase(context.Background(), record2); err != nil { + t.Fatalf("AppendCase second record: %v", err) + } + + data, err := os.ReadFile(paths.TaskRecords) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + text := string(data) + if !strings.HasSuffix(text, "\n") { + t.Fatalf("record file should end with newline, got %q", text) + } + + lines := strings.Split(strings.TrimSpace(text), "\n") + if len(lines) != 2 { + t.Fatalf("record file line count = %d, want 2", len(lines)) + } + + records := []evolution.LearningRecord{record1, record2} + for i, line := range lines { + var got evolution.LearningRecord + if err := json.Unmarshal([]byte(line), &got); err != nil { + t.Fatalf("Unmarshal line %d: %v", i, err) + } + + want := records[i] + if got.ID != want.ID { + t.Fatalf("record %d ID = %q, want %q", i, got.ID, want.ID) + } + if got.Kind != evolution.RecordKindCase { + t.Fatalf("record %d kind = %q, want %q", i, got.Kind, evolution.RecordKindCase) + } + if got.Summary != want.Summary { + t.Fatalf("record %d summary = %q, want %q", i, got.Summary, want.Summary) + } + if got.Success == nil || *got.Success != *want.Success { + t.Fatalf("record %d success = %v, want %v", i, got.Success, want.Success) + } + } +} + +func testRecord(id, workspaceID string, success bool) evolution.LearningRecord { + return evolution.LearningRecord{ + ID: id, + Kind: evolution.RecordKindCase, + WorkspaceID: workspaceID, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "cli turn completed", + Status: evolution.RecordStatus("new"), + Success: &success, + } +} diff --git a/pkg/evolution/cold_path_runner.go b/pkg/evolution/cold_path_runner.go new file mode 100644 index 000000000..696c706fb --- /dev/null +++ b/pkg/evolution/cold_path_runner.go @@ -0,0 +1,121 @@ +package evolution + +import ( + "context" + "errors" + "sync" +) + +type coldPathRuntime interface { + RunColdPathOnce(ctx context.Context, workspace string) error +} + +type ColdPathRunner struct { + runtime coldPathRuntime + async func(func()) + onError func(error) + ctx context.Context + cancel context.CancelFunc + + mu sync.Mutex + wg sync.WaitGroup + closeOnce sync.Once + closed bool + running map[string]workspaceRunState +} + +func NewColdPathRunner(runtime coldPathRuntime) *ColdPathRunner { + return NewColdPathRunnerWithErrorHandler(runtime, nil) +} + +func NewColdPathRunnerWithErrorHandler(runtime coldPathRuntime, onError func(error)) *ColdPathRunner { + if onError == nil { + onError = func(error) {} + } + ctx, cancel := context.WithCancel(context.Background()) + + return &ColdPathRunner{ + runtime: runtime, + async: func(run func()) { + go run() + }, + onError: onError, + ctx: ctx, + cancel: cancel, + running: make(map[string]workspaceRunState), + } +} + +type workspaceRunState struct { + running bool + pending bool +} + +func (r *ColdPathRunner) Trigger(workspace string) bool { + if r == nil || r.runtime == nil || workspace == "" { + return false + } + + r.mu.Lock() + if r.closed { + r.mu.Unlock() + return false + } + state, exists := r.running[workspace] + if exists && state.running { + state.pending = true + r.running[workspace] = state + r.mu.Unlock() + return true + } + r.running[workspace] = workspaceRunState{running: true} + r.wg.Add(1) + r.mu.Unlock() + + r.async(func() { + defer r.wg.Done() + r.runWorkspace(workspace) + }) + + return true +} + +func (r *ColdPathRunner) runWorkspace(workspace string) { + for { + if err := r.runtime.RunColdPathOnce(r.ctx, workspace); err != nil && !errors.Is(err, context.Canceled) { + r.onError(err) + } + + r.mu.Lock() + state, exists := r.running[workspace] + if !exists || r.closed { + delete(r.running, workspace) + r.mu.Unlock() + return + } + if state.pending { + state.pending = false + r.running[workspace] = state + r.mu.Unlock() + continue + } + delete(r.running, workspace) + r.mu.Unlock() + return + } +} + +func (r *ColdPathRunner) Close() error { + if r == nil { + return nil + } + + r.closeOnce.Do(func() { + r.mu.Lock() + r.closed = true + r.mu.Unlock() + r.cancel() + }) + r.wg.Wait() + return nil +} diff --git a/pkg/evolution/cold_path_runner_test.go b/pkg/evolution/cold_path_runner_test.go new file mode 100644 index 000000000..2a0b28309 --- /dev/null +++ b/pkg/evolution/cold_path_runner_test.go @@ -0,0 +1,142 @@ +package evolution + +import ( + "context" + "sync/atomic" + "testing" + "time" +) + +type blockingColdPathRuntime struct { + runCount atomic.Int32 + cancelCount atomic.Int32 + started chan string + release chan struct{} +} + +func (r *blockingColdPathRuntime) RunColdPathOnce(ctx context.Context, workspace string) error { + r.runCount.Add(1) + r.started <- workspace + select { + case <-r.release: + return nil + case <-ctx.Done(): + r.cancelCount.Add(1) + return ctx.Err() + } +} + +func TestColdPathRunner_QueuesPendingRunForWorkspace(t *testing.T) { + runtime := &blockingColdPathRuntime{ + started: make(chan string, 4), + release: make(chan struct{}, 4), + } + runner := NewColdPathRunner(runtime) + defer runner.Close() + + if scheduled := runner.Trigger("workspace-a"); !scheduled { + t.Fatal("expected first trigger to be scheduled") + } + + select { + case workspace := <-runtime.started: + if workspace != "workspace-a" { + t.Fatalf("workspace = %q, want workspace-a", workspace) + } + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for first cold path run") + } + + if scheduled := runner.Trigger("workspace-a"); !scheduled { + t.Fatal("expected second trigger to queue a pending run") + } + + select { + case workspace := <-runtime.started: + t.Fatalf("unexpected early pending cold path run for %q", workspace) + case <-time.After(150 * time.Millisecond): + } + + runtime.release <- struct{}{} + + select { + case workspace := <-runtime.started: + if workspace != "workspace-a" { + t.Fatalf("workspace = %q, want workspace-a", workspace) + } + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for pending cold path run") + } + + runtime.release <- struct{}{} + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if runtime.runCount.Load() == 2 { + return + } + time.Sleep(10 * time.Millisecond) + } + + t.Fatalf("runCount = %d, want 2", runtime.runCount.Load()) +} + +func TestColdPathRunner_CloseCancelsActiveRunAndDropsPendingWork(t *testing.T) { + runtime := &blockingColdPathRuntime{ + started: make(chan string, 4), + release: make(chan struct{}, 4), + } + runner := NewColdPathRunner(runtime) + + if scheduled := runner.Trigger("workspace-a"); !scheduled { + t.Fatal("expected first trigger to be scheduled") + } + + select { + case <-runtime.started: + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for first cold path run") + } + + if scheduled := runner.Trigger("workspace-a"); !scheduled { + t.Fatal("expected second trigger to mark pending work") + } + + closeDone := make(chan struct{}) + go func() { + defer close(closeDone) + if err := runner.Close(); err != nil { + t.Errorf("Close() error = %v", err) + } + }() + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if !runner.Trigger("workspace-a") { + break + } + time.Sleep(10 * time.Millisecond) + } + if runner.Trigger("workspace-a") { + t.Fatal("expected Trigger to reject new work after Close") + } + + select { + case <-closeDone: + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for Close to finish") + } + + select { + case workspace := <-runtime.started: + t.Fatalf("unexpected pending cold path run after Close for %q", workspace) + case <-time.After(150 * time.Millisecond): + } + + if got := runtime.runCount.Load(); got != 1 { + t.Fatalf("runCount = %d, want 1", got) + } + if got := runtime.cancelCount.Load(); got != 1 { + t.Fatalf("cancelCount = %d, want 1", got) + } +} diff --git a/pkg/evolution/draft_review.go b/pkg/evolution/draft_review.go new file mode 100644 index 000000000..da44d6365 --- /dev/null +++ b/pkg/evolution/draft_review.go @@ -0,0 +1,38 @@ +package evolution + +import "strings" + +type DraftReviewResult struct { + Status DraftStatus + Findings []string + ReviewNotes []string +} + +func ReviewDraft(draft SkillDraft) DraftReviewResult { + findings := append([]string(nil), ValidateDraft(draft)...) + findings = append(findings, scanDraftContent(draft)...) + + result := DraftReviewResult{ + Status: DraftStatusCandidate, + Findings: findings, + ReviewNotes: []string{"local structural validation completed"}, + } + if len(findings) > 0 { + result.Status = DraftStatusQuarantined + } + return result +} + +func scanDraftContent(draft SkillDraft) []string { + body := strings.ToLower(draft.BodyOrPatch) + findings := make([]string, 0, 2) + + if strings.Contains(body, "sk-live-") || strings.Contains(body, "sk_test_") || strings.Contains(body, "api_key=") { + findings = append(findings, "secret-like token detected in body_or_patch") + } + if strings.Contains(body, "-----begin private key-----") { + findings = append(findings, "private key material detected in body_or_patch") + } + + return findings +} diff --git a/pkg/evolution/draft_review_test.go b/pkg/evolution/draft_review_test.go new file mode 100644 index 000000000..70bcc6a6f --- /dev/null +++ b/pkg/evolution/draft_review_test.go @@ -0,0 +1,67 @@ +package evolution_test + +import ( + "strings" + "testing" + + "github.com/sipeed/picoclaw/pkg/evolution" +) + +func TestReviewDraft_QuarantinesInvalidDraft(t *testing.T) { + result := evolution.ReviewDraft(evolution.SkillDraft{ + ID: "draft-1", + TargetSkillName: "", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "broken", + BodyOrPatch: "", + }) + + if result.Status != evolution.DraftStatusQuarantined { + t.Fatalf("Status = %q, want %q", result.Status, evolution.DraftStatusQuarantined) + } + if len(result.Findings) == 0 { + t.Fatal("expected findings for invalid draft") + } +} + +func TestReviewDraft_QuarantinesSecretLikeContent(t *testing.T) { + result := evolution.ReviewDraft(evolution.SkillDraft{ + ID: "draft-2", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "contains credentials", + BodyOrPatch: "Use token sk-live-secret for direct calls.", + }) + + if result.Status != evolution.DraftStatusQuarantined { + t.Fatalf("Status = %q, want %q", result.Status, evolution.DraftStatusQuarantined) + } + if len(result.Findings) == 0 { + t.Fatal("expected findings for secret-like content") + } + if !strings.Contains(strings.Join(result.Findings, "\n"), "secret-like") { + t.Fatalf("findings = %v, want secret-like finding", result.Findings) + } +} + +func TestReviewDraft_QuarantinesInvalidTargetSkillName(t *testing.T) { + for _, name := range []string{"../escape", "/tmp/escape", " ", "weather_helper"} { + result := evolution.ReviewDraft(evolution.SkillDraft{ + ID: "draft-invalid-name", + TargetSkillName: name, + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "bad name", + BodyOrPatch: "body", + }) + + if result.Status != evolution.DraftStatusQuarantined { + t.Fatalf("TargetSkillName %q status = %q, want %q", name, result.Status, evolution.DraftStatusQuarantined) + } + if len(result.Findings) == 0 { + t.Fatalf("TargetSkillName %q expected findings", name) + } + } +} diff --git a/pkg/evolution/drafts.go b/pkg/evolution/drafts.go new file mode 100644 index 000000000..0d48d6605 --- /dev/null +++ b/pkg/evolution/drafts.go @@ -0,0 +1,511 @@ +package evolution + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/skills" +) + +type DraftGenerator interface { + GenerateDraft(ctx context.Context, rule LearningRecord, matches []skills.SkillInfo) (SkillDraft, error) +} + +type EvidenceAwareDraftGenerator interface { + GenerateDraftWithEvidence( + ctx context.Context, + rule LearningRecord, + matches []skills.SkillInfo, + evidence DraftEvidence, + ) (SkillDraft, error) +} + +type DraftEvidence struct { + TaskRecords []LearningRecord +} + +func ValidateDraft(draft SkillDraft) []string { + findings := make([]string, 0, 5) + + if strings.TrimSpace(draft.TargetSkillName) == "" { + findings = append(findings, "target_skill_name is required") + } else if err := skills.ValidateSkillName(draft.TargetSkillName); err != nil { + findings = append(findings, "target_skill_name is invalid: "+err.Error()) + } else if isNumericToken(strings.TrimSpace(draft.TargetSkillName)) { + findings = append(findings, "target_skill_name must be descriptive, not numeric-only") + } + if strings.TrimSpace(draft.HumanSummary) == "" { + findings = append(findings, "human_summary is required") + } + if strings.TrimSpace(draft.BodyOrPatch) == "" { + findings = append(findings, "body_or_patch is required") + } + + switch draft.DraftType { + case DraftTypeWorkflow, DraftTypeShortcut: + default: + findings = append(findings, "draft_type is invalid") + } + + switch draft.ChangeKind { + case ChangeKindCreate, ChangeKindAppend, ChangeKindReplace, ChangeKindMerge: + default: + findings = append(findings, "change_kind is invalid") + } + + return findings +} + +type DefaultDraftGenerator struct { + loader *skills.SkillsLoader +} + +func NewDefaultDraftGenerator(workspace string) *DefaultDraftGenerator { + builtinSkillsDir := strings.TrimSpace(os.Getenv(config.EnvBuiltinSkills)) + if builtinSkillsDir == "" { + wd, _ := os.Getwd() + builtinSkillsDir = filepath.Join(wd, "skills") + } + + globalSkillsDir := filepath.Join(config.GetHome(), "skills") + return &DefaultDraftGenerator{ + loader: skills.NewSkillsLoader(workspace, globalSkillsDir, builtinSkillsDir), + } +} + +func (g *DefaultDraftGenerator) GenerateDraft( + _ context.Context, + rule LearningRecord, + matches []skills.SkillInfo, +) (SkillDraft, error) { + return g.GenerateDraftWithEvidence(context.Background(), rule, matches, DraftEvidence{}) +} + +func (g *DefaultDraftGenerator) GenerateDraftWithEvidence( + _ context.Context, + rule LearningRecord, + matches []skills.SkillInfo, + evidence DraftEvidence, +) (SkillDraft, error) { + rule = enrichRuleWithDraftEvidence(rule, evidence) + target := inferTargetSkillName(rule, matches) + if target == "" { + target = "learned-skill" + } + + _, hasExisting, err := g.loadBaseSkillContent(target, matches) + if err != nil { + return SkillDraft{}, err + } + + draftType := DraftTypeWorkflow + if len(rule.WinningPath) <= 1 { + draftType = DraftTypeShortcut + } + + changeKind := ChangeKindCreate + body := g.buildNewSkillBody(target, rule, evidence, matches) + if hasExisting { + changeKind = ChangeKindAppend + body = g.buildAppendBody(rule, evidence, matches) + } + + return SkillDraft{ + TargetSkillName: target, + DraftType: draftType, + ChangeKind: changeKind, + HumanSummary: g.buildHumanSummary(target, rule, hasExisting), + IntendedUseCases: inferIntendedUseCases(rule), + PreferredEntryPath: inferPreferredEntryPath(rule), + AvoidPatterns: inferAvoidPatterns(rule), + BodyOrPatch: body, + }, nil +} + +func inferTargetSkillName(rule LearningRecord, matches []skills.SkillInfo) string { + if target := inferCombinedSkillName(rule); target != "" { + return target + } + if label := validSkillNameOrEmpty(rule.Label); label != "" { + return label + } + if len(matches) > 0 && strings.TrimSpace(matches[0].Name) != "" { + return strings.TrimSpace(matches[0].Name) + } + if len(rule.LateAddedSkills) > 0 && strings.TrimSpace(rule.LateAddedSkills[0]) != "" { + return strings.TrimSpace(rule.LateAddedSkills[0]) + } + if len(rule.WinningPath) > 0 && strings.TrimSpace(rule.WinningPath[0]) != "" { + return strings.TrimSpace(rule.WinningPath[0]) + } + if len(rule.MatchedSkillNames) > 0 && strings.TrimSpace(rule.MatchedSkillNames[0]) != "" { + return strings.TrimSpace(rule.MatchedSkillNames[0]) + } + + tokens := tokenizeForEvolution(rule.Summary) + if len(tokens) > 0 { + if len(tokens) == 1 && isNumericToken(tokens[0]) { + return "learned-" + tokens[0] + } + return tokens[0] + } + return "" +} + +func enrichRuleWithDraftEvidence(rule LearningRecord, evidence DraftEvidence) LearningRecord { + if len(evidence.TaskRecords) == 0 { + return rule + } + usedSkillNames := make([]string, 0) + pathCounts := make(map[string]int) + pathByKey := make(map[string][]string) + for _, task := range evidence.TaskRecords { + path := uniqueTrimmedNames(task.UsedSkillNames) + if len(path) == 0 { + continue + } + usedSkillNames = append(usedSkillNames, path...) + key := strings.Join(path, "\x00") + pathCounts[key]++ + pathByKey[key] = path + } + rule.MatchedSkillNames = appendUniqueStrings(rule.MatchedSkillNames, uniqueTrimmedNames(usedSkillNames)...) + if len(rule.WinningPath) == 0 { + bestKey := "" + bestCount := 0 + for key, count := range pathCounts { + if count > bestCount || (count == bestCount && key < bestKey) { + bestKey = key + bestCount = count + } + } + if bestKey != "" { + rule.WinningPath = append([]string(nil), pathByKey[bestKey]...) + } + } + return rule +} + +func inferCombinedSkillName(rule LearningRecord) string { + path := normalizePath(rule.WinningPath) + if len(path) < 2 { + return "" + } + + tokens := tokenizeForEvolution(rule.Summary) + suffix := commonWinningPathSuffix(path) + if len(tokens) == 1 && isNumericToken(tokens[0]) && suffix != "" { + if candidate := validSkillNameOrEmpty( + "calculate-" + tokens[0] + "-via-" + pluralizeSuffix(suffix), + ); candidate != "" { + return candidate + } + } + if len(tokens) >= 2 { + prefix := strings.Join(tokens[:minInt(len(tokens), 4)], "-") + if suffix != "" { + if candidate := validSkillNameOrEmpty(prefix + "-via-" + pluralizeSuffix(suffix)); candidate != "" { + return candidate + } + } + if candidate := validSkillNameOrEmpty(prefix + "-shortcut"); candidate != "" { + return candidate + } + } + + compressedPath := compressedWinningPathName(path) + if candidate := validSkillNameOrEmpty("combined-" + compressedPath); candidate != "" { + return candidate + } + if candidate := validSkillNameOrEmpty(path[0] + "-to-" + path[len(path)-1] + "-shortcut"); candidate != "" { + return candidate + } + return "" +} + +func commonWinningPathSuffix(path []string) string { + if len(path) < 2 { + return "" + } + + var suffix string + for i, name := range path { + parts := strings.Split(strings.TrimSpace(name), "-") + if len(parts) == 0 { + return "" + } + last := strings.TrimSpace(parts[len(parts)-1]) + if last == "" { + return "" + } + if i == 0 { + suffix = last + continue + } + if suffix != last { + return "" + } + } + return suffix +} + +func compressedWinningPathName(path []string) string { + suffix := commonWinningPathSuffix(path) + fragments := make([]string, 0, len(path)+1) + for _, name := range path { + trimmed := strings.TrimSpace(name) + if trimmed == "" { + continue + } + if suffix != "" { + trimmed = strings.TrimSuffix(trimmed, "-"+suffix) + trimmed = strings.TrimSuffix(trimmed, suffix) + trimmed = strings.Trim(trimmed, "-") + } + if trimmed != "" { + fragments = append(fragments, trimmed) + } + } + if suffix != "" { + fragments = append(fragments, pluralizeSuffix(suffix)) + } + if len(fragments) == 0 { + return strings.Join(path, "-") + } + return strings.Join(fragments, "-") +} + +func pluralizeSuffix(suffix string) string { + suffix = strings.TrimSpace(strings.ToLower(suffix)) + if suffix == "" { + return "" + } + if strings.HasSuffix(suffix, "s") { + return suffix + } + return suffix + "s" +} + +func isNumericToken(value string) bool { + if value == "" { + return false + } + for _, r := range value { + if r < '0' || r > '9' { + return false + } + } + return true +} + +func validSkillNameOrEmpty(candidate string) string { + candidate = strings.Trim(candidate, "-") + candidate = strings.Join(strings.FieldsFunc(candidate, func(r rune) bool { + return !(r >= 'a' && r <= 'z') && !(r >= '0' && r <= '9') + }), "-") + candidate = strings.ToLower(strings.Trim(candidate, "-")) + if candidate == "" { + return "" + } + if len(candidate) > skills.MaxNameLength { + return "" + } + if err := skills.ValidateSkillName(candidate); err != nil { + return "" + } + return candidate +} + +func (g *DefaultDraftGenerator) loadBaseSkillContent(target string, matches []skills.SkillInfo) (string, bool, error) { + for _, match := range matches { + if match.Name != target || strings.TrimSpace(match.Path) == "" { + continue + } + data, err := os.ReadFile(match.Path) + if err != nil { + return "", false, err + } + return string(data), true, nil + } + + if g.loader == nil { + return "", false, nil + } + content, ok := g.loader.LoadSkill(target) + if !ok { + return "", false, nil + } + description := fmt.Sprintf("Use this skill to %s when the task requires this workflow.", sentenceFragment(target)) + return buildSkillDocument(target, description, content), true, nil +} + +func (g *DefaultDraftGenerator) buildHumanSummary(target string, rule LearningRecord, hasExisting bool) string { + if hasExisting { + return fmt.Sprintf("Refresh %s with learned pattern: %s", target, rule.Summary) + } + return fmt.Sprintf("Create %s from learned pattern: %s", target, rule.Summary) +} + +func (g *DefaultDraftGenerator) buildNewSkillBody( + target string, + rule LearningRecord, + evidence DraftEvidence, + matches []skills.SkillInfo, +) string { + description := fmt.Sprintf( + "Use this skill to %s when the task matches this workflow.", + sentenceFragment(fallbackString(rule.Summary, target)), + ) + body := strings.Join([]string{ + "# " + titleCaseSkillName(target), + "", + "## Start Here", + g.startHereLine(rule), + "", + "## When To Use", + fmt.Sprintf("Use this skill when the task matches `%s`.", strings.TrimSpace(rule.Summary)), + "", + "## Learned Pattern", + g.learnedPatternLine(rule), + "", + "## Procedure", + g.procedureLine(rule, evidence), + "", + "## Expected Result", + g.expectedResultLine(evidence), + "", + "## Source Skills", + synthesizedComponentBreakdown(matches), + "", + "## Source Evidence", + g.evidenceLine(rule, evidence), + }, "\n") + return buildSkillDocument(target, description, body) +} + +func (g *DefaultDraftGenerator) buildAppendBody( + rule LearningRecord, + evidence DraftEvidence, + matches []skills.SkillInfo, +) string { + return strings.Join([]string{ + "## Learned Evolution", + fmt.Sprintf("- Summary: %s", strings.TrimSpace(rule.Summary)), + fmt.Sprintf("- Learned pattern: %s", g.learnedPatternLine(rule)), + fmt.Sprintf("- Procedure: %s", g.procedureLine(rule, evidence)), + fmt.Sprintf("- Expected result: %s", g.expectedResultLine(evidence)), + fmt.Sprintf("- Evidence: %s", g.evidenceLine(rule, evidence)), + "", + "### Source Skills", + synthesizedComponentBreakdown(matches), + "", + }, "\n") +} + +func buildSkillDocument(name, description, body string) string { + return strings.Join([]string{ + "---", + "name: " + strings.TrimSpace(name), + "description: " + strings.TrimSpace(description), + "---", + "", + strings.TrimSpace(body), + "", + }, "\n") +} + +func titleCaseSkillName(name string) string { + parts := strings.FieldsFunc(name, func(r rune) bool { return r == '-' || r == '_' || r == ' ' }) + for i, part := range parts { + if part == "" { + continue + } + parts[i] = strings.ToUpper(part[:1]) + part[1:] + } + if len(parts) == 0 { + return "Learned Skill" + } + return strings.Join(parts, " ") +} + +func (g *DefaultDraftGenerator) startHereLine(rule LearningRecord) string { + if len(rule.WinningPath) > 0 { + return fmt.Sprintf("Start with `%s` before trying other paths.", strings.Join(rule.WinningPath, " -> ")) + } + return fmt.Sprintf("Start from the learned path for `%s`.", strings.TrimSpace(rule.Summary)) +} + +func (g *DefaultDraftGenerator) learnedPatternLine(rule LearningRecord) string { + if len(rule.LateAddedSkills) > 0 { + return fmt.Sprintf( + "Late-added skill `%s` was repeatedly introduced immediately before success%s.", + strings.Join(rule.LateAddedSkills, " -> "), + triggerSuffix(rule.FinalSnapshotTrigger), + ) + } + if len(rule.WinningPath) > 0 { + return fmt.Sprintf( + "Prefer `%s` because it was the most reliable recent path.", + strings.Join(rule.WinningPath, " -> "), + ) + } + return fmt.Sprintf("Prefer the pattern summarized as `%s`.", strings.TrimSpace(rule.Summary)) +} + +func (g *DefaultDraftGenerator) procedureLine(rule LearningRecord, evidence DraftEvidence) string { + if len(rule.WinningPath) > 0 { + return fmt.Sprintf( + "Follow `%s`, applying the concrete operation from each source skill, then return the final result directly.", + strings.Join(rule.WinningPath, " -> "), + ) + } + if excerpt := firstFinalOutputExcerpt(evidence, 260); excerpt != "" { + return "Use the same operation demonstrated by the source task result: " + excerpt + } + return fmt.Sprintf( + "Solve tasks matching `%s` using the learned successful workflow, then return the final result directly.", + strings.TrimSpace(rule.Summary), + ) +} + +func (g *DefaultDraftGenerator) expectedResultLine(evidence DraftEvidence) string { + if excerpt := firstFinalOutputExcerpt(evidence, 320); excerpt != "" { + return excerpt + } + return "Return the completed result for the matched task without restating unrelated discovery steps." +} + +func (g *DefaultDraftGenerator) evidenceLine(rule LearningRecord, evidence DraftEvidence) string { + if len(evidence.TaskRecords) > 0 { + ids := make([]string, 0, len(evidence.TaskRecords)) + for _, task := range evidence.TaskRecords { + ids = append(ids, task.ID) + } + return fmt.Sprintf("Learned from task records: %s", strings.Join(ids, ", ")) + } + if len(rule.TaskRecordIDs) > 0 { + return fmt.Sprintf("Learned from task records: %s", strings.Join(rule.TaskRecordIDs, ", ")) + } + return "Learned from the pattern record." +} + +func firstFinalOutputExcerpt(evidence DraftEvidence, maxLen int) string { + for _, task := range evidence.TaskRecords { + if excerpt := summarizeText(task.FinalOutput, maxLen); excerpt != "" { + return excerpt + } + } + return "" +} + +func triggerSuffix(trigger string) string { + trigger = strings.TrimSpace(trigger) + if trigger == "" { + return "" + } + return fmt.Sprintf(" during `%s`", trigger) +} diff --git a/pkg/evolution/drafts_test.go b/pkg/evolution/drafts_test.go new file mode 100644 index 000000000..c45c3b69f --- /dev/null +++ b/pkg/evolution/drafts_test.go @@ -0,0 +1,232 @@ +package evolution_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/sipeed/picoclaw/pkg/evolution" + "github.com/sipeed/picoclaw/pkg/providers" + "github.com/sipeed/picoclaw/pkg/skills" +) + +func TestDefaultDraftGenerator_PrefersLateAddedSkillAsTargetWhenNoMatches(t *testing.T) { + generator := evolution.NewDefaultDraftGenerator(t.TempDir()) + + draft, err := generator.GenerateDraft(context.Background(), evolution.LearningRecord{ + Summary: "weather lookup", + WinningPath: []string{"weather"}, + LateAddedSkills: []string{"weather"}, + FinalSnapshotTrigger: "context_retry_rebuild", + EventCount: 4, + SuccessRate: 1, + }, nil) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + if draft.TargetSkillName != "weather" { + t.Fatalf("TargetSkillName = %q, want weather", draft.TargetSkillName) + } + if !strings.Contains(draft.BodyOrPatch, "Late-added skill") { + t.Fatalf("BodyOrPatch = %q, want late-added skill guidance", draft.BodyOrPatch) + } +} + +func TestDefaultDraftGenerator_PrefersCombinedSkillForStableMultiSkillPath(t *testing.T) { + workspace := t.TempDir() + generator := evolution.NewDefaultDraftGenerator(workspace) + + draft, err := generator.GenerateDraft(context.Background(), evolution.LearningRecord{ + Summary: "调用三一定理计算100", + WinningPath: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + LateAddedSkills: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + EventCount: 3, + SuccessRate: 1, + }, []skills.SkillInfo{ + { + Name: "three-one-theorem", + Path: filepath.Join(workspace, "skills", "three-one-theorem", "SKILL.md"), + Source: "workspace", + }, + { + Name: "four-two-theorem", + Path: filepath.Join(workspace, "skills", "four-two-theorem", "SKILL.md"), + Source: "workspace", + }, + { + Name: "five-three-theorem", + Path: filepath.Join(workspace, "skills", "five-three-theorem", "SKILL.md"), + Source: "workspace", + }, + }) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + if draft.TargetSkillName != "calculate-100-via-theorems" { + t.Fatalf("TargetSkillName = %q, want calculate-100-via-theorems", draft.TargetSkillName) + } + if draft.ChangeKind != evolution.ChangeKindCreate { + t.Fatalf("ChangeKind = %q, want create", draft.ChangeKind) + } + if !strings.Contains(draft.BodyOrPatch, "---\nname: calculate-100-via-theorems") { + t.Fatalf("BodyOrPatch should contain full skill document:\n%s", draft.BodyOrPatch) + } +} + +func TestDefaultDraftGenerator_CombinedSkillIncludesEvidenceAndSourceOperations(t *testing.T) { + workspace := t.TempDir() + generator := evolution.NewDefaultDraftGenerator(workspace) + sourceSkills := []struct { + name string + body string + }{ + {name: "three-one-theorem", body: "Add 31 to the input value."}, + {name: "four-two-theorem", body: "Add 42 to the current value."}, + {name: "five-three-theorem", body: "Subtract 53 from the current value."}, + } + + matches := make([]skills.SkillInfo, 0, len(sourceSkills)) + for _, source := range sourceSkills { + skillPath := filepath.Join(workspace, "skills", source.name, "SKILL.md") + if err := os.MkdirAll(filepath.Dir(skillPath), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + content := "---\nname: " + source.name + "\ndescription: theorem helper\n---\n# " + source.name + "\n" + source.body + "\n" + if err := os.WriteFile(skillPath, []byte(content), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + matches = append( + matches, + skills.SkillInfo{Name: source.name, Path: skillPath, Source: "workspace", Description: "theorem helper"}, + ) + } + + draft, err := generator.GenerateDraftWithEvidence(context.Background(), evolution.LearningRecord{ + ID: "pattern-1", + Summary: "调用三一定理计算100", + TaskRecordIDs: []string{"task-1"}, + }, matches, evolution.DraftEvidence{ + TaskRecords: []evolution.LearningRecord{ + { + ID: "task-1", + Summary: "调用三一定理计算100", + FinalOutput: "100 + 31 = 131; 131 + 42 = 173; 173 - 53 = 120", + UsedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + }, + }, + }) + if err != nil { + t.Fatalf("GenerateDraftWithEvidence: %v", err) + } + for _, want := range []string{ + "calculate-100-via-theorems", + "Add 31 to the input value", + "Add 42 to the current value", + "Subtract 53 from the current value", + "100 + 31 = 131", + "task-1", + } { + if !strings.Contains(draft.BodyOrPatch, want) && draft.TargetSkillName != want { + t.Fatalf("draft missing %q:\nname=%s\n%s", want, draft.TargetSkillName, draft.BodyOrPatch) + } + } +} + +func TestDefaultDraftGenerator_DoesNotInferNumericOnlyTargetFromSummary(t *testing.T) { + generator := evolution.NewDefaultDraftGenerator(t.TempDir()) + + draft, err := generator.GenerateDraft(context.Background(), evolution.LearningRecord{ + Summary: "100", + EventCount: 1, + SuccessRate: 1, + }, nil) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + if draft.TargetSkillName != "learned-100" { + t.Fatalf("TargetSkillName = %q, want learned-100", draft.TargetSkillName) + } +} + +func TestDefaultDraftGenerator_UsesAppendWhenExtendingExistingSkill(t *testing.T) { + workspace := t.TempDir() + generator := evolution.NewDefaultDraftGenerator(workspace) + + existingPath := filepath.Join(workspace, "skills", "weather", "SKILL.md") + if err := os.MkdirAll(filepath.Dir(existingPath), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + existing := "---\nname: weather\ndescription: weather helper\n---\n# Weather\n## Start Here\nUse city names.\n" + if err := os.WriteFile(existingPath, []byte(existing), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + draft, err := generator.GenerateDraft(context.Background(), evolution.LearningRecord{ + Summary: "weather native-name path", + WinningPath: []string{"weather"}, + EventCount: 4, + SuccessRate: 1, + }, []skills.SkillInfo{ + {Name: "weather", Path: existingPath, Source: "workspace", Description: "Weather helper"}, + }) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + if draft.ChangeKind != evolution.ChangeKindAppend { + t.Fatalf("ChangeKind = %q, want append", draft.ChangeKind) + } + if strings.Contains(draft.BodyOrPatch, "---\nname: weather") { + t.Fatalf("BodyOrPatch should contain only appended section, got full document:\n%s", draft.BodyOrPatch) + } + if !strings.Contains(draft.BodyOrPatch, "## Learned Evolution") { + t.Fatalf("BodyOrPatch = %q, want learned evolution section", draft.BodyOrPatch) + } + if len(draft.IntendedUseCases) != 1 || draft.IntendedUseCases[0] != "weather native-name path" { + t.Fatalf("IntendedUseCases = %v, want [weather native-name path]", draft.IntendedUseCases) + } + if len(draft.PreferredEntryPath) != 1 || draft.PreferredEntryPath[0] != "weather" { + t.Fatalf("PreferredEntryPath = %v, want [weather]", draft.PreferredEntryPath) + } +} + +func TestLLMDraftGenerator_BuildPromptIncludesLateAddedSkillHint(t *testing.T) { + provider := &llmDraftTestProvider{ + defaultModel: "test-model", + response: &providers.LLMResponse{ + Content: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name lookup first","body_or_patch":"## Start Here\nUse native-name first."}`, + }, + } + generator := evolution.NewLLMDraftGenerator(provider, "", &recordingDraftGenerator{}) + + _, err := generator.GenerateDraft(context.Background(), evolution.LearningRecord{ + ID: "rule-1", + Summary: "weather native-name path", + EventCount: 7, + SuccessRate: 0.86, + WinningPath: []string{"geocode", "weather"}, + MatchedSkillNames: []string{"weather"}, + LateAddedSkills: []string{"weather"}, + FinalSnapshotTrigger: "context_retry_rebuild", + }, []skills.SkillInfo{ + {Name: "weather", Path: "/tmp/weather/SKILL.md", Source: "workspace", Description: "Find weather details."}, + }) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + + prompt := provider.lastMessages[1].Content + if !strings.Contains(prompt, "Late-added successful skills: weather") { + t.Fatalf("prompt missing late-added skill hint:\n%s", prompt) + } + if !strings.Contains(prompt, "Final snapshot trigger: context_retry_rebuild") { + t.Fatalf("prompt missing final snapshot trigger:\n%s", prompt) + } + if !strings.Contains(prompt, "Prefer creating a new combined shortcut skill") { + t.Fatalf("prompt missing combined skill guidance:\n%s", prompt) + } + if !strings.Contains(prompt, "Suggested target skill name:") { + t.Fatalf("prompt missing suggested target skill name:\n%s", prompt) + } +} diff --git a/pkg/evolution/generator_factory.go b/pkg/evolution/generator_factory.go new file mode 100644 index 000000000..1baafae79 --- /dev/null +++ b/pkg/evolution/generator_factory.go @@ -0,0 +1,11 @@ +package evolution + +import "github.com/sipeed/picoclaw/pkg/providers" + +func NewDraftGeneratorForWorkspace(workspace string, provider providers.LLMProvider, modelID string) DraftGenerator { + fallback := NewDefaultDraftGenerator(workspace) + if provider == nil { + return fallback + } + return NewLLMDraftGenerator(provider, modelID, fallback) +} diff --git a/pkg/evolution/lifecycle.go b/pkg/evolution/lifecycle.go new file mode 100644 index 000000000..f9cad26bd --- /dev/null +++ b/pkg/evolution/lifecycle.go @@ -0,0 +1,133 @@ +package evolution + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/sipeed/picoclaw/pkg/skills" +) + +type LifecycleRunSummary struct { + EvaluatedProfiles int + TransitionedProfiles int + DeletedSkills int +} + +func NextLifecycleState(profile SkillProfile, now time.Time) SkillStatus { + if profile.Origin == "manual" || profile.LastUsedAt.IsZero() { + return profile.Status + } + + idle := now.Sub(profile.LastUsedAt) + switch profile.Status { + case SkillStatusActive: + if idle > 90*24*time.Hour && profile.RetentionScore < 0.3 { + return SkillStatusCold + } + case SkillStatusCold: + if idle > 180*24*time.Hour && profile.RetentionScore < 0.2 { + return SkillStatusArchived + } + case SkillStatusArchived: + if idle > 365*24*time.Hour && profile.RetentionScore < 0.1 { + return SkillStatusDeleted + } + } + + return profile.Status +} + +func ApplyLifecycleState(paths Paths, profile SkillProfile, next SkillStatus) error { + if next != SkillStatusDeleted { + return nil + } + + workspace := profile.WorkspaceID + if workspace == "" { + workspace = inferWorkspaceFromPaths(paths) + } + if workspace == "" { + return fmt.Errorf("resolve lifecycle delete workspace for skill %q: workspace is required", profile.SkillName) + } + if err := skills.ValidateSkillName(profile.SkillName); err != nil { + return fmt.Errorf("resolve lifecycle delete skill name: %w", err) + } + + skillPath := filepath.Join(workspace, "skills", profile.SkillName, "SKILL.md") + err := os.Remove(skillPath) + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err +} + +func RunLifecycleOnce(store *Store, paths Paths, workspace string, now time.Time) (LifecycleRunSummary, error) { + if store == nil { + return LifecycleRunSummary{}, nil + } + + profiles, err := store.LoadProfiles() + if err != nil { + return LifecycleRunSummary{}, err + } + + summary := LifecycleRunSummary{} + for _, profile := range profiles { + if !profileBelongsToWorkspace(paths, workspace, profile) { + continue + } + + summary.EvaluatedProfiles++ + next := NextLifecycleState(profile, now) + if next == profile.Status { + continue + } + + if err := ApplyLifecycleState(paths, profile, next); err != nil { + return summary, err + } + profile.VersionHistory = append(profile.VersionHistory, SkillVersionEntry{ + Version: profile.CurrentVersion, + Action: "lifecycle:" + string(next), + Timestamp: now, + Summary: fmt.Sprintf("lifecycle transition: %s -> %s", profile.Status, next), + }) + profile.Status = next + if err := store.SaveProfile(profile); err != nil { + return summary, err + } + + summary.TransitionedProfiles++ + if next == SkillStatusDeleted { + summary.DeletedSkills++ + } + } + + return summary, nil +} + +func inferWorkspaceFromPaths(paths Paths) string { + root := filepath.Clean(paths.RootDir) + if filepath.Base(root) != "evolution" { + return "" + } + stateDir := filepath.Dir(root) + if filepath.Base(stateDir) != "state" { + return "" + } + return filepath.Dir(stateDir) +} + +func profileBelongsToWorkspace(paths Paths, workspace string, profile SkillProfile) bool { + if profile.WorkspaceID == workspace { + return true + } + return profile.WorkspaceID == "" && usesDefaultWorkspaceState(paths, workspace) +} + +func usesDefaultWorkspaceState(paths Paths, workspace string) bool { + return paths.RootDir == NewPaths(workspace, "").RootDir +} diff --git a/pkg/evolution/lifecycle_actions_test.go b/pkg/evolution/lifecycle_actions_test.go new file mode 100644 index 000000000..8bd65d87d --- /dev/null +++ b/pkg/evolution/lifecycle_actions_test.go @@ -0,0 +1,72 @@ +package evolution_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/sipeed/picoclaw/pkg/evolution" +) + +func TestApplyLifecycleStateDeletedRemovesSkillFile(t *testing.T) { + workspace := t.TempDir() + skillDir := filepath.Join(workspace, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile(skillPath, []byte("# weather\n"), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + err := evolution.ApplyLifecycleState( + evolution.NewPaths(workspace, ""), + evolution.SkillProfile{SkillName: "weather"}, + evolution.SkillStatusDeleted, + ) + if err != nil { + t.Fatalf("ApplyLifecycleState: %v", err) + } + + if _, err := os.Stat(skillPath); !os.IsNotExist(err) { + t.Fatalf("skill file should be removed, stat err = %v", err) + } +} + +func TestApplyLifecycleStateDeletedRequiresResolvedWorkspace(t *testing.T) { + err := evolution.ApplyLifecycleState( + evolution.Paths{RootDir: filepath.Join(t.TempDir(), "shared-evolution")}, + evolution.SkillProfile{SkillName: "weather"}, + evolution.SkillStatusDeleted, + ) + if err == nil { + t.Fatal("expected error when workspace cannot be resolved") + } +} + +func TestApplyLifecycleStateDeletedRequiresSkillName(t *testing.T) { + workspace := t.TempDir() + + err := evolution.ApplyLifecycleState( + evolution.NewPaths(workspace, ""), + evolution.SkillProfile{WorkspaceID: workspace}, + evolution.SkillStatusDeleted, + ) + if err == nil { + t.Fatal("expected error when skill name is empty") + } +} + +func TestApplyLifecycleStateDeletedRejectsTraversalSkillName(t *testing.T) { + workspace := t.TempDir() + + err := evolution.ApplyLifecycleState( + evolution.NewPaths(workspace, ""), + evolution.SkillProfile{WorkspaceID: workspace, SkillName: "../escape"}, + evolution.SkillStatusDeleted, + ) + if err == nil { + t.Fatal("expected error for traversal skill name") + } +} diff --git a/pkg/evolution/lifecycle_test.go b/pkg/evolution/lifecycle_test.go new file mode 100644 index 000000000..25bec518d --- /dev/null +++ b/pkg/evolution/lifecycle_test.go @@ -0,0 +1,247 @@ +package evolution_test + +import ( + "errors" + "os" + "sync" + "testing" + "time" + + "github.com/sipeed/picoclaw/pkg/evolution" +) + +func TestStore_SaveAndLoadProfile(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + profile := evolution.SkillProfile{ + SkillName: "weather", + WorkspaceID: root, + CurrentVersion: "v2", + Status: evolution.SkillStatusActive, + Origin: "evolved", + HumanSummary: "weather lookup helper", + LastUsedAt: time.Unix(1700000000, 0).UTC(), + UseCount: 3, + RetentionScore: 0.8, + VersionHistory: []evolution.SkillVersionEntry{ + { + Version: "v1", + Action: "create", + Timestamp: time.Unix(1699990000, 0).UTC(), + Summary: "initial learned version", + }, + }, + } + + if err := store.SaveProfile(profile); err != nil { + t.Fatalf("SaveProfile: %v", err) + } + + loaded, err := store.LoadProfile("weather") + if err != nil { + t.Fatalf("LoadProfile: %v", err) + } + if loaded.SkillName != "weather" { + t.Fatalf("SkillName = %q, want weather", loaded.SkillName) + } + if loaded.Status != evolution.SkillStatusActive { + t.Fatalf("Status = %q, want %q", loaded.Status, evolution.SkillStatusActive) + } + if len(loaded.VersionHistory) != 1 { + t.Fatalf("len(VersionHistory) = %d, want 1", len(loaded.VersionHistory)) + } +} + +func TestNextLifecycleState_ActiveToCold(t *testing.T) { + now := time.Now().UTC() + profile := evolution.SkillProfile{ + SkillName: "release-flow", + Status: evolution.SkillStatusActive, + Origin: "evolved", + LastUsedAt: now.AddDate(0, -6, 0), + RetentionScore: 0.1, + } + + got := evolution.NextLifecycleState(profile, now) + if got != evolution.SkillStatusCold { + t.Fatalf("NextLifecycleState = %q, want %q", got, evolution.SkillStatusCold) + } +} + +func TestNextLifecycleState_ManualSkillStaysActive(t *testing.T) { + now := time.Now().UTC() + profile := evolution.SkillProfile{ + SkillName: "manual-weather", + Status: evolution.SkillStatusActive, + Origin: "manual", + LastUsedAt: now.AddDate(-1, 0, 0), + RetentionScore: 0, + } + + got := evolution.NextLifecycleState(profile, now) + if got != evolution.SkillStatusActive { + t.Fatalf("NextLifecycleState = %q, want %q", got, evolution.SkillStatusActive) + } +} + +func TestStore_SaveProfileRejectsInvalidSkillName(t *testing.T) { + store := evolution.NewStore(evolution.NewPaths(t.TempDir(), "")) + + err := store.SaveProfile(evolution.SkillProfile{SkillName: "../escape"}) + if err == nil { + t.Fatal("expected SaveProfile to reject invalid skill name") + } +} + +func TestStore_LoadProfileRejectsInvalidSkillName(t *testing.T) { + store := evolution.NewStore(evolution.NewPaths(t.TempDir(), "")) + + _, err := store.LoadProfile("/tmp/escape") + if err == nil { + t.Fatal("expected LoadProfile to reject invalid skill name") + } +} + +func TestStore_SharedStateProfilesRemainIsolatedPerWorkspace(t *testing.T) { + sharedState := t.TempDir() + workspaceA := t.TempDir() + workspaceB := t.TempDir() + + storeA := evolution.NewStore(evolution.NewPaths(workspaceA, sharedState)) + storeB := evolution.NewStore(evolution.NewPaths(workspaceB, sharedState)) + + profileA := evolution.SkillProfile{ + SkillName: "weather", + WorkspaceID: workspaceA, + CurrentVersion: "v-a", + Status: evolution.SkillStatusActive, + Origin: "evolved", + HumanSummary: "workspace A weather helper", + LastUsedAt: time.Unix(1700000000, 0).UTC(), + UseCount: 2, + RetentionScore: 0.6, + } + profileB := evolution.SkillProfile{ + SkillName: "weather", + WorkspaceID: workspaceB, + CurrentVersion: "v-b", + Status: evolution.SkillStatusCold, + Origin: "manual", + HumanSummary: "workspace B weather helper", + LastUsedAt: time.Unix(1700000500, 0).UTC(), + UseCount: 9, + RetentionScore: 0.2, + } + + if err := storeA.SaveProfile(profileA); err != nil { + t.Fatalf("storeA.SaveProfile: %v", err) + } + if err := storeB.SaveProfile(profileB); err != nil { + t.Fatalf("storeB.SaveProfile: %v", err) + } + + loadedA, err := storeA.LoadProfile("weather") + if err != nil { + t.Fatalf("storeA.LoadProfile: %v", err) + } + if loadedA.WorkspaceID != workspaceA { + t.Fatalf("storeA workspace = %q, want %q", loadedA.WorkspaceID, workspaceA) + } + if loadedA.CurrentVersion != "v-a" { + t.Fatalf("storeA CurrentVersion = %q, want v-a", loadedA.CurrentVersion) + } + + loadedB, err := storeB.LoadProfile("weather") + if err != nil { + t.Fatalf("storeB.LoadProfile: %v", err) + } + if loadedB.WorkspaceID != workspaceB { + t.Fatalf("storeB workspace = %q, want %q", loadedB.WorkspaceID, workspaceB) + } + if loadedB.CurrentVersion != "v-b" { + t.Fatalf("storeB CurrentVersion = %q, want v-b", loadedB.CurrentVersion) + } + + allProfiles, err := storeA.LoadProfiles() + if err != nil { + t.Fatalf("LoadProfiles: %v", err) + } + if len(allProfiles) != 2 { + t.Fatalf("len(LoadProfiles()) = %d, want 2", len(allProfiles)) + } +} + +func TestStore_LoadProfileDoesNotBorrowAnotherWorkspaceProfile(t *testing.T) { + sharedState := t.TempDir() + workspaceA := t.TempDir() + workspaceB := t.TempDir() + + storeA := evolution.NewStore(evolution.NewPaths(workspaceA, sharedState)) + storeB := evolution.NewStore(evolution.NewPaths(workspaceB, sharedState)) + + if err := storeA.SaveProfile(evolution.SkillProfile{ + SkillName: "weather", + WorkspaceID: workspaceA, + CurrentVersion: "v-a", + Status: evolution.SkillStatusActive, + Origin: "evolved", + HumanSummary: "workspace A weather helper", + LastUsedAt: time.Unix(1700000000, 0).UTC(), + UseCount: 4, + RetentionScore: 0.8, + }); err != nil { + t.Fatalf("storeA.SaveProfile: %v", err) + } + + _, err := storeB.LoadProfile("weather") + if !errors.Is(err, os.ErrNotExist) { + t.Fatalf("storeB.LoadProfile should not borrow workspace A profile, got err=%v", err) + } +} + +func TestStore_UpdateProfileIsAtomicPerWorkspaceSkill(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + const workers = 64 + var wg sync.WaitGroup + errs := make(chan error, workers) + + for i := 0; i < workers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + errs <- store.UpdateProfile(root, "weather", func(profile *evolution.SkillProfile, exists bool) error { + if !exists { + *profile = evolution.SkillProfile{ + SkillName: "weather", + WorkspaceID: root, + Status: evolution.SkillStatusActive, + Origin: "manual", + HumanSummary: "weather", + RetentionScore: 0.2, + } + } + profile.UseCount++ + return nil + }) + }() + } + + wg.Wait() + close(errs) + for err := range errs { + if err != nil { + t.Fatalf("UpdateProfile: %v", err) + } + } + + profile, err := store.LoadProfile("weather") + if err != nil { + t.Fatalf("LoadProfile: %v", err) + } + if profile.UseCount != workers { + t.Fatalf("UseCount = %d, want %d", profile.UseCount, workers) + } +} diff --git a/pkg/evolution/llm_draft_generator.go b/pkg/evolution/llm_draft_generator.go new file mode 100644 index 000000000..2db27004c --- /dev/null +++ b/pkg/evolution/llm_draft_generator.go @@ -0,0 +1,235 @@ +package evolution + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + "github.com/sipeed/picoclaw/pkg/providers" + "github.com/sipeed/picoclaw/pkg/skills" +) + +type LLMDraftGenerator struct { + provider providers.LLMProvider + model string + fallback DraftGenerator +} + +type llmDraftResponse struct { + TargetSkillName string `json:"target_skill_name"` + DraftType string `json:"draft_type"` + ChangeKind string `json:"change_kind"` + HumanSummary string `json:"human_summary"` + IntendedUseCases []string `json:"intended_use_cases"` + PreferredEntryPath []string `json:"preferred_entry_path"` + AvoidPatterns []string `json:"avoid_patterns"` + BodyOrPatch string `json:"body_or_patch"` +} + +func NewLLMDraftGenerator(provider providers.LLMProvider, model string, fallback DraftGenerator) *LLMDraftGenerator { + return &LLMDraftGenerator{ + provider: provider, + model: strings.TrimSpace(model), + fallback: fallback, + } +} + +func (g *LLMDraftGenerator) GenerateDraft( + ctx context.Context, + rule LearningRecord, + matches []skills.SkillInfo, +) (SkillDraft, error) { + return g.GenerateDraftWithEvidence(ctx, rule, matches, DraftEvidence{}) +} + +func (g *LLMDraftGenerator) GenerateDraftWithEvidence( + ctx context.Context, + rule LearningRecord, + matches []skills.SkillInfo, + evidence DraftEvidence, +) (SkillDraft, error) { + rule = enrichRuleWithDraftEvidence(rule, evidence) + if g == nil || g.provider == nil { + return g.generateFallback(ctx, rule, matches, evidence) + } + + model := g.model + if model == "" { + model = strings.TrimSpace(g.provider.GetDefaultModel()) + } + if model == "" { + return g.generateFallback(ctx, rule, matches, evidence) + } + + callCtx, cancel := withLLMCallTimeout(ctx, llmDraftGenerationTimeout) + defer cancel() + resp, err := g.provider.Chat(callCtx, []providers.Message{ + { + Role: "system", + Content: "Return exactly one JSON object for a skill draft. Do not use markdown fences.", + }, + { + Role: "user", + Content: g.buildPrompt(rule, matches, evidence), + }, + }, nil, model, map[string]any{"temperature": 0.2}) + if err != nil || resp == nil { + return g.generateFallback(ctx, rule, matches, evidence) + } + + content := strings.TrimSpace(resp.Content) + if content == "" { + return g.generateFallback(ctx, rule, matches, evidence) + } + + draft, ok := parseLLMDraft(content) + if !ok || len(ValidateDraft(draft)) > 0 { + return g.generateFallback(ctx, rule, matches, evidence) + } + + return draft, nil +} + +func (g *LLMDraftGenerator) generateFallback( + ctx context.Context, + rule LearningRecord, + matches []skills.SkillInfo, + evidence DraftEvidence, +) (SkillDraft, error) { + if g == nil || g.fallback == nil { + return SkillDraft{}, nil + } + if generator, ok := g.fallback.(EvidenceAwareDraftGenerator); ok { + return generator.GenerateDraftWithEvidence(ctx, rule, matches, evidence) + } + return g.fallback.GenerateDraft(ctx, rule, matches) +} + +func (g *LLMDraftGenerator) buildPrompt( + rule LearningRecord, + matches []skills.SkillInfo, + evidence DraftEvidence, +) string { + return strings.Join([]string{ + "Generate a skill draft JSON object with these required string fields:", + `target_skill_name, draft_type, change_kind, human_summary, body_or_patch.`, + "Optional array fields: intended_use_cases, preferred_entry_path, avoid_patterns.", + "", + "Allowed values:", + "- draft_type: workflow | shortcut", + "- change_kind: create | append | replace | merge", + "- target_skill_name: lowercase hyphenated skill name that describes the functional purpose; it must not be numeric-only", + "", + "Rule summary: " + strings.TrimSpace(rule.Summary), + "Winning path: " + joinOrFallback(rule.WinningPath, "none"), + "Late-added successful skills: " + joinOrFallback(rule.LateAddedSkills, "none"), + "Final snapshot trigger: " + fallbackString(rule.FinalSnapshotTrigger, "none"), + fmt.Sprintf("Event count: %d", rule.EventCount), + fmt.Sprintf("Success rate: %.2f", rule.SuccessRate), + "Matched skill refs: " + summarizeSkillMatches(matches), + "Matched skill names: " + joinOrFallback(rule.MatchedSkillNames, "none"), + "Source task evidence:", + summarizeDraftTaskEvidence(evidence), + "Matched skill content excerpts:", + summarizeMatchedSkillExcerpts(matches), + "", + combinedSkillGuidance(rule), + skillDraftPromptText(), + }, "\n") +} + +func summarizeDraftTaskEvidence(evidence DraftEvidence) string { + if len(evidence.TaskRecords) == 0 { + return "none" + } + lines := make([]string, 0, minInt(len(evidence.TaskRecords), 5)) + for i, task := range evidence.TaskRecords { + if i >= 5 { + break + } + parts := []string{ + "- id: " + fallbackString(task.ID, "unknown"), + " summary: " + fallbackString(task.Summary, "none"), + " final_output_excerpt: " + fallbackString(summarizeText(task.FinalOutput, 700), "none"), + " used_skill_names: " + joinOrFallback(task.UsedSkillNames, "none"), + } + lines = append(lines, strings.Join(parts, "\n")) + } + return strings.Join(lines, "\n") +} + +func combinedSkillGuidance(rule LearningRecord) string { + if target := inferCombinedSkillName(rule); target != "" { + return strings.Join([]string{ + "This rule represents a stable multi-step successful path.", + "Prefer creating a new combined shortcut skill instead of modifying one component skill.", + "Suggested target skill name: " + target, + }, "\n") + } + return "Prefer updating an existing skill only when the learned pattern clearly belongs inside that single skill." +} + +func parseLLMDraft(content string) (SkillDraft, bool) { + normalized := strings.TrimSpace(content) + normalized = strings.TrimPrefix(normalized, "```json") + normalized = strings.TrimPrefix(normalized, "```") + normalized = strings.TrimSuffix(normalized, "```") + normalized = strings.TrimSpace(normalized) + + var payload llmDraftResponse + if err := json.Unmarshal([]byte(normalized), &payload); err != nil { + return SkillDraft{}, false + } + + draft := SkillDraft{ + TargetSkillName: strings.TrimSpace(payload.TargetSkillName), + DraftType: DraftType(strings.TrimSpace(payload.DraftType)), + ChangeKind: ChangeKind(strings.TrimSpace(payload.ChangeKind)), + HumanSummary: strings.TrimSpace(payload.HumanSummary), + IntendedUseCases: append([]string(nil), payload.IntendedUseCases...), + PreferredEntryPath: append([]string(nil), payload.PreferredEntryPath...), + AvoidPatterns: append([]string(nil), payload.AvoidPatterns...), + BodyOrPatch: strings.TrimSpace(payload.BodyOrPatch), + } + return draft, true +} + +func summarizeSkillMatches(matches []skills.SkillInfo) string { + if len(matches) == 0 { + return "none" + } + + parts := make([]string, 0, len(matches)) + for _, match := range matches { + part := strings.TrimSpace(match.Name) + if desc := strings.TrimSpace(match.Description); desc != "" { + part += ": " + desc + } + if path := strings.TrimSpace(match.Path); path != "" { + part += " (" + path + ")" + } + if part != "" { + parts = append(parts, part) + } + } + if len(parts) == 0 { + return "none" + } + return strings.Join(parts, "; ") +} + +func joinOrFallback(parts []string, fallback string) string { + if len(parts) == 0 { + return fallback + } + return strings.Join(parts, " -> ") +} + +func fallbackString(value, fallback string) string { + value = strings.TrimSpace(value) + if value == "" { + return fallback + } + return value +} diff --git a/pkg/evolution/llm_draft_generator_test.go b/pkg/evolution/llm_draft_generator_test.go new file mode 100644 index 000000000..ffba62b84 --- /dev/null +++ b/pkg/evolution/llm_draft_generator_test.go @@ -0,0 +1,367 @@ +package evolution_test + +import ( + "context" + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/sipeed/picoclaw/pkg/evolution" + "github.com/sipeed/picoclaw/pkg/providers" + "github.com/sipeed/picoclaw/pkg/skills" +) + +type recordingDraftGenerator struct { + draft evolution.SkillDraft + err error + calls int +} + +func (g *recordingDraftGenerator) GenerateDraft( + _ context.Context, + _ evolution.LearningRecord, + _ []skills.SkillInfo, +) (evolution.SkillDraft, error) { + g.calls++ + return g.draft, g.err +} + +type llmDraftTestProvider struct { + response *providers.LLMResponse + err error + defaultModel string + lastModel string + lastMessages []providers.Message + chatCallCount int +} + +func (p *llmDraftTestProvider) Chat( + _ context.Context, + messages []providers.Message, + _ []providers.ToolDefinition, + model string, + _ map[string]any, +) (*providers.LLMResponse, error) { + p.chatCallCount++ + p.lastModel = model + p.lastMessages = append([]providers.Message(nil), messages...) + return p.response, p.err +} + +func (p *llmDraftTestProvider) GetDefaultModel() string { + return p.defaultModel +} + +func testLearningRule() evolution.LearningRecord { + return evolution.LearningRecord{ + ID: "rule-1", + Summary: "weather native-name path", + EventCount: 7, + SuccessRate: 0.86, + WinningPath: []string{"weather", "native-name"}, + MatchedSkillNames: []string{"weather"}, + } +} + +func testSkillMatches() []skills.SkillInfo { + return []skills.SkillInfo{ + { + Name: "weather", + Path: "/tmp/weather/SKILL.md", + Source: "workspace", + Description: "Find weather details.", + }, + } +} + +func TestLLMDraftGenerator_GenerateDraft_ParsesJSONResponse(t *testing.T) { + provider := &llmDraftTestProvider{ + defaultModel: "test-model", + response: &providers.LLMResponse{ + Content: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name lookup first","body_or_patch":"## Start Here\nUse native-name first."}`, + }, + } + fallback := &recordingDraftGenerator{ + draft: evolution.SkillDraft{TargetSkillName: "fallback"}, + } + generator := evolution.NewLLMDraftGenerator(provider, "", fallback) + + draft, err := generator.GenerateDraft(context.Background(), testLearningRule(), testSkillMatches()) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + + if provider.chatCallCount != 1 { + t.Fatalf("chatCallCount = %d, want 1", provider.chatCallCount) + } + if provider.lastModel != "test-model" { + t.Fatalf("lastModel = %q, want test-model", provider.lastModel) + } + if len(provider.lastMessages) == 0 { + t.Fatal("expected prompt messages") + } + if fallback.calls != 0 { + t.Fatalf("fallback.calls = %d, want 0", fallback.calls) + } + if draft.TargetSkillName != "weather" { + t.Fatalf("TargetSkillName = %q, want weather", draft.TargetSkillName) + } + if draft.DraftType != evolution.DraftTypeShortcut { + t.Fatalf("DraftType = %q, want %q", draft.DraftType, evolution.DraftTypeShortcut) + } + if draft.ChangeKind != evolution.ChangeKindAppend { + t.Fatalf("ChangeKind = %q, want %q", draft.ChangeKind, evolution.ChangeKindAppend) + } + if draft.HumanSummary == "" || draft.BodyOrPatch == "" { + t.Fatal("expected non-empty draft content") + } +} + +func TestLLMDraftGenerator_BuildPromptIncludesMatchedSkillContent(t *testing.T) { + dir := t.TempDir() + skillPath := filepath.Join(dir, "skills", "three-one-theorem", "SKILL.md") + if err := os.MkdirAll(filepath.Dir(skillPath), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile( + skillPath, + []byte( + "---\nname: three-one-theorem\ndescription: Add 31 then delegate\n---\n# Three One\nAdd 31 to the input, then continue with the next theorem.\n", + ), + 0o644, + ); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + provider := &llmDraftTestProvider{ + defaultModel: "test-model", + response: &providers.LLMResponse{ + Content: `{"target_skill_name":"calculate-100-via-theorems","draft_type":"shortcut","change_kind":"create","human_summary":"Combine theorem chain","body_or_patch":"## Start Here\nAdd 31, then continue."}`, + }, + } + generator := evolution.NewLLMDraftGenerator(provider, "", &recordingDraftGenerator{}) + + _, err := generator.GenerateDraft(context.Background(), evolution.LearningRecord{ + ID: "rule-1", + Summary: "calculate 100", + WinningPath: []string{"three-one-theorem", "four-two-theorem"}, + EventCount: 2, + SuccessRate: 1, + }, []skills.SkillInfo{{ + Name: "three-one-theorem", + Path: skillPath, + Source: "workspace", + Description: "Add 31 then delegate", + }}) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + if len(provider.lastMessages) < 2 { + t.Fatal("expected user prompt") + } + prompt := provider.lastMessages[1].Content + if !strings.Contains(prompt, "Matched skill content excerpts") { + t.Fatalf("prompt missing content section:\n%s", prompt) + } + if !strings.Contains(prompt, "Add 31 to the input") { + t.Fatalf("prompt missing matched skill body:\n%s", prompt) + } + if !strings.Contains(prompt, "summarize the functional purpose and result") { + t.Fatalf("prompt missing synthesis instruction:\n%s", prompt) + } + if !strings.Contains(prompt, "complete SKILL.md file with exactly two parts") { + t.Fatalf("prompt missing complete skill instruction:\n%s", prompt) + } + if !strings.Contains(prompt, "The YAML frontmatter must contain only name and description fields") { + t.Fatalf("prompt missing frontmatter instruction:\n%s", prompt) + } + if !strings.Contains( + prompt, + "The description field must and only describe what this skill can do and when to use it", + ) { + t.Fatalf("prompt missing description field instruction:\n%s", prompt) + } + if !strings.Contains( + prompt, + "The deployable Markdown body should only contain what the skill is useful for and how to use it", + ) { + t.Fatalf("prompt missing deployable body scope instruction:\n%s", prompt) + } + if !strings.Contains( + prompt, + "provide detailed step-by-step instructions for the exact operation or execution process", + ) { + t.Fatalf("prompt missing step-by-step instruction:\n%s", prompt) + } + if !strings.Contains(prompt, "body_or_patch is an internal draft and review artifact") { + t.Fatalf("prompt missing internal draft instruction:\n%s", prompt) + } + if !strings.Contains(prompt, "the final deployed SKILL.md will be rendered without learning traces") { + t.Fatalf("prompt missing deploy-clean instruction:\n%s", prompt) + } + if !strings.Contains(prompt, "do not copy or directly include other skills' instructions") { + t.Fatalf("prompt missing no-copy instruction:\n%s", prompt) + } +} + +func TestLLMDraftGenerator_BuildPromptIncludesTaskEvidence(t *testing.T) { + provider := &llmDraftTestProvider{ + defaultModel: "test-model", + response: &providers.LLMResponse{ + Content: `{"target_skill_name":"calculate-with-three-one-theorem","draft_type":"shortcut","change_kind":"create","human_summary":"Calculate using theorem chain","body_or_patch":"---\nname: calculate-with-three-one-theorem\ndescription: Calculate with theorem chain.\n---\n# Calculate With Three One Theorem\n\n## Procedure\nAdd 31, add 42, then subtract 53."}`, + }, + } + generator := evolution.NewLLMDraftGenerator(provider, "", &recordingDraftGenerator{}) + + _, err := generator.GenerateDraftWithEvidence(context.Background(), evolution.LearningRecord{ + ID: "rule-1", + Label: "calculate-with-three-one-theorem", + Summary: "调用三一定理计算", + }, nil, evolution.DraftEvidence{ + TaskRecords: []evolution.LearningRecord{ + { + ID: "main-turn-6", + Summary: "调用三一定理计算100", + FinalOutput: "100 + 31 = 131; 131 + 42 = 173; 173 - 53 = 120", + UsedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + }, + }, + }) + if err != nil { + t.Fatalf("GenerateDraftWithEvidence: %v", err) + } + if len(provider.lastMessages) < 2 { + t.Fatal("expected user prompt") + } + prompt := provider.lastMessages[1].Content + for _, want := range []string{ + "Source task evidence", + "main-turn-6", + "调用三一定理计算100", + "100 + 31 = 131", + "three-one-theorem -> four-two-theorem -> five-three-theorem", + "directly usable by a future agent", + } { + if !strings.Contains(prompt, want) { + t.Fatalf("prompt missing %q:\n%s", want, prompt) + } + } +} + +func TestLLMDraftGenerator_GenerateDraft_PrefersExplicitModelIDOverProviderDefault(t *testing.T) { + provider := &llmDraftTestProvider{ + defaultModel: "provider-default-model", + response: &providers.LLMResponse{ + Content: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name lookup first","body_or_patch":"## Start Here\nUse native-name first."}`, + }, + } + generator := evolution.NewLLMDraftGenerator(provider, "explicit-model-id", &recordingDraftGenerator{}) + + _, err := generator.GenerateDraft(context.Background(), testLearningRule(), testSkillMatches()) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + if provider.lastModel != "explicit-model-id" { + t.Fatalf("lastModel = %q, want explicit-model-id", provider.lastModel) + } +} + +func TestLLMDraftGenerator_GenerateDraft_FallsBackOnProviderError(t *testing.T) { + fallback := &recordingDraftGenerator{ + draft: evolution.SkillDraft{ + TargetSkillName: "weather-fallback", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "fallback summary", + BodyOrPatch: "fallback body", + }, + } + generator := evolution.NewLLMDraftGenerator(&llmDraftTestProvider{ + defaultModel: "test-model", + err: errors.New("provider unavailable"), + }, "", fallback) + + draft, err := generator.GenerateDraft(context.Background(), testLearningRule(), testSkillMatches()) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + + if fallback.calls != 1 { + t.Fatalf("fallback.calls = %d, want 1", fallback.calls) + } + if draft.TargetSkillName != "weather-fallback" { + t.Fatalf("TargetSkillName = %q, want weather-fallback", draft.TargetSkillName) + } +} + +func TestLLMDraftGenerator_GenerateDraft_FallsBackOnInvalidOrEmptyContent(t *testing.T) { + testCases := []struct { + name string + content string + }{ + {name: "invalid json", content: `not-json`}, + {name: "empty content", content: ``}, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + fallback := &recordingDraftGenerator{ + draft: evolution.SkillDraft{ + TargetSkillName: "weather-fallback", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "fallback summary", + BodyOrPatch: "fallback body", + }, + } + generator := evolution.NewLLMDraftGenerator(&llmDraftTestProvider{ + defaultModel: "test-model", + response: &providers.LLMResponse{Content: tt.content}, + }, "", fallback) + + draft, err := generator.GenerateDraft(context.Background(), testLearningRule(), testSkillMatches()) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + + if fallback.calls != 1 { + t.Fatalf("fallback.calls = %d, want 1", fallback.calls) + } + if draft.TargetSkillName != "weather-fallback" { + t.Fatalf("TargetSkillName = %q, want weather-fallback", draft.TargetSkillName) + } + }) + } +} + +func TestLLMDraftGenerator_GenerateDraft_FallsBackOnNumericOnlyTargetSkillName(t *testing.T) { + fallback := &recordingDraftGenerator{ + draft: evolution.SkillDraft{ + TargetSkillName: "learned-100", + DraftType: evolution.DraftTypeWorkflow, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "fallback summary", + BodyOrPatch: "fallback body", + }, + } + generator := evolution.NewLLMDraftGenerator(&llmDraftTestProvider{ + defaultModel: "test-model", + response: &providers.LLMResponse{ + Content: `{"target_skill_name":"100","draft_type":"shortcut","change_kind":"create","human_summary":"Calculate 100","body_or_patch":"## Start Here\nCalculate 100."}`, + }, + }, "", fallback) + + draft, err := generator.GenerateDraft(context.Background(), testLearningRule(), testSkillMatches()) + if err != nil { + t.Fatalf("GenerateDraft: %v", err) + } + + if fallback.calls != 1 { + t.Fatalf("fallback.calls = %d, want 1", fallback.calls) + } + if draft.TargetSkillName != "learned-100" { + t.Fatalf("TargetSkillName = %q, want learned-100", draft.TargetSkillName) + } +} diff --git a/pkg/evolution/llm_timeout.go b/pkg/evolution/llm_timeout.go new file mode 100644 index 000000000..0c5700a60 --- /dev/null +++ b/pkg/evolution/llm_timeout.go @@ -0,0 +1,25 @@ +package evolution + +import ( + "context" + "time" +) + +const ( + llmTaskSuccessJudgeTimeout = 15 * time.Second + llmPatternClusterTimeout = 45 * time.Second + llmDraftGenerationTimeout = 60 * time.Second +) + +func withLLMCallTimeout(parent context.Context, timeout time.Duration) (context.Context, context.CancelFunc) { + if parent == nil { + parent = context.Background() + } + if timeout <= 0 { + return context.WithCancel(parent) + } + if deadline, ok := parent.Deadline(); ok && time.Until(deadline) <= timeout { + return context.WithCancel(parent) + } + return context.WithTimeout(parent, timeout) +} diff --git a/pkg/evolution/organizer.go b/pkg/evolution/organizer.go new file mode 100644 index 000000000..d8b8a2e67 --- /dev/null +++ b/pkg/evolution/organizer.go @@ -0,0 +1,397 @@ +package evolution + +import ( + "crypto/sha1" + "encoding/hex" + "sort" + "strings" + "time" +) + +type OrganizerOptions struct { + MinCaseCount int + MinSuccessRate float64 + Now func() time.Time +} + +type Organizer struct { + minCaseCount int + minSuccessRate float64 + now func() time.Time +} + +func NewOrganizer(opts OrganizerOptions) *Organizer { + now := opts.Now + if now == nil { + now = time.Now + } + + minCaseCount := opts.MinCaseCount + if minCaseCount <= 0 { + minCaseCount = 3 + } + + minSuccessRate := opts.MinSuccessRate + if minSuccessRate <= 0 { + minSuccessRate = 0.7 + } + + return &Organizer{ + minCaseCount: minCaseCount, + minSuccessRate: minSuccessRate, + now: now, + } +} + +func (o *Organizer) BuildRules(records []LearningRecord) ([]LearningRecord, error) { + clusters := make(map[string][]LearningRecord) + keys := make([]string, 0) + + for _, record := range records { + if !isTaskRecordKind(record.Kind) { + continue + } + + key := normalizeRuleKey(record) + if key == "" { + continue + } + + clusterKey := record.WorkspaceID + "\x00" + key + if _, ok := clusters[clusterKey]; !ok { + keys = append(keys, clusterKey) + } + clusters[clusterKey] = append(clusters[clusterKey], record) + } + + sort.Strings(keys) + + rules := make([]LearningRecord, 0, len(keys)) + for _, clusterKey := range keys { + cluster := append([]LearningRecord(nil), clusters[clusterKey]...) + sortCaseCluster(cluster) + + if len(cluster) < o.minCaseCount { + continue + } + + successRate := clusterSuccessRate(cluster) + if successRate < o.minSuccessRate { + continue + } + + ruleKey := clusterKey[strings.Index(clusterKey, "\x00")+1:] + winningPath := clusterWinningPath(cluster) + lateAddedSkills, finalSnapshotTrigger := clusterLateAddedSkills(cluster, winningPath) + matchedSkillNames := append([]string(nil), winningPath...) + + rules = append(rules, LearningRecord{ + ID: stableRuleID(cluster[0].WorkspaceID, ruleKey), + Kind: RecordKindPattern, + WorkspaceID: cluster[0].WorkspaceID, + CreatedAt: o.now(), + Summary: buildRuleSummary(cluster, ruleKey, winningPath), + Source: map[string]any{"cluster_key": ruleKey}, + Status: RecordStatus("ready"), + SourceRecordIDs: collectRecordIDs(cluster), + EventCount: len(cluster), + SuccessRate: successRate, + MaturityScore: computeMaturityScore(len(cluster), successRate), + WinningPath: winningPath, + LateAddedSkills: lateAddedSkills, + FinalSnapshotTrigger: finalSnapshotTrigger, + MatchedSkillNames: matchedSkillNames, + }) + } + + return rules, nil +} + +func normalizeRuleKey(record LearningRecord) string { + if path := preferredRulePath(record); len(path) > 0 { + return strings.Join(path, " ") + } + if path := normalizePath(record.ToolKinds); len(path) > 0 { + return strings.Join(path, " ") + } + + tokens := tokenizeForEvolution(record.Summary) + if len(tokens) == 0 { + return "" + } + if len(tokens) > 6 { + tokens = tokens[:6] + } + return strings.Join(tokens, " ") +} + +func preferredRulePath(record LearningRecord) []string { + if path := normalizeFinalSuccessfulPath(record); len(path) > 0 { + return path + } + if path := normalizePath(record.UsedSkillNames); len(path) > 0 { + return path + } + if path := normalizePath(record.AddedSkillNames); len(path) > 0 { + return path + } + if path := normalizeAttemptedSkills(record); len(path) > 0 { + return path + } + if path := normalizePath(record.ActiveSkillNames); len(path) > 0 { + return path + } + if path := normalizePath(record.MatchedSkillNames); len(path) > 0 { + return path + } + return nil +} + +func normalizePath(values []string) []string { + if len(values) == 0 { + return nil + } + + out := make([]string, 0, len(values)) + for _, value := range values { + value = strings.ToLower(strings.TrimSpace(value)) + if value == "" { + continue + } + out = append(out, value) + } + if len(out) == 0 { + return nil + } + return out +} + +func normalizeFinalSuccessfulPath(record LearningRecord) []string { + if record.AttemptTrail == nil { + return nil + } + return normalizePath(record.AttemptTrail.FinalSuccessfulPath) +} + +func normalizeAttemptedSkills(record LearningRecord) []string { + if record.AttemptTrail == nil { + return nil + } + return normalizePath(record.AttemptTrail.AttemptedSkills) +} + +func sortCaseCluster(cluster []LearningRecord) { + sort.Slice(cluster, func(i, j int) bool { + if !cluster[i].CreatedAt.Equal(cluster[j].CreatedAt) { + return cluster[i].CreatedAt.Before(cluster[j].CreatedAt) + } + return cluster[i].ID < cluster[j].ID + }) +} + +func clusterSuccessRate(cluster []LearningRecord) float64 { + if len(cluster) == 0 { + return 0 + } + + successes := 0 + for _, record := range cluster { + if record.Success != nil && *record.Success { + successes++ + } + } + return float64(successes) / float64(len(cluster)) +} + +func clusterWinningPath(cluster []LearningRecord) []string { + type pathScore struct { + path []string + count int + } + + bestKey := "" + best := pathScore{} + paths := make(map[string]pathScore) + order := make([]string, 0) + + for _, record := range cluster { + path := preferredRulePath(record) + if len(path) == 0 { + path = normalizePath(record.ToolKinds) + } + if len(path) == 0 { + continue + } + + key := strings.Join(path, "\x00") + score := paths[key] + if score.path == nil { + score.path = append([]string(nil), path...) + order = append(order, key) + } + score.count++ + paths[key] = score + } + + for _, key := range order { + score := paths[key] + if score.count > best.count { + best = score + bestKey = key + } + } + + if bestKey == "" { + return nil + } + return best.path +} + +func clusterLateAddedSkills(cluster []LearningRecord, winningPath []string) ([]string, string) { + type lateAddedScore struct { + skills []string + trigger string + count int + } + + bestKey := "" + best := lateAddedScore{} + scores := make(map[string]lateAddedScore) + order := make([]string, 0) + + for _, record := range cluster { + skills, trigger := lateAddedSkillsFromRecord(record) + if len(skills) == 0 { + continue + } + if len(winningPath) > 0 && !pathsEqual(skills, tailAddedWithinWinningPath(winningPath, skills)) { + continue + } + + key := trigger + "\x00" + strings.Join(skills, "\x00") + score := scores[key] + if score.skills == nil { + score.skills = append([]string(nil), skills...) + score.trigger = trigger + order = append(order, key) + } + score.count++ + scores[key] = score + } + + for _, key := range order { + score := scores[key] + if score.count > best.count { + bestKey = key + best = score + } + } + + if bestKey == "" { + return nil, "" + } + return best.skills, best.trigger +} + +func lateAddedSkillsFromRecord(record LearningRecord) ([]string, string) { + if skills := normalizePath(record.AddedSkillNames); len(skills) > 0 { + return skills, "loaded_during_task" + } + if record.AttemptTrail == nil || len(record.AttemptTrail.SkillContextSnapshots) == 0 { + return nil, "" + } + + snapshots := record.AttemptTrail.SkillContextSnapshots + last := snapshots[len(snapshots)-1] + if len(last.SkillNames) == 0 { + return nil, "" + } + if len(snapshots) == 1 { + return nil, strings.TrimSpace(last.Trigger) + } + + prev := snapshots[len(snapshots)-2] + prevSet := make(map[string]struct{}, len(prev.SkillNames)) + for _, skill := range normalizePath(prev.SkillNames) { + prevSet[skill] = struct{}{} + } + + added := make([]string, 0, len(last.SkillNames)) + for _, skill := range normalizePath(last.SkillNames) { + if _, ok := prevSet[skill]; ok { + continue + } + added = append(added, skill) + } + if len(added) == 0 { + return nil, strings.TrimSpace(last.Trigger) + } + return added, strings.TrimSpace(last.Trigger) +} + +func tailAddedWithinWinningPath(winningPath, lateAdded []string) []string { + if len(winningPath) == 0 || len(lateAdded) == 0 || len(lateAdded) > len(winningPath) { + return nil + } + tail := winningPath[len(winningPath)-len(lateAdded):] + if !pathsEqual(tail, lateAdded) { + return nil + } + return tail +} + +func pathsEqual(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +func collectRecordIDs(cluster []LearningRecord) []string { + ids := make([]string, 0, len(cluster)) + for _, record := range cluster { + ids = append(ids, record.ID) + } + return ids +} + +func computeMaturityScore(caseCount int, successRate float64) float64 { + return float64(caseCount) * successRate +} + +func stableRuleID(workspaceID, key string) string { + sum := sha1.Sum([]byte(workspaceID + "\x00" + key)) + return "rule-" + hex.EncodeToString(sum[:6]) +} + +func buildRuleSummary(cluster []LearningRecord, key string, winningPath []string) string { + if goal := representativeGoal(cluster); goal != "" && len(winningPath) > 0 { + return goal + " via " + strings.Join(winningPath, " -> ") + } + if goal := representativeGoal(cluster); goal != "" { + return goal + } + if len(winningPath) > 0 { + return strings.Join(winningPath, " -> ") + } + return key +} + +func representativeGoal(cluster []LearningRecord) string { + for _, record := range cluster { + if goal := strings.TrimSpace(record.UserGoal); goal != "" { + return goal + } + } + for _, record := range cluster { + if summary := strings.TrimSpace(record.Summary); summary != "" { + return summary + } + } + return "" +} diff --git a/pkg/evolution/organizer_test.go b/pkg/evolution/organizer_test.go new file mode 100644 index 000000000..397bfa5be --- /dev/null +++ b/pkg/evolution/organizer_test.go @@ -0,0 +1,310 @@ +package evolution_test + +import ( + "testing" + "time" + + "github.com/sipeed/picoclaw/pkg/evolution" +) + +func TestOrganizer_BuildRulesCreatesRuleRecord(t *testing.T) { + ok := true + cases := []evolution.LearningRecord{ + { + ID: "case-1", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather shanghai", + Status: evolution.RecordStatus("new"), + Success: &ok, + ActiveSkillNames: []string{"weather"}, + }, + { + ID: "case-2", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "weather beijing", + Status: evolution.RecordStatus("new"), + Success: &ok, + ActiveSkillNames: []string{"weather"}, + }, + { + ID: "case-3", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000200, 0).UTC(), + Summary: "weather hangzhou", + Status: evolution.RecordStatus("new"), + Success: &ok, + ActiveSkillNames: []string{"weather"}, + }, + } + + org := evolution.NewOrganizer(evolution.OrganizerOptions{ + MinCaseCount: 3, + MinSuccessRate: 0.7, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + }) + + rules, err := org.BuildRules(cases) + if err != nil { + t.Fatalf("BuildRules: %v", err) + } + if len(rules) != 1 { + t.Fatalf("len(rules) = %d, want 1", len(rules)) + } + + rule := rules[0] + if rule.Kind != evolution.RecordKindRule { + t.Fatalf("Kind = %q, want %q", rule.Kind, evolution.RecordKindRule) + } + if rule.EventCount != 3 { + t.Fatalf("EventCount = %d, want 3", rule.EventCount) + } + if len(rule.SourceRecordIDs) != 3 { + t.Fatalf("SourceRecordIDs = %v", rule.SourceRecordIDs) + } + if rule.MaturityScore <= 0 { + t.Fatalf("MaturityScore = %v, want > 0", rule.MaturityScore) + } + if len(rule.WinningPath) != 1 || rule.WinningPath[0] != "weather" { + t.Fatalf("WinningPath = %v, want [weather]", rule.WinningPath) + } +} + +func TestOrganizer_BuildRulesSkipsImmatureCluster(t *testing.T) { + ok := true + cases := []evolution.LearningRecord{ + { + ID: "case-1", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "release build linux", + Status: evolution.RecordStatus("new"), + Success: &ok, + }, + } + + org := evolution.NewOrganizer(evolution.OrganizerOptions{ + MinCaseCount: 3, + MinSuccessRate: 0.7, + }) + + rules, err := org.BuildRules(cases) + if err != nil { + t.Fatalf("BuildRules: %v", err) + } + if len(rules) != 0 { + t.Fatalf("len(rules) = %d, want 0", len(rules)) + } +} + +func TestOrganizer_BuildRulesPrefersFinalSuccessfulPathFromAttemptTrail(t *testing.T) { + ok := true + cases := []evolution.LearningRecord{ + { + ID: "case-1", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather shanghai", + Status: evolution.RecordStatus("new"), + Success: &ok, + AttemptTrail: &evolution.AttemptTrail{ + AttemptedSkills: []string{"geocode", "weather"}, + FinalSuccessfulPath: []string{"geocode", "weather"}, + }, + ActiveSkillNames: []string{"geocode", "weather"}, + }, + { + ID: "case-2", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "weather beijing", + Status: evolution.RecordStatus("new"), + Success: &ok, + AttemptTrail: &evolution.AttemptTrail{ + AttemptedSkills: []string{"browser", "weather"}, + FinalSuccessfulPath: []string{"geocode", "weather"}, + }, + ActiveSkillNames: []string{"browser", "weather"}, + }, + { + ID: "case-3", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000200, 0).UTC(), + Summary: "weather hangzhou", + Status: evolution.RecordStatus("new"), + Success: &ok, + AttemptTrail: &evolution.AttemptTrail{ + AttemptedSkills: []string{"maps", "weather"}, + FinalSuccessfulPath: []string{"geocode", "weather"}, + }, + ActiveSkillNames: []string{"maps", "weather"}, + }, + } + + org := evolution.NewOrganizer(evolution.OrganizerOptions{ + MinCaseCount: 3, + MinSuccessRate: 0.7, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + }) + + rules, err := org.BuildRules(cases) + if err != nil { + t.Fatalf("BuildRules: %v", err) + } + if len(rules) != 1 { + t.Fatalf("len(rules) = %d, want 1", len(rules)) + } + if got := rules[0].WinningPath; len(got) != 2 || got[0] != "geocode" || got[1] != "weather" { + t.Fatalf("WinningPath = %v, want [geocode weather]", got) + } +} + +func TestOrganizer_BuildRulesCapturesLateAddedSkillHintFromSnapshots(t *testing.T) { + ok := true + cases := []evolution.LearningRecord{ + { + ID: "case-1", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather shanghai", + Status: evolution.RecordStatus("new"), + Success: &ok, + AttemptTrail: &evolution.AttemptTrail{ + AttemptedSkills: []string{"geocode", "weather"}, + FinalSuccessfulPath: []string{"geocode", "weather"}, + SkillContextSnapshots: []evolution.SkillContextSnapshot{ + {Sequence: 1, Trigger: "initial_build", SkillNames: []string{"geocode"}}, + {Sequence: 2, Trigger: "context_retry_rebuild", SkillNames: []string{"geocode", "weather"}}, + }, + }, + }, + { + ID: "case-2", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "weather beijing", + Status: evolution.RecordStatus("new"), + Success: &ok, + AttemptTrail: &evolution.AttemptTrail{ + AttemptedSkills: []string{"browser", "weather"}, + FinalSuccessfulPath: []string{"geocode", "weather"}, + SkillContextSnapshots: []evolution.SkillContextSnapshot{ + {Sequence: 1, Trigger: "initial_build", SkillNames: []string{"geocode"}}, + {Sequence: 2, Trigger: "context_retry_rebuild", SkillNames: []string{"geocode", "weather"}}, + }, + }, + }, + { + ID: "case-3", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000200, 0).UTC(), + Summary: "weather hangzhou", + Status: evolution.RecordStatus("new"), + Success: &ok, + AttemptTrail: &evolution.AttemptTrail{ + AttemptedSkills: []string{"maps", "weather"}, + FinalSuccessfulPath: []string{"geocode", "weather"}, + SkillContextSnapshots: []evolution.SkillContextSnapshot{ + {Sequence: 1, Trigger: "initial_build", SkillNames: []string{"geocode"}}, + {Sequence: 2, Trigger: "context_retry_rebuild", SkillNames: []string{"geocode", "weather"}}, + }, + }, + }, + } + + org := evolution.NewOrganizer(evolution.OrganizerOptions{ + MinCaseCount: 3, + MinSuccessRate: 0.7, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + }) + + rules, err := org.BuildRules(cases) + if err != nil { + t.Fatalf("BuildRules: %v", err) + } + if len(rules) != 1 { + t.Fatalf("len(rules) = %d, want 1", len(rules)) + } + if got := rules[0].LateAddedSkills; len(got) != 1 || got[0] != "weather" { + t.Fatalf("LateAddedSkills = %v, want [weather]", got) + } + if got := rules[0].FinalSnapshotTrigger; got != "context_retry_rebuild" { + t.Fatalf("FinalSnapshotTrigger = %q, want context_retry_rebuild", got) + } +} + +func TestOrganizer_BuildRulesUsesAddedSkillNamesWithoutSnapshots(t *testing.T) { + ok := true + cases := []evolution.LearningRecord{ + { + ID: "case-1", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather shanghai", + UserGoal: "check weather in shanghai", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"geocode", "weather"}, + AddedSkillNames: []string{"weather"}, + }, + { + ID: "case-2", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "weather beijing", + UserGoal: "check weather in beijing", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"geocode", "weather"}, + AddedSkillNames: []string{"weather"}, + }, + { + ID: "case-3", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000200, 0).UTC(), + Summary: "weather hangzhou", + UserGoal: "check weather in hangzhou", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"geocode", "weather"}, + AddedSkillNames: []string{"weather"}, + }, + } + + org := evolution.NewOrganizer(evolution.OrganizerOptions{ + MinCaseCount: 3, + MinSuccessRate: 0.7, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + }) + + rules, err := org.BuildRules(cases) + if err != nil { + t.Fatalf("BuildRules: %v", err) + } + if len(rules) != 1 { + t.Fatalf("len(rules) = %d, want 1", len(rules)) + } + if got := rules[0].WinningPath; len(got) != 2 || got[0] != "geocode" || got[1] != "weather" { + t.Fatalf("WinningPath = %v, want [geocode weather]", got) + } + if got := rules[0].LateAddedSkills; len(got) != 1 || got[0] != "weather" { + t.Fatalf("LateAddedSkills = %v, want [weather]", got) + } + if got := rules[0].FinalSnapshotTrigger; got != "loaded_during_task" { + t.Fatalf("FinalSnapshotTrigger = %q, want loaded_during_task", got) + } +} diff --git a/pkg/evolution/paths.go b/pkg/evolution/paths.go new file mode 100644 index 000000000..631d5cd93 --- /dev/null +++ b/pkg/evolution/paths.go @@ -0,0 +1,35 @@ +package evolution + +import ( + "path/filepath" + "strings" +) + +type Paths struct { + Workspace string + RootDir string + LearningRecords string + TaskRecords string + PatternRecords string + SkillDrafts string + ProfilesDir string + BackupsDir string +} + +func NewPaths(workspace, override string) Paths { + root := strings.TrimSpace(override) + if root == "" { + root = filepath.Join(workspace, "state", "evolution") + } + + return Paths{ + Workspace: workspace, + RootDir: root, + LearningRecords: filepath.Join(root, "learning-records.jsonl"), + TaskRecords: filepath.Join(root, "task-records.jsonl"), + PatternRecords: filepath.Join(root, "pattern-records.jsonl"), + SkillDrafts: filepath.Join(root, "skill-drafts.json"), + ProfilesDir: filepath.Join(root, "profiles"), + BackupsDir: filepath.Join(root, "backups"), + } +} diff --git a/pkg/evolution/paths_test.go b/pkg/evolution/paths_test.go new file mode 100644 index 000000000..309ff012d --- /dev/null +++ b/pkg/evolution/paths_test.go @@ -0,0 +1,86 @@ +package evolution + +import ( + "path/filepath" + "testing" +) + +func TestNewPaths_DefaultRoot(t *testing.T) { + workspace := "/tmp/workspace" + + paths := NewPaths(workspace, "") + + wantRoot := filepath.Join(workspace, "state", "evolution") + if paths.RootDir != wantRoot { + t.Fatalf("RootDir = %q, want %q", paths.RootDir, wantRoot) + } + if paths.LearningRecords != filepath.Join(wantRoot, "learning-records.jsonl") { + t.Fatalf("LearningRecords = %q", paths.LearningRecords) + } + if paths.TaskRecords != filepath.Join(wantRoot, "task-records.jsonl") { + t.Fatalf("TaskRecords = %q", paths.TaskRecords) + } + if paths.PatternRecords != filepath.Join(wantRoot, "pattern-records.jsonl") { + t.Fatalf("PatternRecords = %q", paths.PatternRecords) + } + if paths.SkillDrafts != filepath.Join(wantRoot, "skill-drafts.json") { + t.Fatalf("SkillDrafts = %q", paths.SkillDrafts) + } + if paths.ProfilesDir != filepath.Join(wantRoot, "profiles") { + t.Fatalf("ProfilesDir = %q", paths.ProfilesDir) + } + if paths.BackupsDir != filepath.Join(wantRoot, "backups") { + t.Fatalf("BackupsDir = %q", paths.BackupsDir) + } +} + +func TestNewPaths_UsesOverride(t *testing.T) { + workspace := "/tmp/workspace" + override := "/tmp/custom-evolution" + + paths := NewPaths(workspace, override) + + if paths.RootDir != override { + t.Fatalf("RootDir = %q, want %q", paths.RootDir, override) + } + if paths.LearningRecords != filepath.Join(override, "learning-records.jsonl") { + t.Fatalf("LearningRecords = %q", paths.LearningRecords) + } + if paths.TaskRecords != filepath.Join(override, "task-records.jsonl") { + t.Fatalf("TaskRecords = %q", paths.TaskRecords) + } + if paths.PatternRecords != filepath.Join(override, "pattern-records.jsonl") { + t.Fatalf("PatternRecords = %q", paths.PatternRecords) + } + if paths.SkillDrafts != filepath.Join(override, "skill-drafts.json") { + t.Fatalf("SkillDrafts = %q", paths.SkillDrafts) + } + if paths.ProfilesDir != filepath.Join(override, "profiles") { + t.Fatalf("ProfilesDir = %q", paths.ProfilesDir) + } + if paths.BackupsDir != filepath.Join(override, "backups") { + t.Fatalf("BackupsDir = %q", paths.BackupsDir) + } +} + +func TestNewPaths_BlankOverrideFallsBackToDefaultRoot(t *testing.T) { + workspace := "/tmp/workspace" + + paths := NewPaths(workspace, " \t\n ") + + wantRoot := filepath.Join(workspace, "state", "evolution") + if paths.RootDir != wantRoot { + t.Fatalf("RootDir = %q, want %q", paths.RootDir, wantRoot) + } +} + +func TestNewPaths_TrimmedOverrideIsUsed(t *testing.T) { + workspace := "/tmp/workspace" + override := " /tmp/custom-evolution " + + paths := NewPaths(workspace, override) + + if paths.RootDir != "/tmp/custom-evolution" { + t.Fatalf("RootDir = %q, want %q", paths.RootDir, "/tmp/custom-evolution") + } +} diff --git a/pkg/evolution/pattern_clusterer.go b/pkg/evolution/pattern_clusterer.go new file mode 100644 index 000000000..b167c7432 --- /dev/null +++ b/pkg/evolution/pattern_clusterer.go @@ -0,0 +1,732 @@ +package evolution + +import ( + "context" + "crypto/sha1" + "encoding/hex" + "encoding/json" + "fmt" + "sort" + "strings" + "time" + "unicode" + + "github.com/sipeed/picoclaw/pkg/providers" +) + +type PatternClusterer interface { + BuildPatterns( + ctx context.Context, + workspace string, + tasks []LearningRecord, + existing []LearningRecord, + ) ([]LearningRecord, []string, error) +} + +type evidencePatternClusterer interface { + BuildPatternsWithEvidence( + ctx context.Context, + workspace string, + successfulTasks []LearningRecord, + evidenceTasks []LearningRecord, + existing []LearningRecord, + minSuccessRatio float64, + ) ([]LearningRecord, []string, error) +} + +type HeuristicPatternClusterer struct { + minCaseCount int + now func() time.Time +} + +func NewHeuristicPatternClusterer(minCaseCount int, now func() time.Time) *HeuristicPatternClusterer { + if minCaseCount <= 0 { + minCaseCount = 3 + } + if now == nil { + now = time.Now + } + return &HeuristicPatternClusterer{minCaseCount: minCaseCount, now: now} +} + +func (c *HeuristicPatternClusterer) BuildPatterns( + _ context.Context, + workspace string, + tasks []LearningRecord, + existing []LearningRecord, +) ([]LearningRecord, []string, error) { + groups := make(map[string][]LearningRecord) + keys := make([]string, 0) + for _, task := range tasks { + if task.WorkspaceID != workspace { + continue + } + key := heuristicClusterKey(task) + if key == "" { + continue + } + if _, ok := groups[key]; !ok { + keys = append(keys, key) + } + groups[key] = append(groups[key], task) + } + sort.Strings(keys) + + existingByLabel := patternsByLabel(existing, workspace) + patterns := make([]LearningRecord, 0, len(keys)) + clusteredIDs := make([]string, 0) + for _, key := range keys { + cluster := groups[key] + label := heuristicClusterLabelForGroup(key, cluster) + if label == "" { + continue + } + existingPattern, hasExisting := existingByLabel[label] + if !hasExisting && len(cluster) < c.minCaseCount { + continue + } + pattern := buildPatternFromCluster( + workspace, + label, + heuristicClusterSummary(label, cluster), + "heuristic cluster by normalized task summary", + cluster, + existingPattern, + c.now(), + ) + patterns = append(patterns, pattern) + clusteredIDs = append(clusteredIDs, collectRecordIDs(cluster)...) + } + return patterns, clusteredIDs, nil +} + +type LLMPatternClusterer struct { + provider providers.LLMProvider + model string + fallback PatternClusterer + minCount int + now func() time.Time +} + +type llmClusterResponse struct { + Clusters []llmCluster `json:"clusters"` +} + +type llmCluster struct { + Label string `json:"label"` + Summary string `json:"summary"` + TaskRecordIDs []string `json:"task_record_ids"` + Reason string `json:"cluster_reason"` +} + +func NewLLMPatternClusterer( + provider providers.LLMProvider, + model string, + fallback PatternClusterer, + minCount int, + now func() time.Time, +) *LLMPatternClusterer { + if fallback == nil { + fallback = NewHeuristicPatternClusterer(minCount, now) + } + if minCount <= 0 { + minCount = 3 + } + if now == nil { + now = time.Now + } + return &LLMPatternClusterer{ + provider: provider, + model: strings.TrimSpace(model), + fallback: fallback, + minCount: minCount, + now: now, + } +} + +func (c *LLMPatternClusterer) BuildPatterns( + ctx context.Context, + workspace string, + tasks []LearningRecord, + existing []LearningRecord, +) ([]LearningRecord, []string, error) { + if c == nil { + return NewHeuristicPatternClusterer(0, nil).BuildPatterns(ctx, workspace, tasks, existing) + } + fallback := c.fallback + if fallback == nil { + fallback = NewHeuristicPatternClusterer(c.minCount, c.now) + } + if c.provider == nil { + return fallback.BuildPatterns(ctx, workspace, tasks, existing) + } + model := strings.TrimSpace(c.model) + if model == "" { + model = strings.TrimSpace(c.provider.GetDefaultModel()) + } + if model == "" { + return fallback.BuildPatterns(ctx, workspace, tasks, existing) + } + + callCtx, cancel := withLLMCallTimeout(ctx, llmPatternClusterTimeout) + defer cancel() + resp, err := c.provider.Chat(callCtx, []providers.Message{ + { + Role: "system", + Content: "Cluster agent task records by task meaning. Return exactly one JSON object with clusters:[{label,summary,task_record_ids,cluster_reason}]. No markdown fences.", + }, + { + Role: "user", + Content: buildPatternClusterPrompt(workspace, tasks, existing), + }, + }, nil, model, map[string]any{"temperature": 0}) + if err != nil || resp == nil || strings.TrimSpace(resp.Content) == "" { + return fallback.BuildPatterns(ctx, workspace, tasks, existing) + } + + payload, ok := parseLLMClusterResponse(resp.Content) + if !ok { + return fallback.BuildPatterns(ctx, workspace, tasks, existing) + } + patterns, clusteredIDs := c.validateAndBuildPatterns(workspace, payload.Clusters, tasks, existing) + if len(patterns) == 0 { + return fallback.BuildPatterns(ctx, workspace, tasks, existing) + } + return patterns, clusteredIDs, nil +} + +func (c *LLMPatternClusterer) BuildPatternsWithEvidence( + ctx context.Context, + workspace string, + successfulTasks []LearningRecord, + evidenceTasks []LearningRecord, + existing []LearningRecord, + minSuccessRatio float64, +) ([]LearningRecord, []string, error) { + if c == nil { + return NewHeuristicPatternClusterer(0, nil).BuildPatterns(ctx, workspace, successfulTasks, existing) + } + fallback := c.fallback + if fallback == nil { + fallback = NewHeuristicPatternClusterer(c.minCount, c.now) + } + if c.provider == nil { + return buildFallbackPatternsWithEvidence( + ctx, + fallback, + workspace, + successfulTasks, + evidenceTasks, + existing, + minSuccessRatio, + ) + } + model := strings.TrimSpace(c.model) + if model == "" { + model = strings.TrimSpace(c.provider.GetDefaultModel()) + } + if model == "" { + return buildFallbackPatternsWithEvidence( + ctx, + fallback, + workspace, + successfulTasks, + evidenceTasks, + existing, + minSuccessRatio, + ) + } + if len(evidenceTasks) == 0 { + evidenceTasks = successfulTasks + } + + callCtx, cancel := withLLMCallTimeout(ctx, llmPatternClusterTimeout) + defer cancel() + resp, err := c.provider.Chat(callCtx, []providers.Message{ + { + Role: "system", + Content: "Cluster agent task records by task meaning. Include successful and failed task IDs in the same cluster when they share the same reusable meaning. Return exactly one JSON object with clusters:[{label,summary,task_record_ids,cluster_reason}]. No markdown fences.", + }, + { + Role: "user", + Content: buildPatternClusterPrompt(workspace, evidenceTasks, existing), + }, + }, nil, model, map[string]any{"temperature": 0}) + if err != nil || resp == nil || strings.TrimSpace(resp.Content) == "" { + return buildFallbackPatternsWithEvidence( + ctx, + fallback, + workspace, + successfulTasks, + evidenceTasks, + existing, + minSuccessRatio, + ) + } + + payload, ok := parseLLMClusterResponse(resp.Content) + if !ok { + return buildFallbackPatternsWithEvidence( + ctx, + fallback, + workspace, + successfulTasks, + evidenceTasks, + existing, + minSuccessRatio, + ) + } + if len(payload.Clusters) == 0 { + return buildFallbackPatternsWithEvidence( + ctx, + fallback, + workspace, + successfulTasks, + evidenceTasks, + existing, + minSuccessRatio, + ) + } + patterns, clusteredIDs := c.validateAndBuildPatternsWithEvidence( + workspace, + payload.Clusters, + successfulTasks, + evidenceTasks, + existing, + minSuccessRatio, + ) + return patterns, clusteredIDs, nil +} + +func buildFallbackPatternsWithEvidence( + ctx context.Context, + fallback PatternClusterer, + workspace string, + successfulTasks []LearningRecord, + evidenceTasks []LearningRecord, + existing []LearningRecord, + minSuccessRatio float64, +) ([]LearningRecord, []string, error) { + if fallback == nil { + fallback = NewHeuristicPatternClusterer(0, nil) + } + patterns, _, err := fallback.BuildPatterns(ctx, workspace, successfulTasks, existing) + if err != nil || len(patterns) == 0 { + return patterns, nil, err + } + if len(evidenceTasks) == 0 { + evidenceTasks = successfulTasks + } + + successByID := make(map[string]LearningRecord, len(successfulTasks)) + for _, task := range successfulTasks { + successByID[task.ID] = task + } + evidenceByKey := make(map[string][]LearningRecord) + for _, task := range evidenceTasks { + if task.WorkspaceID != workspace { + continue + } + key := heuristicClusterKey(task) + if key == "" { + continue + } + evidenceByKey[key] = append(evidenceByKey[key], task) + } + + filteredPatterns := make([]LearningRecord, 0, len(patterns)) + clusteredIDs := make([]string, 0) + for _, pattern := range patterns { + keys := make(map[string]struct{}) + for _, id := range pattern.TaskRecordIDs { + task, ok := successByID[id] + if !ok { + continue + } + key := heuristicClusterKey(task) + if key == "" { + continue + } + keys[key] = struct{}{} + } + + clusterEvidenceByID := make(map[string]LearningRecord) + for key := range keys { + for _, task := range evidenceByKey[key] { + clusterEvidenceByID[task.ID] = task + } + } + if len(clusterEvidenceByID) == 0 { + for _, id := range pattern.TaskRecordIDs { + if task, ok := successByID[id]; ok { + clusterEvidenceByID[task.ID] = task + } + } + } + if len(clusterEvidenceByID) == 0 { + continue + } + + successes := 0 + clusterEvidence := make([]LearningRecord, 0, len(clusterEvidenceByID)) + for _, task := range clusterEvidenceByID { + clusterEvidence = append(clusterEvidence, task) + if task.Success != nil && *task.Success { + successes++ + } + } + sort.Slice(clusterEvidence, func(i, j int) bool { + leftSuccess := clusterEvidence[i].Success != nil && *clusterEvidence[i].Success + rightSuccess := clusterEvidence[j].Success != nil && *clusterEvidence[j].Success + if leftSuccess != rightSuccess { + return leftSuccess + } + return clusterEvidence[i].ID < clusterEvidence[j].ID + }) + if successes == 0 { + continue + } + if minSuccessRatio > 0 { + ratio := float64(successes) / float64(len(clusterEvidence)) + if ratio < minSuccessRatio { + continue + } + } + + filteredPatterns = append(filteredPatterns, pattern) + clusteredIDs = append(clusteredIDs, collectRecordIDs(clusterEvidence)...) + } + return filteredPatterns, appendUniqueStrings(nil, clusteredIDs...), nil +} + +func (c *LLMPatternClusterer) validateAndBuildPatterns( + workspace string, + clusters []llmCluster, + tasks []LearningRecord, + existing []LearningRecord, +) ([]LearningRecord, []string) { + taskByID := make(map[string]LearningRecord, len(tasks)) + for _, task := range tasks { + taskByID[task.ID] = task + } + existingByLabel := patternsByLabel(existing, workspace) + assigned := make(map[string]struct{}, len(tasks)) + patterns := make([]LearningRecord, 0, len(clusters)) + clusteredIDs := make([]string, 0) + + for _, cluster := range clusters { + label := validSkillNameOrEmpty(cluster.Label) + if label == "" { + continue + } + clusterTasks := make([]LearningRecord, 0, len(cluster.TaskRecordIDs)) + for _, id := range cluster.TaskRecordIDs { + id = strings.TrimSpace(id) + if id == "" { + continue + } + if _, exists := assigned[id]; exists { + continue + } + task, ok := taskByID[id] + if !ok { + continue + } + clusterTasks = append(clusterTasks, task) + assigned[id] = struct{}{} + } + existingPattern, hasExisting := existingByLabel[label] + if !hasExisting && len(clusterTasks) < c.minCount { + continue + } + if len(clusterTasks) == 0 { + continue + } + pattern := buildPatternFromCluster( + workspace, + label, + cluster.Summary, + cluster.Reason, + clusterTasks, + existingPattern, + c.now(), + ) + patterns = append(patterns, pattern) + clusteredIDs = append(clusteredIDs, collectRecordIDs(clusterTasks)...) + } + return patterns, clusteredIDs +} + +func (c *LLMPatternClusterer) validateAndBuildPatternsWithEvidence( + workspace string, + clusters []llmCluster, + successfulTasks []LearningRecord, + evidenceTasks []LearningRecord, + existing []LearningRecord, + minSuccessRatio float64, +) ([]LearningRecord, []string) { + evidenceByID := make(map[string]LearningRecord, len(evidenceTasks)) + for _, task := range evidenceTasks { + evidenceByID[task.ID] = task + } + successfulByID := make(map[string]LearningRecord, len(successfulTasks)) + for _, task := range successfulTasks { + successfulByID[task.ID] = task + } + existingByLabel := patternsByLabel(existing, workspace) + assigned := make(map[string]struct{}, len(evidenceTasks)) + patterns := make([]LearningRecord, 0, len(clusters)) + clusteredIDs := make([]string, 0) + + for _, cluster := range clusters { + label := validSkillNameOrEmpty(cluster.Label) + if label == "" { + continue + } + clusterEvidence := make([]LearningRecord, 0, len(cluster.TaskRecordIDs)) + clusterSuccesses := make([]LearningRecord, 0, len(cluster.TaskRecordIDs)) + for _, id := range cluster.TaskRecordIDs { + id = strings.TrimSpace(id) + if id == "" { + continue + } + if _, exists := assigned[id]; exists { + continue + } + task, ok := evidenceByID[id] + if !ok { + continue + } + clusterEvidence = append(clusterEvidence, task) + if successTask, ok := successfulByID[id]; ok { + clusterSuccesses = append(clusterSuccesses, successTask) + } + assigned[id] = struct{}{} + } + if len(clusterEvidence) == 0 || len(clusterSuccesses) == 0 { + continue + } + if minSuccessRatio > 0 { + ratio := float64(len(clusterSuccesses)) / float64(len(clusterEvidence)) + if ratio < minSuccessRatio { + continue + } + } + existingPattern, hasExisting := existingByLabel[label] + if !hasExisting && len(clusterSuccesses) < c.minCount { + continue + } + pattern := buildPatternFromCluster( + workspace, + label, + cluster.Summary, + cluster.Reason, + clusterSuccesses, + existingPattern, + c.now(), + ) + patterns = append(patterns, pattern) + clusteredIDs = append(clusteredIDs, collectRecordIDs(clusterEvidence)...) + } + if len(assigned) != len(evidenceByID) { + return nil, nil + } + return patterns, clusteredIDs +} + +func parseLLMClusterResponse(content string) (llmClusterResponse, bool) { + normalized := strings.TrimSpace(content) + normalized = strings.TrimPrefix(normalized, "```json") + normalized = strings.TrimPrefix(normalized, "```") + normalized = strings.TrimSuffix(normalized, "```") + normalized = strings.TrimSpace(normalized) + var payload llmClusterResponse + if err := json.Unmarshal([]byte(normalized), &payload); err != nil { + return llmClusterResponse{}, false + } + return payload, true +} + +func buildPatternClusterPrompt(workspace string, tasks []LearningRecord, existing []LearningRecord) string { + type taskPayload struct { + ID string `json:"id"` + Summary string `json:"summary"` + FinalOutputExcerpt string `json:"final_output_excerpt"` + Success *bool `json:"success,omitempty"` + } + type patternPayload struct { + Label string `json:"label"` + Summary string `json:"summary"` + } + payload := struct { + Instruction string `json:"instruction"` + ExistingPatterns []patternPayload `json:"existing_patterns,omitempty"` + Tasks []taskPayload `json:"tasks"` + }{ + Instruction: "Group tasks that have the same reusable task meaning. Use existing pattern labels when they fit. Labels must be lowercase hyphenated and must not include concrete values.", + } + for _, pattern := range existing { + if pattern.WorkspaceID != workspace { + continue + } + if strings.TrimSpace(pattern.Label) == "" { + continue + } + payload.ExistingPatterns = append(payload.ExistingPatterns, patternPayload{ + Label: strings.TrimSpace(pattern.Label), + Summary: strings.TrimSpace(pattern.Summary), + }) + } + for _, task := range tasks { + payload.Tasks = append(payload.Tasks, taskPayload{ + ID: task.ID, + Summary: task.Summary, + FinalOutputExcerpt: summarizeText(task.FinalOutput, 800), + Success: task.Success, + }) + } + data, err := json.MarshalIndent(payload, "", " ") + if err != nil { + return fmt.Sprintf("tasks: %d", len(tasks)) + } + return string(data) +} + +func buildPatternFromCluster( + workspace, label, summary, reason string, + tasks []LearningRecord, + existing LearningRecord, + now time.Time, +) LearningRecord { + taskIDs := append([]string(nil), existing.TaskRecordIDs...) + taskIDs = appendUniqueStrings(taskIDs, collectRecordIDs(tasks)...) + if summary = strings.TrimSpace(summary); summary == "" { + summary = labelSummary(label) + } + pattern := existing + if strings.TrimSpace(pattern.ID) == "" { + pattern = LearningRecord{ + ID: stableRuleID(workspace, label), + Kind: RecordKindPattern, + WorkspaceID: workspace, + CreatedAt: now, + Status: RecordStatus("ready"), + } + } else { + updatedAt := now + pattern.UpdatedAt = &updatedAt + } + pattern.Label = label + pattern.Summary = summary + pattern.TaskRecordIDs = taskIDs + pattern.ClusterReason = strings.TrimSpace(reason) + pattern.Status = RecordStatus("ready") + pattern.Source = nil + pattern.SourceRecordIDs = nil + pattern.EventCount = 0 + pattern.SuccessRate = 0 + pattern.MaturityScore = 0 + pattern.WinningPath = nil + pattern.LateAddedSkills = nil + pattern.FinalSnapshotTrigger = "" + pattern.MatchedSkillNames = nil + return pattern +} + +func patternsByLabel(patterns []LearningRecord, workspace string) map[string]LearningRecord { + out := make(map[string]LearningRecord, len(patterns)) + for _, pattern := range patterns { + if pattern.WorkspaceID != workspace { + continue + } + label := strings.TrimSpace(pattern.Label) + if label == "" { + label = validSkillNameOrEmpty(pattern.Summary) + } + if label == "" { + continue + } + out[label] = pattern + } + return out +} + +func heuristicClusterLabel(record LearningRecord) string { + if label := heuristicASCIIClusterLabel(record.Summary); label != "" { + return label + } + if normalized := normalizeUnicodeTaskSummary(record.Summary); normalized != "" { + return hashedTaskLabel(normalized) + } + return "" +} + +func heuristicClusterKey(record LearningRecord) string { + if label := heuristicASCIIClusterLabel(record.Summary); label != "" { + return "ascii:" + label + } + if normalized := normalizeUnicodeTaskSummary(record.Summary); normalized != "" { + return "unicode:" + hashedTaskLabel(normalized) + } + return "" +} + +func heuristicClusterLabelForGroup(key string, cluster []LearningRecord) string { + if strings.HasPrefix(key, "ascii:") || strings.HasPrefix(key, "unicode:") { + return strings.TrimSpace(strings.TrimPrefix(strings.TrimPrefix(key, "ascii:"), "unicode:")) + } + for _, record := range cluster { + if label := heuristicClusterLabel(record); label != "" { + return label + } + } + return "" +} + +func heuristicClusterSummary(label string, cluster []LearningRecord) string { + for _, record := range cluster { + if summary := strings.TrimSpace(record.Summary); summary != "" { + return summary + } + } + return labelSummary(label) +} + +func heuristicASCIIClusterLabel(summary string) string { + tokens := tokenizeForEvolution(summary) + out := make([]string, 0, len(tokens)) + for _, token := range tokens { + if isNumericToken(token) { + continue + } + out = append(out, token) + if len(out) >= 5 { + break + } + } + return validSkillNameOrEmpty(strings.Join(out, "-")) +} + +func normalizeUnicodeTaskSummary(summary string) string { + var b strings.Builder + for _, r := range strings.ToLower(strings.TrimSpace(summary)) { + if unicode.IsDigit(r) || unicode.IsSpace(r) || unicode.IsPunct(r) || unicode.IsSymbol(r) { + continue + } + b.WriteRune(r) + } + return b.String() +} + +func hashedTaskLabel(value string) string { + sum := sha1.Sum([]byte(value)) + return "task-" + hex.EncodeToString(sum[:4]) +} + +func labelSummary(label string) string { + label = strings.ReplaceAll(strings.TrimSpace(label), "-", " ") + if label == "" { + return "Learned task pattern." + } + return strings.ToUpper(label[:1]) + label[1:] + "." +} diff --git a/pkg/evolution/pattern_clusterer_test.go b/pkg/evolution/pattern_clusterer_test.go new file mode 100644 index 000000000..0e0c91128 --- /dev/null +++ b/pkg/evolution/pattern_clusterer_test.go @@ -0,0 +1,402 @@ +package evolution_test + +import ( + "context" + "strings" + "testing" + "time" + + "github.com/sipeed/picoclaw/pkg/evolution" + "github.com/sipeed/picoclaw/pkg/providers" +) + +type llmClusterTestProvider struct { + content string + defaultModel string + messages []providers.Message +} + +func (p *llmClusterTestProvider) Chat( + _ context.Context, + messages []providers.Message, + _ []providers.ToolDefinition, + _ string, + _ map[string]any, +) (*providers.LLMResponse, error) { + p.messages = append([]providers.Message(nil), messages...) + return &providers.LLMResponse{Content: p.content}, nil +} + +func (p *llmClusterTestProvider) GetDefaultModel() string { + return p.defaultModel +} + +func TestHeuristicPatternClusterer_GroupsChineseSummariesWithoutLLM(t *testing.T) { + clusterer := evolution.NewHeuristicPatternClusterer(3, func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + success := true + tasks := []evolution.LearningRecord{ + { + ID: "task-1", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace", + Summary: "调用三一定理计算100", + FinalOutput: "100 + 31 = 131; 131 + 42 = 173; 173 - 53 = 120", + Status: evolution.RecordStatus("new"), + Success: &success, + }, + { + ID: "task-2", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace", + Summary: "调用三一定理计算200", + FinalOutput: "200 + 31 = 231; 231 + 42 = 273; 273 - 53 = 220", + Status: evolution.RecordStatus("new"), + Success: &success, + }, + { + ID: "task-3", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace", + Summary: "调用三一定理计算300", + FinalOutput: "300 + 31 = 331; 331 + 42 = 373; 373 - 53 = 320", + Status: evolution.RecordStatus("new"), + Success: &success, + UsedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + }, + } + + patterns, clusteredIDs, err := clusterer.BuildPatterns(context.Background(), "workspace", tasks, nil) + if err != nil { + t.Fatalf("BuildPatterns: %v", err) + } + if len(patterns) != 1 { + t.Fatalf("len(patterns) = %d, want 1: %#v", len(patterns), patterns) + } + if !strings.HasPrefix(patterns[0].Label, "task-") { + t.Fatalf("Label = %q, want task-* fallback label", patterns[0].Label) + } + if patterns[0].Summary != "调用三一定理计算100" { + t.Fatalf("Summary = %q, want representative Chinese summary", patterns[0].Summary) + } + if len(patterns[0].TaskRecordIDs) != 3 { + t.Fatalf("TaskRecordIDs = %v, want 3 ids", patterns[0].TaskRecordIDs) + } + if len(clusteredIDs) != 3 { + t.Fatalf("clusteredIDs = %v, want 3 ids", clusteredIDs) + } +} + +func TestLLMPatternClusterer_FallsBackWhenLLMReturnsNoUsableClusters(t *testing.T) { + fallback := evolution.NewHeuristicPatternClusterer(2, func() time.Time { + return time.Unix(1700000000, 0).UTC() + }) + clusterer := evolution.NewLLMPatternClusterer( + &llmClusterTestProvider{content: `{"clusters":[]}`, defaultModel: "test-model"}, + "test-model", + fallback, + 2, + func() time.Time { return time.Unix(1700000000, 0).UTC() }, + ) + success := true + tasks := []evolution.LearningRecord{ + { + ID: "task-1", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace", + Summary: "调用三一定理计算100", + FinalOutput: "100 + 31 = 131", + Status: evolution.RecordStatus("new"), + Success: &success, + }, + { + ID: "task-2", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace", + Summary: "调用三一定理计算200", + FinalOutput: "200 + 31 = 231", + Status: evolution.RecordStatus("new"), + Success: &success, + }, + } + + patterns, clusteredIDs, err := clusterer.BuildPatterns(context.Background(), "workspace", tasks, nil) + if err != nil { + t.Fatalf("BuildPatterns: %v", err) + } + if len(patterns) != 1 { + t.Fatalf("len(patterns) = %d, want fallback pattern: %#v", len(patterns), patterns) + } + if len(clusteredIDs) != 2 { + t.Fatalf("clusteredIDs = %v, want 2 task IDs", clusteredIDs) + } +} + +func TestLLMPatternClusterer_PromptFiltersExistingPatternsByWorkspace(t *testing.T) { + provider := &llmClusterTestProvider{ + content: `{"clusters":[{"label":"current-weather-path","summary":"current summary","task_record_ids":["task-1"],"cluster_reason":"same goal"}]}`, + defaultModel: "test-model", + } + clusterer := evolution.NewLLMPatternClusterer( + provider, + "test-model", + evolution.NewHeuristicPatternClusterer(1, nil), + 1, + func() time.Time { return time.Unix(1700000000, 0).UTC() }, + ) + success := true + tasks := []evolution.LearningRecord{ + { + ID: "task-1", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace-a", + Summary: "weather lookup", + FinalOutput: "sunny", + Status: evolution.RecordStatus("new"), + Success: &success, + }, + } + existing := []evolution.LearningRecord{ + { + ID: "rule-a", + Kind: evolution.RecordKindPattern, + WorkspaceID: "workspace-a", + Label: "current-weather-path", + Summary: "current workspace pattern", + }, + { + ID: "rule-b", + Kind: evolution.RecordKindPattern, + WorkspaceID: "workspace-b", + Label: "other-workspace-secret-path", + Summary: "other workspace pattern", + }, + } + + if _, _, err := clusterer.BuildPatterns(context.Background(), "workspace-a", tasks, existing); err != nil { + t.Fatalf("BuildPatterns: %v", err) + } + if len(provider.messages) != 2 { + t.Fatalf("len(messages) = %d, want 2", len(provider.messages)) + } + prompt := provider.messages[1].Content + if !strings.Contains(prompt, "current-weather-path") { + t.Fatalf("prompt = %q, want current workspace pattern", prompt) + } + if strings.Contains(prompt, "other-workspace-secret-path") || strings.Contains(prompt, "other workspace pattern") { + t.Fatalf("prompt leaked other workspace pattern: %s", prompt) + } +} + +func TestLLMPatternClusterer_RejectsClusterBelowEvidenceSuccessRatio(t *testing.T) { + provider := &llmClusterTestProvider{ + content: `{"clusters":[{"label":"weather-lookup","summary":"lookup weather","task_record_ids":["task-success","task-failed"],"cluster_reason":"same weather lookup goal"}]}`, + defaultModel: "test-model", + } + clusterer := evolution.NewLLMPatternClusterer( + provider, + "test-model", + evolution.NewHeuristicPatternClusterer(1, nil), + 1, + func() time.Time { return time.Unix(1700000000, 0).UTC() }, + ) + success := true + failed := false + successfulTasks := []evolution.LearningRecord{ + { + ID: "task-success", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace-a", + Summary: "weather lookup shanghai", + FinalOutput: "sunny", + Status: evolution.RecordStatus("new"), + Success: &success, + }, + } + evidenceTasks := []evolution.LearningRecord{ + successfulTasks[0], + { + ID: "task-failed", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace-a", + Summary: "forecast for shanghai", + FinalOutput: "could not complete", + Status: evolution.RecordStatus("new"), + Success: &failed, + }, + } + + patterns, clusteredIDs, err := clusterer.BuildPatternsWithEvidence( + context.Background(), + "workspace-a", + successfulTasks, + evidenceTasks, + nil, + 0.8, + ) + if err != nil { + t.Fatalf("BuildPatternsWithEvidence: %v", err) + } + if len(patterns) != 0 { + t.Fatalf("len(patterns) = %d, want 0: %#v", len(patterns), patterns) + } + if len(clusteredIDs) != 0 { + t.Fatalf("clusteredIDs = %v, want none", clusteredIDs) + } + prompt := provider.messages[1].Content + if !strings.Contains(prompt, `"success": true`) || !strings.Contains(prompt, `"success": false`) { + t.Fatalf("prompt should include success and failure evidence:\n%s", prompt) + } +} + +func TestLLMPatternClusterer_RejectsIncompleteEvidenceAssignment(t *testing.T) { + provider := &llmClusterTestProvider{ + content: `{"clusters":[{"label":"weather-lookup","summary":"lookup weather","task_record_ids":["task-success"],"cluster_reason":"same weather lookup goal"}]}`, + defaultModel: "test-model", + } + clusterer := evolution.NewLLMPatternClusterer( + provider, + "test-model", + evolution.NewHeuristicPatternClusterer(1, nil), + 1, + func() time.Time { return time.Unix(1700000000, 0).UTC() }, + ) + success := true + failed := false + successfulTasks := []evolution.LearningRecord{ + { + ID: "task-success", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace-a", + Summary: "weather lookup shanghai", + FinalOutput: "sunny", + Status: evolution.RecordStatus("new"), + Success: &success, + }, + } + evidenceTasks := []evolution.LearningRecord{ + successfulTasks[0], + { + ID: "task-failed", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace-a", + Summary: "forecast for shanghai", + FinalOutput: "could not complete", + Status: evolution.RecordStatus("new"), + Success: &failed, + }, + } + + patterns, clusteredIDs, err := clusterer.BuildPatternsWithEvidence( + context.Background(), + "workspace-a", + successfulTasks, + evidenceTasks, + nil, + 0.8, + ) + if err != nil { + t.Fatalf("BuildPatternsWithEvidence: %v", err) + } + if len(patterns) != 0 { + t.Fatalf("len(patterns) = %d, want 0: %#v", len(patterns), patterns) + } + if len(clusteredIDs) != 0 { + t.Fatalf("clusteredIDs = %v, want none", clusteredIDs) + } +} + +func TestLLMPatternClusterer_MarksAllAcceptedEvidenceClusteredButStoresSuccessfulTaskIDs(t *testing.T) { + provider := &llmClusterTestProvider{ + content: `{"clusters":[{"label":"weather-lookup","summary":"lookup weather","task_record_ids":["task-success","task-failed"],"cluster_reason":"same weather lookup goal"}]}`, + defaultModel: "test-model", + } + assertClustererMarksAllAcceptedEvidenceClustered( + t, + provider, + "weather lookup shanghai", + "forecast for shanghai", + "could not complete", + "1", + ) +} + +func TestLLMPatternClusterer_FallbackMarksAllAcceptedEvidenceClustered(t *testing.T) { + provider := &llmClusterTestProvider{ + content: `not-json`, + defaultModel: "test-model", + } + assertClustererMarksAllAcceptedEvidenceClustered( + t, + provider, + "weather lookup 100", + "weather lookup 200", + "partial result", + "fallback pattern", + ) +} + +func assertClustererMarksAllAcceptedEvidenceClustered( + t *testing.T, + provider *llmClusterTestProvider, + successSummary string, + failedSummary string, + failedOutput string, + wantPatternDescription string, +) { + t.Helper() + clusterer := evolution.NewLLMPatternClusterer( + provider, + "test-model", + evolution.NewHeuristicPatternClusterer(1, nil), + 1, + func() time.Time { return time.Unix(1700000000, 0).UTC() }, + ) + success := true + failed := false + successfulTasks := []evolution.LearningRecord{ + { + ID: "task-success", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace-a", + Summary: successSummary, + FinalOutput: "sunny", + Status: evolution.RecordStatus("new"), + Success: &success, + }, + } + evidenceTasks := []evolution.LearningRecord{ + successfulTasks[0], + { + ID: "task-failed", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace-a", + Summary: failedSummary, + FinalOutput: failedOutput, + Status: evolution.RecordStatus("new"), + Success: &failed, + }, + } + + patterns, clusteredIDs, err := clusterer.BuildPatternsWithEvidence( + context.Background(), + "workspace-a", + successfulTasks, + evidenceTasks, + nil, + 0.5, + ) + if err != nil { + t.Fatalf("BuildPatternsWithEvidence: %v", err) + } + if len(patterns) != 1 { + t.Fatalf("len(patterns) = %d, want %s: %#v", len(patterns), wantPatternDescription, patterns) + } + if got := strings.Join(patterns[0].TaskRecordIDs, ","); got != "task-success" { + t.Fatalf("pattern TaskRecordIDs = %v, want only successful task", patterns[0].TaskRecordIDs) + } + if got := strings.Join(clusteredIDs, ","); got != "task-success,task-failed" { + t.Fatalf("clusteredIDs = %v, want all accepted evidence IDs", clusteredIDs) + } +} diff --git a/pkg/evolution/preview.go b/pkg/evolution/preview.go new file mode 100644 index 000000000..ee2774136 --- /dev/null +++ b/pkg/evolution/preview.go @@ -0,0 +1,154 @@ +package evolution + +import ( + "os" + "path/filepath" + "strconv" + "strings" +) + +type DraftPreview struct { + CurrentBody string + RenderedBody string + DiffPreview string +} + +func BuildDraftPreview(workspace string, draft SkillDraft) (DraftPreview, error) { + currentBody, hadOriginal, err := loadCurrentSkillBody(workspace, draft.TargetSkillName) + if err != nil { + return DraftPreview{}, err + } + + renderedBody, err := renderAppliedBody(draft, currentBody, hadOriginal) + if err != nil { + return DraftPreview{}, err + } + + return DraftPreview{ + CurrentBody: currentBody, + RenderedBody: renderedBody, + DiffPreview: buildLineDiffPreview(currentBody, renderedBody), + }, nil +} + +func loadCurrentSkillBody(workspace, skillName string) (string, bool, error) { + skillPath := filepath.Join(workspace, "skills", skillName, "SKILL.md") + data, err := os.ReadFile(skillPath) + if os.IsNotExist(err) { + return "", false, nil + } + if err != nil { + return "", false, err + } + return string(data), true, nil +} + +func buildLineDiffPreview(currentBody, renderedBody string) string { + before := strings.Split(strings.TrimRight(currentBody, "\n"), "\n") + after := strings.Split(strings.TrimRight(renderedBody, "\n"), "\n") + + if len(before) == 1 && before[0] == "" { + before = nil + } + if len(after) == 1 && after[0] == "" { + after = nil + } + + prefixLen := sharedPrefixLen(before, after) + suffixLen := sharedSuffixLen(before[prefixLen:], after[prefixLen:]) + const contextRadius = 2 + + beforeChangeStart := prefixLen + beforeChangeEnd := len(before) - suffixLen + afterChangeStart := prefixLen + afterChangeEnd := len(after) - suffixLen + + hunkBeforeStart := previewMaxInt(0, beforeChangeStart-contextRadius) + hunkAfterStart := previewMaxInt(0, afterChangeStart-contextRadius) + hunkBeforeEnd := previewMinInt(len(before), beforeChangeEnd+contextRadius) + hunkAfterEnd := previewMinInt(len(after), afterChangeEnd+contextRadius) + + removed := before[prefixLen : len(before)-suffixLen] + added := after[prefixLen : len(after)-suffixLen] + if len(removed) == 0 && len(added) == 0 { + return "(no content change)" + } + + lines := make([]string, 0, (hunkBeforeEnd-hunkBeforeStart)+(hunkAfterEnd-hunkAfterStart)) + header := make([]string, 0, 3+len(lines)) + header = append(header, + "--- current", + "+++ rendered", + formatUnifiedHunkHeader( + hunkBeforeStart, + hunkBeforeEnd-hunkBeforeStart, + hunkAfterStart, + hunkAfterEnd-hunkAfterStart, + ), + ) + for _, line := range before[hunkBeforeStart:beforeChangeStart] { + lines = append(lines, " "+line) + } + for _, line := range removed { + lines = append(lines, "-"+line) + } + for _, line := range added { + lines = append(lines, "+"+line) + } + for _, line := range after[afterChangeEnd:hunkAfterEnd] { + lines = append(lines, " "+line) + } + return strings.Join(append(header, lines...), "\n") +} + +func formatUnifiedHunkHeader(beforeStart, beforeCount, afterStart, afterCount int) string { + return "@@ -" + formatUnifiedRange( + beforeStart+1, + beforeCount, + ) + " +" + formatUnifiedRange( + afterStart+1, + afterCount, + ) + " @@" +} + +func formatUnifiedRange(start, count int) string { + return strconv.Itoa(start) + "," + strconv.Itoa(count) +} + +func sharedPrefixLen(left, right []string) int { + limit := len(left) + if len(right) < limit { + limit = len(right) + } + n := 0 + for n < limit && left[n] == right[n] { + n++ + } + return n +} + +func sharedSuffixLen(left, right []string) int { + limit := len(left) + if len(right) < limit { + limit = len(right) + } + n := 0 + for n < limit && left[len(left)-1-n] == right[len(right)-1-n] { + n++ + } + return n +} + +func previewMinInt(a, b int) int { + if a < b { + return a + } + return b +} + +func previewMaxInt(a, b int) int { + if a > b { + return a + } + return b +} diff --git a/pkg/evolution/preview_test.go b/pkg/evolution/preview_test.go new file mode 100644 index 000000000..e6f0a9ce5 --- /dev/null +++ b/pkg/evolution/preview_test.go @@ -0,0 +1,111 @@ +package evolution + +import ( + "strings" + "testing" +) + +func TestBuildLineDiffPreview_UsesUnifiedDiffStyle(t *testing.T) { + current := strings.Join([]string{ + "---", + "name: weather", + "description: weather helper", + "---", + "# Weather", + "## Start Here", + "Use city names first.", + "", + }, "\n") + rendered := strings.Join([]string{ + "---", + "name: weather", + "description: weather helper", + "---", + "# Weather", + "## Start Here", + "Use city names first.", + "", + "## Start Here", + "Use native-name query first.", + "", + }, "\n") + + diff := buildLineDiffPreview(current, rendered) + + for _, want := range []string{ + "--- current", + "+++ rendered", + "@@", + "+## Start Here", + "+Use native-name query first.", + } { + if !strings.Contains(diff, want) { + t.Fatalf("diff missing %q:\n%s", want, diff) + } + } +} + +func TestBuildLineDiffPreview_NoContentChange(t *testing.T) { + body := "---\nname: weather\n---\n# Weather\n" + diff := buildLineDiffPreview(body, body) + if diff != "(no content change)" { + t.Fatalf("diff = %q, want no-content marker", diff) + } +} + +func TestBuildLineDiffPreview_LimitsContextAroundChanges(t *testing.T) { + current := strings.Join([]string{ + "line-01", + "line-02", + "line-03", + "line-04", + "line-05", + "line-06", + "line-07", + "line-08", + "line-09", + "line-10", + "", + }, "\n") + rendered := strings.Join([]string{ + "line-01", + "line-02", + "line-03", + "line-04", + "line-05", + "line-06", + "inserted-a", + "inserted-b", + "line-07", + "line-08", + "line-09", + "line-10", + "", + }, "\n") + + diff := buildLineDiffPreview(current, rendered) + + for _, want := range []string{ + "@@", + " line-05", + " line-06", + "+inserted-a", + "+inserted-b", + " line-07", + " line-08", + } { + if !strings.Contains(diff, want) { + t.Fatalf("diff missing %q:\n%s", want, diff) + } + } + for _, unwanted := range []string{ + "line-01", + "line-02", + "line-09", + "line-10", + } { + if strings.Contains(diff, unwanted) { + t.Fatalf("diff should omit distant context %q:\n%s", unwanted, diff) + } + } +} diff --git a/pkg/evolution/profile_sync.go b/pkg/evolution/profile_sync.go new file mode 100644 index 000000000..1499a6d44 --- /dev/null +++ b/pkg/evolution/profile_sync.go @@ -0,0 +1,75 @@ +package evolution + +import ( + "strings" + "time" +) + +func SaveAppliedProfile(store *Store, workspace string, draft SkillDraft, now time.Time) error { + return store.UpdateProfile(workspace, draft.TargetSkillName, func(profile *SkillProfile, exists bool) error { + if !exists { + *profile = SkillProfile{ + SkillName: draft.TargetSkillName, + WorkspaceID: workspace, + Origin: "evolved", + } + } + + profile.SkillName = draft.TargetSkillName + profile.WorkspaceID = workspace + profile.CurrentVersion = draft.ID + profile.Status = SkillStatusActive + profile.Origin = profileOrigin(profile.Origin) + profile.HumanSummary = draft.HumanSummary + profile.ChangeReason = draft.HumanSummary + profile.IntendedUseCases = append([]string(nil), draft.IntendedUseCases...) + profile.PreferredEntryPath = append([]string(nil), draft.PreferredEntryPath...) + profile.AvoidPatterns = append([]string(nil), draft.AvoidPatterns...) + profile.LastUsedAt = now + if profile.RetentionScore <= 0 { + profile.RetentionScore = 1 + } + profile.VersionHistory = append(profile.VersionHistory, SkillVersionEntry{ + Version: draft.ID, + Action: string(draft.ChangeKind), + Timestamp: now, + DraftID: draft.ID, + Summary: draft.HumanSummary, + }) + return nil + }) +} + +func inferIntendedUseCases(rule LearningRecord) []string { + summary := strings.TrimSpace(rule.Summary) + if summary == "" { + return nil + } + return []string{summary} +} + +func inferPreferredEntryPath(rule LearningRecord) []string { + if len(rule.WinningPath) == 0 { + return nil + } + return append([]string(nil), rule.WinningPath...) +} + +func inferAvoidPatterns(rule LearningRecord) []string { + if len(rule.LateAddedSkills) == 0 || len(rule.WinningPath) <= len(rule.LateAddedSkills) { + return nil + } + prefix := rule.WinningPath[:len(rule.WinningPath)-len(rule.LateAddedSkills)] + if len(prefix) == 0 { + return nil + } + return []string{ + "avoid starting with " + strings.Join( + prefix, + " -> ", + ) + " before using " + strings.Join( + rule.LateAddedSkills, + " -> ", + ), + } +} diff --git a/pkg/evolution/record_kinds.go b/pkg/evolution/record_kinds.go new file mode 100644 index 000000000..db9af93a3 --- /dev/null +++ b/pkg/evolution/record_kinds.go @@ -0,0 +1,9 @@ +package evolution + +func isTaskRecordKind(kind RecordKind) bool { + return kind == RecordKindTask || kind == legacyRecordKindCase +} + +func isPatternRecordKind(kind RecordKind) bool { + return kind == RecordKindPattern || kind == legacyRecordKindRule +} diff --git a/pkg/evolution/runtime.go b/pkg/evolution/runtime.go new file mode 100644 index 000000000..cc88433f4 --- /dev/null +++ b/pkg/evolution/runtime.go @@ -0,0 +1,1579 @@ +package evolution + +import ( + "context" + "crypto/sha1" + "encoding/hex" + "errors" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "sync" + "time" + "unicode/utf8" + + "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/logger" + "github.com/sipeed/picoclaw/pkg/skills" +) + +var ErrApplyDraftFailed = errors.New("apply draft failed") + +type RuntimeOptions struct { + Config config.EvolutionConfig + Now func() time.Time + Store *Store + Organizer *Organizer + PatternClusterer PatternClusterer + SuccessJudge SuccessJudge + SkillsRecaller *SkillsRecaller + DraftGenerator DraftGenerator + GeneratorFactory func(workspace string) DraftGenerator + SuccessJudgeFactory func(workspace string) SuccessJudge + Applier *Applier + ApplierFactory func(workspace string) *Applier +} + +type Runtime struct { + cfg config.EvolutionConfig + mu sync.Mutex + now func() time.Time + writer *CaseWriter + store *Store + organizer *Organizer + patternClusterer PatternClusterer + successJudge SuccessJudge + skillsRecaller *SkillsRecaller + draftGenerator DraftGenerator + generatorFactory func(workspace string) DraftGenerator + successJudgeFactory func(workspace string) SuccessJudge + applier *Applier + applierFactory func(workspace string) *Applier +} + +type TurnCaseInput struct { + Workspace string + WorkspaceID string + TurnID string + SessionKey string + AgentID string + Status string + UserMessage string + FinalContent string + ToolKinds []string + ToolExecutions []ToolExecutionRecord + ActiveSkillNames []string + AttemptedSkillNames []string + FinalSuccessfulPath []string + SkillContextSnapshots []SkillContextSnapshot +} + +func NewRuntime(opts RuntimeOptions) (*Runtime, error) { + now := opts.Now + if now == nil { + now = time.Now + } + + organizer := opts.Organizer + if organizer == nil { + organizer = NewOrganizer(OrganizerOptions{ + MinCaseCount: opts.Config.EffectiveMinTaskCount(), + MinSuccessRate: opts.Config.EffectiveMinSuccessRatio(), + Now: now, + }) + } + + patternClusterer := opts.PatternClusterer + if patternClusterer == nil { + patternClusterer = NewHeuristicPatternClusterer(opts.Config.EffectiveMinTaskCount(), now) + } + + return &Runtime{ + cfg: opts.Config, + now: now, + store: opts.Store, + organizer: organizer, + patternClusterer: patternClusterer, + successJudge: opts.SuccessJudge, + skillsRecaller: opts.SkillsRecaller, + draftGenerator: opts.DraftGenerator, + generatorFactory: opts.GeneratorFactory, + successJudgeFactory: opts.SuccessJudgeFactory, + applier: opts.Applier, + applierFactory: opts.ApplierFactory, + }, nil +} + +func (rt *Runtime) FinalizeTurn(ctx context.Context, input TurnCaseInput) error { + if rt == nil || !rt.cfg.Enabled || input.Workspace == "" || shouldSkipLearningRecord(input) { + return nil + } + + success := input.Status == "completed" + usedSkillNames := buildUsedSkillNames(input) + workspaceID := input.Workspace + createdAt := rt.now() + + record := LearningRecord{ + ID: buildTaskRecordID(input, createdAt), + Kind: RecordKindTask, + WorkspaceID: workspaceID, + CreatedAt: createdAt, + SessionKey: input.SessionKey, + Summary: buildRecordSummary(input), + FinalOutput: summarizeText(input.FinalContent, 1200), + Status: RecordStatus("new"), + Success: &success, + UsedSkillNames: append([]string(nil), usedSkillNames...), + } + + paths := NewPaths(input.Workspace, rt.cfg.StateDir) + + rt.mu.Lock() + if rt.writer == nil || rt.writer.paths.RootDir != paths.RootDir { + rt.writer = NewCaseWriter(paths) + } + writer := rt.writer + rt.mu.Unlock() + + if err := writer.AppendCase(ctx, record); err != nil { + return err + } + + if err := rt.recordSkillUsage(input, success); err != nil { + return err + } + + logger.DebugCF("evolution", "Recorded hot path learning record", map[string]any{ + "workspace": input.Workspace, + "turn_id": input.TurnID, + "success": success, + "used_skills": len(record.UsedSkillNames), + }) + return nil +} + +func buildTaskRecordID(input TurnCaseInput, createdAt time.Time) string { + base := strings.TrimSpace(input.TurnID) + if base == "" { + base = "turn" + } + base = validSkillNameOrEmpty(base) + if base == "" { + base = "turn" + } + seed := strings.Join([]string{ + input.Workspace, + input.SessionKey, + input.AgentID, + input.TurnID, + createdAt.UTC().Format(time.RFC3339Nano), + }, "\x00") + sum := sha1.Sum([]byte(seed)) + return base + "-" + hex.EncodeToString(sum[:6]) +} + +func buildRecordSummary(input TurnCaseInput) string { + if goal := summarizeText(input.UserMessage, 160); goal != "" { + return goal + } + return fmt.Sprintf("turn %s finished with status=%s", input.TurnID, input.Status) +} + +func summarizeText(text string, maxLen int) string { + text = strings.TrimSpace(text) + if text == "" || maxLen <= 0 { + return text + } + if utf8.RuneCountInString(text) <= maxLen { + return text + } + if maxLen <= 3 { + runes := []rune(text) + return string(runes[:maxLen]) + } + runes := []rune(text) + return string(runes[:maxLen-3]) + "..." +} + +func buildUsedSkillNames(input TurnCaseInput) []string { + if final := uniqueTrimmedNames(input.FinalSuccessfulPath); len(final) > 0 { + return final + } + out := make([]string, 0) + for _, exec := range input.ToolExecutions { + if !exec.Success { + continue + } + out = append(out, exec.SkillNames...) + } + return uniqueTrimmedNames(out) +} + +func shouldSkipLearningRecord(input TurnCaseInput) bool { + if strings.EqualFold(strings.TrimSpace(input.SessionKey), "heartbeat") { + return true + } + return false +} + +func uniqueTrimmedNames(values []string) []string { + out := make([]string, 0, len(values)) + seen := make(map[string]struct{}, len(values)) + for _, value := range values { + value = strings.TrimSpace(value) + if value == "" { + continue + } + key := strings.ToLower(value) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + out = append(out, value) + } + return out +} + +func (rt *Runtime) RunColdPathOnce(ctx context.Context, workspace string) error { + if rt == nil || !rt.cfg.Enabled || workspace == "" { + return nil + } + + mode := rt.cfg.EffectiveMode() + runID := fmt.Sprintf("%d", rt.now().UnixNano()) + if mode == "" || mode == "observe" { + logger.DebugCF("evolution", "Skipped cold path run", map[string]any{ + "workspace": workspace, + "mode": mode, + "run_id": runID, + }) + return nil + } + + logger.InfoCF("evolution", "Started cold path run", map[string]any{ + "workspace": workspace, + "mode": mode, + "run_id": runID, + }) + + store := rt.storeForWorkspace(workspace) + taskRecords, err := store.LoadTaskRecords() + if err != nil { + return err + } + patternRecords, err := store.LoadPatternRecords() + if err != nil { + return err + } + logger.DebugCF("evolution", "Loaded evolution records", map[string]any{ + "workspace": workspace, + "task_count": len(taskRecords), + "pattern_count": len(patternRecords), + "run_id": runID, + }) + + admittedCount := 0 + newRuleCount := 0 + if rt.patternClusterer != nil { + recordsForOrganizer, evidenceRecordsForOrganizer, inputErr := rt.recordsForColdPathInputs( + ctx, + workspace, + taskRecords, + ) + if inputErr != nil { + return inputErr + } + recordsForOrganizer = rt.filterRecordsByMinSuccessRatio( + workspace, + evidenceRecordsForOrganizer, + recordsForOrganizer, + ) + admittedCount = countTaskLearningRecords(recordsForOrganizer) + logger.DebugCF("evolution", "Admitted task records for cold path", map[string]any{ + "workspace": workspace, + "admitted_tasks": admittedCount, + "organizer_input": len(recordsForOrganizer), + "task_ids": joinRecordIDs(recordsForOrganizer), + "run_id": runID, + }) + var rules []LearningRecord + var clusteredTaskIDs []string + if clusterer, ok := rt.patternClusterer.(evidencePatternClusterer); ok { + rules, clusteredTaskIDs, err = clusterer.BuildPatternsWithEvidence( + ctx, + workspace, + recordsForOrganizer, + evidenceRecordsForOrganizer, + patternRecords, + rt.cfg.EffectiveMinSuccessRatio(), + ) + } else { + rules, clusteredTaskIDs, err = rt.patternClusterer.BuildPatterns( + ctx, + workspace, + recordsForOrganizer, + patternRecords, + ) + } + if err != nil { + return err + } + newRuleCount = countNewPatterns(patternRecords, rules, workspace) + logger.DebugCF("evolution", "Built learning patterns", map[string]any{ + "workspace": workspace, + "pattern_count": len(rules), + "new_patterns": newRuleCount, + "admitted_tasks": admittedCount, + "patterns": summarizePatternRecords(rules), + "run_id": runID, + }) + if len(rules) > 0 { + merged := mergePatternRecords(patternRecords, rules, workspace) + if mergeErr := store.MergePatternRecords(rules); mergeErr != nil { + return mergeErr + } + patternRecords = merged + } + if len(clusteredTaskIDs) > 0 { + if markErr := markTaskRecordsClustered(store, clusteredTaskIDs); markErr != nil { + return markErr + } + } + } + + generator := rt.draftGeneratorForWorkspace(workspace) + if generator == nil { + logger.DebugCF("evolution", "Skipped drafting because no draft generator is available", map[string]any{ + "workspace": workspace, + "run_id": runID, + }) + return rt.runLifecycleMaintenance(workspace, store, runID) + } + + recaller := rt.skillsRecallerForWorkspace(workspace) + applier := rt.applierForWorkspace(workspace) + readyRules := filterReadyRules(patternRecords, workspace) + readyRules = enrichReadyRulesForDrafts(readyRules, taskRecords) + if len(readyRules) == 0 { + logger.DebugCF("evolution", "Finished cold path run without ready patterns", map[string]any{ + "workspace": workspace, + "record_count": len(taskRecords), + "new_patterns": newRuleCount, + "admitted_tasks": admittedCount, + "run_id": runID, + }) + return rt.runLifecycleMaintenance(workspace, store, runID) + } + + existingDrafts, err := store.LoadDrafts() + if err != nil { + return err + } + readyRuleByID := make(map[string]LearningRecord, len(readyRules)) + for _, rule := range readyRules { + readyRuleByID[rule.ID] = rule + } + appliedExistingDrafts := 0 + changedExistingDrafts := false + for _, draft := range existingDrafts { + if draft.WorkspaceID != workspace || draft.Status != DraftStatusCandidate { + continue + } + rule, ok := readyRuleByID[draft.SourceRecordID] + if !ok { + logger.DebugCF( + "evolution", + "Skipped existing candidate draft because its source pattern is not ready", + map[string]any{ + "workspace": workspace, + "draft_id": draft.ID, + "source_record_id": draft.SourceRecordID, + "run_id": runID, + }, + ) + continue + } + matches, recallErr := recaller.RecallSimilarSkills(rule) + if recallErr != nil { + return recallErr + } + draft.MatchedSkillRefs = collectSkillRefs(matches) + var normalizationNotes []string + evidence := draftEvidenceForRule(rule, taskRecords) + draft, normalizationNotes = rt.normalizeDraftForWorkspace(workspace, rule, matches, evidence, draft) + review := ReviewDraft(draft) + draft.Status = review.Status + draft.ReviewNotes = appendUniqueStrings(draft.ReviewNotes, append(review.ReviewNotes, normalizationNotes...)...) + draft.ScanFindings = appendUniqueStrings(draft.ScanFindings, review.Findings...) + changedExistingDrafts = true + if draft.Status != DraftStatusCandidate || mode != "apply" || applier == nil { + if saveErr := store.SaveDrafts([]SkillDraft{draft}); saveErr != nil { + return saveErr + } + continue + } + updatedDraft, applyErr := rt.applyCandidateDraft(ctx, workspace, store, applier, draft, runID) + if applyErr != nil { + return applyErr + } + if updatedDraft.Status == DraftStatusAccepted { + appliedExistingDrafts++ + changedExistingDrafts = true + } + } + if changedExistingDrafts { + existingDrafts, err = store.LoadDrafts() + if err != nil { + return err + } + } + existingBySource := existingDraftSourceSet(existingDrafts, workspace) + logger.DebugCF("evolution", "Selected ready patterns for drafting", map[string]any{ + "workspace": workspace, + "ready_patterns": len(readyRules), + "existing_draft_count": len(existingBySource), + "applied_existing": appliedExistingDrafts, + "ready_pattern_ids": joinRecordIDs(readyRules), + "ready_patterns_info": summarizePatternRecords(readyRules), + "run_id": runID, + }) + + processedRules := 0 + for _, rule := range readyRules { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + if _, exists := existingBySource[rule.ID]; exists { + logger.DebugCF( + "evolution", + "Skipped pattern because a non-quarantined draft already exists", + map[string]any{ + "workspace": workspace, + "pattern_id": rule.ID, + "pattern_info": summarizePatternRecord(rule), + "run_id": runID, + }, + ) + continue + } + + evidence := draftEvidenceForRule(rule, taskRecords) + rule = enrichRuleWithDraftEvidence(rule, evidence) + matches, err := recaller.RecallSimilarSkills(rule) + if err != nil { + return err + } + logger.DebugCF("evolution", "Generating skill draft", map[string]any{ + "workspace": workspace, + "pattern_id": rule.ID, + "matched_skill_count": len(matches), + "pattern_info": summarizePatternRecord(rule), + "run_id": runID, + }) + + draft, err := generateDraftWithEvidence(ctx, generator, rule, matches, evidence) + if err != nil { + return err + } + + draft = rt.finalizeDraft(workspace, rule, matches, evidence, draft) + draftSaved := false + logger.DebugCF("evolution", "Finalized skill draft", map[string]any{ + "workspace": workspace, + "pattern_id": rule.ID, + "draft_id": draft.ID, + "target_skill": draft.TargetSkillName, + "change_kind": string(draft.ChangeKind), + "status": string(draft.Status), + "run_id": runID, + }) + if mode == "apply" && applier != nil && draft.Status == DraftStatusCandidate { + var err error + draft, err = rt.applyCandidateDraft(ctx, workspace, store, applier, draft, runID) + if err != nil { + return err + } + draftSaved = true + } + + if !draftSaved { + if err := store.SaveDrafts([]SkillDraft{draft}); err != nil { + return err + } + } + logger.DebugCF("evolution", "Saved skill draft", map[string]any{ + "workspace": workspace, + "draft_id": draft.ID, + "target_skill": draft.TargetSkillName, + "status": string(draft.Status), + "run_id": runID, + }) + existingBySource[rule.ID] = struct{}{} + processedRules++ + } + + logger.InfoCF("evolution", "Finished cold path run", map[string]any{ + "workspace": workspace, + "ready_patterns": len(readyRules), + "processed_patterns": processedRules, + "new_patterns": newRuleCount, + "run_id": runID, + }) + return rt.runLifecycleMaintenance(workspace, store, runID) +} + +func (rt *Runtime) recordsForColdPathInputs( + ctx context.Context, + workspace string, + records []LearningRecord, +) ([]LearningRecord, []LearningRecord, error) { + admitted := make([]LearningRecord, 0, len(records)) + evidence := make([]LearningRecord, 0, len(records)) + judge := rt.successJudgeForWorkspace(workspace) + + for _, record := range records { + if !isTaskRecordKind(record.Kind) || record.WorkspaceID != workspace { + continue + } + if reason := coldPathEvidenceRejectReason(record); reason != "" { + logger.DebugCF("evolution", "Rejected task record for cold path", map[string]any{ + "workspace": workspace, + "record_id": record.ID, + "reason": reason, + }) + continue + } + + evidenceRecord := record + if record.Success != nil && *record.Success && judge != nil { + decision, err := judge.JudgeTaskRecord(ctx, record) + if err != nil { + return nil, nil, err + } + judgedSuccess := decision.Success + evidenceRecord.Success = &judgedSuccess + if !decision.Success { + logger.DebugCF("evolution", "Rejected task record by success judge", map[string]any{ + "workspace": workspace, + "record_id": record.ID, + "reason": strings.TrimSpace(decision.Reason), + }) + } + } + evidence = append(evidence, evidenceRecord) + if evidenceRecord.Success == nil || !*evidenceRecord.Success { + continue + } + admitted = append(admitted, evidenceRecord) + } + return admitted, evidence, nil +} + +func (rt *Runtime) filterRecordsByMinSuccessRatio( + workspace string, + allRecords []LearningRecord, + admittedRecords []LearningRecord, +) []LearningRecord { + minRatio := rt.cfg.EffectiveMinSuccessRatio() + if minRatio <= 0 { + return admittedRecords + } + + type successStats struct { + success int + total int + } + statsByKey := make(map[string]successStats) + for _, record := range allRecords { + key, ok := coldPathSuccessRatioKey(workspace, record) + if !ok { + continue + } + stats := statsByKey[key] + stats.total++ + if record.Success != nil && *record.Success { + stats.success++ + } + statsByKey[key] = stats + } + + out := make([]LearningRecord, 0, len(admittedRecords)) + for _, record := range admittedRecords { + if !isTaskRecordKind(record.Kind) { + out = append(out, record) + continue + } + key, ok := coldPathSuccessRatioKey(workspace, record) + if !ok { + continue + } + stats := statsByKey[key] + if stats.total == 0 { + continue + } + ratio := float64(stats.success) / float64(stats.total) + if ratio < minRatio { + logger.DebugCF("evolution", "Rejected task record below cold path success ratio", map[string]any{ + "workspace": workspace, + "record_id": record.ID, + "success_ratio": ratio, + "min_success_ratio": minRatio, + "success_count": stats.success, + "total_count": stats.total, + }) + continue + } + out = append(out, record) + } + return out +} + +func coldPathSuccessRatioKey(workspace string, record LearningRecord) (string, bool) { + if !isTaskRecordKind(record.Kind) || record.WorkspaceID != workspace { + return "", false + } + if record.Status != "" && record.Status != RecordStatus("new") { + return "", false + } + if strings.EqualFold(strings.TrimSpace(record.SessionKey), "heartbeat") { + return "", false + } + if strings.EqualFold(strings.TrimSpace(record.FinalOutput), "HEARTBEAT_OK") { + return "", false + } + if strings.TrimSpace(record.Summary) == "" { + return "", false + } + key := heuristicClusterKey(record) + if key == "" { + return "", false + } + return key, true +} + +func coldPathEvidenceRejectReason(record LearningRecord) string { + if !isTaskRecordKind(record.Kind) { + return "not a task record" + } + if record.Success == nil { + return "task success unknown" + } + if record.Status != "" && record.Status != RecordStatus("new") { + return "task already processed" + } + if strings.EqualFold(strings.TrimSpace(record.SessionKey), "heartbeat") { + return "heartbeat session" + } + if strings.EqualFold(strings.TrimSpace(record.FinalOutput), "HEARTBEAT_OK") { + return "heartbeat output" + } + if strings.TrimSpace(record.Summary) == "" { + return "missing summary" + } + if strings.TrimSpace(record.FinalOutput) == "" { + return "missing final output" + } + return "" +} + +func (rt *Runtime) storeForWorkspace(workspace string) *Store { + paths := NewPaths(workspace, rt.cfg.StateDir) + if rt.store != nil && rt.store.paths.RootDir == paths.RootDir && rt.store.paths.Workspace == paths.Workspace { + return rt.store + } + return NewStore(paths) +} + +func (rt *Runtime) skillsRecallerForWorkspace(workspace string) *SkillsRecaller { + rt.mu.Lock() + defer rt.mu.Unlock() + + if rt.skillsRecaller == nil || rt.skillsRecaller.workspace != workspace { + rt.skillsRecaller = NewSkillsRecaller(workspace) + } + return rt.skillsRecaller +} + +func (rt *Runtime) draftGeneratorForWorkspace(workspace string) DraftGenerator { + if rt.generatorFactory != nil { + if generator := rt.generatorFactory(workspace); generator != nil { + return generator + } + } + if rt.draftGenerator != nil { + return rt.draftGenerator + } + return NewDefaultDraftGenerator(workspace) +} + +func (rt *Runtime) successJudgeForWorkspace(workspace string) SuccessJudge { + if rt.successJudgeFactory != nil { + if judge := rt.successJudgeFactory(workspace); judge != nil { + return judge + } + } + if rt.successJudge != nil { + return rt.successJudge + } + return &HeuristicSuccessJudge{} +} + +func (rt *Runtime) applierForWorkspace(workspace string) *Applier { + if rt.applierFactory != nil { + if applier := rt.applierFactory(workspace); applier != nil { + return applier + } + } + return rt.applier +} + +func (rt *Runtime) finalizeDraft( + workspace string, + rule LearningRecord, + matches []skills.SkillInfo, + evidence DraftEvidence, + draft SkillDraft, +) SkillDraft { + if draft.ID == "" { + draft.ID = "draft-" + rule.ID + } + if draft.CreatedAt.IsZero() { + draft.CreatedAt = rt.now() + } + draft.WorkspaceID = workspace + draft.SourceRecordID = rule.ID + draft.MatchedSkillRefs = collectSkillRefs(matches) + + draft, normalizationNotes := rt.normalizeDraftForWorkspace(workspace, rule, matches, evidence, draft) + review := ReviewDraft(draft) + draft.Status = review.Status + draft.ReviewNotes = append([]string(nil), review.ReviewNotes...) + draft.ReviewNotes = append(draft.ReviewNotes, normalizationNotes...) + if len(review.Findings) == 0 { + draft.ScanFindings = nil + return draft + } + draft.ScanFindings = append([]string(nil), review.Findings...) + return draft +} + +func (rt *Runtime) normalizeDraftForWorkspace( + workspace string, + rule LearningRecord, + matches []skills.SkillInfo, + evidence DraftEvidence, + draft SkillDraft, +) (SkillDraft, []string) { + target := strings.TrimSpace(draft.TargetSkillName) + if workspace == "" || target == "" { + return draft, nil + } + + notes := make([]string, 0, 4) + if combinedTarget := inferCombinedSkillName(rule); combinedTarget != "" && combinedTarget != target { + originalTarget := target + draft.TargetSkillName = combinedTarget + target = combinedTarget + notes = append(notes, fmt.Sprintf( + "retargeted draft from %q to combined shortcut skill %q because the winning path was a stable multi-skill chain", + originalTarget, + combinedTarget, + )) + } + + skillPath := filepath.Join(workspace, "skills", target, "SKILL.md") + _, err := os.Stat(skillPath) + hasExisting := err == nil + if err != nil && !errors.Is(err, os.ErrNotExist) { + return draft, notes + } + + if combinedTarget := inferCombinedSkillName(rule); combinedTarget != "" && combinedTarget == target { + draft.HumanSummary = buildCombinedSkillHumanSummary(target, rule, hasExisting) + draft.PreferredEntryPath = []string{target} + draft.AvoidPatterns = appendUniqueStrings( + draft.AvoidPatterns, + buildCombinedSkillAvoidPattern(target, rule), + ) + if hasExisting { + draft.ChangeKind = ChangeKindAppend + draft.BodyOrPatch = synthesizeCombinedSkillAppendBody(target, draft, rule, matches, evidence) + notes = append(notes, "normalized combined shortcut draft to append onto the existing combined skill") + } else { + draft.ChangeKind = ChangeKindCreate + draft.BodyOrPatch = synthesizeCombinedSkillDocument(target, draft, rule, matches, evidence) + notes = append(notes, "normalized combined shortcut draft to create a new standalone shortcut skill") + } + return draft, notes + } + + if !hasExisting { + switch draft.ChangeKind { + case ChangeKindAppend, ChangeKindMerge, ChangeKindReplace: + draft.ChangeKind = ChangeKindCreate + notes = append(notes, "normalized change_kind to create because target skill did not exist") + if !looksLikeSkillDocument(draft.BodyOrPatch) { + draft.BodyOrPatch = synthesizeSkillDocumentFromPartialDraft(target, draft, rule, evidence) + notes = append(notes, "synthesized full skill document because draft body was partial") + } + } + return draft, notes + } + + if draft.ChangeKind == ChangeKindCreate && !looksLikeSkillDocument(draft.BodyOrPatch) { + draft.ChangeKind = ChangeKindAppend + notes = append(notes, "normalized change_kind to append because target skill already existed") + } + return draft, notes +} + +func looksLikeSkillDocument(body string) bool { + body = strings.TrimSpace(body) + return strings.HasPrefix(body, "---\n") && strings.Contains(body, "\n# ") +} + +func synthesizeSkillDocumentFromPartialDraft( + target string, + draft SkillDraft, + rule LearningRecord, + evidence DraftEvidence, +) string { + description := strings.TrimSpace(draft.HumanSummary) + if description == "" { + description = fmt.Sprintf("Learned workflow for %s.", target) + } + + bodyContent := strings.TrimSpace(draft.BodyOrPatch) + if bodyContent == "" { + bodyContent = "No learned content was generated." + } + if strings.HasPrefix(bodyContent, "# ") { + return buildSkillDocument(target, description, bodyContent) + } + + body := strings.Join([]string{ + "# " + titleCaseSkillName(target), + "", + "## Start Here", + synthesizedStartHereLine(rule, target), + "", + "## Learned Evolution", + bodyContent, + "", + "## Expected Result", + synthesizedExpectedResultLine(evidence), + "", + "## Source Evidence", + synthesizedEvidenceLine(rule, evidence), + "", + }, "\n") + return buildSkillDocument(target, description, body) +} + +func synthesizeCombinedSkillDocument( + target string, + draft SkillDraft, + rule LearningRecord, + matches []skills.SkillInfo, + evidence DraftEvidence, +) string { + description := strings.TrimSpace(draft.HumanSummary) + if description == "" { + description = buildCombinedSkillHumanSummary(target, rule, false) + } + + body := strings.Join([]string{ + "# " + titleCaseSkillName(target), + "", + "## When To Use", + synthesizedCombinedWhenToUseLine(rule, target), + "", + "## Procedure", + synthesizedCombinedStartHereLine(rule, target), + synthesizedCombinedProcedure(matches, rule), + "", + "## Source Skills", + synthesizedComponentBreakdown(matches), + "", + "## Learned Context", + synthesizedCombinedLearnedContent(draft.BodyOrPatch, rule), + "", + "## Expected Result", + synthesizedExpectedResultLine(evidence), + "", + "## Source Evidence", + synthesizedEvidenceLine(rule, evidence), + "", + }, "\n") + return buildSkillDocument(target, description, body) +} + +func synthesizeCombinedSkillAppendBody( + target string, + draft SkillDraft, + rule LearningRecord, + matches []skills.SkillInfo, + evidence DraftEvidence, +) string { + lines := []string{ + "## Learned Shortcut Update", + fmt.Sprintf("- Shortcut skill: `%s`", target), + fmt.Sprintf("- Task summary: %s", fallbackEvolutionSummary(rule)), + fmt.Sprintf("- Wrapped path: %s", synthesizedWrappedPathLine(rule)), + "- Guidance: prefer this shortcut directly instead of replaying the whole path when the task matches.", + fmt.Sprintf("- Expected result: %s", synthesizedExpectedResultLine(evidence)), + fmt.Sprintf("- Evidence: %s", synthesizedEvidenceLine(rule, evidence)), + "", + "### Source Skills", + synthesizedComponentBreakdown(matches), + "", + synthesizedCombinedLearnedContent(draft.BodyOrPatch, rule), + "", + } + return strings.Join(lines, "\n") +} + +func synthesizedStartHereLine(rule LearningRecord, target string) string { + if len(rule.WinningPath) > 0 { + return fmt.Sprintf( + "Start with `%s` for tasks like `%s`.", + strings.Join(rule.WinningPath, " -> "), + strings.TrimSpace(rule.Summary), + ) + } + if summary := strings.TrimSpace(rule.Summary); summary != "" { + return fmt.Sprintf("Use `%s` when the task matches `%s`.", target, summary) + } + return fmt.Sprintf("Use `%s` for the learned task pattern.", target) +} + +func synthesizedCombinedStartHereLine(rule LearningRecord, target string) string { + return fmt.Sprintf("Use `%s` directly when the task matches `%s`.", target, fallbackEvolutionSummary(rule)) +} + +func synthesizedCombinedWhenToUseLine(rule LearningRecord, target string) string { + if len(rule.WinningPath) == 0 { + return fmt.Sprintf("Use `%s` when the learned task pattern appears again.", target) + } + return fmt.Sprintf( + "Use `%s` as a direct shortcut instead of replaying `%s` step by step.", + target, + strings.Join(rule.WinningPath, " -> "), + ) +} + +func synthesizedCombinedProcedure(matches []skills.SkillInfo, rule LearningRecord) string { + components := synthesizedComponentBreakdown(matches) + if !strings.HasPrefix(strings.TrimSpace(components), "- `") { + if len(rule.WinningPath) == 0 { + return "Use the learned shortcut directly and keep the response focused on the requested result." + } + return fmt.Sprintf( + "Apply the recorded path `%s`, then return the final result with only the necessary explanation.", + strings.Join(rule.WinningPath, " -> "), + ) + } + return "Follow the source skill guidance below as one compact procedure, then return the final result without replaying unnecessary discovery steps." +} + +func synthesizedExpectedResultLine(evidence DraftEvidence) string { + if excerpt := firstFinalOutputExcerpt(evidence, 360); excerpt != "" { + return excerpt + } + return "Return the completed result for the matched task without restating unrelated discovery steps." +} + +func synthesizedEvidenceLine(rule LearningRecord, evidence DraftEvidence) string { + if len(evidence.TaskRecords) > 0 { + ids := make([]string, 0, len(evidence.TaskRecords)) + for _, task := range evidence.TaskRecords { + if id := strings.TrimSpace(task.ID); id != "" { + ids = append(ids, id) + } + } + if len(ids) > 0 { + return "learned from task records: " + strings.Join(ids, ", ") + } + } + if len(rule.TaskRecordIDs) > 0 { + return "learned from task records: " + strings.Join(rule.TaskRecordIDs, ", ") + } + return "learned from the pattern record." +} + +func synthesizedWrappedPathLine(rule LearningRecord) string { + if len(rule.WinningPath) == 0 { + return "No explicit wrapped path was recorded." + } + return strings.Join(rule.WinningPath, " -> ") +} + +func synthesizedCombinedLearnedContent(body string, rule LearningRecord) string { + content := strings.TrimSpace(stripSkillFrontmatter(body)) + if content == "" { + return fmt.Sprintf( + "Learned from `%s`; use this shortcut directly when the same task pattern appears again.", + fallbackEvolutionSummary(rule), + ) + } + content = removeVerboseCombinedSections(content) + content = strings.Join(strings.Fields(content), " ") + if content == "" { + return fmt.Sprintf( + "Learned from `%s`; use this shortcut directly when the same task pattern appears again.", + fallbackEvolutionSummary(rule), + ) + } + content = trimAtReadableBoundary(content, 1200) + return "- Learned task: " + fallbackEvolutionSummary(rule) + "\n- Reusable guidance: " + content +} + +func stripSkillFrontmatter(body string) string { + trimmed := strings.TrimSpace(body) + if !strings.HasPrefix(trimmed, "---\n") { + return trimmed + } + rest := strings.TrimPrefix(trimmed, "---\n") + end := strings.Index(rest, "\n---\n") + if end < 0 { + return trimmed + } + return strings.TrimSpace(rest[end+5:]) +} + +func removeVerboseCombinedSections(content string) string { + lines := strings.Split(content, "\n") + out := make([]string, 0, len(lines)) + skip := false + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if strings.HasPrefix(trimmed, "#") { + title := strings.TrimSpace(strings.TrimLeft(trimmed, "#")) + normalized := strings.ToLower(title) + switch normalized { + case "component skill breakdown", "source skills", "wrapped path", "start here", "when to use", "procedure": + skip = true + continue + default: + skip = false + } + } + if skip { + continue + } + out = append(out, line) + } + return strings.TrimSpace(strings.Join(out, "\n")) +} + +func fallbackEvolutionSummary(rule LearningRecord) string { + if summary := strings.TrimSpace(rule.Summary); summary != "" { + return summary + } + if len(rule.WinningPath) > 0 { + return strings.Join(rule.WinningPath, " -> ") + } + return "the learned task pattern" +} + +func buildCombinedSkillHumanSummary(target string, rule LearningRecord, hasExisting bool) string { + _ = hasExisting + summary := fallbackEvolutionSummary(rule) + if strings.TrimSpace(summary) == "" || summary == "the learned task pattern" { + summary = titleCaseSkillName(target) + } + return fmt.Sprintf("Use this skill to %s when the task requires this workflow.", sentenceFragment(summary)) +} + +func buildCombinedSkillAvoidPattern(target string, rule LearningRecord) string { + if len(rule.WinningPath) == 0 { + return fmt.Sprintf("avoid bypassing `%s` when the same learned task pattern appears again", target) + } + return fmt.Sprintf("avoid replaying %s before trying `%s` directly", strings.Join(rule.WinningPath, " -> "), target) +} + +func collectSkillRefs(matches []skills.SkillInfo) []string { + if len(matches) == 0 { + return nil + } + + refs := make([]string, 0, len(matches)) + for _, match := range matches { + if strings := match.Path; strings != "" { + refs = append(refs, strings) + continue + } + refs = append(refs, match.Source+":"+match.Name) + } + return refs +} + +func countTaskLearningRecords(records []LearningRecord) int { + count := 0 + for _, record := range records { + if isTaskRecordKind(record.Kind) { + count++ + } + } + return count +} + +func (rt *Runtime) runLifecycleMaintenance(workspace string, store *Store, runID string) error { + if rt == nil || store == nil || workspace == "" { + return nil + } + + paths := NewPaths(workspace, rt.cfg.StateDir) + logger.DebugCF("evolution", "Started lifecycle maintenance", map[string]any{ + "workspace": workspace, + "run_id": runID, + }) + + summary, err := RunLifecycleOnce(store, paths, workspace, rt.now()) + if err != nil { + logger.WarnCF("evolution", "Lifecycle maintenance failed", map[string]any{ + "workspace": workspace, + "run_id": runID, + "error": err.Error(), + }) + return err + } + + logger.DebugCF("evolution", "Finished lifecycle maintenance", map[string]any{ + "workspace": workspace, + "run_id": runID, + "evaluated_profiles": summary.EvaluatedProfiles, + "transitioned_profiles": summary.TransitionedProfiles, + "deleted_skills": summary.DeletedSkills, + }) + return nil +} + +func joinRecordIDs(records []LearningRecord) string { + if len(records) == 0 { + return "" + } + ids := make([]string, 0, len(records)) + for _, record := range records { + if strings.TrimSpace(record.ID) == "" { + continue + } + ids = append(ids, record.ID) + } + return strings.Join(ids, ",") +} + +func summarizePatternRecords(records []LearningRecord) string { + if len(records) == 0 { + return "" + } + parts := make([]string, 0, len(records)) + for _, record := range records { + parts = append(parts, summarizePatternRecord(record)) + } + return strings.Join(parts, " | ") +} + +func summarizePatternRecord(record LearningRecord) string { + label := strings.TrimSpace(record.ID) + if label == "" { + label = "unknown-pattern" + } + + path := strings.Join(record.WinningPath, " -> ") + if path == "" { + path = strings.TrimSpace(record.Summary) + } + if path == "" { + path = "no-summary" + } + + return fmt.Sprintf("%s[%s]", label, path) +} + +func enrichReadyRulesForDrafts(rules, taskRecords []LearningRecord) []LearningRecord { + if len(rules) == 0 || len(taskRecords) == 0 { + return rules + } + out := make([]LearningRecord, 0, len(rules)) + for _, rule := range rules { + evidence := draftEvidenceForRule(rule, taskRecords) + out = append(out, enrichRuleWithDraftEvidence(rule, evidence)) + } + return out +} + +func draftEvidenceForRule(rule LearningRecord, taskRecords []LearningRecord) DraftEvidence { + if len(rule.TaskRecordIDs) == 0 || len(taskRecords) == 0 { + return DraftEvidence{} + } + idSet := make(map[string]struct{}, len(rule.TaskRecordIDs)) + for _, id := range rule.TaskRecordIDs { + id = strings.TrimSpace(id) + if id == "" { + continue + } + idSet[id] = struct{}{} + } + if len(idSet) == 0 { + return DraftEvidence{} + } + tasks := make([]LearningRecord, 0, len(idSet)) + for _, task := range taskRecords { + if rule.WorkspaceID != "" && task.WorkspaceID != rule.WorkspaceID { + continue + } + if _, ok := idSet[task.ID]; !ok { + continue + } + tasks = append(tasks, task) + } + return DraftEvidence{TaskRecords: tasks} +} + +func generateDraftWithEvidence( + ctx context.Context, + generator DraftGenerator, + rule LearningRecord, + matches []skills.SkillInfo, + evidence DraftEvidence, +) (SkillDraft, error) { + if generator == nil { + return SkillDraft{}, nil + } + if evidenceAware, ok := generator.(EvidenceAwareDraftGenerator); ok { + return evidenceAware.GenerateDraftWithEvidence(ctx, rule, matches, evidence) + } + return generator.GenerateDraft(ctx, rule, matches) +} + +func countNewPatterns(existing, patterns []LearningRecord, workspace string) int { + existingIDs := make(map[string]struct{}, len(existing)) + for _, pattern := range existing { + if !isPatternRecordKind(pattern.Kind) || pattern.WorkspaceID != workspace { + continue + } + existingIDs[pattern.ID] = struct{}{} + } + count := 0 + for _, pattern := range patterns { + if pattern.WorkspaceID != workspace { + continue + } + if _, ok := existingIDs[pattern.ID]; ok { + continue + } + count++ + } + return count +} + +func mergePatternRecords(existing, updates []LearningRecord, workspace string) []LearningRecord { + out := append([]LearningRecord(nil), existing...) + indexByID := make(map[string]int, len(out)) + for i, pattern := range out { + indexByID[pattern.ID] = i + } + for _, update := range updates { + if update.WorkspaceID != workspace { + continue + } + if idx, ok := indexByID[update.ID]; ok { + out[idx] = update + continue + } + indexByID[update.ID] = len(out) + out = append(out, update) + } + return out +} + +func markTaskRecordsClustered(store *Store, ids []string) error { + if store == nil || len(ids) == 0 { + return nil + } + return store.MarkTaskRecordsClustered(ids) +} + +func filterReadyRules(records []LearningRecord, workspace string) []LearningRecord { + seen := make(map[string]LearningRecord) + for _, record := range records { + if !isPatternRecordKind(record.Kind) || record.WorkspaceID != workspace || + record.Status != RecordStatus("ready") { + continue + } + seen[record.ID] = record + } + + out := make([]LearningRecord, 0, len(seen)) + for _, record := range seen { + out = append(out, record) + } + sort.Slice(out, func(i, j int) bool { + if !out[i].CreatedAt.Equal(out[j].CreatedAt) { + return out[i].CreatedAt.Before(out[j].CreatedAt) + } + return out[i].ID < out[j].ID + }) + return out +} + +func existingDraftSourceSet(drafts []SkillDraft, workspace string) map[string]struct{} { + out := make(map[string]struct{}, len(drafts)) + for _, draft := range drafts { + if draft.WorkspaceID != workspace || draft.SourceRecordID == "" { + continue + } + if draft.Status == DraftStatusQuarantined { + continue + } + out[draft.SourceRecordID] = struct{}{} + } + return out +} + +func (rt *Runtime) saveAppliedProfile(store *Store, workspace string, draft SkillDraft) error { + now := rt.now() + + return SaveAppliedProfile(store, workspace, draft, now) +} + +func (rt *Runtime) applyCandidateDraft( + ctx context.Context, + workspace string, + store *Store, + applier *Applier, + draft SkillDraft, + runID string, +) (SkillDraft, error) { + logger.InfoCF("evolution", "Applying skill draft", map[string]any{ + "workspace": workspace, + "draft_id": draft.ID, + "target_skill": draft.TargetSkillName, + "change_kind": string(draft.ChangeKind), + "run_id": runID, + }) + rollbackApply, err := applier.applyDraftWithRollback(ctx, workspace, draft) + if err != nil { + logger.WarnCF("evolution", "Skill draft apply failed", map[string]any{ + "workspace": workspace, + "draft_id": draft.ID, + "target_skill": draft.TargetSkillName, + "error": err.Error(), + "run_id": runID, + }) + draft.Status = DraftStatusQuarantined + draft.ScanFindings = appendUniqueStrings(draft.ScanFindings, fmt.Sprintf("apply failed: %v", err)) + if auditErr := rt.recordRollbackAudit(store, draft, err); auditErr != nil { + draft.ScanFindings = appendUniqueStrings( + draft.ScanFindings, + fmt.Sprintf("rollback audit failed: %v", auditErr), + ) + if saveErr := store.SaveDrafts([]SkillDraft{draft}); saveErr != nil { + return draft, errorsJoin(fmt.Errorf("%w: %v", ErrApplyDraftFailed, err), auditErr, saveErr) + } + return draft, errorsJoin(fmt.Errorf("%w: %v", ErrApplyDraftFailed, err), auditErr) + } + if saveErr := store.SaveDrafts([]SkillDraft{draft}); saveErr != nil { + return draft, errorsJoin(fmt.Errorf("%w: %v", ErrApplyDraftFailed, err), saveErr) + } + return draft, fmt.Errorf("%w: %v", ErrApplyDraftFailed, err) + } + + draft.Status = DraftStatusAccepted + if saveErr := store.SaveDrafts([]SkillDraft{draft}); saveErr != nil { + logger.WarnCF("evolution", "Skill draft save failed after apply", map[string]any{ + "workspace": workspace, + "draft_id": draft.ID, + "target_skill": draft.TargetSkillName, + "error": saveErr.Error(), + "run_id": runID, + }) + if rollbackErr := rollbackApply(); rollbackErr != nil { + return draft, errorsJoin(fmt.Errorf("%w: %v", ErrApplyDraftFailed, saveErr), rollbackErr) + } + return draft, fmt.Errorf("%w: %v", ErrApplyDraftFailed, saveErr) + } + + if err := rt.saveAppliedProfile(store, workspace, draft); err != nil { + logger.WarnCF("evolution", "Skill profile save failed after apply", map[string]any{ + "workspace": workspace, + "draft_id": draft.ID, + "target_skill": draft.TargetSkillName, + "error": err.Error(), + "run_id": runID, + }) + draft.Status = DraftStatusQuarantined + draft.ScanFindings = appendUniqueStrings(draft.ScanFindings, fmt.Sprintf("profile save failed: %v", err)) + if rollbackErr := rollbackApply(); rollbackErr != nil { + draft.ScanFindings = appendUniqueStrings( + draft.ScanFindings, + fmt.Sprintf("apply rollback failed: %v", rollbackErr), + ) + if saveErr := store.SaveDrafts([]SkillDraft{draft}); saveErr != nil { + return draft, errorsJoin(fmt.Errorf("%w: %v", ErrApplyDraftFailed, err), rollbackErr, saveErr) + } + return draft, errorsJoin(fmt.Errorf("%w: %v", ErrApplyDraftFailed, err), rollbackErr) + } + if saveErr := store.SaveDrafts([]SkillDraft{draft}); saveErr != nil { + return draft, errorsJoin(fmt.Errorf("%w: %v", ErrApplyDraftFailed, err), saveErr) + } + return draft, fmt.Errorf("%w: %v", ErrApplyDraftFailed, err) + } + logger.InfoCF("evolution", "Applied skill draft successfully", map[string]any{ + "workspace": workspace, + "draft_id": draft.ID, + "target_skill": draft.TargetSkillName, + "run_id": runID, + }) + return draft, nil +} + +func (rt *Runtime) recordRollbackAudit(store *Store, draft SkillDraft, applyErr error) error { + now := rt.now() + return store.UpdateProfile( + draft.WorkspaceID, + draft.TargetSkillName, + func(profile *SkillProfile, exists bool) error { + if !exists { + return nil + } + profile.VersionHistory = append(profile.VersionHistory, SkillVersionEntry{ + Version: profile.CurrentVersion, + Action: "rollback", + Timestamp: now, + DraftID: draft.ID, + Summary: fmt.Sprintf("Rolled back failed draft apply: %s", draft.HumanSummary), + Rollback: true, + RollbackReason: applyErr.Error(), + }) + return nil + }, + ) +} + +func profileOrigin(origin string) string { + if origin == "manual" { + return origin + } + return "evolved" +} + +func appendUniqueStrings(existing []string, values ...string) []string { + seen := make(map[string]struct{}, len(existing)) + for _, value := range existing { + seen[value] = struct{}{} + } + for _, value := range values { + if strings.TrimSpace(value) == "" { + continue + } + if _, ok := seen[value]; ok { + continue + } + existing = append(existing, value) + seen[value] = struct{}{} + } + return existing +} + +type skillUsageSummary struct { + All []string +} + +func buildSkillUsage(input TurnCaseInput) skillUsageSummary { + capacity := len(input.ActiveSkillNames) + len(input.AttemptedSkillNames) + len(input.FinalSuccessfulPath) + for _, snapshot := range input.SkillContextSnapshots { + capacity += len(snapshot.SkillNames) + } + for _, exec := range input.ToolExecutions { + capacity += len(exec.SkillNames) + } + + all := make([]string, 0, capacity) + all = append(all, input.ActiveSkillNames...) + all = append(all, input.AttemptedSkillNames...) + all = append(all, input.FinalSuccessfulPath...) + for _, snapshot := range input.SkillContextSnapshots { + all = append(all, snapshot.SkillNames...) + } + for _, exec := range input.ToolExecutions { + all = append(all, exec.SkillNames...) + } + return skillUsageSummary{All: uniqueTrimmedNames(all)} +} + +func (rt *Runtime) recordSkillUsage(input TurnCaseInput, success bool) error { + usage := buildSkillUsage(input) + if len(usage.All) == 0 { + return nil + } + + store := rt.storeForWorkspace(input.Workspace) + seen := make(map[string]struct{}, len(usage.All)) + for _, skillName := range usage.All { + skillName = strings.TrimSpace(skillName) + if skillName == "" { + continue + } + if _, ok := seen[skillName]; ok { + continue + } + seen[skillName] = struct{}{} + + if err := rt.touchSkillProfile(store, input, skillName, success); err != nil { + return err + } + } + return nil +} + +func (rt *Runtime) touchSkillProfile(store *Store, input TurnCaseInput, skillName string, success bool) error { + now := rt.now() + return store.UpdateProfile(input.Workspace, skillName, func(profile *SkillProfile, exists bool) error { + if !exists { + *profile = SkillProfile{ + SkillName: skillName, + WorkspaceID: input.Workspace, + Status: SkillStatusActive, + Origin: "manual", + HumanSummary: skillName, + RetentionScore: 0.2, + } + } + + profile.SkillName = skillName + profile.WorkspaceID = input.Workspace + if profile.Status == SkillStatusCold || profile.Status == SkillStatusArchived || profile.Status == "" { + profile.Status = SkillStatusActive + } + if profile.Origin == "" { + profile.Origin = "manual" + } + if strings.TrimSpace(profile.HumanSummary) == "" { + profile.HumanSummary = skillName + } + profile.LastUsedAt = now + profile.UseCount++ + profile.RetentionScore = nextRetentionScore(profile.RetentionScore, success) + return nil + }) +} + +func nextRetentionScore(current float64, success bool) float64 { + increment := 0.05 + if success { + increment = 0.1 + } + current += increment + if current > 1 { + return 1 + } + return current +} diff --git a/pkg/evolution/runtime_apply_test.go b/pkg/evolution/runtime_apply_test.go new file mode 100644 index 000000000..5f5a53185 --- /dev/null +++ b/pkg/evolution/runtime_apply_test.go @@ -0,0 +1,1170 @@ +package evolution_test + +import ( + "context" + "errors" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/evolution" +) + +func TestRuntime_RunColdPathOnce_ApplyModeWritesSkillAndProfile(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier(evolution.NewPaths(root, ""), func() time.Time { + return time.Unix(1700001000, 0).UTC() + }), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + IntendedUseCases: []string{ + "weather native-name path", + }, + PreferredEntryPath: []string{"weather"}, + AvoidPatterns: []string{"avoid translating city names before querying weather"}, + BodyOrPatch: "---\nname: weather\ndescription: weather helper\n---\n# Weather\n## Start Here\nUse native-name query first.\n", + }, + }, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + skillPath := filepath.Join(root, "skills", "weather", "SKILL.md") + if _, statErr := os.Stat(skillPath); statErr != nil { + t.Fatalf("expected skill file: %v", statErr) + } + + profile, err := store.LoadProfile("weather") + if err != nil { + t.Fatalf("LoadProfile: %v", err) + } + if profile.Status != evolution.SkillStatusActive { + t.Fatalf("Status = %q, want %q", profile.Status, evolution.SkillStatusActive) + } + if profile.CurrentVersion == "" { + t.Fatal("CurrentVersion should not be empty") + } + if profile.ChangeReason != "weather helper" { + t.Fatalf("ChangeReason = %q, want weather helper", profile.ChangeReason) + } + if len(profile.IntendedUseCases) != 1 || profile.IntendedUseCases[0] != "weather native-name path" { + t.Fatalf("IntendedUseCases = %v, want [weather native-name path]", profile.IntendedUseCases) + } + if len(profile.PreferredEntryPath) != 1 || profile.PreferredEntryPath[0] != "weather" { + t.Fatalf("PreferredEntryPath = %v, want [weather]", profile.PreferredEntryPath) + } + if len(profile.AvoidPatterns) != 1 || + profile.AvoidPatterns[0] != "avoid translating city names before querying weather" { + t.Fatalf("AvoidPatterns = %v, want populated metadata", profile.AvoidPatterns) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].Status != evolution.DraftStatusAccepted { + t.Fatalf("draft status = %q, want %q", drafts[0].Status, evolution.DraftStatusAccepted) + } +} + +func TestRuntime_RunColdPathOnce_DraftModeKeepsCandidateDraft(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier(evolution.NewPaths(root, ""), func() time.Time { + return time.Unix(1700001000, 0).UTC() + }), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + BodyOrPatch: "---\nname: weather\ndescription: weather helper\n---\n# Weather\n## Start Here\nUse native-name query first.\n", + }, + }, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + if _, statErr := os.Stat(filepath.Join(root, "skills", "weather", "SKILL.md")); !os.IsNotExist(statErr) { + t.Fatalf("expected no applied skill file, got err=%v", statErr) + } + if _, loadErr := store.LoadProfile("weather"); !os.IsNotExist(loadErr) { + t.Fatalf("expected no profile, got err=%v", loadErr) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].Status != evolution.DraftStatusCandidate { + t.Fatalf("draft status = %q, want %q", drafts[0].Status, evolution.DraftStatusCandidate) + } +} + +func TestRuntime_RunColdPathOnce_DraftModeRefreshesExistingCandidateWithEvidence(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + for _, source := range []struct { + name string + body string + }{ + {name: "three-one-theorem", body: "Add 31 to the input value."}, + {name: "four-two-theorem", body: "Add 42 to the current value."}, + {name: "five-three-theorem", body: "Subtract 53 from the current value."}, + } { + skillPath := filepath.Join(root, "skills", source.name, "SKILL.md") + if err := os.MkdirAll(filepath.Dir(skillPath), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + content := "---\nname: " + source.name + "\ndescription: theorem helper\n---\n# " + source.name + "\n" + source.body + "\n" + if err := os.WriteFile(skillPath, []byte(content), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + } + + success := true + if err := store.SaveTaskRecords([]evolution.LearningRecord{{ + ID: "task-1", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "调用三一定理计算100", + FinalOutput: "100 + 31 = 131; 131 + 42 = 173; 173 - 53 = 120", + Status: evolution.RecordStatus("clustered"), + Success: &success, + UsedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + }}); err != nil { + t.Fatalf("SaveTaskRecords: %v", err) + } + if err := store.SavePatternRecords([]evolution.LearningRecord{{ + ID: "pattern-1", + Kind: evolution.RecordKindPattern, + WorkspaceID: root, + CreatedAt: time.Unix(1700000001, 0).UTC(), + Summary: "调用三一定理计算100", + Status: evolution.RecordStatus("ready"), + TaskRecordIDs: []string{"task-1"}, + }}); err != nil { + t.Fatalf("SavePatternRecords: %v", err) + } + if err := store.SaveDrafts([]evolution.SkillDraft{ + { + ID: "draft-pattern-1", + WorkspaceID: root, + SourceRecordID: "pattern-1", + TargetSkillName: "learned-skill", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "old generic draft", + BodyOrPatch: "---\nname: learned-skill\ndescription: old\n---\n# Learned Skill\n\nNo explicit winning path was recorded.\n", + Status: evolution.DraftStatusCandidate, + }, + }); err != nil { + t.Fatalf("SaveDrafts: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].Status != evolution.DraftStatusCandidate { + t.Fatalf("draft status = %q, want candidate", drafts[0].Status) + } + for _, want := range []string{ + "calculate-100-via-theorems", + "Add 31 to the input value", + "Subtract 53 from the current value", + "100 + 31 = 131", + "task-1", + } { + if !strings.Contains(drafts[0].BodyOrPatch, want) && drafts[0].TargetSkillName != want { + t.Fatalf("refreshed draft missing %q:\nname=%s\n%s", want, drafts[0].TargetSkillName, drafts[0].BodyOrPatch) + } + } +} + +func TestRuntime_RunColdPathOnce_ApplyModeAppliesExistingCandidateDraft(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + if err := store.SaveDrafts([]evolution.SkillDraft{ + { + ID: "draft-1", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + BodyOrPatch: "---\nname: weather\ndescription: weather helper\n---\n# Weather\n## Start Here\nUse native-name query first.\n", + Status: evolution.DraftStatusCandidate, + }, + }); err != nil { + t.Fatalf("SaveDrafts: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier(evolution.NewPaths(root, ""), func() time.Time { + return time.Unix(1700001000, 0).UTC() + }), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-unused", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "unused-weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "unused", + BodyOrPatch: "---\nname: unused-weather\ndescription: unused\n---\n# Unused\n", + }, + }, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + if _, statErr := os.Stat(filepath.Join(root, "skills", "weather", "SKILL.md")); statErr != nil { + t.Fatalf("expected existing candidate to be applied: %v", statErr) + } + if _, statErr := os.Stat(filepath.Join(root, "skills", "unused-weather", "SKILL.md")); !os.IsNotExist(statErr) { + t.Fatalf("expected source rule to stay skipped after applying existing draft, got err=%v", statErr) + } + profile, err := store.LoadProfile("weather") + if err != nil { + t.Fatalf("LoadProfile: %v", err) + } + if profile.CurrentVersion != "draft-1" { + t.Fatalf("CurrentVersion = %q, want draft-1", profile.CurrentVersion) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].Status != evolution.DraftStatusAccepted { + t.Fatalf("draft status = %q, want %q", drafts[0].Status, evolution.DraftStatusAccepted) + } +} + +func TestRuntime_RunColdPathOnce_ApplyModeSkipsOrphanCandidateDraft(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + if err := store.SaveDrafts([]evolution.SkillDraft{ + { + ID: "draft-orphan", + WorkspaceID: root, + SourceRecordID: "missing-rule", + TargetSkillName: "orphan-weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "orphan weather helper", + BodyOrPatch: "---\nname: orphan-weather\ndescription: orphan weather helper\n---\n# Orphan Weather\n## Start Here\nUse stale guidance.\n", + Status: evolution.DraftStatusCandidate, + }, + }); err != nil { + t.Fatalf("SaveDrafts: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier(evolution.NewPaths(root, ""), func() time.Time { + return time.Unix(1700001000, 0).UTC() + }), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-valid", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "valid-weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "valid weather helper", + BodyOrPatch: "---\nname: valid-weather\ndescription: valid weather helper\n---\n# Valid Weather\n", + }, + }, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + if _, statErr := os.Stat(filepath.Join(root, "skills", "orphan-weather", "SKILL.md")); !os.IsNotExist(statErr) { + t.Fatalf("orphan candidate draft should not be applied, got err=%v", statErr) + } + if _, statErr := os.Stat(filepath.Join(root, "skills", "valid-weather", "SKILL.md")); statErr != nil { + t.Fatalf("expected current ready rule draft to be applied: %v", statErr) + } + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + statusByID := map[string]evolution.DraftStatus{} + for _, draft := range drafts { + statusByID[draft.ID] = draft.Status + } + if statusByID["draft-orphan"] != evolution.DraftStatusCandidate { + t.Fatalf("orphan draft status = %q, want candidate", statusByID["draft-orphan"]) + } +} + +func TestRuntime_RunColdPathOnce_ApplyModeNormalizesExistingCombinedCandidateDraft(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + writeSkillForCombinedShortcutTest(t, root, "three-one-theorem", "Add 31 to the input before continuing.") + writeSkillForCombinedShortcutTest(t, root, "four-two-theorem", "Add 42 to the intermediate result.") + writeSkillForCombinedShortcutTest(t, root, "five-three-theorem", "Subtract 53 to produce the final result.") + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "calculate 100", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + SuccessRate: 1, + WinningPath: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + if err := store.SaveDrafts([]evolution.SkillDraft{ + { + ID: "draft-1", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "five-three-theorem", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "messy generated combined skill", + BodyOrPatch: strings.Join([]string{ + "Prefer the full theorem chain directly.", + "", + "## Component Skill Breakdown", + "messy raw component dump should be removed before apply.", + "", + "## Learned Shortcut", + "Net effect: input + 20.", + }, "\n"), + Status: evolution.DraftStatusCandidate, + }, + }); err != nil { + t.Fatalf("SaveDrafts: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier( + evolution.NewPaths(root, ""), + func() time.Time { return time.Unix(1700001000, 0).UTC() }, + ), + DraftGenerator: stubDraftGenerator{}, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + data, err := os.ReadFile(filepath.Join(root, "skills", "calculate-100-via-theorems", "SKILL.md")) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(data) + if !strings.Contains(content, "## Procedure Details") || !strings.Contains(content, "## Procedure") { + t.Fatalf("expected clean combined skill sections:\n%s", content) + } + if strings.Contains(content, "Learned") || strings.Contains(content, "Source Evidence") { + t.Fatalf("deployed skill should not expose learning traces:\n%s", content) + } + if strings.Contains(content, "messy raw component dump") || + strings.Contains(content, "## Component Skill Breakdown") { + t.Fatalf("expected old verbose draft content to be cleaned:\n%s", content) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].Status != evolution.DraftStatusAccepted { + t.Fatalf("draft status = %q, want %q", drafts[0].Status, evolution.DraftStatusAccepted) + } + if drafts[0].TargetSkillName != "calculate-100-via-theorems" { + t.Fatalf("TargetSkillName = %q, want calculate-100-via-theorems", drafts[0].TargetSkillName) + } +} + +func TestRuntime_RunColdPathOnce_ApplyModeRetargetsStableMultiSkillPathIntoCombinedShortcut(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + writeSkillForCombinedShortcutTest(t, root, "three-one-theorem", "Add 31 to the input before continuing.") + writeSkillForCombinedShortcutTest(t, root, "four-two-theorem", "Add 42 to the intermediate result.") + writeSkillForCombinedShortcutTest(t, root, "five-three-theorem", "Subtract 53 to produce the final result.") + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "calculate 100", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + SuccessRate: 1, + WinningPath: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier(evolution.NewPaths(root, ""), func() time.Time { + return time.Unix(1700001000, 0).UTC() + }), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "five-three-theorem", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "combine the theorem chain into one shortcut skill", + BodyOrPatch: strings.Join([]string{ + "Prefer the full theorem chain directly.", + "", + "## Component Skill Breakdown", + "messy raw component dump should be removed.", + "", + "## Learned Shortcut", + "Net effect: input + 20.", + }, "\n"), + }, + }, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + skillPath := filepath.Join(root, "skills", "calculate-100-via-theorems", "SKILL.md") + data, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(data) + if !strings.Contains(content, "name: calculate-100-via-theorems") { + t.Fatalf("unexpected content:\n%s", content) + } + if !strings.Contains(content, "# Calculate 100 Via Theorems") { + t.Fatalf("missing synthesized heading:\n%s", content) + } + if !strings.Contains(content, "Prefer the full theorem chain directly.") { + t.Fatalf("missing learned content:\n%s", content) + } + if !strings.Contains(content, "## Procedure") { + t.Fatalf("missing compact procedure:\n%s", content) + } + if !strings.Contains(content, "## Procedure Details") { + t.Fatalf("missing source skill summary:\n%s", content) + } + if strings.Contains(content, "Learned") || strings.Contains(content, "Source Evidence") { + t.Fatalf("deployed skill should not expose learning traces:\n%s", content) + } + if !strings.Contains(content, "Add 31 to the input") || + !strings.Contains(content, "Subtract 53 to produce the final result") { + t.Fatalf("missing extracted component skill content:\n%s", content) + } + if strings.Contains(content, "Extracted guidance") { + t.Fatalf("component content should be concise, not raw extracted guidance:\n%s", content) + } + if strings.Contains(content, "messy raw component dump") { + t.Fatalf("learned context should remove verbose component dumps:\n%s", content) + } + if !strings.Contains(content, "Use `calculate-100-via-theorems` directly") { + t.Fatalf("missing direct shortcut guidance:\n%s", content) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].Status != evolution.DraftStatusAccepted { + t.Fatalf("draft status = %q, want %q", drafts[0].Status, evolution.DraftStatusAccepted) + } + if drafts[0].ChangeKind != evolution.ChangeKindCreate { + t.Fatalf("ChangeKind = %q, want %q", drafts[0].ChangeKind, evolution.ChangeKindCreate) + } + if drafts[0].TargetSkillName != "calculate-100-via-theorems" { + t.Fatalf("TargetSkillName = %q, want calculate-100-via-theorems", drafts[0].TargetSkillName) + } + if len(drafts[0].PreferredEntryPath) != 1 || drafts[0].PreferredEntryPath[0] != "calculate-100-via-theorems" { + t.Fatalf("PreferredEntryPath = %v, want [calculate-100-via-theorems]", drafts[0].PreferredEntryPath) + } + if len(drafts[0].ReviewNotes) == 0 { + t.Fatal("expected normalization review notes") + } +} + +func TestRuntime_RunColdPathOnce_CombinedShortcutKeepsReadableLongGuidance(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + longOperation := strings.Join([]string{ + "Step 1: normalize the incoming number and keep the original input available for reporting.", + "Step 2: add 31 to the normalized value and record the intermediate value.", + "Step 3: add 42 to the intermediate value and verify that arithmetic was performed exactly once.", + "Step 4: subtract 53 from the second intermediate value and return only the final value.", + "Step 5: if the user asks for explanation, include the compact arithmetic chain without unrelated context.", + }, " ") + writeSkillForCombinedShortcutTest(t, root, "three-one-theorem", longOperation) + writeSkillForCombinedShortcutTest(t, root, "four-two-theorem", "Add 42 to the intermediate result.") + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "calculate with theorem chain", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + SuccessRate: 1, + WinningPath: []string{"three-one-theorem", "four-two-theorem"}, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier( + evolution.NewPaths(root, ""), + func() time.Time { return time.Unix(1700001000, 0).UTC() }, + ), + DraftGenerator: stubDraftGenerator{draft: evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "three-one-theorem", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "combine theorem chain", + BodyOrPatch: strings.Join([]string{ + "Prefer the theorem chain directly.", + "Include Step A, Step B, Step C, Step D, Step E, Step F, Step G, Step H, Step I, Step J, Step K, Step L, Step M, Step N, Step O, Step P, Step Q, Step R, Step S, Step T, Step U, Step V, Step W, Step X, Step Y, Step Z, and then return the answer.", + "Finish with a short arithmetic explanation.", + }, " "), + }}, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + data, err := os.ReadFile(filepath.Join(root, "skills", "calculate-with-theorem-chain-via-theorems", "SKILL.md")) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + content := string(data) + if !strings.Contains(content, "Step 5: if the user asks for explanation") { + t.Fatalf("procedure details were cut too aggressively:\n%s", content) + } + if !strings.Contains(content, "Step Z, and then return the answer.") { + t.Fatalf("procedure notes were cut too aggressively:\n%s", content) + } +} + +func writeSkillForCombinedShortcutTest(t *testing.T, root, name, body string) { + t.Helper() + + skillPath := filepath.Join(root, "skills", name, "SKILL.md") + if err := os.MkdirAll(filepath.Dir(skillPath), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + content := strings.Join([]string{ + "---", + "name: " + name, + "description: test component skill", + "---", + "# " + name, + body, + "", + }, "\n") + if err := os.WriteFile(skillPath, []byte(content), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } +} + +func TestRuntime_RunColdPathOnce_ApplyFailureQuarantinesDraftAndWritesRollbackAudit(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + profile := evolution.SkillProfile{ + SkillName: "weather", + WorkspaceID: root, + CurrentVersion: "v1", + Status: evolution.SkillStatusActive, + Origin: "evolved", + HumanSummary: "weather helper", + LastUsedAt: time.Unix(1700000000, 0).UTC(), + RetentionScore: 1, + VersionHistory: []evolution.SkillVersionEntry{ + { + Version: "v1", + Action: "create", + Timestamp: time.Unix(1700000000, 0).UTC(), + DraftID: "draft-old", + Summary: "initial", + }, + }, + } + if err := store.SaveProfile(profile); err != nil { + t.Fatalf("SaveProfile: %v", err) + } + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + skillDir := filepath.Join(root, "skills", "weather") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + skillPath := filepath.Join(skillDir, "SKILL.md") + original := "---\nname: weather\ndescription: valid\n---\n# Weather\nold body\n" + if err := os.WriteFile(skillPath, []byte(original), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier(evolution.NewPaths(root, ""), func() time.Time { + return time.Unix(1700001000, 0).UTC() + }), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-rollback", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindReplace, + HumanSummary: "broken weather helper", + BodyOrPatch: "invalid-frontmatter", + }, + }, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + err = rt.RunColdPathOnce(context.Background(), root) + if err == nil { + t.Fatal("expected RunColdPathOnce to fail") + } + if !errors.Is(err, evolution.ErrApplyDraftFailed) { + t.Fatalf("error = %v, want ErrApplyDraftFailed", err) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].Status != evolution.DraftStatusQuarantined { + t.Fatalf("draft status = %q, want %q", drafts[0].Status, evolution.DraftStatusQuarantined) + } + if len(drafts[0].ScanFindings) == 0 { + t.Fatal("expected apply error in ScanFindings") + } + + loadedProfile, err := store.LoadProfile("weather") + if err != nil { + t.Fatalf("LoadProfile: %v", err) + } + if len(loadedProfile.VersionHistory) != 2 { + t.Fatalf("len(VersionHistory) = %d, want 2", len(loadedProfile.VersionHistory)) + } + last := loadedProfile.VersionHistory[len(loadedProfile.VersionHistory)-1] + if !last.Rollback { + t.Fatal("expected rollback audit entry") + } + if last.DraftID != "draft-rollback" { + t.Fatalf("DraftID = %q, want draft-rollback", last.DraftID) + } + + got, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(got) != original { + t.Fatalf("skill content changed after runtime rollback:\n%s", string(got)) + } +} + +func TestRuntime_RunColdPathOnce_FirstApplyFailureDoesNotCreateGhostProfile(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier(evolution.NewPaths(root, ""), func() time.Time { + return time.Unix(1700001000, 0).UTC() + }), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-ghost-profile", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "broken weather helper", + BodyOrPatch: "invalid-frontmatter", + }, + }, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + err = rt.RunColdPathOnce(context.Background(), root) + if err == nil { + t.Fatal("expected RunColdPathOnce to fail") + } + if !errors.Is(err, evolution.ErrApplyDraftFailed) { + t.Fatalf("error = %v, want ErrApplyDraftFailed", err) + } + + if _, loadErr := store.LoadProfile("weather"); !os.IsNotExist(loadErr) { + t.Fatalf("expected no profile after first apply failure, got err=%v", loadErr) + } +} + +func TestRuntime_RunColdPathOnce_DraftSaveFailureRollsBackAppliedSkill(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("directory permission behavior differs on Windows") + } + + root := t.TempDir() + paths := evolution.NewPaths(root, "") + store := evolution.NewStore(paths) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + if err := os.Chmod(paths.RootDir, 0o555); err != nil { + t.Fatalf("Chmod(root read-only): %v", err) + } + t.Cleanup(func() { + _ = os.Chmod(paths.RootDir, 0o755) + }) + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier(paths, func() time.Time { + return time.Unix(1700001000, 0).UTC() + }), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-save-fail", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + BodyOrPatch: "---\nname: weather\ndescription: weather helper\n---\n# Weather\n## Start Here\nUse native-name query first.\n", + }, + }, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + err = rt.RunColdPathOnce(context.Background(), root) + if err == nil { + t.Fatal("expected RunColdPathOnce to fail") + } + if !errors.Is(err, evolution.ErrApplyDraftFailed) { + t.Fatalf("error = %v, want ErrApplyDraftFailed", err) + } + + skillPath := filepath.Join(root, "skills", "weather", "SKILL.md") + if _, statErr := os.Stat(skillPath); !os.IsNotExist(statErr) { + t.Fatalf("expected applied skill to be rolled back, got err=%v", statErr) + } + if _, loadErr := store.LoadProfile("weather"); !os.IsNotExist(loadErr) { + t.Fatalf("expected no profile after draft save failure, got err=%v", loadErr) + } +} + +func TestRuntime_RunColdPathOnce_AutoRunsLifecycleMaintenance(t *testing.T) { + root := t.TempDir() + paths := evolution.NewPaths(root, "") + store := evolution.NewStore(paths) + now := time.Unix(1700001000, 0).UTC() + + if err := store.SaveProfile(evolution.SkillProfile{ + SkillName: "stale-active-skill", + WorkspaceID: root, + Status: evolution.SkillStatusActive, + Origin: "evolved", + HumanSummary: "stale active skill", + LastUsedAt: now.Add(-91 * 24 * time.Hour), + RetentionScore: 0.1, + }); err != nil { + t.Fatalf("SaveProfile(active): %v", err) + } + + skillDir := filepath.Join(root, "skills", "stale-archived-skill") + if err := os.MkdirAll(skillDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + skillPath := filepath.Join(skillDir, "SKILL.md") + if err := os.WriteFile( + skillPath, + []byte("---\nname: stale-archived-skill\ndescription: stale\n---\n# Stale Archived Skill\n"), + 0o644, + ); err != nil { + t.Fatalf("WriteFile: %v", err) + } + if err := store.SaveProfile(evolution.SkillProfile{ + SkillName: "stale-archived-skill", + WorkspaceID: root, + Status: evolution.SkillStatusArchived, + Origin: "evolved", + HumanSummary: "stale archived skill", + LastUsedAt: now.Add(-366 * 24 * time.Hour), + RetentionScore: 0.05, + }); err != nil { + t.Fatalf("SaveProfile(archived): %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return now }, + Store: store, + Applier: evolution.NewApplier(paths, func() time.Time { + return now + }), + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + activeProfile, err := store.LoadProfile("stale-active-skill") + if err != nil { + t.Fatalf("LoadProfile(active): %v", err) + } + if activeProfile.Status != evolution.SkillStatusCold { + t.Fatalf("active profile Status = %q, want %q", activeProfile.Status, evolution.SkillStatusCold) + } + if len(activeProfile.VersionHistory) != 1 || activeProfile.VersionHistory[0].Action != "lifecycle:cold" { + t.Fatalf("active profile VersionHistory = %+v, want lifecycle:cold entry", activeProfile.VersionHistory) + } + + archivedProfile, err := store.LoadProfile("stale-archived-skill") + if err != nil { + t.Fatalf("LoadProfile(archived): %v", err) + } + if archivedProfile.Status != evolution.SkillStatusDeleted { + t.Fatalf("archived profile Status = %q, want %q", archivedProfile.Status, evolution.SkillStatusDeleted) + } + if len(archivedProfile.VersionHistory) != 1 || archivedProfile.VersionHistory[0].Action != "lifecycle:deleted" { + t.Fatalf("archived profile VersionHistory = %+v, want lifecycle:deleted entry", archivedProfile.VersionHistory) + } + + if _, statErr := os.Stat(skillPath); !os.IsNotExist(statErr) { + t.Fatalf("expected lifecycle delete to remove skill file, stat err = %v", statErr) + } +} + +func TestRuntime_RunColdPathOnce_ProfileSaveFailureRollsBackSkillAndQuarantinesDraft(t *testing.T) { + root := t.TempDir() + paths := evolution.NewPaths(root, "") + store := evolution.NewStore(paths) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + if err := os.MkdirAll(filepath.Dir(paths.ProfilesDir), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile(paths.ProfilesDir, []byte("not-a-directory"), 0o644); err != nil { + t.Fatalf("WriteFile(profiles): %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + Store: store, + Applier: evolution.NewApplier(paths, func() time.Time { + return time.Unix(1700001000, 0).UTC() + }), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-profile-fail", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + BodyOrPatch: "---\nname: weather\ndescription: weather helper\n---\n# Weather\n## Start Here\nUse native-name query first.\n", + }, + }, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 3, MinSuccessRate: 0.7}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + err = rt.RunColdPathOnce(context.Background(), root) + if err == nil { + t.Fatal("expected RunColdPathOnce to fail") + } + if !errors.Is(err, evolution.ErrApplyDraftFailed) { + t.Fatalf("error = %v, want ErrApplyDraftFailed", err) + } + + skillPath := filepath.Join(root, "skills", "weather", "SKILL.md") + if _, statErr := os.Stat(skillPath); !os.IsNotExist(statErr) { + t.Fatalf("expected rolled back skill file, got err=%v", statErr) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].Status != evolution.DraftStatusQuarantined { + t.Fatalf("draft status = %q, want %q", drafts[0].Status, evolution.DraftStatusQuarantined) + } + if len(drafts[0].ScanFindings) == 0 { + t.Fatal("expected scan findings for profile save failure") + } +} diff --git a/pkg/evolution/runtime_cold_path_test.go b/pkg/evolution/runtime_cold_path_test.go new file mode 100644 index 000000000..19c23ebf0 --- /dev/null +++ b/pkg/evolution/runtime_cold_path_test.go @@ -0,0 +1,1285 @@ +package evolution_test + +import ( + "context" + "errors" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/evolution" + "github.com/sipeed/picoclaw/pkg/providers" + "github.com/sipeed/picoclaw/pkg/skills" +) + +type stubDraftGenerator struct { + draft evolution.SkillDraft + err error +} + +func (g stubDraftGenerator) GenerateDraft( + _ context.Context, + _ evolution.LearningRecord, + _ []skills.SkillInfo, +) (evolution.SkillDraft, error) { + return g.draft, g.err +} + +type sequenceDraftGenerator struct { + results []draftGenerationResult + index int +} + +type draftGenerationResult struct { + draft evolution.SkillDraft + err error +} + +type evidenceCaptureDraftGenerator struct { + evidence evolution.DraftEvidence +} + +func (g *evidenceCaptureDraftGenerator) GenerateDraft( + _ context.Context, + _ evolution.LearningRecord, + _ []skills.SkillInfo, +) (evolution.SkillDraft, error) { + return evolution.SkillDraft{}, nil +} + +func (g *evidenceCaptureDraftGenerator) GenerateDraftWithEvidence( + _ context.Context, + _ evolution.LearningRecord, + _ []skills.SkillInfo, + evidence evolution.DraftEvidence, +) (evolution.SkillDraft, error) { + g.evidence = evidence + return evolution.SkillDraft{ + ID: "draft-evidence", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindCreate, + HumanSummary: "weather helper", + BodyOrPatch: "---\nname: weather\ndescription: weather helper\n---\n# Weather\nUse current workspace evidence.\n", + }, nil +} + +type stubSuccessJudge struct { + decisions map[string]evolution.TaskSuccessDecision + calls []string +} + +func (j *stubSuccessJudge) JudgeTaskRecord( + _ context.Context, + record evolution.LearningRecord, +) (evolution.TaskSuccessDecision, error) { + j.calls = append(j.calls, record.ID) + if decision, ok := j.decisions[record.ID]; ok { + return decision, nil + } + return evolution.TaskSuccessDecision{Success: true, Reason: "default success"}, nil +} + +func (g *sequenceDraftGenerator) GenerateDraft( + _ context.Context, + _ evolution.LearningRecord, + _ []skills.SkillInfo, +) (evolution.SkillDraft, error) { + if g.index >= len(g.results) { + return evolution.SkillDraft{}, nil + } + result := g.results[g.index] + g.index++ + return result.draft, result.err +} + +func TestRuntime_RunColdPathOnce_GeneratesCandidateDraft(t *testing.T) { + root := t.TempDir() + paths := evolution.NewPaths(root, "") + store := evolution.NewStore(paths) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Now: func() time.Time { return time.Unix(1700001000, 0).UTC() }, + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "prefer native-name path first", + BodyOrPatch: "## Start Here\nUse native-name query first.", + }, + }, + Store: store, + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].Status != evolution.DraftStatusCandidate { + t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusCandidate) + } +} + +func TestRuntime_RunColdPathOnce_AdmitsOnlyRecordsApprovedBySuccessJudge(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + ok := true + failed := false + + records := []evolution.LearningRecord{ + { + ID: "task-failed", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "failed weather attempt", + UserGoal: "check weather in shanghai", + FinalOutput: "tool failed", + Status: evolution.RecordStatus("new"), + Success: &failed, + UsedSkillNames: []string{"weather", "native-name"}, + ToolKinds: []string{"read_file"}, + }, + { + ID: "task-rejected", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "partial weather answer", + UserGoal: "check weather in shanghai", + FinalOutput: "I will check it next", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"weather", "native-name"}, + ToolKinds: []string{"read_file"}, + ToolExecutions: []evolution.ToolExecutionRecord{ + {Name: "read_file", Success: true}, + {Name: "read_file", Success: true}, + }, + }, + { + ID: "task-admitted", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000200, 0).UTC(), + Summary: "weather answer delivered", + UserGoal: "check weather in shanghai", + FinalOutput: "sunny, 26C", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"weather", "native-name"}, + AddedSkillNames: []string{"native-name"}, + ToolKinds: []string{"read_file"}, + ToolExecutions: []evolution.ToolExecutionRecord{ + {Name: "read_file", Success: true}, + {Name: "read_file", Success: true}, + }, + AttemptTrail: &evolution.AttemptTrail{ + AttemptedSkills: []string{"weather"}, + FinalSuccessfulPath: []string{"weather"}, + }, + }, + } + if err := store.AppendLearningRecords(records); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + judge := &stubSuccessJudge{ + decisions: map[string]evolution.TaskSuccessDecision{ + "task-rejected": {Success: false, Reason: "only partial reasoning"}, + "task-admitted": {Success: true, Reason: "goal achieved"}, + }, + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1}, + Store: store, + SuccessJudge: judge, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-weather", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "prefer the proven weather path", + BodyOrPatch: "## Start Here\nUse the weather path directly.", + }, + }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + if len(judge.calls) != 2 || judge.calls[0] != "task-rejected" || judge.calls[1] != "task-admitted" { + t.Fatalf("judge calls = %v, want [task-rejected task-admitted]", judge.calls) + } + + allRecords, err := store.LoadLearningRecords() + if err != nil { + t.Fatalf("LoadLearningRecords: %v", err) + } + + var pattern evolution.LearningRecord + foundPattern := false + for _, record := range allRecords { + if record.Kind != evolution.RecordKindPattern { + continue + } + pattern = record + foundPattern = true + break + } + if !foundPattern { + t.Fatal("expected generated pattern record") + } + if len(pattern.TaskRecordIDs) != 1 || pattern.TaskRecordIDs[0] != "task-admitted" { + t.Fatalf("TaskRecordIDs = %v, want [task-admitted]", pattern.TaskRecordIDs) + } + if pattern.Label == "" { + t.Fatal("pattern Label should not be empty") + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].SourceRecordID != pattern.ID { + t.Fatalf("draft SourceRecordID = %q, want %q", drafts[0].SourceRecordID, pattern.ID) + } +} + +func TestRuntime_RunColdPathOnce_RejectsClusterBelowMinSuccessRatio(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + ok := true + failed := false + + records := []evolution.LearningRecord{ + { + ID: "task-success", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000200, 0).UTC(), + Summary: "weather lookup 100", + FinalOutput: "sunny", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"weather"}, + }, + { + ID: "task-failed-1", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "weather lookup 200", + FinalOutput: "failed", + Status: evolution.RecordStatus("new"), + Success: &failed, + UsedSkillNames: []string{"weather"}, + }, + { + ID: "task-failed-2", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather lookup 300", + FinalOutput: "failed", + Status: evolution.RecordStatus("new"), + Success: &failed, + UsedSkillNames: []string{"weather"}, + }, + } + if err := store.AppendLearningRecords(records); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.8}, + Store: store, + SuccessJudge: &stubSuccessJudge{}, + SkillsRecaller: evolution.NewSkillsRecaller(root), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-weather", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "prefer the proven weather path", + BodyOrPatch: "## Start Here\nUse the weather path directly.", + }, + }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + patterns, err := store.LoadPatternRecords() + if err != nil { + t.Fatalf("LoadPatternRecords: %v", err) + } + if len(patterns) != 0 { + t.Fatalf("len(patterns) = %d, want 0", len(patterns)) + } + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 0 { + t.Fatalf("len(drafts) = %d, want 0", len(drafts)) + } +} + +func TestRuntime_RunColdPathOnce_FallbackUsesJudgeAdjustedSuccessRatio(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + ok := true + + records := []evolution.LearningRecord{ + { + ID: "task-success", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000200, 0).UTC(), + Summary: "weather lookup 100", + FinalOutput: "sunny", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"weather"}, + }, + { + ID: "task-judge-rejected", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "weather lookup 200", + FinalOutput: "partial answer", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"weather"}, + }, + } + if err := store.AppendLearningRecords(records); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + judge := &stubSuccessJudge{ + decisions: map[string]evolution.TaskSuccessDecision{ + "task-success": {Success: true, Reason: "goal achieved"}, + "task-judge-rejected": {Success: false, Reason: "partial result"}, + }, + } + clusterer := evolution.NewLLMPatternClusterer( + &llmClusterTestProvider{content: `not-json`, defaultModel: "test-model"}, + "test-model", + evolution.NewHeuristicPatternClusterer(1, nil), + 1, + func() time.Time { return time.Unix(1700000000, 0).UTC() }, + ) + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.8}, + Store: store, + PatternClusterer: clusterer, + SuccessJudge: judge, + SkillsRecaller: evolution.NewSkillsRecaller(root), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-weather", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "prefer the proven weather path", + BodyOrPatch: "## Start Here\nUse the weather path directly.", + }, + }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + patterns, err := store.LoadPatternRecords() + if err != nil { + t.Fatalf("LoadPatternRecords: %v", err) + } + if len(patterns) != 0 { + t.Fatalf("len(patterns) = %d, want 0", len(patterns)) + } + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 0 { + t.Fatalf("len(drafts) = %d, want 0", len(drafts)) + } +} + +func TestRuntime_RunColdPathOnce_FallbackMarksAcceptedFailureEvidenceClustered(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + ok := true + + records := []evolution.LearningRecord{ + { + ID: "task-success", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000200, 0).UTC(), + Summary: "weather lookup 100", + FinalOutput: "sunny", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"weather"}, + }, + { + ID: "task-judge-rejected", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "weather lookup 200", + FinalOutput: "partial answer", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"weather"}, + }, + } + if err := store.AppendLearningRecords(records); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + judge := &stubSuccessJudge{ + decisions: map[string]evolution.TaskSuccessDecision{ + "task-success": {Success: true, Reason: "goal achieved"}, + "task-judge-rejected": {Success: false, Reason: "partial result"}, + }, + } + clusterer := evolution.NewLLMPatternClusterer( + &llmClusterTestProvider{content: `not-json`, defaultModel: "test-model"}, + "test-model", + evolution.NewHeuristicPatternClusterer(1, nil), + 1, + func() time.Time { return time.Unix(1700000000, 0).UTC() }, + ) + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft", MinTaskCount: 1, MinSuccessRatio: 0.5}, + Store: store, + PatternClusterer: clusterer, + SuccessJudge: judge, + SkillsRecaller: evolution.NewSkillsRecaller(root), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-weather", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "prefer the proven weather path", + BodyOrPatch: "## Start Here\nUse the weather path directly.", + }, + }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + patterns, err := store.LoadPatternRecords() + if err != nil { + t.Fatalf("LoadPatternRecords: %v", err) + } + if len(patterns) != 1 { + t.Fatalf("len(patterns) = %d, want 1", len(patterns)) + } + if got := strings.Join(patterns[0].TaskRecordIDs, ","); got != "task-success" { + t.Fatalf("pattern TaskRecordIDs = %v, want only successful task", patterns[0].TaskRecordIDs) + } + taskRecords, err := store.LoadTaskRecords() + if err != nil { + t.Fatalf("LoadTaskRecords: %v", err) + } + statusByID := make(map[string]evolution.RecordStatus) + for _, record := range taskRecords { + statusByID[record.ID] = record.Status + } + for _, id := range []string{"task-success", "task-judge-rejected"} { + if statusByID[id] != evolution.RecordStatus("clustered") { + t.Fatalf("statusByID[%s] = %q, want clustered", id, statusByID[id]) + } + } +} + +func TestRuntime_RunColdPathOnce_DraftEvidenceDoesNotCrossWorkspaceWithDuplicateTaskID(t *testing.T) { + sharedState := t.TempDir() + workspaceA := t.TempDir() + workspaceB := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(workspaceA, sharedState)) + ok := true + + if err := store.AppendTaskRecords(context.Background(), []evolution.LearningRecord{ + { + ID: "main-turn-1", + Kind: evolution.RecordKindTask, + WorkspaceID: workspaceB, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "other workspace weather", + FinalOutput: "foreign workspace output", + Status: evolution.RecordStatus("clustered"), + Success: &ok, + UsedSkillNames: []string{"foreign-skill"}, + }, + { + ID: "main-turn-1", + Kind: evolution.RecordKindTask, + WorkspaceID: workspaceA, + CreatedAt: time.Unix(1700000001, 0).UTC(), + Summary: "current workspace weather", + FinalOutput: "current workspace output", + Status: evolution.RecordStatus("clustered"), + Success: &ok, + UsedSkillNames: []string{"current-skill"}, + }, + }); err != nil { + t.Fatalf("AppendTaskRecords: %v", err) + } + if err := store.AppendPatternRecords([]evolution.LearningRecord{{ + ID: "pattern-workspace-a", + Kind: evolution.RecordKindPattern, + WorkspaceID: workspaceA, + CreatedAt: time.Unix(1700000002, 0).UTC(), + Summary: "current workspace weather", + Status: evolution.RecordStatus("ready"), + TaskRecordIDs: []string{"main-turn-1"}, + }}); err != nil { + t.Fatalf("AppendPatternRecords: %v", err) + } + + generator := &evidenceCaptureDraftGenerator{} + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft", StateDir: sharedState}, + Store: store, + SkillsRecaller: evolution.NewSkillsRecaller(workspaceA), + DraftGenerator: generator, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), workspaceA); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + if len(generator.evidence.TaskRecords) != 1 { + t.Fatalf( + "evidence task count = %d, want 1: %#v", + len(generator.evidence.TaskRecords), + generator.evidence.TaskRecords, + ) + } + task := generator.evidence.TaskRecords[0] + if task.WorkspaceID != workspaceA { + t.Fatalf("evidence workspace = %q, want %q", task.WorkspaceID, workspaceA) + } + if task.FinalOutput != "current workspace output" { + t.Fatalf("evidence FinalOutput = %q, want current workspace output", task.FinalOutput) + } + if len(task.UsedSkillNames) != 1 || task.UsedSkillNames[0] != "current-skill" { + t.Fatalf("evidence UsedSkillNames = %v, want [current-skill]", task.UsedSkillNames) + } +} + +func TestRuntime_RunColdPathOnce_AdmitsSingleSkillTaskButWaitsForMinTaskCount(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + ok := true + + record := evolution.LearningRecord{ + ID: "task-simple", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000250, 0).UTC(), + Summary: "simple weather lookup", + UserGoal: "check weather", + FinalOutput: "sunny", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"weather"}, + AddedSkillNames: []string{"weather"}, + ToolKinds: []string{"read_file"}, + ToolExecutions: []evolution.ToolExecutionRecord{ + {Name: "read_file", Success: true, SkillNames: []string{"weather"}}, + }, + AttemptTrail: &evolution.AttemptTrail{ + AttemptedSkills: []string{"weather"}, + FinalSuccessfulPath: []string{"weather"}, + }, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{record}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + judge := &stubSuccessJudge{} + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Store: store, + SuccessJudge: judge, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-simple", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "simple draft", + BodyOrPatch: "## Start Here\nUse weather.", + }, + }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + if len(judge.calls) != 1 || judge.calls[0] != "task-simple" { + t.Fatalf("judge calls = %v, want [task-simple]", judge.calls) + } + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 0 { + t.Fatalf("len(drafts) = %d, want 0", len(drafts)) + } +} + +func TestRuntime_RunColdPathOnce_RejectsTaskWhenSuccessJudgeRejects(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + ok := true + + record := evolution.LearningRecord{ + ID: "task-detailed-path", + Kind: evolution.RecordKindTask, + WorkspaceID: root, + CreatedAt: time.Unix(1700000300, 0).UTC(), + Summary: "computed theorem chain", + UserGoal: "调用三一定理计算100", + FinalOutput: "最终结果:100 通过三一定理计算得到 120", + Status: evolution.RecordStatus("new"), + Success: &ok, + UsedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + AddedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + ToolKinds: []string{"read_file"}, + ToolExecutions: []evolution.ToolExecutionRecord{ + {Name: "read_file", Success: true, SkillNames: []string{"three-one-theorem"}}, + {Name: "read_file", Success: true, SkillNames: []string{"four-two-theorem"}}, + {Name: "read_file", Success: true, SkillNames: []string{"five-three-theorem"}}, + }, + AttemptTrail: &evolution.AttemptTrail{ + AttemptedSkills: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + FinalSuccessfulPath: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"}, + }, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{record}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + judge := &stubSuccessJudge{ + decisions: map[string]evolution.TaskSuccessDecision{ + "task-detailed-path": {Success: false, Reason: "llm false negative"}, + }, + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Store: store, + SuccessJudge: judge, + Organizer: evolution.NewOrganizer(evolution.OrganizerOptions{MinCaseCount: 1, MinSuccessRate: 1}), + SkillsRecaller: evolution.NewSkillsRecaller(root), + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-detailed-path", + TargetSkillName: "three-one-theorem", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "prefer the full theorem chain", + BodyOrPatch: "## Start Here\nUse the full three-one, four-two, five-three theorem chain.", + }, + }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + allRecords, err := store.LoadLearningRecords() + if err != nil { + t.Fatalf("LoadLearningRecords: %v", err) + } + + foundPattern := false + for _, record := range allRecords { + if record.Kind != evolution.RecordKindPattern { + continue + } + foundPattern = true + break + } + if foundPattern { + t.Fatal("unexpected pattern record for rejected task") + } +} + +func TestRuntime_RunColdPathOnce_QuarantinesInvalidDraft(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "release path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "broken", + BodyOrPatch: "", + }, + }, + Store: store, + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].Status != evolution.DraftStatusQuarantined { + t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusQuarantined) + } + if len(drafts[0].ScanFindings) == 0 { + t.Fatal("expected scan findings for invalid draft") + } +} + +func TestRuntime_RunColdPathOnce_DoesNotWriteSkillFile(t *testing.T) { + root := t.TempDir() + skillPath := filepath.Join(root, "skills", "weather", "SKILL.md") + if err := os.MkdirAll(filepath.Dir(skillPath), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile( + skillPath, + []byte("---\nname: weather\ndescription: test\n---\n# Weather"), + 0o644, + ); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + store := evolution.NewStore(evolution.NewPaths(root, "")) + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + original, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile(original): %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: root, + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "prefer native-name path first", + BodyOrPatch: "## Start Here\nUse native-name query first.", + }, + }, + Store: store, + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + got, err := os.ReadFile(skillPath) + if err != nil { + t.Fatalf("ReadFile(after): %v", err) + } + if string(got) != string(original) { + t.Fatalf("skill file changed unexpectedly:\n%s", string(got)) + } +} + +func TestRuntime_RunColdPathOnce_UsesDefaultDraftGenerator(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + SuccessRate: 1, + WinningPath: []string{"weather"}, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Store: store, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].TargetSkillName != "weather" { + t.Fatalf("TargetSkillName = %q, want weather", drafts[0].TargetSkillName) + } + if drafts[0].Status != evolution.DraftStatusCandidate { + t.Fatalf("Status = %q, want %q", drafts[0].Status, evolution.DraftStatusCandidate) + } + if drafts[0].BodyOrPatch == "" { + t.Fatal("expected generated draft body") + } +} + +func TestRuntime_RunColdPathOnce_UsesLLMDraftGeneratorWhenProviderAvailable(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + SuccessRate: 1, + WinningPath: []string{"weather"}, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + provider := &llmDraftRuntimeProvider{ + response: &providers.LLMResponse{ + Content: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name path first","body_or_patch":"## Start Here\nUse native-name query first."}`, + }, + } + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Store: store, + DraftGenerator: evolution.NewDraftGeneratorForWorkspace(root, provider, "runtime-explicit-model"), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if provider.calls != 1 { + t.Fatalf("provider.calls = %d, want 1", provider.calls) + } + if drafts[0].HumanSummary != "Prefer native-name path first" { + t.Fatalf("HumanSummary = %q, want %q", drafts[0].HumanSummary, "Prefer native-name path first") + } +} + +func TestRuntime_RunColdPathOnce_UsesDefaultDraftGeneratorWhenFactoryHasNoProvider(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + SuccessRate: 1, + WinningPath: []string{"weather"}, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Store: store, + DraftGenerator: evolution.NewDraftGeneratorForWorkspace(root, nil, ""), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].TargetSkillName != "weather" { + t.Fatalf("TargetSkillName = %q, want weather", drafts[0].TargetSkillName) + } + if drafts[0].BodyOrPatch == "" { + t.Fatal("expected generated draft body") + } +} + +func TestRuntime_RunColdPathOnce_UsesGeneratorFactoryWorkspaceForFallback(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + if err := os.MkdirAll(filepath.Join(root, "skills", "weather"), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + skillBody := "---\nname: weather\ndescription: workspace weather helper\n---\n# Weather\n## Start Here\nUse the workspace-specific path.\n" + if err := os.WriteFile(filepath.Join(root, "skills", "weather", "SKILL.md"), []byte(skillBody), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + SuccessRate: 1, + WinningPath: []string{"weather"}, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + provider := &llmDraftRuntimeProvider{ + response: &providers.LLMResponse{Content: `not-json`}, + defaultModel: "runtime-test-model", + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Store: store, + GeneratorFactory: func(workspace string) evolution.DraftGenerator { + return evolution.NewDraftGeneratorForWorkspace(workspace, provider, "runtime-explicit-model") + }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].ChangeKind != evolution.ChangeKindAppend { + t.Fatalf("ChangeKind = %q, want %q", drafts[0].ChangeKind, evolution.ChangeKindAppend) + } + if !strings.Contains(drafts[0].BodyOrPatch, "## Learned Evolution") { + t.Fatalf("BodyOrPatch = %q, want appended learned evolution section", drafts[0].BodyOrPatch) + } +} + +func TestRuntime_RunColdPathOnce_PersistsEarlierDraftWhenLaterRuleFails(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rules := []evolution.LearningRecord{ + { + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + }, + { + ID: "rule-2", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "release path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + }, + } + if err := store.AppendLearningRecords(rules); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + generator := &sequenceDraftGenerator{ + results: []draftGenerationResult{ + { + draft: evolution.SkillDraft{ + ID: "draft-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "prefer native-name path first", + BodyOrPatch: "## Start Here\nUse native-name query first.", + }, + }, + { + err: context.DeadlineExceeded, + }, + }, + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Store: store, + DraftGenerator: generator, + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + err = rt.RunColdPathOnce(context.Background(), root) + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("RunColdPathOnce error = %v, want %v", err, context.DeadlineExceeded) + } + + drafts, loadErr := store.LoadDrafts() + if loadErr != nil { + t.Fatalf("LoadDrafts: %v", loadErr) + } + if len(drafts) != 1 { + t.Fatalf("len(drafts) = %d, want 1", len(drafts)) + } + if drafts[0].SourceRecordID != "rule-1" { + t.Fatalf("SourceRecordID = %q, want rule-1", drafts[0].SourceRecordID) + } +} + +func TestRuntime_RunColdPathOnce_RegeneratesAfterQuarantinedDraft(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + rule := evolution.LearningRecord{ + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: root, + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather native-name path", + Status: evolution.RecordStatus("ready"), + EventCount: 4, + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{rule}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + if err := store.SaveDrafts([]evolution.SkillDraft{{ + ID: "draft-old", + WorkspaceID: root, + CreatedAt: time.Unix(1700000100, 0).UTC(), + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "broken attempt", + BodyOrPatch: "## Start Here\nBroken content.", + Status: evolution.DraftStatusQuarantined, + ScanFindings: []string{"apply failed"}, + }}); err != nil { + t.Fatalf("SaveDrafts: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "draft"}, + Store: store, + DraftGenerator: stubDraftGenerator{ + draft: evolution.SkillDraft{ + ID: "draft-new", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "fixed attempt", + BodyOrPatch: "## Start Here\nUse native-name query first.", + }, + }, + SkillsRecaller: evolution.NewSkillsRecaller(root), + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if runErr := rt.RunColdPathOnce(context.Background(), root); runErr != nil { + t.Fatalf("RunColdPathOnce: %v", runErr) + } + + drafts, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(drafts) != 2 { + t.Fatalf("len(drafts) = %d, want 2", len(drafts)) + } + if drafts[1].ID != "draft-new" { + t.Fatalf("drafts[1].ID = %q, want draft-new", drafts[1].ID) + } +} + +type llmDraftRuntimeProvider struct { + response *providers.LLMResponse + err error + calls int + defaultModel string +} + +func (p *llmDraftRuntimeProvider) Chat( + _ context.Context, + _ []providers.Message, + _ []providers.ToolDefinition, + _ string, + _ map[string]any, +) (*providers.LLMResponse, error) { + p.calls++ + return p.response, p.err +} + +func (p *llmDraftRuntimeProvider) GetDefaultModel() string { + if p.defaultModel != "" { + return p.defaultModel + } + return "runtime-test-model" +} diff --git a/pkg/evolution/runtime_test.go b/pkg/evolution/runtime_test.go new file mode 100644 index 000000000..533294ae5 --- /dev/null +++ b/pkg/evolution/runtime_test.go @@ -0,0 +1,672 @@ +package evolution_test + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + "time" + "unicode/utf8" + + "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/evolution" +) + +func TestRuntime_FinalizeTurnDisabledDoesNothing(t *testing.T) { + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: false, Mode: "observe"}, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + workspace := t.TempDir() + err = rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspace, + TurnID: "turn-1", + Status: "completed", + }) + if err != nil { + t.Fatalf("FinalizeTurn: %v", err) + } + + paths := evolution.NewPaths(workspace, "") + if _, statErr := os.Stat(paths.TaskRecords); !os.IsNotExist(statErr) { + t.Fatalf("task records file should not exist, stat err = %v", statErr) + } +} + +func TestRuntime_FinalizeTurnWithEmptyWorkspaceDoesNothing(t *testing.T) { + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "observe"}, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + TurnID: "turn-1", + Status: "completed", + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn: %v", finalizeErr) + } +} + +func TestRuntime_FinalizeTurnSkipsHeartbeat(t *testing.T) { + workspace := t.TempDir() + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspace, + TurnID: "heartbeat-turn", + SessionKey: "heartbeat", + Status: "completed", + UserMessage: "# Heartbeat Check", + FinalContent: "HEARTBEAT_OK", + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn: %v", finalizeErr) + } + + paths := evolution.NewPaths(workspace, "") + if _, statErr := os.Stat(paths.TaskRecords); !os.IsNotExist(statErr) { + t.Fatalf("heartbeat should not create task records, stat err = %v", statErr) + } +} + +func TestRuntime_FinalizeTurnWritesRecordWithOverride(t *testing.T) { + workspace := t.TempDir() + override := filepath.Join(t.TempDir(), "custom-state") + now := time.Unix(1700000000, 0).UTC() + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + StateDir: override, + }, + Now: func() time.Time { return now }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspace, + TurnID: "turn-1", + SessionKey: "session-1", + AgentID: "agent-1", + Status: "completed", + UserMessage: "summarize the release notes", + FinalContent: "Here is the summary.", + ToolKinds: []string{"web", "read_file"}, + ToolExecutions: []evolution.ToolExecutionRecord{ + {Name: "web", Success: true}, + {Name: "read_file", Success: true}, + }, + ActiveSkillNames: []string{"skill-a"}, + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn first call: %v", finalizeErr) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspace, + WorkspaceID: "ws-explicit", + TurnID: "turn-2", + SessionKey: "session-2", + AgentID: "agent-2", + Status: "error", + UserMessage: "run the bash command", + FinalContent: "bash failed", + ToolKinds: []string{"bash"}, + ToolExecutions: []evolution.ToolExecutionRecord{ + {Name: "bash", Success: false, ErrorSummary: "exit status 1"}, + }, + ActiveSkillNames: []string{"skill-b"}, + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn second call: %v", finalizeErr) + } + + paths := evolution.NewPaths(workspace, override) + data, err := os.ReadFile(paths.TaskRecords) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + lines := strings.Split(strings.TrimSpace(string(data)), "\n") + if len(lines) != 2 { + t.Fatalf("record file line count = %d, want 2", len(lines)) + } + + var first evolution.LearningRecord + if err := json.Unmarshal([]byte(lines[0]), &first); err != nil { + t.Fatalf("Unmarshal first record: %v", err) + } + if first.WorkspaceID != workspace { + t.Fatalf("first WorkspaceID = %q, want %q", first.WorkspaceID, workspace) + } + if first.CreatedAt != now { + t.Fatalf("first CreatedAt = %v, want %v", first.CreatedAt, now) + } + if first.SessionKey != "session-1" { + t.Fatalf("first SessionKey = %q, want %q", first.SessionKey, "session-1") + } + if first.Summary != "summarize the release notes" { + t.Fatalf("first Summary = %q", first.Summary) + } + if first.FinalOutput != "Here is the summary." { + t.Fatalf("first FinalOutput = %q", first.FinalOutput) + } + if first.Success == nil || !*first.Success { + t.Fatalf("first Success = %v, want true", first.Success) + } + if len(first.AddedSkillNames) != 0 { + t.Fatalf("first AddedSkillNames = %v, want empty", first.AddedSkillNames) + } + if len(first.UsedSkillNames) != 0 { + t.Fatalf("first UsedSkillNames = %v, want empty", first.UsedSkillNames) + } + if len(first.ToolKinds) != 0 || len(first.ToolExecutions) != 0 || first.Source != nil || first.AttemptTrail != nil { + t.Fatalf("first record should be slimmed: %+v", first) + } + if first.TaskHash != "" || len(first.Signals) != 0 { + t.Fatalf("first record should not persist task_hash/signals: %+v", first) + } + + var second evolution.LearningRecord + if err := json.Unmarshal([]byte(lines[1]), &second); err != nil { + t.Fatalf("Unmarshal second record: %v", err) + } + if second.WorkspaceID != workspace { + t.Fatalf("second WorkspaceID = %q, want %q", second.WorkspaceID, workspace) + } + if second.SessionKey != "session-2" { + t.Fatalf("second SessionKey = %q, want %q", second.SessionKey, "session-2") + } + if second.Summary != "run the bash command" { + t.Fatalf("second Summary = %q", second.Summary) + } + if second.Success == nil || *second.Success { + t.Fatalf("second Success = %v, want false", second.Success) + } + if len(second.ToolExecutions) != 0 || second.Source != nil || second.AttemptTrail != nil { + t.Fatalf("second record should be slimmed: %+v", second) + } + if second.TaskHash != "" || len(second.Signals) != 0 { + t.Fatalf("second record should not persist task_hash/signals: %+v", second) + } +} + +func TestRuntime_FinalizeTurnGeneratesUniqueTaskRecordIDsAcrossRestartedTurnSequence(t *testing.T) { + workspace := t.TempDir() + createdAt := time.Unix(1700000000, 0).UTC() + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "observe"}, + Now: func() time.Time { + createdAt = createdAt.Add(time.Second) + return createdAt + }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + input := evolution.TurnCaseInput{ + Workspace: workspace, + TurnID: "main-turn-1", + SessionKey: "session-a", + AgentID: "main", + Status: "completed", + UserMessage: "summarize release notes", + FinalContent: "done", + } + if finalizeErr := rt.FinalizeTurn(context.Background(), input); finalizeErr != nil { + t.Fatalf("FinalizeTurn first: %v", finalizeErr) + } + input.SessionKey = "session-b" + if finalizeErr := rt.FinalizeTurn(context.Background(), input); finalizeErr != nil { + t.Fatalf("FinalizeTurn second: %v", finalizeErr) + } + + store := evolution.NewStore(evolution.NewPaths(workspace, "")) + records, err := store.LoadTaskRecords() + if err != nil { + t.Fatalf("LoadTaskRecords: %v", err) + } + if len(records) != 2 { + t.Fatalf("len(records) = %d, want 2: %#v", len(records), records) + } + if records[0].ID == records[1].ID { + t.Fatalf("record IDs should be unique across repeated turn IDs: %#v", records) + } + for _, record := range records { + if !strings.HasPrefix(record.ID, "main-turn-1-") { + t.Fatalf("record ID = %q, want main-turn-1-*", record.ID) + } + } +} + +func TestRuntime_FinalizeTurnSharedStateKeepsSkillProfilesScoped(t *testing.T) { + sharedState := t.TempDir() + workspaceA := t.TempDir() + workspaceB := t.TempDir() + now := time.Unix(1700000000, 0).UTC() + + storeA := evolution.NewStore(evolution.NewPaths(workspaceA, sharedState)) + if err := storeA.SaveProfile(evolution.SkillProfile{ + SkillName: "weather", + WorkspaceID: workspaceA, + CurrentVersion: "draft-a", + Status: evolution.SkillStatusActive, + Origin: "evolved", + HumanSummary: "workspace A weather helper", + LastUsedAt: now, + UseCount: 7, + RetentionScore: 0.9, + }); err != nil { + t.Fatalf("storeA.SaveProfile: %v", err) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + StateDir: sharedState, + }, + Now: func() time.Time { return now.Add(time.Minute) }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspaceA, + TurnID: "turn-a", + SessionKey: "session-a", + Status: "completed", + ActiveSkillNames: []string{"weather"}, + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn(workspaceA): %v", finalizeErr) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspaceB, + TurnID: "turn-b", + SessionKey: "session-b", + Status: "completed", + ActiveSkillNames: []string{"weather"}, + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn(workspaceB): %v", finalizeErr) + } + + loadedA, err := storeA.LoadProfile("weather") + if err != nil { + t.Fatalf("storeA.LoadProfile: %v", err) + } + if loadedA.WorkspaceID != workspaceA { + t.Fatalf("workspace A profile WorkspaceID = %q, want %q", loadedA.WorkspaceID, workspaceA) + } + if loadedA.UseCount != 8 { + t.Fatalf("workspace A profile UseCount = %d, want 8", loadedA.UseCount) + } + + storeB := evolution.NewStore(evolution.NewPaths(workspaceB, sharedState)) + loadedB, err := storeB.LoadProfile("weather") + if err != nil { + t.Fatalf("storeB.LoadProfile: %v", err) + } + if loadedB.WorkspaceID != workspaceB { + t.Fatalf("workspace B profile WorkspaceID = %q, want %q", loadedB.WorkspaceID, workspaceB) + } + if loadedB.UseCount != 1 { + t.Fatalf("workspace B profile UseCount = %d, want 1", loadedB.UseCount) + } + if loadedB.Origin != "manual" { + t.Fatalf("workspace B profile Origin = %q, want manual", loadedB.Origin) + } + if loadedB.CurrentVersion != "" { + t.Fatalf("workspace B profile CurrentVersion = %q, want empty", loadedB.CurrentVersion) + } +} + +func TestRuntime_FinalizeTurnWritesPotentiallyLearnableSignal(t *testing.T) { + workspace := t.TempDir() + now := time.Unix(1700003000, 0).UTC() + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "observe"}, + Now: func() time.Time { return now }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspace, + TurnID: "turn-learnable", + SessionKey: "session-learnable", + AgentID: "agent-1", + Status: "completed", + ToolKinds: []string{"web", "bash"}, + ActiveSkillNames: []string{"geocode", "weather"}, + FinalContent: "weather workflow completed", + FinalSuccessfulPath: []string{ + "weather", + }, + SkillContextSnapshots: []evolution.SkillContextSnapshot{ + {Sequence: 1, Trigger: "initial_build", SkillNames: []string{"geocode"}}, + {Sequence: 2, Trigger: "context_retry_rebuild", SkillNames: []string{"geocode", "weather"}}, + }, + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn: %v", finalizeErr) + } + + paths := evolution.NewPaths(workspace, "") + data, err := os.ReadFile(paths.TaskRecords) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + lines := strings.Split(strings.TrimSpace(string(data)), "\n") + if len(lines) != 1 { + t.Fatalf("record file line count = %d, want 1", len(lines)) + } + + var record evolution.LearningRecord + if err := json.Unmarshal([]byte(lines[0]), &record); err != nil { + t.Fatalf("Unmarshal record: %v", err) + } + if len(record.Signals) != 0 { + t.Fatalf("Signals = %v, want empty", record.Signals) + } + if got := record.InitialSkillNames; len(got) != 0 { + t.Fatalf("InitialSkillNames = %v, want empty", got) + } + if got := record.AddedSkillNames; len(got) != 0 { + t.Fatalf("AddedSkillNames = %v, want empty", got) + } + if got := record.UsedSkillNames; len(got) != 1 || got[0] != "weather" { + t.Fatalf("UsedSkillNames = %v, want [weather]", got) + } + if got := record.AllLoadedSkillNames; len(got) != 0 { + t.Fatalf("AllLoadedSkillNames = %v, want empty", got) + } + if record.AttemptTrail != nil { + t.Fatalf("AttemptTrail = %+v, want nil", record.AttemptTrail) + } +} + +func TestRuntime_FinalizeTurnUsesSkillNamesFromToolExecutions(t *testing.T) { + workspace := t.TempDir() + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspace, + TurnID: "turn-skill-chain", + SessionKey: "session-skill-chain", + AgentID: "main", + Status: "completed", + UserMessage: "调用三一定理计算100", + FinalContent: "done", + ToolExecutions: []evolution.ToolExecutionRecord{ + {Name: "read_file", Success: true, SkillNames: []string{"three-one"}}, + {Name: "read_file", Success: true, SkillNames: []string{"four-two"}}, + {Name: "read_file", Success: true, SkillNames: []string{"five-three"}}, + }, + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn: %v", finalizeErr) + } + + paths := evolution.NewPaths(workspace, "") + data, err := os.ReadFile(paths.TaskRecords) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + lines := strings.Split(strings.TrimSpace(string(data)), "\n") + if len(lines) != 1 { + t.Fatalf("record file line count = %d, want 1", len(lines)) + } + + var record evolution.LearningRecord + if err := json.Unmarshal([]byte(lines[0]), &record); err != nil { + t.Fatalf("Unmarshal record: %v", err) + } + if got := record.AddedSkillNames; len(got) != 0 { + t.Fatalf("AddedSkillNames = %v, want empty", got) + } + if got := record.UsedSkillNames; len(got) != 3 || got[0] != "three-one" || got[1] != "four-two" || + got[2] != "five-three" { + t.Fatalf("UsedSkillNames = %v, want [three-one four-two five-three]", got) + } + if got := record.AllLoadedSkillNames; len(got) != 0 { + t.Fatalf("AllLoadedSkillNames = %v, want empty", got) + } +} + +func TestRuntime_FinalizeTurnPreservesUTF8WhenTruncatingChineseOutput(t *testing.T) { + workspace := t.TempDir() + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "apply"}, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + longChinese := strings.Repeat("中文输出", 500) + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspace, + TurnID: "turn-utf8", + SessionKey: "session-utf8", + AgentID: "main", + Status: "completed", + UserMessage: "请处理这段中文输出", + FinalContent: longChinese, + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn: %v", finalizeErr) + } + + paths := evolution.NewPaths(workspace, "") + data, err := os.ReadFile(paths.TaskRecords) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + lines := strings.Split(strings.TrimSpace(string(data)), "\n") + if len(lines) != 1 { + t.Fatalf("record file line count = %d, want 1", len(lines)) + } + + var record evolution.LearningRecord + if err := json.Unmarshal([]byte(lines[0]), &record); err != nil { + t.Fatalf("Unmarshal record: %v", err) + } + if !utf8.ValidString(record.FinalOutput) { + t.Fatalf("FinalOutput is not valid UTF-8: %q", record.FinalOutput) + } + if strings.ContainsRune(record.FinalOutput, '\uFFFD') { + t.Fatalf("FinalOutput contains replacement rune: %q", record.FinalOutput) + } + if !strings.HasSuffix(record.FinalOutput, "...") { + t.Fatalf("FinalOutput = %q, want truncated suffix ...", record.FinalOutput) + } +} + +func TestRuntime_FinalizeTurnPrefersExplicitAttemptTrail(t *testing.T) { + workspace := t.TempDir() + now := time.Unix(1700003500, 0).UTC() + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "observe"}, + Now: func() time.Time { return now }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspace, + TurnID: "turn-explicit-trail", + SessionKey: "session-explicit-trail", + AgentID: "agent-1", + Status: "completed", + ToolKinds: []string{"web"}, + ActiveSkillNames: []string{"weather"}, + AttemptedSkillNames: []string{"geocode", "weather"}, + FinalSuccessfulPath: []string{"geocode", "weather"}, + SkillContextSnapshots: []evolution.SkillContextSnapshot{ + {Sequence: 1, Trigger: "initial_build", SkillNames: []string{"weather"}}, + {Sequence: 2, Trigger: "context_retry_rebuild", SkillNames: []string{"geocode", "weather"}}, + }, + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn: %v", finalizeErr) + } + + paths := evolution.NewPaths(workspace, "") + data, err := os.ReadFile(paths.TaskRecords) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + lines := strings.Split(strings.TrimSpace(string(data)), "\n") + if len(lines) != 1 { + t.Fatalf("record file line count = %d, want 1", len(lines)) + } + + var record evolution.LearningRecord + if err := json.Unmarshal([]byte(lines[0]), &record); err != nil { + t.Fatalf("Unmarshal record: %v", err) + } + if record.AttemptTrail != nil { + t.Fatalf("AttemptTrail = %+v, want nil", record.AttemptTrail) + } + if got := record.UsedSkillNames; len(got) != 2 || got[0] != "geocode" || got[1] != "weather" { + t.Fatalf("UsedSkillNames = %v, want [geocode weather]", got) + } + if got := record.InitialSkillNames; len(got) != 0 { + t.Fatalf("InitialSkillNames = %v, want empty", got) + } + if got := record.AddedSkillNames; len(got) != 0 { + t.Fatalf("AddedSkillNames = %v, want empty", got) + } + if len(record.Signals) != 0 { + t.Fatalf("Signals = %v, want empty", record.Signals) + } +} + +func TestRuntime_FinalizeTurnUpdatesSkillProfileUsage(t *testing.T) { + workspace := t.TempDir() + now := time.Unix(1700000000, 0).UTC() + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{ + Enabled: true, + Mode: "observe", + }, + Now: func() time.Time { return now }, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspace, + TurnID: "turn-1", + SessionKey: "session-1", + AgentID: "agent-1", + Status: "completed", + ActiveSkillNames: []string{"skill-a", "skill-a"}, + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn: %v", finalizeErr) + } + + store := evolution.NewStore(evolution.NewPaths(workspace, "")) + profile, err := store.LoadProfile("skill-a") + if err != nil { + t.Fatalf("LoadProfile: %v", err) + } + if profile.Origin != "manual" { + t.Fatalf("Origin = %q, want manual", profile.Origin) + } + if profile.UseCount != 1 { + t.Fatalf("UseCount = %d, want 1", profile.UseCount) + } + if profile.LastUsedAt != now { + t.Fatalf("LastUsedAt = %v, want %v", profile.LastUsedAt, now) + } + if profile.RetentionScore <= 0.2 { + t.Fatalf("RetentionScore = %v, want > 0.2", profile.RetentionScore) + } +} + +func TestRuntime_FinalizeTurnReactivatesColdSkill(t *testing.T) { + assertFinalizeTurnReactivatesSkill(t, "skill-cold", evolution.SkillStatusCold, 2, 0.2, 24*time.Hour) +} + +func TestRuntime_FinalizeTurnReactivatesArchivedSkill(t *testing.T) { + assertFinalizeTurnReactivatesSkill(t, "skill-archived", evolution.SkillStatusArchived, 5, 0.1, 48*time.Hour) +} + +func assertFinalizeTurnReactivatesSkill( + t *testing.T, + skillName string, + initialStatus evolution.SkillStatus, + useCount int, + retentionScore float64, + lastUsedAge time.Duration, +) { + t.Helper() + workspace := t.TempDir() + now := time.Unix(1700002000, 0).UTC() + store := evolution.NewStore(evolution.NewPaths(workspace, "")) + + if saveErr := store.SaveProfile(evolution.SkillProfile{ + SkillName: skillName, + WorkspaceID: workspace, + Status: initialStatus, + Origin: "evolved", + HumanSummary: string(initialStatus) + " skill", + LastUsedAt: now.Add(-lastUsedAge), + UseCount: useCount, + RetentionScore: retentionScore, + }); saveErr != nil { + t.Fatalf("SaveProfile: %v", saveErr) + } + + rt, err := evolution.NewRuntime(evolution.RuntimeOptions{ + Config: config.EvolutionConfig{Enabled: true, Mode: "observe"}, + Now: func() time.Time { return now }, + Store: store, + }) + if err != nil { + t.Fatalf("NewRuntime: %v", err) + } + + if finalizeErr := rt.FinalizeTurn(context.Background(), evolution.TurnCaseInput{ + Workspace: workspace, + TurnID: "turn-" + skillName, + Status: "completed", + ActiveSkillNames: []string{skillName}, + }); finalizeErr != nil { + t.Fatalf("FinalizeTurn: %v", finalizeErr) + } + + profile, err := store.LoadProfile(skillName) + if err != nil { + t.Fatalf("LoadProfile: %v", err) + } + if profile.Status != evolution.SkillStatusActive { + t.Fatalf("Status = %q, want %q", profile.Status, evolution.SkillStatusActive) + } +} diff --git a/pkg/evolution/skill_content.go b/pkg/evolution/skill_content.go new file mode 100644 index 000000000..3aad1fe30 --- /dev/null +++ b/pkg/evolution/skill_content.go @@ -0,0 +1,126 @@ +package evolution + +import ( + "fmt" + "os" + "strings" + + "github.com/sipeed/picoclaw/pkg/skills" +) + +const ( + maxMatchedSkillExcerptCount = 5 + maxMatchedSkillExcerptChars = 1400 + maxComponentGuidanceChars = 520 +) + +type matchedSkillExcerpt struct { + Name string + Description string + Body string +} + +func loadMatchedSkillExcerpts(matches []skills.SkillInfo) []matchedSkillExcerpt { + excerpts := make([]matchedSkillExcerpt, 0, minInt(len(matches), maxMatchedSkillExcerptCount)) + for _, match := range matches { + if len(excerpts) >= maxMatchedSkillExcerptCount { + break + } + body := readSkillBodyExcerpt(match.Path) + if body == "" { + continue + } + excerpts = append(excerpts, matchedSkillExcerpt{ + Name: strings.TrimSpace(match.Name), + Description: strings.TrimSpace(match.Description), + Body: body, + }) + } + return excerpts +} + +func readSkillBodyExcerpt(path string) string { + path = strings.TrimSpace(path) + if path == "" { + return "" + } + data, err := os.ReadFile(path) + if err != nil { + return "" + } + body := strings.TrimSpace(stripSkillFrontmatter(string(data))) + if body == "" { + return "" + } + body = strings.Join(strings.Fields(body), " ") + if len(body) <= maxMatchedSkillExcerptChars { + return body + } + return strings.TrimSpace(body[:maxMatchedSkillExcerptChars]) + "..." +} + +func summarizeMatchedSkillExcerpts(matches []skills.SkillInfo) string { + excerpts := loadMatchedSkillExcerpts(matches) + if len(excerpts) == 0 { + return "none" + } + + parts := make([]string, 0, len(excerpts)) + for _, excerpt := range excerpts { + header := excerpt.Name + if excerpt.Description != "" { + header += ": " + excerpt.Description + } + parts = append(parts, fmt.Sprintf("### %s\n%s", header, excerpt.Body)) + } + return strings.Join(parts, "\n\n") +} + +func synthesizedComponentBreakdown(matches []skills.SkillInfo) string { + excerpts := loadMatchedSkillExcerpts(matches) + if len(excerpts) == 0 { + return "- No component skill content was available when this shortcut was generated." + } + + lines := make([]string, 0, len(excerpts)) + for _, excerpt := range excerpts { + guidance := conciseComponentGuidance(excerpt) + if guidance == "" { + continue + } + lines = append(lines, fmt.Sprintf("- `%s`: %s", excerpt.Name, guidance)) + } + if len(lines) == 0 { + return "- Component skill content was available, but no concise guidance could be extracted." + } + return strings.Join(lines, "\n") +} + +func conciseComponentGuidance(excerpt matchedSkillExcerpt) string { + description := strings.TrimSpace(excerpt.Description) + body := trimComponentGuidance(excerpt.Body) + switch { + case description != "" && body != "": + return trimComponentGuidance(description + " " + body) + case description != "": + return trimComponentGuidance(description) + default: + return body + } +} + +func trimComponentGuidance(content string) string { + content = strings.TrimSpace(content) + if content == "" { + return "" + } + content = strings.NewReplacer( + "#### ", "", + "### ", "", + "## ", "", + "# ", "", + "**", "", + ).Replace(content) + content = strings.TrimSpace(content) + return trimAtReadableBoundary(content, maxComponentGuidanceChars) +} diff --git a/pkg/evolution/skill_draft_policy.go b/pkg/evolution/skill_draft_policy.go new file mode 100644 index 000000000..b91493cec --- /dev/null +++ b/pkg/evolution/skill_draft_policy.go @@ -0,0 +1,174 @@ +package evolution + +import "strings" + +func skillDraftPromptInstructions() []string { + return []string{ + "body_or_patch must contain the complete draft body or patch content as plain text.", + "body_or_patch is an internal draft and review artifact, so it may include concise learning provenance, source task evidence, and source skill summaries when useful for human review.", + "If change_kind is create, body_or_patch must be a complete SKILL.md file with exactly two parts: YAML frontmatter and a Markdown body.", + "The YAML frontmatter must contain only name and description fields.", + "The description field must and only describe what this skill can do and when to use it.", + "The deployable Markdown body should only contain what the skill is useful for and how to use it.", + "The Markdown body is loaded only after the skill triggers, so focus on concise usage guidance and the execution steps needed to complete the task.", + "When describing an operation process in the body, do not use vague summaries; provide detailed step-by-step instructions for the exact operation or execution process.", + "When creating a combined shortcut skill, summarize the functional purpose and result of the provided SKILL.md inputs; do not copy or directly include other skills' instructions.", + "Extract only the necessary operations from source skills and evidence, such as formulas, ordered transformations, commands, inputs, outputs, and boundary conditions.", + "The operational part of the generated skill must be directly usable by a future agent without reading the original task records or source skills.", + "Keep operational instructions separable from audit/provenance notes because the final deployed SKILL.md will be rendered without learning traces.", + } +} + +func skillDraftPromptText() string { + return strings.Join(skillDraftPromptInstructions(), "\n") +} + +func learningTraceReplacer() *strings.Replacer { + return strings.NewReplacer( + "## Learned Shortcut Update", "## Shortcut Update", + "## Learned Evolution", "## Usage Notes", + "## Learned Pattern", "## Usage Pattern", + "## Learned Context", "## Procedure Notes", + "## Source Evidence", "## Validation", + "## Source Skills", "## Procedure Details", + "### Source Skills", "### Procedure Details", + "## Learned Shortcut", "## Shortcut", + "### Learned Shortcut", "### Shortcut", + "Learned workflow for ", "Workflow for ", + "learned workflow for ", "workflow for ", + "from learned pattern: ", "for: ", + "Learned task:", "Task:", + "learned task:", "task:", + "Learned pattern:", "Pattern:", + "learned pattern:", "pattern:", + "Learned from", "Based on", + "learned from", "based on", + "Source evidence", "Validation", + "source evidence", "validation", + "task records", "validated examples", + "Task records", "Validated examples", + ) +} + +func renderDeployableSkillBody(body string) string { + body = strings.TrimSpace(body) + if body == "" { + return body + } + frontmatter, markdownBody := splitSkillFrontmatter(body) + if frontmatter != "" { + body = "---\n" + frontmatter + "\n---\n" + learningTraceReplacer().Replace(strings.TrimLeft(markdownBody, "\n")) + } else { + body = learningTraceReplacer().Replace(body) + } + body = normalizeDeployableDescription(body) + return removeDeployOnlyProvenanceLines(body) +} + +func normalizeDeployableDescription(body string) string { + lines := strings.Split(body, "\n") + inFrontmatter := false + for i, line := range lines { + trimmed := strings.TrimSpace(line) + if i == 0 && trimmed == "---" { + inFrontmatter = true + continue + } + if inFrontmatter && trimmed == "---" { + break + } + if !inFrontmatter || !strings.HasPrefix(trimmed, "description:") { + continue + } + value := strings.TrimSpace(strings.TrimPrefix(trimmed, "description:")) + value = cleanDeployableDescription(value) + lines[i] = "description: " + value + break + } + return strings.Join(lines, "\n") +} + +func cleanDeployableDescription(description string) string { + description = strings.TrimSpace(strings.Trim(description, `"'`)) + for _, marker := range []string{ + " for: ", + " from learned pattern: ", + " for learned pattern: ", + } { + if idx := strings.Index(strings.ToLower(description), marker); idx >= 0 { + description = strings.TrimSpace(description[idx+len(marker):]) + break + } + } + description = strings.TrimPrefix(description, "Create combined shortcut ") + description = strings.TrimPrefix(description, "Refresh combined shortcut ") + description = strings.TrimPrefix(description, "Create shortcut ") + description = strings.TrimPrefix(description, "Refresh shortcut ") + description = strings.TrimSpace(description) + if description == "" { + return "Use this skill when the task matches its documented workflow." + } + return description +} + +func sentenceFragment(text string) string { + text = strings.TrimSpace(text) + if text == "" { + return "complete the documented workflow" + } + runes := []rune(text) + if len(runes) > 0 && runes[0] >= 'A' && runes[0] <= 'Z' { + runes[0] = runes[0] + ('a' - 'A') + } + return string(runes) +} + +func trimAtReadableBoundary(content string, maxLen int) string { + content = strings.TrimSpace(content) + runes := []rune(content) + if content == "" || maxLen <= 0 || len(runes) <= maxLen { + return content + } + + cut := maxLen + searchStart := maxLen - minInt(maxLen/2, 240) + if searchStart < 0 { + searchStart = 0 + } + for i := maxLen; i >= searchStart; i-- { + switch runes[i-1] { + case '\n', '.', '!', '?', ';', ':', '。', '!', '?', ';', ':': + cut = i + goto done + } + } + for i := maxLen; i >= searchStart; i-- { + if runes[i-1] == ' ' || runes[i-1] == '\t' { + cut = i + goto done + } + } + +done: + return strings.TrimRight(strings.TrimSpace(string(runes[:cut])), ".,;:,。;:") + "..." +} + +func removeDeployOnlyProvenanceLines(body string) string { + lines := strings.Split(body, "\n") + out := make([]string, 0, len(lines)) + for _, line := range lines { + trimmed := strings.TrimSpace(line) + lower := strings.ToLower(trimmed) + if strings.HasPrefix(lower, "- evidence:") { + continue + } + if strings.HasPrefix(lower, "- validated examples:") { + continue + } + if strings.Contains(lower, "source_record_id") || strings.Contains(lower, "source record") { + continue + } + out = append(out, line) + } + return strings.TrimSpace(strings.Join(out, "\n")) +} diff --git a/pkg/evolution/skills_recall.go b/pkg/evolution/skills_recall.go new file mode 100644 index 000000000..fb7d2dfcc --- /dev/null +++ b/pkg/evolution/skills_recall.go @@ -0,0 +1,217 @@ +package evolution + +import ( + "os" + "path/filepath" + "sort" + "strings" + + "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/skills" +) + +type SkillsRecaller struct { + workspace string + loader *skills.SkillsLoader +} + +func NewSkillsRecaller(workspace string) *SkillsRecaller { + builtinSkillsDir := strings.TrimSpace(os.Getenv(config.EnvBuiltinSkills)) + if builtinSkillsDir == "" { + wd, _ := os.Getwd() + builtinSkillsDir = filepath.Join(wd, "skills") + } + + globalSkillsDir := filepath.Join(config.GetHome(), "skills") + return &SkillsRecaller{ + workspace: workspace, + loader: skills.NewSkillsLoader(workspace, globalSkillsDir, builtinSkillsDir), + } +} + +func (r *SkillsRecaller) RecallSimilarSkills(rule LearningRecord) ([]skills.SkillInfo, error) { + if r == nil || r.loader == nil { + return nil, nil + } + + all := r.loader.ListSkills() + if names := explicitRecallSkillNames(rule); len(names) > 0 { + return filterSkillsByExplicitNames(all, names), nil + } + + type scored struct { + info skills.SkillInfo + score int + sourceRank int + } + + scoredList := make([]scored, 0, len(all)) + for _, skill := range all { + score := scoreSkillMatch(rule, skill) + if score <= 0 { + continue + } + + if body, ok := r.loader.LoadSkill(skill.Name); ok { + score += scoreSkillBody(rule, body) + } + + scoredList = append(scoredList, scored{ + info: skill, + score: score, + sourceRank: skillSourceRank(skill.Source), + }) + } + + sort.Slice(scoredList, func(i, j int) bool { + if scoredList[i].score != scoredList[j].score { + return scoredList[i].score > scoredList[j].score + } + if scoredList[i].sourceRank != scoredList[j].sourceRank { + return scoredList[i].sourceRank < scoredList[j].sourceRank + } + return scoredList[i].info.Name < scoredList[j].info.Name + }) + + out := make([]skills.SkillInfo, 0, len(scoredList)) + for _, item := range scoredList { + out = append(out, item.info) + } + return out, nil +} + +func explicitRecallSkillNames(rule LearningRecord) []string { + names := make([]string, 0, len(rule.WinningPath)+len(rule.MatchedSkillNames)+len(rule.LateAddedSkills)) + names = append(names, normalizePath(rule.WinningPath)...) + names = append(names, normalizePath(rule.MatchedSkillNames)...) + names = append(names, normalizePath(rule.LateAddedSkills)...) + return uniqueTrimmedNames(names) +} + +func filterSkillsByExplicitNames(all []skills.SkillInfo, names []string) []skills.SkillInfo { + if len(all) == 0 || len(names) == 0 { + return nil + } + + byName := make(map[string]skills.SkillInfo, len(all)) + for _, skill := range all { + name := strings.ToLower(strings.TrimSpace(skill.Name)) + if name == "" { + continue + } + if _, exists := byName[name]; exists { + continue + } + byName[name] = skill + } + + out := make([]skills.SkillInfo, 0, len(names)) + for _, name := range names { + if skill, ok := byName[strings.ToLower(strings.TrimSpace(name))]; ok { + out = append(out, skill) + } + } + return out +} + +func scoreSkillMatch(rule LearningRecord, skill skills.SkillInfo) int { + score := 0 + skillName := strings.ToLower(strings.TrimSpace(skill.Name)) + ruleSummary := strings.ToLower(rule.Summary) + + if skillName != "" { + if containsNormalized(rule.WinningPath, skillName) { + score += 8 + } + if containsNormalized(rule.MatchedSkillNames, skillName) { + score += 6 + } + if strings.Contains(ruleSummary, skillName) { + score += 4 + } + } + + score += 2 * tokenOverlap(ruleTokens(rule), tokenizeForEvolution(skill.Name+" "+skill.Description)) + return score +} + +func scoreSkillBody(rule LearningRecord, body string) int { + return minInt(tokenOverlap(ruleTokens(rule), tokenizeForEvolution(body)), 3) +} + +func skillSourceRank(source string) int { + switch source { + case "workspace": + return 0 + case "global": + return 1 + case "builtin": + return 2 + default: + return 3 + } +} + +func ruleTokens(rule LearningRecord) []string { + parts := make([]string, 0, len(rule.WinningPath)+len(rule.MatchedSkillNames)+4) + parts = append(parts, normalizePath(rule.WinningPath)...) + parts = append(parts, normalizePath(rule.MatchedSkillNames)...) + parts = append(parts, tokenizeForEvolution(rule.Summary)...) + return parts +} + +func containsNormalized(values []string, target string) bool { + target = strings.ToLower(strings.TrimSpace(target)) + for _, value := range values { + if strings.ToLower(strings.TrimSpace(value)) == target { + return true + } + } + return false +} + +func tokenOverlap(left, right []string) int { + if len(left) == 0 || len(right) == 0 { + return 0 + } + + leftSet := make(map[string]struct{}, len(left)) + for _, token := range left { + leftSet[token] = struct{}{} + } + + seen := make(map[string]struct{}, len(right)) + count := 0 + for _, token := range right { + if _, ok := seen[token]; ok { + continue + } + seen[token] = struct{}{} + if _, ok := leftSet[token]; ok { + count++ + } + } + return count +} + +func tokenizeForEvolution(text string) []string { + fields := strings.FieldsFunc(strings.ToLower(text), func(r rune) bool { + return !(r >= 'a' && r <= 'z') && !(r >= '0' && r <= '9') + }) + + out := make([]string, 0, len(fields)) + for _, field := range fields { + if field == "" { + continue + } + out = append(out, field) + } + return out +} + +func minInt(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/pkg/evolution/skills_recall_test.go b/pkg/evolution/skills_recall_test.go new file mode 100644 index 000000000..13e27abbc --- /dev/null +++ b/pkg/evolution/skills_recall_test.go @@ -0,0 +1,118 @@ +package evolution_test + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/sipeed/picoclaw/pkg/evolution" +) + +func TestRecallSimilarSkills_ReturnsWorkspaceSkillFirst(t *testing.T) { + workspace := t.TempDir() + globalHome := t.TempDir() + builtinRoot := t.TempDir() + + t.Setenv("HOME", globalHome) + t.Setenv("PICOCLAW_BUILTIN_SKILLS", builtinRoot) + + mustWriteSkill := func(root, name, content string) { + t.Helper() + dir := filepath.Join(root, name) + if err := os.MkdirAll(dir, 0o755); err != nil { + t.Fatalf("MkdirAll(%s): %v", dir, err) + } + if err := os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte(content), 0o644); err != nil { + t.Fatalf("WriteFile(%s): %v", name, err) + } + } + + mustWriteSkill( + filepath.Join(workspace, "skills"), + "weather", + "---\nname: weather\ndescription: weather lookup\n---\n# Weather\nUse weather queries.\n", + ) + mustWriteSkill( + filepath.Join(globalHome, ".picoclaw", "skills"), + "release", + "---\nname: release\ndescription: release flow\n---\n# Release\nRelease build.\n", + ) + mustWriteSkill( + builtinRoot, + "weather-fallback", + "---\nname: weather-fallback\ndescription: weather backup\n---\n# Weather Fallback\nBackup weather path.\n", + ) + + recaller := evolution.NewSkillsRecaller(workspace) + matches, err := recaller.RecallSimilarSkills(evolution.LearningRecord{ + Kind: evolution.RecordKindRule, + Summary: "weather native-name path", + EventCount: 4, + }) + if err != nil { + t.Fatalf("RecallSimilarSkills: %v", err) + } + if len(matches) == 0 { + t.Fatal("expected at least one match") + } + if matches[0].Name != "weather" { + t.Fatalf("first match = %q, want weather", matches[0].Name) + } +} + +func TestRecallSimilarSkills_UsesExplicitWinningPathOnly(t *testing.T) { + workspace := t.TempDir() + globalHome := t.TempDir() + builtinRoot := t.TempDir() + + t.Setenv("HOME", globalHome) + t.Setenv("PICOCLAW_BUILTIN_SKILLS", builtinRoot) + + mustWriteSkill := func(root, name, description string) { + t.Helper() + dir := filepath.Join(root, name) + if err := os.MkdirAll(dir, 0o755); err != nil { + t.Fatalf("MkdirAll(%s): %v", dir, err) + } + content := "---\nname: " + name + "\ndescription: " + description + "\n---\n# " + name + "\nUse this skill.\n" + if err := os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte(content), 0o644); err != nil { + t.Fatalf("WriteFile(%s): %v", name, err) + } + } + + workspaceSkills := filepath.Join(workspace, "skills") + mustWriteSkill(workspaceSkills, "three-one-theorem", "Add 31 and continue theorem calculation.") + mustWriteSkill(workspaceSkills, "four-two-theorem", "Add 42 and continue theorem calculation.") + mustWriteSkill(workspaceSkills, "five-three-theorem", "Subtract 53 and finish theorem calculation.") + mustWriteSkill(workspaceSkills, "github", "Interact with GitHub using the gh CLI.") + mustWriteSkill(workspaceSkills, "tmux", "Remote-control tmux sessions by sending keystrokes.") + + recaller := evolution.NewSkillsRecaller(workspace) + matches, err := recaller.RecallSimilarSkills(evolution.LearningRecord{ + Kind: evolution.RecordKindPattern, + Summary: "Calculate a value by applying the Three-One Theorem rules", + WinningPath: []string{ + "three-one-theorem", + "four-two-theorem", + "five-three-theorem", + }, + MatchedSkillNames: []string{ + "three-one-theorem", + "four-two-theorem", + "five-three-theorem", + }, + }) + if err != nil { + t.Fatalf("RecallSimilarSkills: %v", err) + } + + got := make([]string, 0, len(matches)) + for _, match := range matches { + got = append(got, match.Name) + } + want := []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"} + if strings.Join(got, ",") != strings.Join(want, ",") { + t.Fatalf("matches = %v, want %v", got, want) + } +} diff --git a/pkg/evolution/store.go b/pkg/evolution/store.go new file mode 100644 index 000000000..2e7890799 --- /dev/null +++ b/pkg/evolution/store.go @@ -0,0 +1,672 @@ +package evolution + +import ( + "bufio" + "bytes" + "context" + "crypto/sha1" + "encoding/hex" + "encoding/json" + "errors" + "os" + "path/filepath" + "sort" + "strings" + "sync" + + "github.com/sipeed/picoclaw/pkg/fileutil" + "github.com/sipeed/picoclaw/pkg/skills" +) + +type Store struct { + paths Paths +} + +func NewStore(paths Paths) *Store { + return &Store{paths: paths} +} + +var storeFileLocks sync.Map + +func (s *Store) AppendLearningRecord(ctx context.Context, record LearningRecord) error { + switch record.Kind { + case RecordKindPattern, legacyRecordKindRule: + return s.AppendPatternRecords([]LearningRecord{record}) + default: + return s.AppendTaskRecord(ctx, record) + } +} + +func (s *Store) AppendLearningRecords(records []LearningRecord) error { + taskRecords := make([]LearningRecord, 0, len(records)) + patternRecords := make([]LearningRecord, 0, len(records)) + for _, record := range records { + switch record.Kind { + case RecordKindPattern, legacyRecordKindRule: + patternRecords = append(patternRecords, record) + default: + taskRecords = append(taskRecords, record) + } + } + if err := s.AppendTaskRecords(context.Background(), taskRecords); err != nil { + return err + } + return s.AppendPatternRecords(patternRecords) +} + +func (s *Store) AppendTaskRecord(ctx context.Context, record LearningRecord) error { + return s.AppendTaskRecords(ctx, []LearningRecord{record}) +} + +func (s *Store) AppendTaskRecords(ctx context.Context, records []LearningRecord) error { + return s.appendJSONLRecords(ctx, s.paths.TaskRecords, records) +} + +func (s *Store) AppendPatternRecords(records []LearningRecord) error { + return s.appendJSONLRecords(context.Background(), s.paths.PatternRecords, records) +} + +func (s *Store) appendJSONLRecords(ctx context.Context, path string, records []LearningRecord) error { + if len(records) == 0 { + return nil + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + unlock := lockStoreFile(path) + defer unlock() + + if mkdirErr := os.MkdirAll(filepath.Dir(path), 0o755); mkdirErr != nil { + return mkdirErr + } + + f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644) + if err != nil { + return err + } + defer f.Close() + + enc := json.NewEncoder(f) + for _, record := range records { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + if err := enc.Encode(record); err != nil { + return err + } + } + return nil +} + +func (s *Store) LoadLearningRecords() ([]LearningRecord, error) { + taskRecords, err := s.LoadTaskRecords() + if err != nil { + return nil, err + } + patternRecords, err := s.LoadPatternRecords() + if err != nil { + return nil, err + } + return append(taskRecords, patternRecords...), nil +} + +func (s *Store) LoadTaskRecords() ([]LearningRecord, error) { + records, err := s.loadRecordsFromPath(s.paths.TaskRecords) + if err != nil { + return nil, err + } + legacy, err := s.loadLegacyTaskRecords() + if err != nil { + return nil, err + } + return mergeLearningRecordsByID(legacy, records), nil +} + +func (s *Store) LoadPatternRecords() ([]LearningRecord, error) { + records, err := s.loadRecordsFromPath(s.paths.PatternRecords) + if err != nil { + return nil, err + } + legacy, err := s.loadLegacyPatternRecords() + if err != nil { + return nil, err + } + return mergeLearningRecordsByID(legacy, records), nil +} + +func (s *Store) loadRecordsFromPath(path string) ([]LearningRecord, error) { + var records []LearningRecord + if err := decodeJSONLLines(path, func(line []byte) error { + var record LearningRecord + if err := json.Unmarshal(line, &record); err != nil { + return err + } + records = append(records, record) + return nil + }); err != nil { + return nil, err + } + return records, nil +} + +func (s *Store) loadLegacyTaskRecords() ([]LearningRecord, error) { + records, err := s.loadRecordsFromPath(s.paths.LearningRecords) + if err != nil { + return nil, err + } + out := make([]LearningRecord, 0, len(records)) + for _, record := range records { + if isTaskRecordKind(record.Kind) { + out = append(out, record) + } + } + return out, nil +} + +func (s *Store) loadLegacyPatternRecords() ([]LearningRecord, error) { + records, err := s.loadRecordsFromPath(s.paths.LearningRecords) + if err != nil { + return nil, err + } + out := make([]LearningRecord, 0, len(records)) + for _, record := range records { + if isPatternRecordKind(record.Kind) { + out = append(out, record) + } + } + return out, nil +} + +func (s *Store) SaveTaskRecords(records []LearningRecord) error { + return s.saveJSONLRecords(s.paths.TaskRecords, records) +} + +func (s *Store) MarkTaskRecordsClustered(ids []string) error { + if len(ids) == 0 { + return nil + } + target := make(map[string]struct{}, len(ids)) + for _, id := range ids { + id = strings.TrimSpace(id) + if id == "" { + continue + } + target[id] = struct{}{} + } + if len(target) == 0 { + return nil + } + + unlock := lockStoreFile(s.paths.TaskRecords) + defer unlock() + + current, err := s.loadRecordsFromPath(s.paths.TaskRecords) + if err != nil { + return err + } + legacy, err := s.loadLegacyTaskRecords() + if err != nil { + return err + } + records := mergeLearningRecordsByID(legacy, current) + + hasTargetRecordInWorkspace := make(map[string]bool, len(target)) + if strings.TrimSpace(s.paths.Workspace) != "" { + for _, record := range records { + if _, ok := target[record.ID]; !ok { + continue + } + if record.WorkspaceID == s.paths.Workspace { + hasTargetRecordInWorkspace[record.ID] = true + } + } + } + + changed := false + for i := range records { + if _, ok := target[records[i].ID]; !ok { + continue + } + if hasTargetRecordInWorkspace[records[i].ID] && records[i].WorkspaceID != s.paths.Workspace { + continue + } + records[i].Status = RecordStatus("clustered") + changed = true + } + if !changed { + return nil + } + return s.saveJSONLRecordsLocked(s.paths.TaskRecords, records) +} + +func (s *Store) SavePatternRecords(records []LearningRecord) error { + return s.saveJSONLRecords(s.paths.PatternRecords, records) +} + +func (s *Store) MergePatternRecords(records []LearningRecord) error { + if len(records) == 0 { + return nil + } + + unlock := lockStoreFile(s.paths.PatternRecords) + defer unlock() + + current, err := s.loadRecordsFromPath(s.paths.PatternRecords) + if err != nil { + return err + } + legacy, err := s.loadLegacyPatternRecords() + if err != nil { + return err + } + merged := mergeLearningRecordsByID(mergeLearningRecordsByID(legacy, current), records) + return s.saveJSONLRecordsLocked(s.paths.PatternRecords, merged) +} + +func (s *Store) saveJSONLRecords(path string, records []LearningRecord) error { + unlock := lockStoreFile(path) + defer unlock() + + return s.saveJSONLRecordsLocked(path, records) +} + +func (s *Store) saveJSONLRecordsLocked(path string, records []LearningRecord) error { + if mkdirErr := os.MkdirAll(filepath.Dir(path), 0o755); mkdirErr != nil { + return mkdirErr + } + + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + for _, record := range records { + if err := enc.Encode(record); err != nil { + return err + } + } + return fileutil.WriteFileAtomic(path, buf.Bytes(), 0o644) +} + +func mergeLearningRecordsByID(base, updates []LearningRecord) []LearningRecord { + out := append([]LearningRecord(nil), base...) + indexByID := make(map[string]int, len(out)+len(updates)) + for i, record := range out { + key := learningRecordMergeKey(record) + if key == "" { + continue + } + indexByID[key] = i + } + for _, record := range updates { + key := learningRecordMergeKey(record) + if key == "" { + out = append(out, record) + continue + } + if idx, ok := indexByID[key]; ok { + out[idx] = record + continue + } + indexByID[key] = len(out) + out = append(out, record) + } + return out +} + +func learningRecordMergeKey(record LearningRecord) string { + id := strings.TrimSpace(record.ID) + if id == "" { + return "" + } + return strings.TrimSpace(record.WorkspaceID) + "\x00" + id +} + +func (s *Store) SaveDrafts(drafts []SkillDraft) error { + unlock := lockStoreFile(s.paths.SkillDrafts) + defer unlock() + + existing, err := s.LoadDrafts() + if err != nil { + return err + } + + indexByKey := make(map[string]int, len(existing)) + for i, draft := range existing { + indexByKey[draftKey(draft.WorkspaceID, draft.ID)] = i + } + + for _, draft := range drafts { + key := draftKey(draft.WorkspaceID, draft.ID) + if idx, ok := indexByKey[key]; ok { + existing[idx] = draft + continue + } + indexByKey[key] = len(existing) + existing = append(existing, draft) + } + + data, err := json.MarshalIndent(existing, "", " ") + if err != nil { + return err + } + return fileutil.WriteFileAtomic(s.paths.SkillDrafts, data, 0o644) +} + +func (s *Store) LoadDrafts() ([]SkillDraft, error) { + data, err := os.ReadFile(s.paths.SkillDrafts) + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + if err != nil { + return nil, err + } + if len(bytes.TrimSpace(data)) == 0 { + return nil, nil + } + + var drafts []SkillDraft + if err := json.Unmarshal(data, &drafts); err != nil { + return nil, err + } + return drafts, nil +} + +func (s *Store) SaveProfile(profile SkillProfile) error { + path, err := s.profilePath(profile.WorkspaceID, profile.SkillName) + if err != nil { + return err + } + unlock := lockStoreFile(path) + defer unlock() + + if mkdirErr := os.MkdirAll(filepath.Dir(path), 0o755); mkdirErr != nil { + return mkdirErr + } + + data, err := json.MarshalIndent(profile, "", " ") + if err != nil { + return err + } + return fileutil.WriteFileAtomic(path, data, 0o644) +} + +func (s *Store) LoadProfile(skillName string) (SkillProfile, error) { + return s.loadProfileForWorkspace(strings.TrimSpace(s.paths.Workspace), skillName) +} + +func (s *Store) UpdateProfile( + workspaceID, skillName string, + update func(profile *SkillProfile, exists bool) error, +) error { + targetPath, err := s.profilePath(workspaceID, skillName) + if err != nil { + return err + } + + unlock := lockStoreFile(targetPath) + defer unlock() + + profile, err := s.loadProfileForWorkspace(workspaceID, skillName) + exists := err == nil + if errors.Is(err, os.ErrNotExist) { + profile = SkillProfile{} + } else if err != nil { + return err + } + + if updateErr := update(&profile, exists); updateErr != nil { + return updateErr + } + if !exists && isZeroSkillProfile(profile) { + return nil + } + if mkdirErr := os.MkdirAll(filepath.Dir(targetPath), 0o755); mkdirErr != nil { + return mkdirErr + } + + data, err := json.MarshalIndent(profile, "", " ") + if err != nil { + return err + } + return fileutil.WriteFileAtomic(targetPath, data, 0o644) +} + +func (s *Store) loadProfileForWorkspace(workspaceID, skillName string) (SkillProfile, error) { + paths, err := s.profileLookupPaths(workspaceID, skillName) + if err != nil { + return SkillProfile{}, err + } + for _, path := range paths { + profile, loadErr := s.loadProfileFromPath(path) + if errors.Is(loadErr, os.ErrNotExist) { + continue + } + if loadErr != nil { + return SkillProfile{}, loadErr + } + return profile, nil + } + return SkillProfile{}, os.ErrNotExist +} + +func isZeroSkillProfile(profile SkillProfile) bool { + return profile.SkillName == "" && + profile.WorkspaceID == "" && + profile.CurrentVersion == "" && + profile.Status == "" && + profile.Origin == "" && + profile.HumanSummary == "" && + profile.ChangeReason == "" && + len(profile.IntendedUseCases) == 0 && + len(profile.PreferredEntryPath) == 0 && + len(profile.AvoidPatterns) == 0 && + profile.LastUsedAt.IsZero() && + profile.UseCount == 0 && + profile.RetentionScore == 0 && + len(profile.VersionHistory) == 0 +} + +func (s *Store) LoadProfiles() ([]SkillProfile, error) { + entries, err := os.ReadDir(s.paths.ProfilesDir) + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + if err != nil { + return nil, err + } + + profiles := make([]SkillProfile, 0, len(entries)) + for _, entry := range entries { + entryPath := filepath.Join(s.paths.ProfilesDir, entry.Name()) + if entry.IsDir() { + nestedProfiles, loadErr := s.loadProfilesFromDir(entryPath) + if loadErr != nil { + return nil, loadErr + } + profiles = append(profiles, nestedProfiles...) + continue + } + if filepath.Ext(entry.Name()) != ".json" { + continue + } + profile, err := s.loadProfileFromPath(entryPath) + if err != nil { + return nil, err + } + profiles = append(profiles, profile) + } + + sort.Slice(profiles, func(i, j int) bool { + if profiles[i].SkillName != profiles[j].SkillName { + return profiles[i].SkillName < profiles[j].SkillName + } + return profiles[i].WorkspaceID < profiles[j].WorkspaceID + }) + return profiles, nil +} + +func decodeJSONLLines(path string, decode func(line []byte) error) error { + f, err := os.Open(path) + if errors.Is(err, os.ErrNotExist) { + return nil + } + if err != nil { + return err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + var lines [][]byte + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + continue + } + lines = append(lines, append([]byte(nil), line...)) + } + if err := scanner.Err(); err != nil { + return err + } + + for i, line := range lines { + if err := decode(line); err != nil { + if i == len(lines)-1 && isInvalidJSON(err) { + return nil + } + return err + } + } + return nil +} + +func draftKey(workspaceID, id string) string { + return workspaceID + "\x00" + id +} + +func isInvalidJSON(err error) bool { + var syntaxErr *json.SyntaxError + return errors.As(err, &syntaxErr) +} + +func lockStoreFile(path string) func() { + actual, _ := storeFileLocks.LoadOrStore(path, &sync.Mutex{}) + mu := actual.(*sync.Mutex) + mu.Lock() + return mu.Unlock +} + +func (s *Store) profilePath(workspaceID, skillName string) (string, error) { + if err := skills.ValidateSkillName(skillName); err != nil { + return "", err + } + workspaceID = strings.TrimSpace(workspaceID) + if workspaceID == "" { + return filepath.Join(s.paths.ProfilesDir, skillName+".json"), nil + } + return filepath.Join(s.paths.ProfilesDir, workspaceScopeDir(workspaceID), skillName+".json"), nil +} + +func (s *Store) loadProfilesFromDir(dir string) ([]SkillProfile, error) { + entries, err := os.ReadDir(dir) + if err != nil { + return nil, err + } + + profiles := make([]SkillProfile, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() || filepath.Ext(entry.Name()) != ".json" { + continue + } + profile, err := s.loadProfileFromPath(filepath.Join(dir, entry.Name())) + if err != nil { + return nil, err + } + profiles = append(profiles, profile) + } + return profiles, nil +} + +func (s *Store) loadProfileFromPath(path string) (SkillProfile, error) { + data, err := os.ReadFile(path) + if err != nil { + return SkillProfile{}, err + } + + var profile SkillProfile + if err := json.Unmarshal(data, &profile); err != nil { + return SkillProfile{}, err + } + return profile, nil +} + +func (s *Store) profileLookupPaths(workspaceID, skillName string) ([]string, error) { + if err := skills.ValidateSkillName(skillName); err != nil { + return nil, err + } + + paths := make([]string, 0, 4) + seen := make(map[string]struct{}, 4) + appendPath := func(path string) { + if path == "" { + return + } + if _, ok := seen[path]; ok { + return + } + paths = append(paths, path) + seen[path] = struct{}{} + } + + workspaceID = strings.TrimSpace(workspaceID) + if workspaceID != "" { + path, err := s.profilePath(workspaceID, skillName) + if err != nil { + return nil, err + } + appendPath(path) + if !usesDefaultWorkspaceState(s.paths, workspaceID) { + return paths, nil + } + } + + legacyPath, err := s.profilePath("", skillName) + if err != nil { + return nil, err + } + appendPath(legacyPath) + return paths, nil +} + +func workspaceScopeDir(workspaceID string) string { + sum := sha1.Sum([]byte(workspaceID)) + base := filepath.Base(filepath.Clean(workspaceID)) + base = sanitizeWorkspaceComponent(base) + if base == "" || base == "." { + base = "workspace" + } + return base + "-" + hex.EncodeToString(sum[:6]) +} + +func sanitizeWorkspaceComponent(value string) string { + var b strings.Builder + for _, r := range value { + switch { + case r >= 'a' && r <= 'z': + b.WriteRune(r) + case r >= 'A' && r <= 'Z': + b.WriteRune(r) + case r >= '0' && r <= '9': + b.WriteRune(r) + case r == '-' || r == '_' || r == '.': + b.WriteRune(r) + default: + b.WriteByte('-') + } + } + return strings.Trim(b.String(), "-") +} diff --git a/pkg/evolution/store_test.go b/pkg/evolution/store_test.go new file mode 100644 index 000000000..7b9a78cb4 --- /dev/null +++ b/pkg/evolution/store_test.go @@ -0,0 +1,438 @@ +package evolution_test + +import ( + "context" + "encoding/json" + "os" + "strings" + "testing" + "time" + + "github.com/sipeed/picoclaw/pkg/evolution" +) + +func TestStore_AppendLearningRecordsPersistsCaseAndRule(t *testing.T) { + root := t.TempDir() + paths := evolution.NewPaths(root, "") + store := evolution.NewStore(paths) + + records := []evolution.LearningRecord{ + { + ID: "case-1", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather task completed", + Status: evolution.RecordStatus("new"), + }, + { + ID: "rule-1", + Kind: evolution.RecordKindRule, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "prefer native-name weather path", + Status: evolution.RecordStatus("ready"), + }, + } + + if err := store.AppendLearningRecords(records); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + loaded, err := store.LoadLearningRecords() + if err != nil { + t.Fatalf("LoadLearningRecords: %v", err) + } + if len(loaded) != 2 { + t.Fatalf("len(loaded) = %d, want 2", len(loaded)) + } + if loaded[1].Kind != evolution.RecordKindRule { + t.Fatalf("loaded[1].Kind = %q, want %q", loaded[1].Kind, evolution.RecordKindRule) + } + if _, statErr := os.Stat(paths.LearningRecords); !os.IsNotExist(statErr) { + t.Fatalf("legacy learning records file should not be written, stat err = %v", statErr) + } + if _, statErr := os.Stat(paths.TaskRecords); statErr != nil { + t.Fatalf("task records file should exist: %v", statErr) + } + if _, statErr := os.Stat(paths.PatternRecords); statErr != nil { + t.Fatalf("pattern records file should exist: %v", statErr) + } +} + +func TestStore_LoadTaskRecordsMergesLegacyWhenSplitFileExists(t *testing.T) { + root := t.TempDir() + paths := evolution.NewPaths(root, "") + store := evolution.NewStore(paths) + + legacy := evolution.LearningRecord{ + ID: "legacy-task", + Kind: evolution.RecordKindTask, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "legacy task", + Status: evolution.RecordStatus("new"), + } + data, err := json.Marshal(legacy) + if err != nil { + t.Fatalf("Marshal legacy: %v", err) + } + if mkdirErr := os.MkdirAll(paths.RootDir, 0o755); mkdirErr != nil { + t.Fatalf("MkdirAll: %v", mkdirErr) + } + if writeErr := os.WriteFile(paths.LearningRecords, append(data, '\n'), 0o644); writeErr != nil { + t.Fatalf("WriteFile legacy: %v", writeErr) + } + + current := evolution.LearningRecord{ + ID: "current-task", + Kind: evolution.RecordKindTask, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "current task", + Status: evolution.RecordStatus("new"), + } + if appendErr := store.AppendTaskRecord(context.Background(), current); appendErr != nil { + t.Fatalf("AppendTaskRecord: %v", appendErr) + } + + records, err := store.LoadTaskRecords() + if err != nil { + t.Fatalf("LoadTaskRecords: %v", err) + } + if len(records) != 2 { + t.Fatalf("len(records) = %d, want 2: %+v", len(records), records) + } + ids := records[0].ID + "," + records[1].ID + if !strings.Contains(ids, "legacy-task") || !strings.Contains(ids, "current-task") { + t.Fatalf("records should include legacy and current task IDs, got %q", ids) + } +} + +func TestStore_LoadPatternRecordsMergesLegacyWhenSplitFileExists(t *testing.T) { + root := t.TempDir() + paths := evolution.NewPaths(root, "") + store := evolution.NewStore(paths) + + legacy := evolution.LearningRecord{ + ID: "legacy-pattern", + Kind: evolution.RecordKindPattern, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "legacy pattern", + Status: evolution.RecordStatus("ready"), + } + data, err := json.Marshal(legacy) + if err != nil { + t.Fatalf("Marshal legacy: %v", err) + } + if mkdirErr := os.MkdirAll(paths.RootDir, 0o755); mkdirErr != nil { + t.Fatalf("MkdirAll: %v", mkdirErr) + } + if writeErr := os.WriteFile(paths.LearningRecords, append(data, '\n'), 0o644); writeErr != nil { + t.Fatalf("WriteFile legacy: %v", writeErr) + } + + current := evolution.LearningRecord{ + ID: "current-pattern", + Kind: evolution.RecordKindPattern, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "current pattern", + Status: evolution.RecordStatus("ready"), + } + if appendErr := store.AppendPatternRecords([]evolution.LearningRecord{current}); appendErr != nil { + t.Fatalf("AppendPatternRecords: %v", appendErr) + } + + records, err := store.LoadPatternRecords() + if err != nil { + t.Fatalf("LoadPatternRecords: %v", err) + } + if len(records) != 2 { + t.Fatalf("len(records) = %d, want 2: %+v", len(records), records) + } + ids := records[0].ID + "," + records[1].ID + if !strings.Contains(ids, "legacy-pattern") || !strings.Contains(ids, "current-pattern") { + t.Fatalf("records should include legacy and current pattern IDs, got %q", ids) + } +} + +func TestStore_MarkTaskRecordsClusteredPreservesNewerAppendedRecords(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + first := evolution.LearningRecord{ + ID: "task-1", + Kind: evolution.RecordKindTask, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "first task", + Status: evolution.RecordStatus("new"), + } + if err := store.AppendTaskRecord(context.Background(), first); err != nil { + t.Fatalf("AppendTaskRecord(first): %v", err) + } + if _, err := store.LoadTaskRecords(); err != nil { + t.Fatalf("LoadTaskRecords snapshot: %v", err) + } + + second := evolution.LearningRecord{ + ID: "task-2", + Kind: evolution.RecordKindTask, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "second task", + Status: evolution.RecordStatus("new"), + } + if err := store.AppendTaskRecord(context.Background(), second); err != nil { + t.Fatalf("AppendTaskRecord(second): %v", err) + } + + if err := store.MarkTaskRecordsClustered([]string{"task-1"}); err != nil { + t.Fatalf("MarkTaskRecordsClustered: %v", err) + } + + records, err := store.LoadTaskRecords() + if err != nil { + t.Fatalf("LoadTaskRecords: %v", err) + } + if len(records) != 2 { + t.Fatalf("len(records) = %d, want 2: %+v", len(records), records) + } + statusByID := map[string]evolution.RecordStatus{} + for _, record := range records { + statusByID[record.ID] = record.Status + } + if statusByID["task-1"] != evolution.RecordStatus("clustered") { + t.Fatalf("task-1 status = %q, want clustered", statusByID["task-1"]) + } + if statusByID["task-2"] != evolution.RecordStatus("new") { + t.Fatalf("task-2 status = %q, want new", statusByID["task-2"]) + } +} + +func TestStore_MergeKeepsSameRecordIDAcrossWorkspaces(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths("workspace-a", root)) + + records := []evolution.LearningRecord{ + { + ID: "main-turn-1", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace-a", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "workspace a task", + Status: evolution.RecordStatus("new"), + }, + { + ID: "main-turn-1", + Kind: evolution.RecordKindTask, + WorkspaceID: "workspace-b", + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "workspace b task", + Status: evolution.RecordStatus("new"), + }, + } + if err := store.AppendTaskRecords(context.Background(), records); err != nil { + t.Fatalf("AppendTaskRecords: %v", err) + } + + loaded, err := store.LoadTaskRecords() + if err != nil { + t.Fatalf("LoadTaskRecords: %v", err) + } + if len(loaded) != 2 { + t.Fatalf("len(loaded) = %d, want 2: %+v", len(loaded), loaded) + } + + if markErr := store.MarkTaskRecordsClustered([]string{"main-turn-1"}); markErr != nil { + t.Fatalf("MarkTaskRecordsClustered: %v", markErr) + } + loaded, err = store.LoadTaskRecords() + if err != nil { + t.Fatalf("LoadTaskRecords after clustered: %v", err) + } + statusByWorkspace := map[string]evolution.RecordStatus{} + for _, record := range loaded { + statusByWorkspace[record.WorkspaceID] = record.Status + } + if statusByWorkspace["workspace-a"] != evolution.RecordStatus("clustered") { + t.Fatalf("workspace-a status = %q, want clustered", statusByWorkspace["workspace-a"]) + } + if statusByWorkspace["workspace-b"] != evolution.RecordStatus("new") { + t.Fatalf("workspace-b status = %q, want new", statusByWorkspace["workspace-b"]) + } +} + +func TestStore_MergePatternRecordsPreservesNewerWorkspaceRecords(t *testing.T) { + root := t.TempDir() + store := evolution.NewStore(evolution.NewPaths(root, "")) + + first := evolution.LearningRecord{ + ID: "pattern-a", + Kind: evolution.RecordKindPattern, + WorkspaceID: "workspace-a", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "workspace a pattern", + Status: evolution.RecordStatus("ready"), + } + if err := store.SavePatternRecords([]evolution.LearningRecord{first}); err != nil { + t.Fatalf("SavePatternRecords(first): %v", err) + } + if _, err := store.LoadPatternRecords(); err != nil { + t.Fatalf("LoadPatternRecords snapshot: %v", err) + } + + second := evolution.LearningRecord{ + ID: "pattern-b", + Kind: evolution.RecordKindPattern, + WorkspaceID: "workspace-b", + CreatedAt: time.Unix(1700000100, 0).UTC(), + Summary: "workspace b pattern", + Status: evolution.RecordStatus("ready"), + } + if err := store.MergePatternRecords([]evolution.LearningRecord{second}); err != nil { + t.Fatalf("MergePatternRecords: %v", err) + } + + records, err := store.LoadPatternRecords() + if err != nil { + t.Fatalf("LoadPatternRecords: %v", err) + } + if len(records) != 2 { + t.Fatalf("len(records) = %d, want 2: %+v", len(records), records) + } + ids := records[0].ID + "," + records[1].ID + if !strings.Contains(ids, "pattern-a") || !strings.Contains(ids, "pattern-b") { + t.Fatalf("records should include both workspace patterns, got %q", ids) + } +} + +func TestStore_SaveDraftsOverwritesByID(t *testing.T) { + root := t.TempDir() + paths := evolution.NewPaths(root, "") + store := evolution.NewStore(paths) + + first := evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "prefer native-name path first", + BodyOrPatch: "## Start Here", + Status: evolution.DraftStatusCandidate, + } + second := first + second.HumanSummary = "updated summary" + + if err := store.SaveDrafts([]evolution.SkillDraft{first}); err != nil { + t.Fatalf("SaveDrafts(first): %v", err) + } + if err := store.SaveDrafts([]evolution.SkillDraft{second}); err != nil { + t.Fatalf("SaveDrafts(second): %v", err) + } + + loaded, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(loaded) != 1 { + t.Fatalf("len(loaded) = %d, want 1", len(loaded)) + } + if loaded[0].HumanSummary != "updated summary" { + t.Fatalf("HumanSummary = %q, want %q", loaded[0].HumanSummary, "updated summary") + } +} + +func TestStore_SaveDraftsKeepsSameIDDifferentWorkspace(t *testing.T) { + root := t.TempDir() + paths := evolution.NewPaths(root, "") + store := evolution.NewStore(paths) + + first := evolution.SkillDraft{ + ID: "draft-1", + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + SourceRecordID: "rule-1", + TargetSkillName: "weather", + DraftType: evolution.DraftTypeShortcut, + ChangeKind: evolution.ChangeKindAppend, + HumanSummary: "workspace one", + BodyOrPatch: "## Start Here", + Status: evolution.DraftStatusCandidate, + } + second := first + second.WorkspaceID = "ws-2" + second.HumanSummary = "workspace two" + + if err := store.SaveDrafts([]evolution.SkillDraft{first}); err != nil { + t.Fatalf("SaveDrafts(first): %v", err) + } + if err := store.SaveDrafts([]evolution.SkillDraft{second}); err != nil { + t.Fatalf("SaveDrafts(second): %v", err) + } + + loaded, err := store.LoadDrafts() + if err != nil { + t.Fatalf("LoadDrafts: %v", err) + } + if len(loaded) != 2 { + t.Fatalf("len(loaded) = %d, want 2", len(loaded)) + } + if loaded[0].WorkspaceID == loaded[1].WorkspaceID { + t.Fatalf("loaded drafts should keep distinct workspace IDs: %+v", loaded) + } +} + +func TestStore_LoadLearningRecordsIgnoresTruncatedTrailingLine(t *testing.T) { + root := t.TempDir() + paths := evolution.NewPaths(root, "") + store := evolution.NewStore(paths) + + record := evolution.LearningRecord{ + ID: "case-1", + Kind: evolution.RecordKindCase, + WorkspaceID: "ws-1", + CreatedAt: time.Unix(1700000000, 0).UTC(), + Summary: "weather task completed", + Status: evolution.RecordStatus("new"), + } + if err := store.AppendLearningRecords([]evolution.LearningRecord{record}); err != nil { + t.Fatalf("AppendLearningRecords: %v", err) + } + + f, err := os.OpenFile(paths.TaskRecords, os.O_APPEND|os.O_WRONLY, 0o644) + if err != nil { + t.Fatalf("OpenFile: %v", err) + } + if _, writeErr := f.WriteString("{\"id\":\"broken\""); writeErr != nil { + f.Close() + t.Fatalf("WriteString: %v", writeErr) + } + if closeErr := f.Close(); closeErr != nil { + t.Fatalf("Close: %v", closeErr) + } + + loaded, err := store.LoadLearningRecords() + if err != nil { + t.Fatalf("LoadLearningRecords: %v", err) + } + if len(loaded) != 1 { + t.Fatalf("len(loaded) = %d, want 1", len(loaded)) + } + if loaded[0].ID != "case-1" { + t.Fatalf("loaded[0].ID = %q, want %q", loaded[0].ID, "case-1") + } + + data, err := os.ReadFile(paths.TaskRecords) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if !strings.Contains(string(data), "\"broken\"") { + t.Fatalf("expected test fixture to include broken trailing line") + } +} diff --git a/pkg/evolution/success_judge.go b/pkg/evolution/success_judge.go new file mode 100644 index 000000000..b230eb54c --- /dev/null +++ b/pkg/evolution/success_judge.go @@ -0,0 +1,138 @@ +package evolution + +import ( + "context" + "encoding/json" + "strings" + + "github.com/sipeed/picoclaw/pkg/providers" +) + +type TaskSuccessDecision struct { + Success bool + Reason string +} + +type SuccessJudge interface { + JudgeTaskRecord(ctx context.Context, record LearningRecord) (TaskSuccessDecision, error) +} + +type HeuristicSuccessJudge struct{} + +func (j *HeuristicSuccessJudge) JudgeTaskRecord( + _ context.Context, + record LearningRecord, +) (TaskSuccessDecision, error) { + if record.Success == nil || !*record.Success { + return TaskSuccessDecision{Success: false, Reason: "task not completed"}, nil + } + if strings.TrimSpace(record.Summary) == "" { + return TaskSuccessDecision{Success: false, Reason: "missing summary"}, nil + } + if strings.EqualFold(strings.TrimSpace(record.SessionKey), "heartbeat") { + return TaskSuccessDecision{Success: false, Reason: "heartbeat session"}, nil + } + if strings.EqualFold(strings.TrimSpace(record.FinalOutput), "HEARTBEAT_OK") { + return TaskSuccessDecision{Success: false, Reason: "heartbeat output"}, nil + } + if strings.TrimSpace(record.FinalOutput) == "" { + return TaskSuccessDecision{Success: false, Reason: "missing final output"}, nil + } + return TaskSuccessDecision{Success: true, Reason: "heuristic success"}, nil +} + +type LLMTaskSuccessJudge struct { + provider providers.LLMProvider + model string + fallback SuccessJudge +} + +type llmTaskSuccessResponse struct { + Success bool `json:"success"` + Reason string `json:"reason"` +} + +func NewLLMTaskSuccessJudge(provider providers.LLMProvider, model string, fallback SuccessJudge) *LLMTaskSuccessJudge { + if fallback == nil { + fallback = &HeuristicSuccessJudge{} + } + return &LLMTaskSuccessJudge{ + provider: provider, + model: strings.TrimSpace(model), + fallback: fallback, + } +} + +func (j *LLMTaskSuccessJudge) JudgeTaskRecord( + ctx context.Context, + record LearningRecord, +) (TaskSuccessDecision, error) { + if j == nil || j.provider == nil { + return j.fallbackDecision(ctx, record) + } + + model := strings.TrimSpace(j.model) + if model == "" { + model = strings.TrimSpace(j.provider.GetDefaultModel()) + } + if model == "" { + return j.fallbackDecision(ctx, record) + } + + callCtx, cancel := withLLMCallTimeout(ctx, llmTaskSuccessJudgeTimeout) + defer cancel() + resp, err := j.provider.Chat(callCtx, []providers.Message{ + { + Role: "system", + Content: "Return exactly one JSON object with fields success:boolean and reason:string. No markdown fences.", + }, + { + Role: "user", + Content: buildTaskSuccessJudgePrompt(record), + }, + }, nil, model, map[string]any{"temperature": 0}) + if err != nil || resp == nil { + return j.fallbackDecision(ctx, record) + } + + content := strings.TrimSpace(resp.Content) + content = strings.TrimPrefix(content, "```json") + content = strings.TrimPrefix(content, "```") + content = strings.TrimSuffix(content, "```") + content = strings.TrimSpace(content) + if content == "" { + return j.fallbackDecision(ctx, record) + } + + var payload llmTaskSuccessResponse + if err := json.Unmarshal([]byte(content), &payload); err != nil { + return j.fallbackDecision(ctx, record) + } + return TaskSuccessDecision{ + Success: payload.Success, + Reason: strings.TrimSpace(payload.Reason), + }, nil +} + +func (j *LLMTaskSuccessJudge) fallbackDecision( + ctx context.Context, + record LearningRecord, +) (TaskSuccessDecision, error) { + if j == nil || j.fallback == nil { + return TaskSuccessDecision{Success: false, Reason: "no success judge available"}, nil + } + return j.fallback.JudgeTaskRecord(ctx, record) +} + +func buildTaskSuccessJudgePrompt(record LearningRecord) string { + lines := []string{ + "Decide whether this agent task truly achieved the user's goal.", + "Reject tasks that are only partial reasoning, only describe future steps, or obviously did not complete the requested outcome.", + "Accept completed custom workspace skill/theorem tasks when the final output gives a concrete result or concrete completed procedure.", + "", + "Summary: " + fallbackString(record.Summary, "none"), + "Final output: " + fallbackString(record.FinalOutput, "none"), + "Used skills: " + joinOrFallback(record.UsedSkillNames, "none"), + } + return strings.Join(lines, "\n") +} diff --git a/pkg/evolution/types.go b/pkg/evolution/types.go new file mode 100644 index 000000000..0cb6ba792 --- /dev/null +++ b/pkg/evolution/types.go @@ -0,0 +1,153 @@ +package evolution + +import "time" + +type RecordKind string + +const ( + RecordKindTask RecordKind = "task" + RecordKindPattern RecordKind = "pattern" + legacyRecordKindCase RecordKind = "case" + legacyRecordKindRule RecordKind = "rule" + // Deprecated: use RecordKindTask. + RecordKindCase = RecordKindTask + // Deprecated: use RecordKindPattern. + RecordKindRule = RecordKindPattern +) + +type RecordStatus string + +type DraftType string + +const ( + DraftTypeWorkflow DraftType = "workflow" + DraftTypeShortcut DraftType = "shortcut" +) + +type ChangeKind string + +const ( + ChangeKindCreate ChangeKind = "create" + ChangeKindAppend ChangeKind = "append" + ChangeKindReplace ChangeKind = "replace" + ChangeKindMerge ChangeKind = "merge" +) + +type DraftStatus string + +const ( + DraftStatusCandidate DraftStatus = "candidate" + DraftStatusQuarantined DraftStatus = "quarantined" + DraftStatusAccepted DraftStatus = "accepted" +) + +type SkillStatus string + +const ( + SkillStatusActive SkillStatus = "active" + SkillStatusCold SkillStatus = "cold" + SkillStatusArchived SkillStatus = "archived" + SkillStatusDeleted SkillStatus = "deleted" +) + +type AttemptTrail struct { + AttemptedSkills []string `json:"attempted_skills,omitempty"` + FinalSuccessfulPath []string `json:"final_successful_path,omitempty"` + SkillContextSnapshots []SkillContextSnapshot `json:"skill_context_snapshots,omitempty"` +} + +type SkillContextSnapshot struct { + Sequence int `json:"sequence"` + Trigger string `json:"trigger"` + SkillNames []string `json:"skill_names,omitempty"` +} + +type ToolExecutionRecord struct { + Name string `json:"name"` + Success bool `json:"success"` + ErrorSummary string `json:"error_summary,omitempty"` + SkillNames []string `json:"skill_names,omitempty"` +} + +type LearningRecord struct { + ID string `json:"id"` + Kind RecordKind `json:"kind"` + WorkspaceID string `json:"workspace_id"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt *time.Time `json:"updated_at,omitempty"` + SessionKey string `json:"session_key,omitempty"` + TaskHash string `json:"task_hash,omitempty"` + Summary string `json:"summary"` + UserGoal string `json:"user_goal,omitempty"` + FinalOutput string `json:"final_output,omitempty"` + Source map[string]any `json:"source,omitempty"` + Status RecordStatus `json:"status"` + Success *bool `json:"success,omitempty"` + ToolKinds []string `json:"tool_kinds,omitempty"` + ToolExecutions []ToolExecutionRecord `json:"tool_executions,omitempty"` + InitialSkillNames []string `json:"initial_skill_names,omitempty"` + AddedSkillNames []string `json:"added_skill_names,omitempty"` + UsedSkillNames []string `json:"used_skill_names,omitempty"` + AllLoadedSkillNames []string `json:"all_loaded_skill_names,omitempty"` + ActiveSkillNames []string `json:"active_skill_names,omitempty"` + AttemptTrail *AttemptTrail `json:"attempt_trail,omitempty"` + Signals []string `json:"signals,omitempty"` + SourceRecordIDs []string `json:"source_record_ids,omitempty"` + TaskRecordIDs []string `json:"task_record_ids,omitempty"` + Label string `json:"label,omitempty"` + ClusterReason string `json:"cluster_reason,omitempty"` + EventCount int `json:"event_count,omitempty"` + SuccessRate float64 `json:"success_rate,omitempty"` + MaturityScore float64 `json:"maturity_score,omitempty"` + WinningPath []string `json:"winning_path,omitempty"` + LateAddedSkills []string `json:"late_added_skills,omitempty"` + FinalSnapshotTrigger string `json:"final_snapshot_trigger,omitempty"` + MatchedSkillNames []string `json:"matched_skill_names,omitempty"` +} + +type SkillDraft struct { + ID string `json:"id"` + WorkspaceID string `json:"workspace_id"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt *time.Time `json:"updated_at,omitempty"` + SourceRecordID string `json:"source_record_id"` + TargetSkillName string `json:"target_skill_name"` + MatchedSkillRefs []string `json:"matched_skill_refs,omitempty"` + DraftType DraftType `json:"draft_type"` + ChangeKind ChangeKind `json:"change_kind"` + HumanSummary string `json:"human_summary"` + IntendedUseCases []string `json:"intended_use_cases,omitempty"` + PreferredEntryPath []string `json:"preferred_entry_path,omitempty"` + AvoidPatterns []string `json:"avoid_patterns,omitempty"` + BodyOrPatch string `json:"body_or_patch"` + Status DraftStatus `json:"status"` + ReviewNotes []string `json:"review_notes,omitempty"` + ScanFindings []string `json:"scan_findings,omitempty"` +} + +type SkillVersionEntry struct { + Version string `json:"version"` + Action string `json:"action"` + Timestamp time.Time `json:"timestamp"` + DraftID string `json:"draft_id,omitempty"` + Summary string `json:"summary"` + Rollback bool `json:"rollback,omitempty"` + RollbackReason string `json:"rollback_reason,omitempty"` +} + +type SkillProfile struct { + SkillName string `json:"skill_name"` + WorkspaceID string `json:"workspace_id"` + CurrentVersion string `json:"current_version"` + Status SkillStatus `json:"status"` + Origin string `json:"origin"` + HumanSummary string `json:"human_summary"` + ChangeReason string `json:"change_reason,omitempty"` + IntendedUseCases []string `json:"intended_use_cases,omitempty"` + PreferredEntryPath []string `json:"preferred_entry_path,omitempty"` + AvoidPatterns []string `json:"avoid_patterns,omitempty"` + LastUsedAt time.Time `json:"last_used_at"` + UseCount int `json:"use_count"` + RetentionScore float64 `json:"retention_score"` + VersionHistory []SkillVersionEntry `json:"version_history"` +} diff --git a/pkg/gateway/gateway.go b/pkg/gateway/gateway.go index 6171fd65f..6ca7c6346 100644 --- a/pkg/gateway/gateway.go +++ b/pkg/gateway/gateway.go @@ -348,7 +348,11 @@ func createStartupProvider( return &startupBlockedProvider{reason: reason}, "", nil } - return providers.CreateProvider(cfg) + provider, modelID, err := providers.CreateProvider(cfg) + if err != nil { + return nil, "", err + } + return provider, modelID, nil } func setupAndStartServices( diff --git a/pkg/skills/loader.go b/pkg/skills/loader.go index f5985a662..e7a82329c 100644 --- a/pkg/skills/loader.go +++ b/pkg/skills/loader.go @@ -42,11 +42,8 @@ func (info SkillInfo) validate() error { if info.Name == "" { errs = errors.Join(errs, errors.New("name is required")) } else { - if len(info.Name) > MaxNameLength { - errs = errors.Join(errs, fmt.Errorf("name exceeds %d characters", MaxNameLength)) - } - if !namePattern.MatchString(info.Name) { - errs = errors.Join(errs, errors.New("name must be alphanumeric with hyphens")) + if err := ValidateSkillName(info.Name); err != nil { + errs = errors.Join(errs, err) } } @@ -148,6 +145,10 @@ func (sl *SkillsLoader) ListSkills() []SkillInfo { } func (sl *SkillsLoader) LoadSkill(name string) (string, bool) { + if err := ValidateSkillName(name); err != nil { + return "", false + } + // 1. load from workspace skills first (project-level) if sl.workspaceSkills != "" { skillFile := filepath.Join(sl.workspaceSkills, name, "SKILL.md") diff --git a/pkg/skills/validation.go b/pkg/skills/validation.go new file mode 100644 index 000000000..504992b4a --- /dev/null +++ b/pkg/skills/validation.go @@ -0,0 +1,29 @@ +package skills + +import ( + "fmt" + "path/filepath" + "strings" + + "github.com/sipeed/picoclaw/pkg/utils" +) + +func ValidateSkillName(name string) error { + trimmed := strings.TrimSpace(name) + if trimmed == "" { + return fmt.Errorf("skill name is required") + } + if filepath.IsAbs(trimmed) { + return fmt.Errorf("skill name must not be an absolute path") + } + if err := utils.ValidateSkillIdentifier(trimmed); err != nil { + return fmt.Errorf("skill name is invalid: %w", err) + } + if len(trimmed) > MaxNameLength { + return fmt.Errorf("skill name exceeds %d characters", MaxNameLength) + } + if !namePattern.MatchString(trimmed) { + return fmt.Errorf("skill name must be alphanumeric with hyphens") + } + return nil +}