Files
picoclaw/pkg/evolution/llm_draft_generator_test.go
T
lxowalle b3a7b7ad64 feat: agent self evolution (#2847)
* feat: add agent self-evolution

* fix ci

* delete unused doc

* fix lint

* fix evolution review issues
2026-05-11 16:13:27 +08:00

368 lines
12 KiB
Go

package evolution_test
import (
"context"
"errors"
"os"
"path/filepath"
"strings"
"testing"
"github.com/sipeed/picoclaw/pkg/evolution"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/skills"
)
type recordingDraftGenerator struct {
draft evolution.SkillDraft
err error
calls int
}
func (g *recordingDraftGenerator) GenerateDraft(
_ context.Context,
_ evolution.LearningRecord,
_ []skills.SkillInfo,
) (evolution.SkillDraft, error) {
g.calls++
return g.draft, g.err
}
type llmDraftTestProvider struct {
response *providers.LLMResponse
err error
defaultModel string
lastModel string
lastMessages []providers.Message
chatCallCount int
}
func (p *llmDraftTestProvider) Chat(
_ context.Context,
messages []providers.Message,
_ []providers.ToolDefinition,
model string,
_ map[string]any,
) (*providers.LLMResponse, error) {
p.chatCallCount++
p.lastModel = model
p.lastMessages = append([]providers.Message(nil), messages...)
return p.response, p.err
}
func (p *llmDraftTestProvider) GetDefaultModel() string {
return p.defaultModel
}
func testLearningRule() evolution.LearningRecord {
return evolution.LearningRecord{
ID: "rule-1",
Summary: "weather native-name path",
EventCount: 7,
SuccessRate: 0.86,
WinningPath: []string{"weather", "native-name"},
MatchedSkillNames: []string{"weather"},
}
}
func testSkillMatches() []skills.SkillInfo {
return []skills.SkillInfo{
{
Name: "weather",
Path: "/tmp/weather/SKILL.md",
Source: "workspace",
Description: "Find weather details.",
},
}
}
func TestLLMDraftGenerator_GenerateDraft_ParsesJSONResponse(t *testing.T) {
provider := &llmDraftTestProvider{
defaultModel: "test-model",
response: &providers.LLMResponse{
Content: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name lookup first","body_or_patch":"## Start Here\nUse native-name first."}`,
},
}
fallback := &recordingDraftGenerator{
draft: evolution.SkillDraft{TargetSkillName: "fallback"},
}
generator := evolution.NewLLMDraftGenerator(provider, "", fallback)
draft, err := generator.GenerateDraft(context.Background(), testLearningRule(), testSkillMatches())
if err != nil {
t.Fatalf("GenerateDraft: %v", err)
}
if provider.chatCallCount != 1 {
t.Fatalf("chatCallCount = %d, want 1", provider.chatCallCount)
}
if provider.lastModel != "test-model" {
t.Fatalf("lastModel = %q, want test-model", provider.lastModel)
}
if len(provider.lastMessages) == 0 {
t.Fatal("expected prompt messages")
}
if fallback.calls != 0 {
t.Fatalf("fallback.calls = %d, want 0", fallback.calls)
}
if draft.TargetSkillName != "weather" {
t.Fatalf("TargetSkillName = %q, want weather", draft.TargetSkillName)
}
if draft.DraftType != evolution.DraftTypeShortcut {
t.Fatalf("DraftType = %q, want %q", draft.DraftType, evolution.DraftTypeShortcut)
}
if draft.ChangeKind != evolution.ChangeKindAppend {
t.Fatalf("ChangeKind = %q, want %q", draft.ChangeKind, evolution.ChangeKindAppend)
}
if draft.HumanSummary == "" || draft.BodyOrPatch == "" {
t.Fatal("expected non-empty draft content")
}
}
func TestLLMDraftGenerator_BuildPromptIncludesMatchedSkillContent(t *testing.T) {
dir := t.TempDir()
skillPath := filepath.Join(dir, "skills", "three-one-theorem", "SKILL.md")
if err := os.MkdirAll(filepath.Dir(skillPath), 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
if err := os.WriteFile(
skillPath,
[]byte(
"---\nname: three-one-theorem\ndescription: Add 31 then delegate\n---\n# Three One\nAdd 31 to the input, then continue with the next theorem.\n",
),
0o644,
); err != nil {
t.Fatalf("WriteFile: %v", err)
}
provider := &llmDraftTestProvider{
defaultModel: "test-model",
response: &providers.LLMResponse{
Content: `{"target_skill_name":"calculate-100-via-theorems","draft_type":"shortcut","change_kind":"create","human_summary":"Combine theorem chain","body_or_patch":"## Start Here\nAdd 31, then continue."}`,
},
}
generator := evolution.NewLLMDraftGenerator(provider, "", &recordingDraftGenerator{})
_, err := generator.GenerateDraft(context.Background(), evolution.LearningRecord{
ID: "rule-1",
Summary: "calculate 100",
WinningPath: []string{"three-one-theorem", "four-two-theorem"},
EventCount: 2,
SuccessRate: 1,
}, []skills.SkillInfo{{
Name: "three-one-theorem",
Path: skillPath,
Source: "workspace",
Description: "Add 31 then delegate",
}})
if err != nil {
t.Fatalf("GenerateDraft: %v", err)
}
if len(provider.lastMessages) < 2 {
t.Fatal("expected user prompt")
}
prompt := provider.lastMessages[1].Content
if !strings.Contains(prompt, "Matched skill content excerpts") {
t.Fatalf("prompt missing content section:\n%s", prompt)
}
if !strings.Contains(prompt, "Add 31 to the input") {
t.Fatalf("prompt missing matched skill body:\n%s", prompt)
}
if !strings.Contains(prompt, "summarize the functional purpose and result") {
t.Fatalf("prompt missing synthesis instruction:\n%s", prompt)
}
if !strings.Contains(prompt, "complete SKILL.md file with exactly two parts") {
t.Fatalf("prompt missing complete skill instruction:\n%s", prompt)
}
if !strings.Contains(prompt, "The YAML frontmatter must contain only name and description fields") {
t.Fatalf("prompt missing frontmatter instruction:\n%s", prompt)
}
if !strings.Contains(
prompt,
"The description field must and only describe what this skill can do and when to use it",
) {
t.Fatalf("prompt missing description field instruction:\n%s", prompt)
}
if !strings.Contains(
prompt,
"The deployable Markdown body should only contain what the skill is useful for and how to use it",
) {
t.Fatalf("prompt missing deployable body scope instruction:\n%s", prompt)
}
if !strings.Contains(
prompt,
"provide detailed step-by-step instructions for the exact operation or execution process",
) {
t.Fatalf("prompt missing step-by-step instruction:\n%s", prompt)
}
if !strings.Contains(prompt, "body_or_patch is an internal draft and review artifact") {
t.Fatalf("prompt missing internal draft instruction:\n%s", prompt)
}
if !strings.Contains(prompt, "the final deployed SKILL.md will be rendered without learning traces") {
t.Fatalf("prompt missing deploy-clean instruction:\n%s", prompt)
}
if !strings.Contains(prompt, "do not copy or directly include other skills' instructions") {
t.Fatalf("prompt missing no-copy instruction:\n%s", prompt)
}
}
func TestLLMDraftGenerator_BuildPromptIncludesTaskEvidence(t *testing.T) {
provider := &llmDraftTestProvider{
defaultModel: "test-model",
response: &providers.LLMResponse{
Content: `{"target_skill_name":"calculate-with-three-one-theorem","draft_type":"shortcut","change_kind":"create","human_summary":"Calculate using theorem chain","body_or_patch":"---\nname: calculate-with-three-one-theorem\ndescription: Calculate with theorem chain.\n---\n# Calculate With Three One Theorem\n\n## Procedure\nAdd 31, add 42, then subtract 53."}`,
},
}
generator := evolution.NewLLMDraftGenerator(provider, "", &recordingDraftGenerator{})
_, err := generator.GenerateDraftWithEvidence(context.Background(), evolution.LearningRecord{
ID: "rule-1",
Label: "calculate-with-three-one-theorem",
Summary: "调用三一定理计算",
}, nil, evolution.DraftEvidence{
TaskRecords: []evolution.LearningRecord{
{
ID: "main-turn-6",
Summary: "调用三一定理计算100",
FinalOutput: "100 + 31 = 131; 131 + 42 = 173; 173 - 53 = 120",
UsedSkillNames: []string{"three-one-theorem", "four-two-theorem", "five-three-theorem"},
},
},
})
if err != nil {
t.Fatalf("GenerateDraftWithEvidence: %v", err)
}
if len(provider.lastMessages) < 2 {
t.Fatal("expected user prompt")
}
prompt := provider.lastMessages[1].Content
for _, want := range []string{
"Source task evidence",
"main-turn-6",
"调用三一定理计算100",
"100 + 31 = 131",
"three-one-theorem -> four-two-theorem -> five-three-theorem",
"directly usable by a future agent",
} {
if !strings.Contains(prompt, want) {
t.Fatalf("prompt missing %q:\n%s", want, prompt)
}
}
}
func TestLLMDraftGenerator_GenerateDraft_PrefersExplicitModelIDOverProviderDefault(t *testing.T) {
provider := &llmDraftTestProvider{
defaultModel: "provider-default-model",
response: &providers.LLMResponse{
Content: `{"target_skill_name":"weather","draft_type":"shortcut","change_kind":"append","human_summary":"Prefer native-name lookup first","body_or_patch":"## Start Here\nUse native-name first."}`,
},
}
generator := evolution.NewLLMDraftGenerator(provider, "explicit-model-id", &recordingDraftGenerator{})
_, err := generator.GenerateDraft(context.Background(), testLearningRule(), testSkillMatches())
if err != nil {
t.Fatalf("GenerateDraft: %v", err)
}
if provider.lastModel != "explicit-model-id" {
t.Fatalf("lastModel = %q, want explicit-model-id", provider.lastModel)
}
}
func TestLLMDraftGenerator_GenerateDraft_FallsBackOnProviderError(t *testing.T) {
fallback := &recordingDraftGenerator{
draft: evolution.SkillDraft{
TargetSkillName: "weather-fallback",
DraftType: evolution.DraftTypeWorkflow,
ChangeKind: evolution.ChangeKindCreate,
HumanSummary: "fallback summary",
BodyOrPatch: "fallback body",
},
}
generator := evolution.NewLLMDraftGenerator(&llmDraftTestProvider{
defaultModel: "test-model",
err: errors.New("provider unavailable"),
}, "", fallback)
draft, err := generator.GenerateDraft(context.Background(), testLearningRule(), testSkillMatches())
if err != nil {
t.Fatalf("GenerateDraft: %v", err)
}
if fallback.calls != 1 {
t.Fatalf("fallback.calls = %d, want 1", fallback.calls)
}
if draft.TargetSkillName != "weather-fallback" {
t.Fatalf("TargetSkillName = %q, want weather-fallback", draft.TargetSkillName)
}
}
func TestLLMDraftGenerator_GenerateDraft_FallsBackOnInvalidOrEmptyContent(t *testing.T) {
testCases := []struct {
name string
content string
}{
{name: "invalid json", content: `not-json`},
{name: "empty content", content: ``},
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
fallback := &recordingDraftGenerator{
draft: evolution.SkillDraft{
TargetSkillName: "weather-fallback",
DraftType: evolution.DraftTypeWorkflow,
ChangeKind: evolution.ChangeKindCreate,
HumanSummary: "fallback summary",
BodyOrPatch: "fallback body",
},
}
generator := evolution.NewLLMDraftGenerator(&llmDraftTestProvider{
defaultModel: "test-model",
response: &providers.LLMResponse{Content: tt.content},
}, "", fallback)
draft, err := generator.GenerateDraft(context.Background(), testLearningRule(), testSkillMatches())
if err != nil {
t.Fatalf("GenerateDraft: %v", err)
}
if fallback.calls != 1 {
t.Fatalf("fallback.calls = %d, want 1", fallback.calls)
}
if draft.TargetSkillName != "weather-fallback" {
t.Fatalf("TargetSkillName = %q, want weather-fallback", draft.TargetSkillName)
}
})
}
}
func TestLLMDraftGenerator_GenerateDraft_FallsBackOnNumericOnlyTargetSkillName(t *testing.T) {
fallback := &recordingDraftGenerator{
draft: evolution.SkillDraft{
TargetSkillName: "learned-100",
DraftType: evolution.DraftTypeWorkflow,
ChangeKind: evolution.ChangeKindCreate,
HumanSummary: "fallback summary",
BodyOrPatch: "fallback body",
},
}
generator := evolution.NewLLMDraftGenerator(&llmDraftTestProvider{
defaultModel: "test-model",
response: &providers.LLMResponse{
Content: `{"target_skill_name":"100","draft_type":"shortcut","change_kind":"create","human_summary":"Calculate 100","body_or_patch":"## Start Here\nCalculate 100."}`,
},
}, "", fallback)
draft, err := generator.GenerateDraft(context.Background(), testLearningRule(), testSkillMatches())
if err != nil {
t.Fatalf("GenerateDraft: %v", err)
}
if fallback.calls != 1 {
t.Fatalf("fallback.calls = %d, want 1", fallback.calls)
}
if draft.TargetSkillName != "learned-100" {
t.Fatalf("TargetSkillName = %q, want learned-100", draft.TargetSkillName)
}
}