Files
picoclaw/cmd/membench/ingest_test.go
T
Liu Yuan 1175f4a62b feat(membench): add LOCOMO memory benchmark tool (#2353)
Benchmark tool comparing legacy session manager vs seahorse short memory
retrieval on the LOCOMO long-term conversational memory dataset.

- cmd/membench/: CLI with ingest/eval/report/run subcommands
- Mode A (legacy): recency-biased budget truncation baseline
- Mode B (seahorse): per-keyword trigram FTS5 search + expand
- Metrics: Token-Overlap F1 and Recall Hit Rate
- `make mem` builds, downloads data, runs benchmark end-to-end
2026-04-06 17:26:43 +08:00

80 lines
2.1 KiB
Go

package main
import (
"context"
"encoding/json"
"path/filepath"
"testing"
"github.com/sipeed/picoclaw/pkg/seahorse"
)
func TestIngestSeahorseIdempotent(t *testing.T) {
ctx := context.Background()
tmpDir := t.TempDir()
dbPath := filepath.Join(tmpDir, "test.db")
// Minimal test data
samples := []LocomoSample{
{
SampleID: "test-1",
Conversation: map[string]json.RawMessage{
"session_1": json.RawMessage(`[
{"speaker":"A","dia_id":"D1:1","text":"hello world this is a test message"},
{"speaker":"B","dia_id":"D1:2","text":"another message for testing purposes"}
]`),
},
},
}
// First ingestion
result1, err := IngestSeahorse(ctx, samples, dbPath)
if err != nil {
t.Fatalf("first ingest failed: %v", err)
}
convCount1 := len(result1.ConvMap)
result1.Engine.Close()
// Second ingestion on same DB — should reuse existing data
result2, err := IngestSeahorse(ctx, samples, dbPath)
if err != nil {
t.Fatalf("second ingest failed: %v", err)
}
defer result2.Engine.Close()
// ConvMap should have same number of entries (no duplicates)
if len(result2.ConvMap) != convCount1 {
t.Errorf("second ingest convMap has %d entries, want %d (same as first)",
len(result2.ConvMap), convCount1)
}
// Verify conversation IDs are the same (reused, not new ones)
for id, cid1 := range result1.ConvMap {
cid2, ok := result2.ConvMap[id]
if !ok {
t.Errorf("sample %s missing from second ConvMap", id)
continue
}
if cid2 != cid1 {
t.Errorf("sample %s: second ingest got convID %d, want %d (reused)", id, cid2, cid1)
}
}
// Verify no duplicate messages by counting
store := result2.Engine.GetRetrieval().Store()
for _, convID := range result2.ConvMap {
msgs, err := store.SearchMessages(ctx, seahorse.SearchInput{
Pattern: "test",
ConversationID: convID,
Limit: 100,
})
if err != nil {
t.Fatalf("search failed: %v", err)
}
// Should find exactly 1 message containing "test" (the first turn)
if len(msgs) > 2 {
t.Errorf("found %d messages for 'test' in conv %d, expected ≤2 (no duplicates)", len(msgs), convID)
}
}
}