mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
1175f4a62b
Benchmark tool comparing legacy session manager vs seahorse short memory retrieval on the LOCOMO long-term conversational memory dataset. - cmd/membench/: CLI with ingest/eval/report/run subcommands - Mode A (legacy): recency-biased budget truncation baseline - Mode B (seahorse): per-keyword trigram FTS5 search + expand - Metrics: Token-Overlap F1 and Recall Hit Rate - `make mem` builds, downloads data, runs benchmark end-to-end
80 lines
2.1 KiB
Go
80 lines
2.1 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"path/filepath"
|
|
"testing"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/seahorse"
|
|
)
|
|
|
|
func TestIngestSeahorseIdempotent(t *testing.T) {
|
|
ctx := context.Background()
|
|
tmpDir := t.TempDir()
|
|
dbPath := filepath.Join(tmpDir, "test.db")
|
|
|
|
// Minimal test data
|
|
samples := []LocomoSample{
|
|
{
|
|
SampleID: "test-1",
|
|
Conversation: map[string]json.RawMessage{
|
|
"session_1": json.RawMessage(`[
|
|
{"speaker":"A","dia_id":"D1:1","text":"hello world this is a test message"},
|
|
{"speaker":"B","dia_id":"D1:2","text":"another message for testing purposes"}
|
|
]`),
|
|
},
|
|
},
|
|
}
|
|
|
|
// First ingestion
|
|
result1, err := IngestSeahorse(ctx, samples, dbPath)
|
|
if err != nil {
|
|
t.Fatalf("first ingest failed: %v", err)
|
|
}
|
|
convCount1 := len(result1.ConvMap)
|
|
result1.Engine.Close()
|
|
|
|
// Second ingestion on same DB — should reuse existing data
|
|
result2, err := IngestSeahorse(ctx, samples, dbPath)
|
|
if err != nil {
|
|
t.Fatalf("second ingest failed: %v", err)
|
|
}
|
|
defer result2.Engine.Close()
|
|
|
|
// ConvMap should have same number of entries (no duplicates)
|
|
if len(result2.ConvMap) != convCount1 {
|
|
t.Errorf("second ingest convMap has %d entries, want %d (same as first)",
|
|
len(result2.ConvMap), convCount1)
|
|
}
|
|
|
|
// Verify conversation IDs are the same (reused, not new ones)
|
|
for id, cid1 := range result1.ConvMap {
|
|
cid2, ok := result2.ConvMap[id]
|
|
if !ok {
|
|
t.Errorf("sample %s missing from second ConvMap", id)
|
|
continue
|
|
}
|
|
if cid2 != cid1 {
|
|
t.Errorf("sample %s: second ingest got convID %d, want %d (reused)", id, cid2, cid1)
|
|
}
|
|
}
|
|
|
|
// Verify no duplicate messages by counting
|
|
store := result2.Engine.GetRetrieval().Store()
|
|
for _, convID := range result2.ConvMap {
|
|
msgs, err := store.SearchMessages(ctx, seahorse.SearchInput{
|
|
Pattern: "test",
|
|
ConversationID: convID,
|
|
Limit: 100,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("search failed: %v", err)
|
|
}
|
|
// Should find exactly 1 message containing "test" (the first turn)
|
|
if len(msgs) > 2 {
|
|
t.Errorf("found %d messages for 'test' in conv %d, expected ≤2 (no duplicates)", len(msgs), convID)
|
|
}
|
|
}
|
|
}
|