mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
15a70ac45c
* feat(seahorse): implement short-term memory engine of seahorse Add pkg/seahorse/ module implementing a SQLite-backed DAG-based summary hierarchy for context management, ported from lossless-claw's LCM design: - types.go + short_constants.go: core types (Message, Summary, Conversation, ContextItem) and configuration constants (fanout, token targets, thresholds) - migration.go: idempotent DB schema with FTS5 trigram tokenizer for CJK - store.go: full SQLite CRUD (conversations, messages, summaries DAG, context_items with ordinal gap numbering, FTS5 search) - short_engine.go: Engine lifecycle (NewEngine, Ingest, Assemble, Compact), session pattern filtering (ignore/stateless glob→regex compilation), per-session mutex via sync.Map - short_assembler.go: budget-aware context assembly with fresh tail protection (32 messages), oldest-first eviction, summary XML formatting, RebuildContextItems - short_compaction.go: leaf compaction (messages→summary) and condensed compaction (summaries→higher-level summary), 3-level LLM escalation, CompactUntilUnder for emergency overflow - short_retrieval.go: lookupByID, FTS5/LIKE search, recursive expand with token cap - context_seahorse.go: agent.ContextManager adapter, registered as "seahorse", provider↔seahorse message type conversion (ToolCalls, tool_result) * fix(seahorse): correct 3 adapter bugs in context management - TokenCount: use full message (Content+ToolCalls+Media) instead of Content-only - Empty Content: rebuild Content from tool_result Parts when stored empty - Duplicate summaries: summaries only in Summary field, not in History messages - Grep: fix SearchResult.Snippet→Content for summaries - Schema: fix FTS5 SQL uses VIRTUAL TABLE not TEMP TABLE - TestFTS5SQLConstants: verify FTS5 SQL syntax correctness - Test: fix flaky TestCompactLeaf * fix(agent): ingest steering messages into seahorse SQLite Steering messages were only persisted to session JSONL but not ingested into seahorse SQLite, causing them to be missing from context assembly. Added `ts.ingestMessage(turnCtx, al, pm)` call in the steering message injection block alongside the existing JSONL persistence. Test: TestSeahorseSteeringMessageIngested verifies steering messages appear in seahorse SQLite DB after being processed. * fix(seahorse): address 3 blocking bugs from code review - Fix resequenceContextItemsTx scan error handling (store.go:850) Changed `return err` to `return scanErr` to properly propagate scan errors instead of returning nil (which silently corrupts data) - Fix sql.NullString for INTEGER column (store.go:847) Changed `mid` from sql.NullString to sql.NullInt64 since message_id is INTEGER in schema. Removed unnecessary strconv.ParseInt call. - Fix compactCondensed fallback deleting non-candidate items Added ReplaceContextItemsWithSummary method for per-item deletion when candidates are not contiguous in ordinal space. Optimized to use range deletion when candidates are consecutive. * fix(seahorse): pass Budget to Compact for correct condensed threshold Issue #4 from PR review: When Budget was not passed to seahorse.Compact, it defaulted to `tokensBefore * 0.75`, making `tokensBefore > budget` always true and causing condensed compaction to trigger unnecessarily. Changes: - context_seahorse.go: Forward Budget from CompactRequest to CompactInput - loop.go: Pass Budget (ContextWindow) in all 3 Compact calls - Add test verifying condensed is skipped when tokens < threshold - Fix lint issues in store.go and store_test.go * fix(seahorse): add mutex for assembler lazy initialization Issue #5 from PR review: The check-then-create pattern for e.assembler was a data race when multiple goroutines called Assemble() concurrently: if e.assembler == nil { e.assembler = &Assembler{...} } Changes: - Add assemblerMu sync.Mutex to Engine struct - Add initAssemblerOnce() using double-checked locking (same pattern as initCompactionOnce) - Add TestAssemblerLazyInitRace to verify thread-safety * fix(seahorse): handle non-consecutive depths in selectShallowestCondensationCandidate Issue #8 from PR review: the loop iterated depth 0, 1, 2... assuming consecutive keys, but break when key was missing caused deeper depths to never be checked. Fix: collect all existing depth keys, sort, then iterate in order. * fix(seahorse): wrap DeleteMessagesAfterID and appendContextItems in transactions - DeleteMessagesAfterID: wrap all DELETE operations in a transaction for atomicity, remove redundant manual FTS delete (handled by trigger) - appendContextItems: use transaction to fix read-then-write race condition - Add GetMaxOrdinalTx and resolveItemTokenCountTx for transaction-scoped queries - Remove unused resolveItemTokenCount function Fixes PR review issues 6 and 7. * fix(seahorse): derive readable content from Parts and cap CompactUntilUnder iterations - Derive readable content from MessageParts in AddMessageWithParts so FTS5 indexing and summary formatting can access tool call information - formatMessagesForSummary and truncateSummary now fall back to Parts when Content is empty, fixing blank summaries for Part-based messages - Add MaxCompactIterations (20) to prevent CompactUntilUnder infinite loops; exceeded iterations are logged as warnings
337 lines
8.0 KiB
Go
337 lines
8.0 KiB
Go
package seahorse
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"fmt"
|
|
"testing"
|
|
"time"
|
|
|
|
_ "modernc.org/sqlite"
|
|
)
|
|
|
|
// newBenchStore creates a test store for benchmarks.
|
|
func newBenchStore(b *testing.B) (*Store, func()) {
|
|
b.Helper()
|
|
db, err := sql.Open("sqlite", ":memory:")
|
|
if err != nil {
|
|
b.Fatalf("open test db: %v", err)
|
|
}
|
|
if err := runSchema(db); err != nil {
|
|
db.Close()
|
|
b.Fatalf("migration: %v", err)
|
|
}
|
|
return &Store{db: db}, func() { db.Close() }
|
|
}
|
|
|
|
// --- Ingest benchmarks ---
|
|
|
|
func BenchmarkIngest_SingleMessage(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
conv, _ := s.GetOrCreateConversation(ctx, "bench:ingest")
|
|
convID := conv.ConversationID
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := s.AddMessage(ctx, convID, "user", "Test message content", 15)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkIngest_BatchMessages(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
conv, _ := s.GetOrCreateConversation(ctx, fmt.Sprintf("bench:ingest-batch:%d", i))
|
|
convID := conv.ConversationID
|
|
|
|
for j := 0; j < 10; j++ {
|
|
added, err := s.AddMessage(ctx, convID, "user",
|
|
fmt.Sprintf("Message %d in batch", j), 10)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
s.AppendContextMessage(ctx, convID, added.ID)
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- Assemble benchmarks ---
|
|
|
|
func BenchmarkAssemble_MessagesOnly(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
conv, _ := s.GetOrCreateConversation(ctx, "bench:assemble-msgs")
|
|
convID := conv.ConversationID
|
|
|
|
// Add 100 messages
|
|
for i := 0; i < 100; i++ {
|
|
m, _ := s.AddMessage(ctx, convID, "user",
|
|
fmt.Sprintf("Message content %d with some text", i), 10)
|
|
s.AppendContextMessage(ctx, convID, m.ID)
|
|
}
|
|
|
|
a := &Assembler{store: s}
|
|
input := AssembleInput{Budget: 50000}
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := a.Assemble(ctx, convID, input)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkAssemble_WithSummaries(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
conv, _ := s.GetOrCreateConversation(ctx, "bench:assemble-sums")
|
|
convID := conv.ConversationID
|
|
|
|
now := time.Now().UTC()
|
|
|
|
// Add 10 leaf summaries
|
|
for i := 0; i < 10; i++ {
|
|
sum, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
|
ConversationID: convID,
|
|
Kind: SummaryKindLeaf,
|
|
Depth: 0,
|
|
Content: fmt.Sprintf("Leaf summary %d", i),
|
|
TokenCount: 500,
|
|
EarliestAt: &now,
|
|
LatestAt: &now,
|
|
})
|
|
s.AppendContextSummary(ctx, convID, sum.SummaryID)
|
|
}
|
|
|
|
// Add 20 fresh messages
|
|
for i := 0; i < 20; i++ {
|
|
m, _ := s.AddMessage(ctx, convID, "user", fmt.Sprintf("Fresh message %d", i), 10)
|
|
s.AppendContextMessage(ctx, convID, m.ID)
|
|
}
|
|
|
|
a := &Assembler{store: s}
|
|
input := AssembleInput{Budget: 10000}
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := a.Assemble(ctx, convID, input)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkAssemble_BudgetEviction(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
conv, _ := s.GetOrCreateConversation(ctx, "bench:assemble-evict")
|
|
convID := conv.ConversationID
|
|
|
|
now := time.Now().UTC()
|
|
|
|
// Add 50 leaf summaries (more than budget can hold)
|
|
for i := 0; i < 50; i++ {
|
|
sum, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
|
ConversationID: convID,
|
|
Kind: SummaryKindLeaf,
|
|
Depth: 0,
|
|
Content: fmt.Sprintf("Summary %d", i),
|
|
TokenCount: 300,
|
|
EarliestAt: &now,
|
|
LatestAt: &now,
|
|
})
|
|
s.AppendContextSummary(ctx, convID, sum.SummaryID)
|
|
}
|
|
|
|
// Add fresh tail
|
|
for i := 0; i < FreshTailCount; i++ {
|
|
m, _ := s.AddMessage(ctx, convID, "user", "fresh", 10)
|
|
s.AppendContextMessage(ctx, convID, m.ID)
|
|
}
|
|
|
|
a := &Assembler{store: s}
|
|
input := AssembleInput{Budget: 5000} // Force eviction
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := a.Assemble(ctx, convID, input)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- Search (FTS5) benchmarks ---
|
|
|
|
// benchSeedSummaries adds n summaries to a conversation for search benchmarks.
|
|
func benchSeedSummaries(b *testing.B, s *Store, convID int64, n int, contentTpl string) {
|
|
b.Helper()
|
|
now := time.Now().UTC()
|
|
for i := 0; i < n; i++ {
|
|
sum, err := s.CreateSummary(context.Background(), CreateSummaryInput{
|
|
ConversationID: convID,
|
|
Kind: SummaryKindLeaf,
|
|
Depth: 0,
|
|
Content: fmt.Sprintf(contentTpl, i),
|
|
TokenCount: 200,
|
|
EarliestAt: &now,
|
|
LatestAt: &now,
|
|
})
|
|
if err != nil {
|
|
b.Fatalf("create summary: %v", err)
|
|
}
|
|
s.AppendContextSummary(context.Background(), convID, sum.SummaryID)
|
|
}
|
|
}
|
|
|
|
func BenchmarkSearchSummaries_FTS5(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
conv, _ := s.GetOrCreateConversation(ctx, "bench:search-fts")
|
|
convID := conv.ConversationID
|
|
|
|
benchSeedSummaries(b, s, convID, 100, "Summary about database configuration and API endpoints %d")
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := s.SearchSummaries(ctx, SearchInput{
|
|
Pattern: "database",
|
|
Mode: "full_text",
|
|
ConversationID: convID,
|
|
})
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkSearchSummaries_Like(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
conv, _ := s.GetOrCreateConversation(ctx, "bench:search-like")
|
|
convID := conv.ConversationID
|
|
|
|
benchSeedSummaries(b, s, convID, 100, "Summary about configuration %d")
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := s.SearchSummaries(ctx, SearchInput{
|
|
Pattern: "config",
|
|
Mode: "like",
|
|
ConversationID: convID,
|
|
})
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkSearchMessages_FTS5(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
conv, _ := s.GetOrCreateConversation(ctx, "bench:search-msg-fts")
|
|
convID := conv.ConversationID
|
|
|
|
// Add 500 messages
|
|
for i := 0; i < 500; i++ {
|
|
m, _ := s.AddMessage(ctx, convID, "user",
|
|
fmt.Sprintf("User message about API and database integration %d", i), 20)
|
|
s.AppendContextMessage(ctx, convID, m.ID)
|
|
}
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := s.SearchMessages(ctx, SearchInput{
|
|
Pattern: "API database",
|
|
Mode: "full_text",
|
|
ConversationID: convID,
|
|
})
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- Bootstrap benchmarks ---
|
|
|
|
func BenchmarkBootstrap_Empty(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
conv, _ := s.GetOrCreateConversation(ctx, fmt.Sprintf("bench:bootstrap-empty:%d", i))
|
|
convID := conv.ConversationID
|
|
_ = convID // Bootstrap with empty history
|
|
}
|
|
}
|
|
|
|
func BenchmarkBootstrap_100Messages(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
|
|
// Prepare 100 messages
|
|
msgs := make([]Message, 100)
|
|
for i := 0; i < 100; i++ {
|
|
msgs[i] = Message{
|
|
Role: "user",
|
|
Content: fmt.Sprintf("Bootstrap message %d", i),
|
|
TokenCount: 15,
|
|
}
|
|
}
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
conv, _ := s.GetOrCreateConversation(ctx, fmt.Sprintf("bench:bootstrap-100:%d", i))
|
|
convID := conv.ConversationID
|
|
|
|
for _, m := range msgs {
|
|
added, _ := s.AddMessage(ctx, convID, m.Role, m.Content, m.TokenCount)
|
|
s.AppendContextMessage(ctx, convID, added.ID)
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkBootstrap_500Messages(b *testing.B) {
|
|
s, cleanup := newBenchStore(b)
|
|
defer cleanup()
|
|
ctx := context.Background()
|
|
|
|
msgs := make([]Message, 500)
|
|
for i := 0; i < 500; i++ {
|
|
msgs[i] = Message{
|
|
Role: "user",
|
|
Content: fmt.Sprintf("Bootstrap message %d", i),
|
|
TokenCount: 15,
|
|
}
|
|
}
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
conv, _ := s.GetOrCreateConversation(ctx, fmt.Sprintf("bench:bootstrap-500:%d", i))
|
|
convID := conv.ConversationID
|
|
|
|
for _, m := range msgs {
|
|
added, _ := s.AddMessage(ctx, convID, m.Role, m.Content, m.TokenCount)
|
|
s.AppendContextMessage(ctx, convID, added.ID)
|
|
}
|
|
}
|
|
}
|