mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
15a70ac45c
* feat(seahorse): implement short-term memory engine of seahorse Add pkg/seahorse/ module implementing a SQLite-backed DAG-based summary hierarchy for context management, ported from lossless-claw's LCM design: - types.go + short_constants.go: core types (Message, Summary, Conversation, ContextItem) and configuration constants (fanout, token targets, thresholds) - migration.go: idempotent DB schema with FTS5 trigram tokenizer for CJK - store.go: full SQLite CRUD (conversations, messages, summaries DAG, context_items with ordinal gap numbering, FTS5 search) - short_engine.go: Engine lifecycle (NewEngine, Ingest, Assemble, Compact), session pattern filtering (ignore/stateless glob→regex compilation), per-session mutex via sync.Map - short_assembler.go: budget-aware context assembly with fresh tail protection (32 messages), oldest-first eviction, summary XML formatting, RebuildContextItems - short_compaction.go: leaf compaction (messages→summary) and condensed compaction (summaries→higher-level summary), 3-level LLM escalation, CompactUntilUnder for emergency overflow - short_retrieval.go: lookupByID, FTS5/LIKE search, recursive expand with token cap - context_seahorse.go: agent.ContextManager adapter, registered as "seahorse", provider↔seahorse message type conversion (ToolCalls, tool_result) * fix(seahorse): correct 3 adapter bugs in context management - TokenCount: use full message (Content+ToolCalls+Media) instead of Content-only - Empty Content: rebuild Content from tool_result Parts when stored empty - Duplicate summaries: summaries only in Summary field, not in History messages - Grep: fix SearchResult.Snippet→Content for summaries - Schema: fix FTS5 SQL uses VIRTUAL TABLE not TEMP TABLE - TestFTS5SQLConstants: verify FTS5 SQL syntax correctness - Test: fix flaky TestCompactLeaf * fix(agent): ingest steering messages into seahorse SQLite Steering messages were only persisted to session JSONL but not ingested into seahorse SQLite, causing them to be missing from context assembly. Added `ts.ingestMessage(turnCtx, al, pm)` call in the steering message injection block alongside the existing JSONL persistence. Test: TestSeahorseSteeringMessageIngested verifies steering messages appear in seahorse SQLite DB after being processed. * fix(seahorse): address 3 blocking bugs from code review - Fix resequenceContextItemsTx scan error handling (store.go:850) Changed `return err` to `return scanErr` to properly propagate scan errors instead of returning nil (which silently corrupts data) - Fix sql.NullString for INTEGER column (store.go:847) Changed `mid` from sql.NullString to sql.NullInt64 since message_id is INTEGER in schema. Removed unnecessary strconv.ParseInt call. - Fix compactCondensed fallback deleting non-candidate items Added ReplaceContextItemsWithSummary method for per-item deletion when candidates are not contiguous in ordinal space. Optimized to use range deletion when candidates are consecutive. * fix(seahorse): pass Budget to Compact for correct condensed threshold Issue #4 from PR review: When Budget was not passed to seahorse.Compact, it defaulted to `tokensBefore * 0.75`, making `tokensBefore > budget` always true and causing condensed compaction to trigger unnecessarily. Changes: - context_seahorse.go: Forward Budget from CompactRequest to CompactInput - loop.go: Pass Budget (ContextWindow) in all 3 Compact calls - Add test verifying condensed is skipped when tokens < threshold - Fix lint issues in store.go and store_test.go * fix(seahorse): add mutex for assembler lazy initialization Issue #5 from PR review: The check-then-create pattern for e.assembler was a data race when multiple goroutines called Assemble() concurrently: if e.assembler == nil { e.assembler = &Assembler{...} } Changes: - Add assemblerMu sync.Mutex to Engine struct - Add initAssemblerOnce() using double-checked locking (same pattern as initCompactionOnce) - Add TestAssemblerLazyInitRace to verify thread-safety * fix(seahorse): handle non-consecutive depths in selectShallowestCondensationCandidate Issue #8 from PR review: the loop iterated depth 0, 1, 2... assuming consecutive keys, but break when key was missing caused deeper depths to never be checked. Fix: collect all existing depth keys, sort, then iterate in order. * fix(seahorse): wrap DeleteMessagesAfterID and appendContextItems in transactions - DeleteMessagesAfterID: wrap all DELETE operations in a transaction for atomicity, remove redundant manual FTS delete (handled by trigger) - appendContextItems: use transaction to fix read-then-write race condition - Add GetMaxOrdinalTx and resolveItemTokenCountTx for transaction-scoped queries - Remove unused resolveItemTokenCount function Fixes PR review issues 6 and 7. * fix(seahorse): derive readable content from Parts and cap CompactUntilUnder iterations - Derive readable content from MessageParts in AddMessageWithParts so FTS5 indexing and summary formatting can access tool call information - formatMessagesForSummary and truncateSummary now fall back to Parts when Content is empty, fixing blank summaries for Part-based messages - Add MaxCompactIterations (20) to prevent CompactUntilUnder infinite loops; exceeded iterations are logged as warnings
975 lines
29 KiB
Go
975 lines
29 KiB
Go
package seahorse
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"sync"
|
||
"sync/atomic"
|
||
"testing"
|
||
"time"
|
||
)
|
||
|
||
// --- Test Helpers ---
|
||
|
||
// waitForCondensed blocks until the async condensed goroutine for convID finishes.
|
||
// Returns false if timeout is reached.
|
||
func waitForCondensed(ce *CompactionEngine, convID int64, timeout time.Duration) bool {
|
||
deadline := time.Now().Add(timeout)
|
||
for time.Now().Before(deadline) {
|
||
if _, exists := ce.condensing.Load(convID); !exists {
|
||
return true
|
||
}
|
||
time.Sleep(50 * time.Millisecond)
|
||
}
|
||
return false
|
||
}
|
||
|
||
// --- Compaction Tests ---
|
||
|
||
func newTestCompactionEngine(t *testing.T) (*CompactionEngine, *Store, int64) {
|
||
t.Helper()
|
||
db := openTestDB(t)
|
||
if err := runSchema(db); err != nil {
|
||
t.Fatalf("migration: %v", err)
|
||
}
|
||
s := &Store{db: db}
|
||
ctx := context.Background()
|
||
conv, _ := s.GetOrCreateConversation(ctx, "test:compact")
|
||
shutdownCtx, shutdownCancel := context.WithCancel(context.Background())
|
||
ce := &CompactionEngine{
|
||
store: s,
|
||
config: Config{},
|
||
complete: mockCompleteFn,
|
||
shutdownCtx: shutdownCtx,
|
||
shutdownCancel: shutdownCancel,
|
||
}
|
||
convID := conv.ConversationID
|
||
// Ensure async goroutines are stopped before database is closed.
|
||
// Register cleanup here (after openTestDB) so it runs BEFORE openTestDB's db.Close().
|
||
t.Cleanup(func() {
|
||
shutdownCancel()
|
||
// Wait for async condensed goroutine to finish (poll condensing map)
|
||
deadline := time.Now().Add(2 * time.Second)
|
||
for time.Now().Before(deadline) {
|
||
if _, exists := ce.condensing.Load(convID); !exists {
|
||
break
|
||
}
|
||
time.Sleep(50 * time.Millisecond)
|
||
}
|
||
})
|
||
return ce, s, conv.ConversationID
|
||
}
|
||
|
||
// newTestCompactionEngineWithStore creates a CompactionEngine with existing store.
|
||
// Note: Caller is responsible for calling shutdownCancel when test ends.
|
||
func newTestCompactionEngineWithStore(
|
||
s *Store, complete CompleteFn,
|
||
) (ce *CompactionEngine, shutdownCancel context.CancelFunc) {
|
||
shutdownCtx, cancel := context.WithCancel(context.Background())
|
||
return &CompactionEngine{
|
||
store: s,
|
||
config: Config{},
|
||
complete: complete,
|
||
shutdownCtx: shutdownCtx,
|
||
shutdownCancel: cancel,
|
||
}, cancel
|
||
}
|
||
|
||
// mockCompleteFn returns a simple summary for testing
|
||
var mockCompleteFn CompleteFn = func(ctx context.Context, prompt string, opts CompleteOptions) (string, error) {
|
||
return "Mock summary of the conversation segment.", nil
|
||
}
|
||
|
||
func TestNeedsCompaction(t *testing.T) {
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Empty context — no compaction needed
|
||
needed, err := ce.NeedsCompaction(ctx, convID, 10000)
|
||
if err != nil {
|
||
t.Fatalf("NeedsCompaction: %v", err)
|
||
}
|
||
if needed {
|
||
t.Error("expected no compaction for empty context")
|
||
}
|
||
|
||
// Add messages to context, total tokens = 8000
|
||
for i := 0; i < 8; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", "test message content", 1000)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
// Threshold = 0.75 × 10000 = 7500. We have 8000 tokens → needs compaction
|
||
needed, err = ce.NeedsCompaction(ctx, convID, 10000)
|
||
if err != nil {
|
||
t.Fatalf("NeedsCompaction: %v", err)
|
||
}
|
||
if !needed {
|
||
t.Error("expected compaction needed at 8000/10000 tokens (threshold 75%)")
|
||
}
|
||
|
||
// Below threshold: 5000 / 10000 → no compaction
|
||
s.UpsertContextItems(ctx, convID, nil) // clear
|
||
for i := 0; i < 5; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", "test", 1000)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
needed, _ = ce.NeedsCompaction(ctx, convID, 10000)
|
||
if needed {
|
||
t.Error("expected no compaction at 5000/10000 tokens")
|
||
}
|
||
}
|
||
|
||
func TestCompactLeaf(t *testing.T) {
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create enough messages to trigger leaf compaction:
|
||
// Need > FreshTailCount(32) evictable messages with >= LeafMinFanout(8) contiguous
|
||
for i := 0; i < 40; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", "message content for compaction test", 100)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
// Compact
|
||
result, err := ce.Compact(ctx, convID, CompactInput{})
|
||
if err != nil {
|
||
t.Fatalf("Compact: %v", err)
|
||
}
|
||
if result == nil {
|
||
t.Fatal("expected non-nil result")
|
||
}
|
||
|
||
// Should have created at least one leaf summary
|
||
if result.LeafSummaries == 0 {
|
||
t.Error("expected at least 1 leaf summary")
|
||
}
|
||
|
||
// Context should now contain a summary item
|
||
items, _ := s.GetContextItems(ctx, convID)
|
||
foundSummary := false
|
||
for _, item := range items {
|
||
if item.ItemType == "summary" {
|
||
foundSummary = true
|
||
break
|
||
}
|
||
}
|
||
if !foundSummary {
|
||
t.Error("expected a summary in context_items after leaf compaction")
|
||
}
|
||
|
||
// Some messages should have been replaced
|
||
if len(result.SummariesCreated) == 0 {
|
||
t.Error("expected at least 1 summary created")
|
||
}
|
||
}
|
||
|
||
func TestCompactLeafNoCandidate(t *testing.T) {
|
||
ce, _, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Too few messages to trigger leaf compaction
|
||
m, _ := ce.store.AddMessage(ctx, convID, "user", "short", 10)
|
||
ce.store.AppendContextMessage(ctx, convID, m.ID)
|
||
|
||
result, err := ce.Compact(ctx, convID, CompactInput{})
|
||
if err != nil {
|
||
t.Fatalf("Compact: %v", err)
|
||
}
|
||
if result == nil {
|
||
t.Fatal("expected non-nil result even with no candidate")
|
||
}
|
||
if result.LeafSummaries != 0 {
|
||
t.Errorf("LeafSummaries = %d, want 0 (too few messages)", result.LeafSummaries)
|
||
}
|
||
}
|
||
|
||
func TestCompactCondensed(t *testing.T) {
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create enough leaf summaries and fresh messages to enable condensation
|
||
leafIDs := make([]string, CondensedMinFanout)
|
||
for i := 0; i < CondensedMinFanout; i++ {
|
||
now := time.Now().UTC()
|
||
summary, err := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: "leaf summary content " + time.Now().String(),
|
||
TokenCount: 500,
|
||
EarliestAt: &now,
|
||
LatestAt: &now,
|
||
})
|
||
if err != nil {
|
||
t.Fatalf("CreateSummary %d: %v", i, err)
|
||
}
|
||
leafIDs[i] = summary.SummaryID
|
||
s.AppendContextSummary(ctx, convID, summary.SummaryID)
|
||
}
|
||
|
||
// Add enough fresh messages to have a fresh tail (>= FreshTailCount)
|
||
for i := 0; i < FreshTailCount; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", "fresh message", 10)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
// Compact with force to trigger condensation
|
||
_, err := ce.Compact(ctx, convID, CompactInput{Force: true})
|
||
if err != nil {
|
||
t.Fatalf("Compact: %v", err)
|
||
}
|
||
|
||
// Wait for async condensed goroutine to complete
|
||
if !waitForCondensed(ce, convID, 2*time.Second) {
|
||
t.Fatal("timeout waiting for condensed compaction")
|
||
}
|
||
|
||
// Should have created a condensed summary in the DB
|
||
summaries, _ := s.GetSummariesByConversation(ctx, convID)
|
||
foundCondensed := false
|
||
for _, sum := range summaries {
|
||
if sum.Kind == SummaryKindCondensed {
|
||
foundCondensed = true
|
||
break
|
||
}
|
||
}
|
||
if !foundCondensed {
|
||
t.Error("expected at least 1 condensed summary")
|
||
}
|
||
}
|
||
|
||
func TestCompactCondensedDoesNotOrphanSummaryWhenCandidatesRemovedConcurrently(t *testing.T) {
|
||
// Reproduce orphan bug: candidates found by selectOldestChunkAtDepth are removed
|
||
// from context_items between candidate selection and ordinal range scan.
|
||
// Use a slow CompleteFn with barrier sync to control timing.
|
||
s := openTestStore(t)
|
||
ctx := context.Background()
|
||
conv, _ := s.GetOrCreateConversation(ctx, "test:orphan-race")
|
||
convID := conv.ConversationID
|
||
|
||
// Create leaf summaries with enough tokens for condensation
|
||
var leafIDs []string
|
||
for i := 0; i < CondensedMinFanout; i++ {
|
||
now := time.Now().UTC()
|
||
sum, err := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: fmt.Sprintf("leaf summary %d", i),
|
||
TokenCount: 500,
|
||
EarliestAt: &now,
|
||
LatestAt: &now,
|
||
})
|
||
if err != nil {
|
||
t.Fatalf("CreateSummary: %v", err)
|
||
}
|
||
leafIDs = append(leafIDs, sum.SummaryID)
|
||
s.AppendContextSummary(ctx, convID, sum.SummaryID)
|
||
}
|
||
|
||
// Add fresh tail so leaf summaries are in evictable range
|
||
for i := 0; i < FreshTailCount+1; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", "fresh", 10)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
// Barrier: CompleteFn waits until test removes context_items, then returns
|
||
var barrier1, barrier2 sync.WaitGroup
|
||
barrier1.Add(1) // CompleteFn signals when called
|
||
barrier2.Add(1) // test signals when context_items removed
|
||
|
||
slowComplete := func(ctx context.Context, prompt string, opts CompleteOptions) (string, error) {
|
||
barrier1.Done() // signal: LLM called, candidates selected
|
||
barrier2.Wait() // wait: test removes context_items
|
||
return "Condensed summary.", nil
|
||
}
|
||
|
||
ce, cancel := newTestCompactionEngineWithStore(s, slowComplete)
|
||
t.Cleanup(func() {
|
||
cancel()
|
||
time.Sleep(100 * time.Millisecond)
|
||
})
|
||
|
||
// Run compactCondensed in background
|
||
type compactResult struct {
|
||
summaryID *string
|
||
err error
|
||
}
|
||
resultCh := make(chan compactResult, 1)
|
||
go func() {
|
||
sid, err := ce.compactCondensed(context.Background(), convID)
|
||
resultCh <- compactResult{summaryID: sid, err: err}
|
||
}()
|
||
|
||
// Wait for CompleteFn to be called (candidates selected)
|
||
barrier1.Wait()
|
||
|
||
// Remove leaf summaries from context_items (simulating concurrent replacement)
|
||
items, _ := s.GetContextItems(ctx, convID)
|
||
var preserved []ContextItem
|
||
for _, item := range items {
|
||
isLeaf := false
|
||
for _, lid := range leafIDs {
|
||
if item.SummaryID == lid {
|
||
isLeaf = true
|
||
break
|
||
}
|
||
}
|
||
if !isLeaf {
|
||
preserved = append(preserved, item)
|
||
}
|
||
}
|
||
s.UpsertContextItems(ctx, convID, preserved)
|
||
|
||
// Let CompleteFn return
|
||
barrier2.Done()
|
||
|
||
// Get result
|
||
res := <-resultCh
|
||
if res.err != nil {
|
||
t.Fatalf("compactCondensed: %v", res.err)
|
||
}
|
||
|
||
// With the bug: returns non-nil summaryID even though context_items has no matching ordinals
|
||
// The fix: should return nil when startOrd == -1
|
||
if res.summaryID != nil {
|
||
t.Errorf("compactCondensed returned summaryID=%s, want nil (orphan created)", *res.summaryID)
|
||
|
||
// Verify the orphan exists in DB
|
||
summary, _ := s.GetSummary(context.Background(), *res.summaryID)
|
||
if summary != nil && summary.Kind == SummaryKindCondensed {
|
||
// Check it's NOT in context_items (orphan)
|
||
items2, _ := s.GetContextItems(context.Background(), convID)
|
||
found := false
|
||
for _, item := range items2 {
|
||
if item.SummaryID == *res.summaryID {
|
||
found = true
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
t.Error("condensed summary exists in DB but not in context_items — orphan confirmed")
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestCompactUntilUnder(t *testing.T) {
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create many leaf summaries to ensure we can condense
|
||
for i := 0; i < 8; i++ {
|
||
now := time.Now().UTC()
|
||
summary, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: "leaf summary for condensation test",
|
||
TokenCount: 500,
|
||
EarliestAt: &now,
|
||
LatestAt: &now,
|
||
})
|
||
s.AppendContextSummary(ctx, convID, summary.SummaryID)
|
||
}
|
||
|
||
// Force compact until under budget
|
||
result, err := ce.CompactUntilUnder(ctx, convID, 2000)
|
||
if err != nil {
|
||
t.Fatalf("CompactUntilUnder: %v", err)
|
||
}
|
||
|
||
if result == nil {
|
||
t.Fatal("expected non-nil result")
|
||
}
|
||
}
|
||
|
||
func TestSelectShallowestCondensationCandidate(t *testing.T) {
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create enough leaf summaries + fresh messages for candidates
|
||
for i := 0; i < LeafMinFanout; i++ {
|
||
summary, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: "leaf",
|
||
TokenCount: 100,
|
||
})
|
||
s.AppendContextSummary(ctx, convID, summary.SummaryID)
|
||
}
|
||
|
||
// Add fresh tail messages so summaries are in evictable range
|
||
for i := 0; i < FreshTailCount+1; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", "fresh", 5)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
candidates, err := ce.selectShallowestCondensationCandidate(ctx, convID, false)
|
||
if err != nil {
|
||
t.Fatalf("selectShallowestCondensationCandidate: %v", err)
|
||
}
|
||
|
||
// Should find leaf summaries at depth 0
|
||
if len(candidates) < CondensedMinFanout {
|
||
t.Errorf("candidates = %d, want >= %d", len(candidates), CondensedMinFanout)
|
||
}
|
||
}
|
||
|
||
func TestSelectShallowestCondensationCandidateEmpty(t *testing.T) {
|
||
ce, _, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
candidates, err := ce.selectShallowestCondensationCandidate(ctx, convID, false)
|
||
if err != nil {
|
||
t.Fatalf("selectShallowestCondensationCandidate: %v", err)
|
||
}
|
||
if len(candidates) != 0 {
|
||
t.Errorf("candidates = %d, want 0 for empty context", len(candidates))
|
||
}
|
||
}
|
||
|
||
func TestCompactCondensedUsesSelectOldestChunk(t *testing.T) {
|
||
// Verify that compactCondensed prefers ordinal-ordered chunks via selectOldestChunkAtDepth
|
||
// rather than just grouping by depth without regard to order
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create interleaved summaries at depth 0 with a message in between:
|
||
// sum1 (ordinal 100), msg (ordinal 200), sum2 (ordinal 300)
|
||
|
||
for i := 0; i < LeafMinFanout+2; i++ {
|
||
now := time.Now().UTC()
|
||
|
||
s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: fmt.Sprintf("leaf summary %d", i),
|
||
TokenCount: 100,
|
||
EarliestAt: &now,
|
||
LatestAt: &now,
|
||
})
|
||
}
|
||
|
||
// Insert a message between first two summaries to break contiguity
|
||
// for selectShallowestCondensationCandidate but would still find all 3
|
||
// but selectOldestChunkAtDepth should only find sum1 + sum2 (not sum3)
|
||
|
||
msg, _ := s.AddMessage(ctx, convID, "user", "interrupting message", 5)
|
||
s.AppendContextMessage(ctx, convID, msg.ID)
|
||
|
||
// Run compactCondensed
|
||
result, err := ce.compactCondensed(ctx, convID)
|
||
if err != nil {
|
||
t.Fatalf("compactCondensed: %v", err)
|
||
}
|
||
|
||
// The result should have merged the two summaries at the start
|
||
// (skipping the message in between), This proves ordinal-aware selection works.
|
||
|
||
_ = result // verify summary was created
|
||
|
||
if result != nil {
|
||
summaries, _ := s.GetSummariesByConversation(ctx, convID)
|
||
found := false
|
||
for _, sum := range summaries {
|
||
if sum.Kind == SummaryKindCondensed {
|
||
found = true
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
t.Error("expected condensed summary to be created via ordinal-aware selection")
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestCompactCondensedUsesOrdinalAwareSelection(t *testing.T) {
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create leaf summaries at depth 0 (total tokens >= CondensedTargetTokens)
|
||
for i := 0; i < 5; i++ {
|
||
summary, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: fmt.Sprintf("leaf summary %d", i),
|
||
TokenCount: 500, // 5 × 500 = 2500 >= CondensedTargetTokens (2000)
|
||
})
|
||
s.AppendContextSummary(ctx, convID, summary.SummaryID)
|
||
}
|
||
|
||
// Add fresh tail
|
||
for i := 0; i < FreshTailCount+1; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", "fresh", 5)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
chunk, err := ce.selectOldestChunkAtDepth(ctx, convID, 0)
|
||
if err != nil {
|
||
t.Fatalf("selectOldestChunkAtDepth: %v", err)
|
||
}
|
||
if len(chunk) < 2 {
|
||
t.Errorf("chunk length = %d, want >= 2 contiguous summaries", len(chunk))
|
||
}
|
||
for _, s := range chunk {
|
||
if s.Depth != 0 {
|
||
t.Errorf("got depth %d, want 0", s.Depth)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestSelectOldestChunkAtDepthBreaksOnMessage(t *testing.T) {
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create 3 summaries, then a message, then 3 more summaries
|
||
for i := 0; i < 3; i++ {
|
||
summary, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: fmt.Sprintf("leaf %d", i),
|
||
TokenCount: 100,
|
||
})
|
||
s.AppendContextSummary(ctx, convID, summary.SummaryID)
|
||
}
|
||
msg, _ := s.AddMessage(ctx, convID, "user", "break", 10)
|
||
s.AppendContextMessage(ctx, convID, msg.ID)
|
||
for i := 0; i < 3; i++ {
|
||
summary, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: fmt.Sprintf("leaf-after %d", i),
|
||
TokenCount: 100,
|
||
})
|
||
s.AppendContextSummary(ctx, convID, summary.SummaryID)
|
||
}
|
||
for i := 0; i < FreshTailCount+1; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", "fresh", 5)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
chunk, _ := ce.selectOldestChunkAtDepth(ctx, convID, 0)
|
||
if len(chunk) > 3 {
|
||
t.Errorf("chunk length = %d, want <= 3 (message breaks chain)", len(chunk))
|
||
}
|
||
}
|
||
|
||
func TestSelectOldestChunkAtDepthMinTokens(t *testing.T) {
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create summaries with very low token counts (total < 2000)
|
||
for i := 0; i < 5; i++ {
|
||
summary, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: fmt.Sprintf("tiny summary %d", i),
|
||
TokenCount: 50, // very small
|
||
})
|
||
s.AppendContextSummary(ctx, convID, summary.SummaryID)
|
||
}
|
||
|
||
// Add fresh tail to protect from compaction
|
||
for i := 0; i < FreshTailCount+1; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", fmt.Sprintf("tail %d", i), 10)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
// Should return nil because total tokens (250) < 2000 minimum
|
||
chunk, err := ce.selectOldestChunkAtDepth(ctx, convID, 0)
|
||
if err != nil {
|
||
t.Fatalf("selectOldestChunkAtDepth: %v", err)
|
||
}
|
||
if len(chunk) > 0 {
|
||
t.Errorf("expected empty chunk when tokens < 2000, got %d summaries", len(chunk))
|
||
}
|
||
}
|
||
|
||
func TestSelectOldestChunkAtDepthPassesMinTokens(t *testing.T) {
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create summaries with enough tokens (total >= 2000)
|
||
for i := 0; i < 5; i++ {
|
||
summary, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: fmt.Sprintf(
|
||
"substantial summary with enough content to meet minimum token threshold for condensation candidate %d",
|
||
i,
|
||
),
|
||
TokenCount: 500, // 5 × 500 = 2500 >= 2000
|
||
})
|
||
s.AppendContextSummary(ctx, convID, summary.SummaryID)
|
||
}
|
||
|
||
// Add fresh tail
|
||
for i := 0; i < FreshTailCount+1; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", fmt.Sprintf("tail %d", i), 10)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
// Should return chunk because total tokens (2500) >= 2000
|
||
chunk, err := ce.selectOldestChunkAtDepth(ctx, convID, 0)
|
||
if err != nil {
|
||
t.Fatalf("selectOldestChunkAtDepth: %v", err)
|
||
}
|
||
if len(chunk) == 0 {
|
||
t.Error("expected non-empty chunk when tokens >= 2000")
|
||
}
|
||
}
|
||
|
||
func TestGenerateLeafSummary(t *testing.T) {
|
||
ce, _, _ := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
msgs := []Message{
|
||
{Role: "user", Content: "hello world", TokenCount: 5},
|
||
{Role: "assistant", Content: "hi there", TokenCount: 5},
|
||
}
|
||
|
||
content, err := ce.generateLeafSummary(ctx, msgs, "")
|
||
if err != nil {
|
||
t.Fatalf("generateLeafSummary: %v", err)
|
||
}
|
||
if content == "" {
|
||
t.Error("expected non-empty summary content")
|
||
}
|
||
}
|
||
|
||
func TestGenerateLeafSummaryEscalationToAggressive(t *testing.T) {
|
||
// Level 1 returns summary that's too large (tokens >= input), should escalate to level 2
|
||
var calls []string
|
||
escalateComplete := func(ctx context.Context, prompt string, opts CompleteOptions) (string, error) {
|
||
if contains(prompt, "Aggressive summary policy") {
|
||
calls = append(calls, "aggressive")
|
||
return "Short aggressive summary.", nil
|
||
}
|
||
calls = append(calls, "normal")
|
||
// Return a very long summary to trigger escalation
|
||
longContent := make([]byte, 5000)
|
||
for i := range longContent {
|
||
longContent[i] = 'x'
|
||
}
|
||
return string(longContent), nil
|
||
}
|
||
|
||
s := openTestStore(t)
|
||
ce, _ := newTestCompactionEngineWithStore(s, escalateComplete)
|
||
|
||
msgs := []Message{
|
||
{Role: "user", Content: "hello world", TokenCount: 10},
|
||
{Role: "assistant", Content: "response", TokenCount: 10},
|
||
}
|
||
|
||
content, err := ce.generateLeafSummary(context.Background(), msgs, "")
|
||
if err != nil {
|
||
t.Fatalf("generateLeafSummary: %v", err)
|
||
}
|
||
if content == "" {
|
||
t.Error("expected non-empty summary content")
|
||
}
|
||
// Should have called both normal and aggressive
|
||
foundNormal := false
|
||
foundAggressive := false
|
||
for _, c := range calls {
|
||
if c == "normal" {
|
||
foundNormal = true
|
||
}
|
||
if c == "aggressive" {
|
||
foundAggressive = true
|
||
}
|
||
}
|
||
if !foundNormal {
|
||
t.Error("expected normal LLM call")
|
||
}
|
||
if !foundAggressive {
|
||
t.Error("expected aggressive LLM call (level 2 escalation)")
|
||
}
|
||
}
|
||
|
||
func TestGenerateLeafSummaryEscalationToTruncation(t *testing.T) {
|
||
// Both normal and aggressive return empty, should escalate to level 3 truncation
|
||
emptyComplete := func(ctx context.Context, prompt string, opts CompleteOptions) (string, error) {
|
||
return "", nil
|
||
}
|
||
|
||
s := openTestStore(t)
|
||
ce, _ := newTestCompactionEngineWithStore(s, emptyComplete)
|
||
|
||
msgs := []Message{
|
||
{Role: "user", Content: "hello world from test", TokenCount: 10},
|
||
{Role: "assistant", Content: "response text here", TokenCount: 10},
|
||
}
|
||
|
||
content, err := ce.generateLeafSummary(context.Background(), msgs, "")
|
||
if err != nil {
|
||
t.Fatalf("generateLeafSummary: %v", err)
|
||
}
|
||
// Level 3 truncation should have produced something
|
||
if content == "" {
|
||
t.Error("expected non-empty content from level 3 truncation fallback")
|
||
}
|
||
if !contains(content, "Truncated from") {
|
||
t.Errorf("expected truncation marker in content: %q", content)
|
||
}
|
||
}
|
||
|
||
func TestGenerateCondensedSummary(t *testing.T) {
|
||
ce, _, _ := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
summaries := []Summary{
|
||
{SummaryID: "sum_a", Content: "first summary", TokenCount: 100},
|
||
{SummaryID: "sum_b", Content: "second summary", TokenCount: 100},
|
||
}
|
||
|
||
content, err := ce.generateCondensedSummary(ctx, summaries)
|
||
if err != nil {
|
||
t.Fatalf("generateCondensedSummary: %v", err)
|
||
}
|
||
if content == "" {
|
||
t.Error("expected non-empty condensed summary content")
|
||
}
|
||
}
|
||
|
||
func TestGenerateCondensedSummaryEscalation(t *testing.T) {
|
||
// When LLM returns empty, should fall back to deterministic concatenation
|
||
emptyComplete := func(ctx context.Context, prompt string, opts CompleteOptions) (string, error) {
|
||
return "", nil
|
||
}
|
||
|
||
s := openTestStore(t)
|
||
ce, _ := newTestCompactionEngineWithStore(s, emptyComplete)
|
||
|
||
summaries := []Summary{
|
||
{SummaryID: "sum_a", Content: "first summary text", TokenCount: 50},
|
||
{SummaryID: "sum_b", Content: "second summary text", TokenCount: 50},
|
||
}
|
||
|
||
content, err := ce.generateCondensedSummary(context.Background(), summaries)
|
||
if err != nil {
|
||
t.Fatalf("generateCondensedSummary: %v", err)
|
||
}
|
||
// Should fall back to concatenation
|
||
if content == "" {
|
||
t.Error("expected non-empty content from fallback")
|
||
}
|
||
}
|
||
|
||
// --- Async Condensed Compaction (Phase 2) ---
|
||
|
||
func TestCompactAsyncReturnsBeforeCondensed(t *testing.T) {
|
||
// Use a slow CompleteFn to verify Compact returns before condensed finishes
|
||
var callCount int32
|
||
slowComplete := func(ctx context.Context, prompt string, opts CompleteOptions) (string, error) {
|
||
atomic.AddInt32(&callCount, 1)
|
||
time.Sleep(500 * time.Millisecond) // simulate slow LLM
|
||
return "Slow condensed summary.", nil
|
||
}
|
||
|
||
s := openTestStore(t)
|
||
ctx := context.Background()
|
||
conv, _ := s.GetOrCreateConversation(ctx, "test:async")
|
||
convID := conv.ConversationID
|
||
|
||
ce, cancel := newTestCompactionEngineWithStore(s, slowComplete)
|
||
t.Cleanup(func() {
|
||
cancel()
|
||
time.Sleep(100 * time.Millisecond)
|
||
})
|
||
|
||
// Create enough leaf summaries for condensation + fresh tail
|
||
for i := 0; i < CondensedMinFanout; i++ {
|
||
now := time.Now().UTC()
|
||
summary, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: "leaf for async test",
|
||
TokenCount: 500,
|
||
EarliestAt: &now,
|
||
LatestAt: &now,
|
||
})
|
||
s.AppendContextSummary(ctx, convID, summary.SummaryID)
|
||
}
|
||
for i := 0; i < FreshTailCount; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", "fresh", 10)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
// Compact with force — should return quickly, condensed runs async
|
||
start := time.Now()
|
||
result, err := ce.Compact(ctx, convID, CompactInput{Force: true})
|
||
elapsed := time.Since(start)
|
||
|
||
if err != nil {
|
||
t.Fatalf("Compact: %v", err)
|
||
}
|
||
if result == nil {
|
||
t.Fatal("expected non-nil result")
|
||
}
|
||
|
||
// Should return well before the 500ms LLM call
|
||
if elapsed > 200*time.Millisecond {
|
||
t.Errorf("Compact took %v, should return before async condensed finishes", elapsed)
|
||
}
|
||
|
||
// Wait for async to complete
|
||
time.Sleep(800 * time.Millisecond)
|
||
|
||
// Verify condensed summary was created by background goroutine
|
||
summaries, _ := s.GetSummariesByConversation(ctx, convID)
|
||
foundCondensed := false
|
||
for _, sum := range summaries {
|
||
if sum.Kind == SummaryKindCondensed {
|
||
foundCondensed = true
|
||
break
|
||
}
|
||
}
|
||
if !foundCondensed {
|
||
t.Error("expected at least one condensed summary from async Phase 2")
|
||
}
|
||
}
|
||
|
||
func TestCompactAsyncDedup(t *testing.T) {
|
||
var callCount int32
|
||
slowComplete := func(ctx context.Context, prompt string, opts CompleteOptions) (string, error) {
|
||
atomic.AddInt32(&callCount, 1)
|
||
time.Sleep(300 * time.Millisecond)
|
||
return "Slow condensed summary.", nil
|
||
}
|
||
|
||
s := openTestStore(t)
|
||
ctx := context.Background()
|
||
conv, _ := s.GetOrCreateConversation(ctx, "test:dedup")
|
||
convID := conv.ConversationID
|
||
|
||
ce, cancel := newTestCompactionEngineWithStore(s, slowComplete)
|
||
t.Cleanup(func() {
|
||
cancel()
|
||
waitForCondensed(ce, convID, 2*time.Second)
|
||
})
|
||
|
||
// Create conditions for condensed compaction
|
||
for i := 0; i < CondensedMinFanout; i++ {
|
||
now := time.Now().UTC()
|
||
summary, _ := s.CreateSummary(ctx, CreateSummaryInput{
|
||
ConversationID: convID,
|
||
Kind: SummaryKindLeaf,
|
||
Depth: 0,
|
||
Content: "leaf for dedup",
|
||
TokenCount: 500,
|
||
EarliestAt: &now,
|
||
LatestAt: &now,
|
||
})
|
||
s.AppendContextSummary(ctx, convID, summary.SummaryID)
|
||
}
|
||
for i := 0; i < FreshTailCount; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", "fresh", 10)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
// Call Compact twice rapidly
|
||
ce.Compact(ctx, convID, CompactInput{Force: true})
|
||
ce.Compact(ctx, convID, CompactInput{Force: true})
|
||
|
||
// Wait for async to finish
|
||
time.Sleep(600 * time.Millisecond)
|
||
|
||
// LLM should only be called once for condensed (dedup)
|
||
// callCount may be 0 if no leaf was created (only condensed in goroutine)
|
||
// The key is that we don't get 2+ condensed calls
|
||
if atomic.LoadInt32(&callCount) > 1 {
|
||
t.Errorf("LLM called %d times, expected at most 1 (dedup)", callCount)
|
||
}
|
||
}
|
||
|
||
func TestCompactLeafForceBypassesFreshTail(t *testing.T) {
|
||
// Spec: compactLeaf with force=true should bypass FreshTailCount protection
|
||
// so CompactUntilUnder can compress messages inside the fresh tail
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create exactly FreshTailCount+4 messages (36 total)
|
||
// Without force: all messages are in fresh tail → no candidate
|
||
// With force: should compact the oldest messages
|
||
total := FreshTailCount + 4
|
||
for i := 0; i < total; i++ {
|
||
m, _ := s.AddMessage(ctx, convID, "user", fmt.Sprintf("message %d for force test", i), 100)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
// Without force: should return nil (all in fresh tail)
|
||
summaryID, err := ce.compactLeaf(ctx, convID)
|
||
if err != nil {
|
||
t.Fatalf("compactLeaf no-force: %v", err)
|
||
}
|
||
if summaryID != nil {
|
||
t.Error("expected nil without force (all messages in fresh tail)")
|
||
}
|
||
|
||
// With force: should compact despite fresh tail protection
|
||
summaryID, err = ce.compactLeaf(ctx, convID, true)
|
||
if err != nil {
|
||
t.Fatalf("compactLeaf force: %v", err)
|
||
}
|
||
if summaryID == nil {
|
||
t.Error("expected summary with force=true (bypasses fresh tail)")
|
||
}
|
||
}
|
||
|
||
func TestCompactLeafAccumulatesUpToLeafChunkTokens(t *testing.T) {
|
||
// Spec: compactLeaf should accumulate messages up to LeafChunkTokens before stopping
|
||
// It should NOT take the entire contiguous chunk regardless of token count
|
||
ce, s, convID := newTestCompactionEngine(t)
|
||
ctx := context.Background()
|
||
|
||
// Create messages totaling far more than LeafChunkTokens (20000)
|
||
// Each message is ~500 tokens, create 80 messages = 40000 tokens
|
||
for i := 0; i < 80; i++ {
|
||
m, _ := s.AddMessage(
|
||
ctx,
|
||
convID,
|
||
"user",
|
||
fmt.Sprintf(
|
||
"message %d with lots of content to make it big enough for token counting purposes and this should be a substantial message body that represents a meaningful conversation turn",
|
||
i,
|
||
),
|
||
500,
|
||
)
|
||
s.AppendContextMessage(ctx, convID, m.ID)
|
||
}
|
||
|
||
summaryID, err := ce.compactLeaf(ctx, convID)
|
||
if err != nil {
|
||
t.Fatalf("compactLeaf: %v", err)
|
||
}
|
||
if summaryID == nil {
|
||
t.Fatal("expected a summary to be created")
|
||
}
|
||
|
||
// The source messages that were compacted should total roughly LeafChunkTokens (20000),
|
||
// not the entire 40000 tokens worth of messages
|
||
summary, _ := s.GetSummary(ctx, *summaryID)
|
||
if summary == nil {
|
||
t.Fatal("summary not found")
|
||
}
|
||
|
||
// Source message tokens should be roughly <= LeafChunkTokens (20000)
|
||
// Spec says: "Stop when accumulated tokens >= LeafChunkTokens"
|
||
if summary.SourceMessageTokenCount > LeafChunkTokens {
|
||
t.Errorf("source tokens = %d, should be <= LeafChunkTokens (%d)",
|
||
summary.SourceMessageTokenCount, LeafChunkTokens)
|
||
}
|
||
}
|