mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-05-25 16:00:35 +00:00
15a70ac45c
* feat(seahorse): implement short-term memory engine of seahorse Add pkg/seahorse/ module implementing a SQLite-backed DAG-based summary hierarchy for context management, ported from lossless-claw's LCM design: - types.go + short_constants.go: core types (Message, Summary, Conversation, ContextItem) and configuration constants (fanout, token targets, thresholds) - migration.go: idempotent DB schema with FTS5 trigram tokenizer for CJK - store.go: full SQLite CRUD (conversations, messages, summaries DAG, context_items with ordinal gap numbering, FTS5 search) - short_engine.go: Engine lifecycle (NewEngine, Ingest, Assemble, Compact), session pattern filtering (ignore/stateless glob→regex compilation), per-session mutex via sync.Map - short_assembler.go: budget-aware context assembly with fresh tail protection (32 messages), oldest-first eviction, summary XML formatting, RebuildContextItems - short_compaction.go: leaf compaction (messages→summary) and condensed compaction (summaries→higher-level summary), 3-level LLM escalation, CompactUntilUnder for emergency overflow - short_retrieval.go: lookupByID, FTS5/LIKE search, recursive expand with token cap - context_seahorse.go: agent.ContextManager adapter, registered as "seahorse", provider↔seahorse message type conversion (ToolCalls, tool_result) * fix(seahorse): correct 3 adapter bugs in context management - TokenCount: use full message (Content+ToolCalls+Media) instead of Content-only - Empty Content: rebuild Content from tool_result Parts when stored empty - Duplicate summaries: summaries only in Summary field, not in History messages - Grep: fix SearchResult.Snippet→Content for summaries - Schema: fix FTS5 SQL uses VIRTUAL TABLE not TEMP TABLE - TestFTS5SQLConstants: verify FTS5 SQL syntax correctness - Test: fix flaky TestCompactLeaf * fix(agent): ingest steering messages into seahorse SQLite Steering messages were only persisted to session JSONL but not ingested into seahorse SQLite, causing them to be missing from context assembly. Added `ts.ingestMessage(turnCtx, al, pm)` call in the steering message injection block alongside the existing JSONL persistence. Test: TestSeahorseSteeringMessageIngested verifies steering messages appear in seahorse SQLite DB after being processed. * fix(seahorse): address 3 blocking bugs from code review - Fix resequenceContextItemsTx scan error handling (store.go:850) Changed `return err` to `return scanErr` to properly propagate scan errors instead of returning nil (which silently corrupts data) - Fix sql.NullString for INTEGER column (store.go:847) Changed `mid` from sql.NullString to sql.NullInt64 since message_id is INTEGER in schema. Removed unnecessary strconv.ParseInt call. - Fix compactCondensed fallback deleting non-candidate items Added ReplaceContextItemsWithSummary method for per-item deletion when candidates are not contiguous in ordinal space. Optimized to use range deletion when candidates are consecutive. * fix(seahorse): pass Budget to Compact for correct condensed threshold Issue #4 from PR review: When Budget was not passed to seahorse.Compact, it defaulted to `tokensBefore * 0.75`, making `tokensBefore > budget` always true and causing condensed compaction to trigger unnecessarily. Changes: - context_seahorse.go: Forward Budget from CompactRequest to CompactInput - loop.go: Pass Budget (ContextWindow) in all 3 Compact calls - Add test verifying condensed is skipped when tokens < threshold - Fix lint issues in store.go and store_test.go * fix(seahorse): add mutex for assembler lazy initialization Issue #5 from PR review: The check-then-create pattern for e.assembler was a data race when multiple goroutines called Assemble() concurrently: if e.assembler == nil { e.assembler = &Assembler{...} } Changes: - Add assemblerMu sync.Mutex to Engine struct - Add initAssemblerOnce() using double-checked locking (same pattern as initCompactionOnce) - Add TestAssemblerLazyInitRace to verify thread-safety * fix(seahorse): handle non-consecutive depths in selectShallowestCondensationCandidate Issue #8 from PR review: the loop iterated depth 0, 1, 2... assuming consecutive keys, but break when key was missing caused deeper depths to never be checked. Fix: collect all existing depth keys, sort, then iterate in order. * fix(seahorse): wrap DeleteMessagesAfterID and appendContextItems in transactions - DeleteMessagesAfterID: wrap all DELETE operations in a transaction for atomicity, remove redundant manual FTS delete (handled by trigger) - appendContextItems: use transaction to fix read-then-write race condition - Add GetMaxOrdinalTx and resolveItemTokenCountTx for transaction-scoped queries - Remove unused resolveItemTokenCount function Fixes PR review issues 6 and 7. * fix(seahorse): derive readable content from Parts and cap CompactUntilUnder iterations - Derive readable content from MessageParts in AddMessageWithParts so FTS5 indexing and summary formatting can access tool call information - formatMessagesForSummary and truncateSummary now fall back to Parts when Content is empty, fixing blank summaries for Part-based messages - Add MaxCompactIterations (20) to prevent CompactUntilUnder infinite loops; exceeded iterations are logged as warnings
186 lines
7.0 KiB
Go
186 lines
7.0 KiB
Go
package seahorse
|
|
|
|
import (
|
|
"database/sql"
|
|
"fmt"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/logger"
|
|
)
|
|
|
|
// SQL statements for FTS5 tables with trigram tokenizer.
|
|
const (
|
|
sqlCreateSummariesFTS = `CREATE VIRTUAL TABLE IF NOT EXISTS summaries_fts USING fts5(
|
|
summary_id,
|
|
content,
|
|
tokenize="trigram"
|
|
)`
|
|
sqlCreateMessagesFTS = `CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
|
|
message_id,
|
|
content,
|
|
tokenize="trigram"
|
|
)`
|
|
sqlCheckFTS5Available = `CREATE VIRTUAL TABLE IF NOT EXISTS _fts5_check USING fts5(content)`
|
|
sqlCheckTrigramAvailable = `CREATE VIRTUAL TABLE IF NOT EXISTS _trigram_check USING fts5(content, tokenize="trigram")`
|
|
sqlDropFTS5Check = `DROP TABLE IF EXISTS _fts5_check`
|
|
sqlDropTrigramCheck = `DROP TABLE IF EXISTS _trigram_check`
|
|
)
|
|
|
|
// runSchema creates or upgrades the database schema.
|
|
// All schemas are idempotent (safe to run multiple times).
|
|
func runSchema(db *sql.DB) error {
|
|
// Check FTS5 support before creating tables
|
|
if err := checkFTS5Support(db); err != nil {
|
|
return fmt.Errorf("FTS5 check: %w", err)
|
|
}
|
|
|
|
stmts := []string{
|
|
`CREATE TABLE IF NOT EXISTS conversations (
|
|
conversation_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
session_key TEXT NOT NULL UNIQUE,
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
)`,
|
|
|
|
`CREATE TABLE IF NOT EXISTS messages (
|
|
message_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
conversation_id INTEGER NOT NULL REFERENCES conversations(conversation_id),
|
|
role TEXT NOT NULL,
|
|
content TEXT NOT NULL DEFAULT '',
|
|
token_count INTEGER NOT NULL DEFAULT 0,
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
)`,
|
|
|
|
`CREATE TABLE IF NOT EXISTS message_parts (
|
|
part_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
message_id INTEGER NOT NULL REFERENCES messages(message_id),
|
|
type TEXT NOT NULL,
|
|
text TEXT,
|
|
name TEXT,
|
|
arguments TEXT,
|
|
tool_call_id TEXT,
|
|
media_uri TEXT,
|
|
mime_type TEXT,
|
|
ordinal INTEGER NOT NULL DEFAULT 0
|
|
)`,
|
|
|
|
`CREATE TABLE IF NOT EXISTS summaries (
|
|
summary_id TEXT PRIMARY KEY,
|
|
conversation_id INTEGER NOT NULL REFERENCES conversations(conversation_id),
|
|
kind TEXT NOT NULL,
|
|
depth INTEGER NOT NULL DEFAULT 0,
|
|
content TEXT NOT NULL,
|
|
token_count INTEGER NOT NULL DEFAULT 0,
|
|
earliest_at TEXT,
|
|
latest_at TEXT,
|
|
descendant_count INTEGER NOT NULL DEFAULT 0,
|
|
descendant_token_count INTEGER NOT NULL DEFAULT 0,
|
|
source_message_token_count INTEGER NOT NULL DEFAULT 0,
|
|
model TEXT,
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
)`,
|
|
|
|
`CREATE TABLE IF NOT EXISTS summary_parents (
|
|
summary_id TEXT NOT NULL,
|
|
parent_summary_id TEXT NOT NULL,
|
|
PRIMARY KEY (summary_id, parent_summary_id)
|
|
)`,
|
|
|
|
`CREATE TABLE IF NOT EXISTS summary_messages (
|
|
summary_id TEXT NOT NULL,
|
|
message_id INTEGER NOT NULL,
|
|
ordinal INTEGER NOT NULL DEFAULT 0,
|
|
PRIMARY KEY (summary_id, message_id)
|
|
)`,
|
|
|
|
`CREATE TABLE IF NOT EXISTS context_items (
|
|
conversation_id INTEGER NOT NULL,
|
|
ordinal INTEGER NOT NULL,
|
|
item_type TEXT NOT NULL,
|
|
summary_id TEXT,
|
|
message_id INTEGER,
|
|
token_count INTEGER NOT NULL DEFAULT 0,
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
PRIMARY KEY (conversation_id, ordinal)
|
|
)`,
|
|
|
|
// FTS5 virtual table with trigram tokenizer for CJK support
|
|
sqlCreateSummariesFTS,
|
|
|
|
// FTS5 virtual table for message search with trigram tokenizer
|
|
sqlCreateMessagesFTS,
|
|
|
|
// Indexes for common query patterns
|
|
`CREATE INDEX IF NOT EXISTS idx_messages_conversation ON messages(conversation_id)`,
|
|
`CREATE INDEX IF NOT EXISTS idx_messages_created ON messages(conversation_id, created_at)`,
|
|
`CREATE INDEX IF NOT EXISTS idx_summaries_conversation ON summaries(conversation_id)`,
|
|
`CREATE INDEX IF NOT EXISTS idx_summaries_kind_depth ON summaries(conversation_id, kind, depth)`,
|
|
`CREATE INDEX IF NOT EXISTS idx_summary_parents_parent ON summary_parents(parent_summary_id)`,
|
|
`CREATE INDEX IF NOT EXISTS idx_summary_messages_message ON summary_messages(message_id)`,
|
|
`CREATE INDEX IF NOT EXISTS idx_context_items_conv ON context_items(conversation_id, ordinal)`,
|
|
|
|
// FTS5 triggers to keep summaries_fts in sync with summaries table
|
|
`CREATE TRIGGER IF NOT EXISTS summaries_ai AFTER INSERT ON summaries BEGIN
|
|
INSERT INTO summaries_fts (summary_id, content) VALUES (new.summary_id, new.content);
|
|
END`,
|
|
`CREATE TRIGGER IF NOT EXISTS summaries_ad AFTER DELETE ON summaries BEGIN
|
|
INSERT INTO summaries_fts (summaries_fts, summary_id, content) VALUES ('delete', old.summary_id, old.content);
|
|
END`,
|
|
`CREATE TRIGGER IF NOT EXISTS summaries_au AFTER UPDATE ON summaries BEGIN
|
|
INSERT INTO summaries_fts (summaries_fts, summary_id, content) VALUES ('delete', old.summary_id, old.content);
|
|
INSERT INTO summaries_fts (summary_id, content) VALUES (new.summary_id, new.content);
|
|
END`,
|
|
|
|
// FTS5 triggers to keep messages_fts in sync with messages table
|
|
`CREATE TRIGGER IF NOT EXISTS messages_ai AFTER INSERT ON messages BEGIN
|
|
INSERT INTO messages_fts (message_id, content) VALUES (new.message_id, new.content);
|
|
END`,
|
|
`CREATE TRIGGER IF NOT EXISTS messages_ad AFTER DELETE ON messages BEGIN
|
|
DELETE FROM messages_fts WHERE message_id = old.message_id;
|
|
END`,
|
|
`CREATE TRIGGER IF NOT EXISTS messages_au AFTER UPDATE ON messages BEGIN
|
|
DELETE FROM messages_fts WHERE message_id = old.message_id;
|
|
INSERT INTO messages_fts (message_id, content) VALUES (new.message_id, new.content);
|
|
END`,
|
|
}
|
|
|
|
for _, s := range stmts {
|
|
if _, err := db.Exec(s); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// checkFTS5Support verifies that SQLite has FTS5 with trigram tokenizer enabled.
|
|
// This is required for full-text search with CJK (Chinese, Japanese, Korean) support.
|
|
func checkFTS5Support(db *sql.DB) error {
|
|
// Check if FTS5 is compiled in
|
|
var fts5Enabled int
|
|
err := db.QueryRow(`SELECT sqlite_compileoption_used('ENABLE_FTS5')`).Scan(&fts5Enabled)
|
|
if err != nil {
|
|
// sqlite_compileoption_used might not exist in older SQLite
|
|
// Try a different approach: create a test FTS5 table
|
|
_, testErr := db.Exec(sqlCheckFTS5Available)
|
|
if testErr != nil {
|
|
return fmt.Errorf("SQLite FTS5 not available: %w (required for full-text search)", testErr)
|
|
}
|
|
db.Exec(sqlDropFTS5Check)
|
|
} else if fts5Enabled == 0 {
|
|
return fmt.Errorf("SQLite was compiled without FTS5 support (required for full-text search)")
|
|
}
|
|
|
|
// Check if trigram tokenizer is available by trying to create a test table
|
|
// Not all SQLite builds include the trigram tokenizer
|
|
_, err = db.Exec(sqlCheckTrigramAvailable)
|
|
if err != nil {
|
|
logger.WarnCF("seahorse", "SQLite trigram tokenizer not available, CJK search may be limited",
|
|
map[string]any{"error": err.Error()})
|
|
// Trigram is not strictly required, just better for CJK
|
|
// Don't return error, just log warning
|
|
} else {
|
|
db.Exec(sqlDropTrigramCheck)
|
|
}
|
|
|
|
return nil
|
|
}
|