mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-05-25 16:00:35 +00:00
15a70ac45c
* feat(seahorse): implement short-term memory engine of seahorse Add pkg/seahorse/ module implementing a SQLite-backed DAG-based summary hierarchy for context management, ported from lossless-claw's LCM design: - types.go + short_constants.go: core types (Message, Summary, Conversation, ContextItem) and configuration constants (fanout, token targets, thresholds) - migration.go: idempotent DB schema with FTS5 trigram tokenizer for CJK - store.go: full SQLite CRUD (conversations, messages, summaries DAG, context_items with ordinal gap numbering, FTS5 search) - short_engine.go: Engine lifecycle (NewEngine, Ingest, Assemble, Compact), session pattern filtering (ignore/stateless glob→regex compilation), per-session mutex via sync.Map - short_assembler.go: budget-aware context assembly with fresh tail protection (32 messages), oldest-first eviction, summary XML formatting, RebuildContextItems - short_compaction.go: leaf compaction (messages→summary) and condensed compaction (summaries→higher-level summary), 3-level LLM escalation, CompactUntilUnder for emergency overflow - short_retrieval.go: lookupByID, FTS5/LIKE search, recursive expand with token cap - context_seahorse.go: agent.ContextManager adapter, registered as "seahorse", provider↔seahorse message type conversion (ToolCalls, tool_result) * fix(seahorse): correct 3 adapter bugs in context management - TokenCount: use full message (Content+ToolCalls+Media) instead of Content-only - Empty Content: rebuild Content from tool_result Parts when stored empty - Duplicate summaries: summaries only in Summary field, not in History messages - Grep: fix SearchResult.Snippet→Content for summaries - Schema: fix FTS5 SQL uses VIRTUAL TABLE not TEMP TABLE - TestFTS5SQLConstants: verify FTS5 SQL syntax correctness - Test: fix flaky TestCompactLeaf * fix(agent): ingest steering messages into seahorse SQLite Steering messages were only persisted to session JSONL but not ingested into seahorse SQLite, causing them to be missing from context assembly. Added `ts.ingestMessage(turnCtx, al, pm)` call in the steering message injection block alongside the existing JSONL persistence. Test: TestSeahorseSteeringMessageIngested verifies steering messages appear in seahorse SQLite DB after being processed. * fix(seahorse): address 3 blocking bugs from code review - Fix resequenceContextItemsTx scan error handling (store.go:850) Changed `return err` to `return scanErr` to properly propagate scan errors instead of returning nil (which silently corrupts data) - Fix sql.NullString for INTEGER column (store.go:847) Changed `mid` from sql.NullString to sql.NullInt64 since message_id is INTEGER in schema. Removed unnecessary strconv.ParseInt call. - Fix compactCondensed fallback deleting non-candidate items Added ReplaceContextItemsWithSummary method for per-item deletion when candidates are not contiguous in ordinal space. Optimized to use range deletion when candidates are consecutive. * fix(seahorse): pass Budget to Compact for correct condensed threshold Issue #4 from PR review: When Budget was not passed to seahorse.Compact, it defaulted to `tokensBefore * 0.75`, making `tokensBefore > budget` always true and causing condensed compaction to trigger unnecessarily. Changes: - context_seahorse.go: Forward Budget from CompactRequest to CompactInput - loop.go: Pass Budget (ContextWindow) in all 3 Compact calls - Add test verifying condensed is skipped when tokens < threshold - Fix lint issues in store.go and store_test.go * fix(seahorse): add mutex for assembler lazy initialization Issue #5 from PR review: The check-then-create pattern for e.assembler was a data race when multiple goroutines called Assemble() concurrently: if e.assembler == nil { e.assembler = &Assembler{...} } Changes: - Add assemblerMu sync.Mutex to Engine struct - Add initAssemblerOnce() using double-checked locking (same pattern as initCompactionOnce) - Add TestAssemblerLazyInitRace to verify thread-safety * fix(seahorse): handle non-consecutive depths in selectShallowestCondensationCandidate Issue #8 from PR review: the loop iterated depth 0, 1, 2... assuming consecutive keys, but break when key was missing caused deeper depths to never be checked. Fix: collect all existing depth keys, sort, then iterate in order. * fix(seahorse): wrap DeleteMessagesAfterID and appendContextItems in transactions - DeleteMessagesAfterID: wrap all DELETE operations in a transaction for atomicity, remove redundant manual FTS delete (handled by trigger) - appendContextItems: use transaction to fix read-then-write race condition - Add GetMaxOrdinalTx and resolveItemTokenCountTx for transaction-scoped queries - Remove unused resolveItemTokenCount function Fixes PR review issues 6 and 7. * fix(seahorse): derive readable content from Parts and cap CompactUntilUnder iterations - Derive readable content from MessageParts in AddMessageWithParts so FTS5 indexing and summary formatting can access tool call information - formatMessagesForSummary and truncateSummary now fall back to Parts when Content is empty, fixing blank summaries for Part-based messages - Add MaxCompactIterations (20) to prevent CompactUntilUnder infinite loops; exceeded iterations are logged as warnings
301 lines
6.4 KiB
Go
301 lines
6.4 KiB
Go
package session
|
|
|
|
import (
|
|
"encoding/json"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/providers"
|
|
)
|
|
|
|
type Session struct {
|
|
Key string `json:"key"`
|
|
Messages []providers.Message `json:"messages"`
|
|
Summary string `json:"summary,omitempty"`
|
|
Created time.Time `json:"created"`
|
|
Updated time.Time `json:"updated"`
|
|
}
|
|
|
|
type SessionManager struct {
|
|
sessions map[string]*Session
|
|
mu sync.RWMutex
|
|
storage string
|
|
}
|
|
|
|
func NewSessionManager(storage string) *SessionManager {
|
|
sm := &SessionManager{
|
|
sessions: make(map[string]*Session),
|
|
storage: storage,
|
|
}
|
|
|
|
if storage != "" {
|
|
os.MkdirAll(storage, 0o700)
|
|
sm.loadSessions()
|
|
}
|
|
|
|
return sm
|
|
}
|
|
|
|
func (sm *SessionManager) GetOrCreate(key string) *Session {
|
|
sm.mu.Lock()
|
|
defer sm.mu.Unlock()
|
|
|
|
session, ok := sm.sessions[key]
|
|
if ok {
|
|
return session
|
|
}
|
|
|
|
session = &Session{
|
|
Key: key,
|
|
Messages: []providers.Message{},
|
|
Created: time.Now(),
|
|
Updated: time.Now(),
|
|
}
|
|
sm.sessions[key] = session
|
|
|
|
return session
|
|
}
|
|
|
|
func (sm *SessionManager) AddMessage(sessionKey, role, content string) {
|
|
sm.AddFullMessage(sessionKey, providers.Message{
|
|
Role: role,
|
|
Content: content,
|
|
})
|
|
}
|
|
|
|
// AddFullMessage adds a complete message with tool calls and tool call ID to the session.
|
|
// This is used to save the full conversation flow including tool calls and tool results.
|
|
func (sm *SessionManager) AddFullMessage(sessionKey string, msg providers.Message) {
|
|
sm.mu.Lock()
|
|
defer sm.mu.Unlock()
|
|
|
|
session, ok := sm.sessions[sessionKey]
|
|
if !ok {
|
|
session = &Session{
|
|
Key: sessionKey,
|
|
Messages: []providers.Message{},
|
|
Created: time.Now(),
|
|
}
|
|
sm.sessions[sessionKey] = session
|
|
}
|
|
|
|
session.Messages = append(session.Messages, msg)
|
|
session.Updated = time.Now()
|
|
}
|
|
|
|
func (sm *SessionManager) GetHistory(key string) []providers.Message {
|
|
sm.mu.RLock()
|
|
defer sm.mu.RUnlock()
|
|
|
|
session, ok := sm.sessions[key]
|
|
if !ok {
|
|
return []providers.Message{}
|
|
}
|
|
|
|
history := make([]providers.Message, len(session.Messages))
|
|
copy(history, session.Messages)
|
|
return history
|
|
}
|
|
|
|
func (sm *SessionManager) GetSummary(key string) string {
|
|
sm.mu.RLock()
|
|
defer sm.mu.RUnlock()
|
|
|
|
session, ok := sm.sessions[key]
|
|
if !ok {
|
|
return ""
|
|
}
|
|
return session.Summary
|
|
}
|
|
|
|
func (sm *SessionManager) SetSummary(key string, summary string) {
|
|
sm.mu.Lock()
|
|
defer sm.mu.Unlock()
|
|
|
|
session, ok := sm.sessions[key]
|
|
if ok {
|
|
session.Summary = summary
|
|
session.Updated = time.Now()
|
|
}
|
|
}
|
|
|
|
func (sm *SessionManager) TruncateHistory(key string, keepLast int) {
|
|
sm.mu.Lock()
|
|
defer sm.mu.Unlock()
|
|
|
|
session, ok := sm.sessions[key]
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
if keepLast <= 0 {
|
|
session.Messages = []providers.Message{}
|
|
session.Updated = time.Now()
|
|
return
|
|
}
|
|
|
|
if len(session.Messages) <= keepLast {
|
|
return
|
|
}
|
|
|
|
session.Messages = session.Messages[len(session.Messages)-keepLast:]
|
|
session.Updated = time.Now()
|
|
}
|
|
|
|
func (sm *SessionManager) ListSessions() []string {
|
|
sm.mu.RLock()
|
|
defer sm.mu.RUnlock()
|
|
keys := make([]string, 0, len(sm.sessions))
|
|
for k := range sm.sessions {
|
|
keys = append(keys, k)
|
|
}
|
|
return keys
|
|
}
|
|
|
|
// sanitizeFilename converts a session key into a cross-platform safe filename.
|
|
// Replaces ':' with '_' (session key separator) and '/' and '\' with '_' so
|
|
// composite IDs (e.g. Telegram forum "chatID/threadID") do not create
|
|
// subdirectories or break on Windows. The original key is preserved inside
|
|
// the JSON file, so loadSessions still maps back to the right in-memory key.
|
|
func sanitizeFilename(key string) string {
|
|
s := strings.ReplaceAll(key, ":", "_")
|
|
s = strings.ReplaceAll(s, "/", "_")
|
|
s = strings.ReplaceAll(s, "\\", "_")
|
|
return s
|
|
}
|
|
|
|
func (sm *SessionManager) Save(key string) error {
|
|
if sm.storage == "" {
|
|
return nil
|
|
}
|
|
|
|
filename := sanitizeFilename(key)
|
|
|
|
// filepath.IsLocal rejects empty names, "..", absolute paths, and
|
|
// OS-reserved device names (NUL, COM1 … on Windows). sanitizeFilename
|
|
// already replaced '/' and '\' with '_', so no subdirs are created.
|
|
if filename == "." || !filepath.IsLocal(filename) {
|
|
return os.ErrInvalid
|
|
}
|
|
|
|
// Snapshot under read lock, then perform slow file I/O after unlock.
|
|
sm.mu.RLock()
|
|
stored, ok := sm.sessions[key]
|
|
if !ok {
|
|
sm.mu.RUnlock()
|
|
return nil
|
|
}
|
|
|
|
snapshot := Session{
|
|
Key: stored.Key,
|
|
Summary: stored.Summary,
|
|
Created: stored.Created,
|
|
Updated: stored.Updated,
|
|
}
|
|
if len(stored.Messages) > 0 {
|
|
snapshot.Messages = make([]providers.Message, len(stored.Messages))
|
|
copy(snapshot.Messages, stored.Messages)
|
|
} else {
|
|
snapshot.Messages = []providers.Message{}
|
|
}
|
|
sm.mu.RUnlock()
|
|
|
|
data, err := json.MarshalIndent(snapshot, "", " ")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
sessionPath := filepath.Join(sm.storage, filename+".json")
|
|
tmpFile, err := os.CreateTemp(sm.storage, "session-*.tmp")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
tmpPath := tmpFile.Name()
|
|
cleanup := true
|
|
defer func() {
|
|
if cleanup {
|
|
_ = os.Remove(tmpPath)
|
|
}
|
|
}()
|
|
|
|
if _, err := tmpFile.Write(data); err != nil {
|
|
_ = tmpFile.Close()
|
|
return err
|
|
}
|
|
if err := tmpFile.Chmod(0o600); err != nil {
|
|
_ = tmpFile.Close()
|
|
return err
|
|
}
|
|
if err := tmpFile.Sync(); err != nil {
|
|
_ = tmpFile.Close()
|
|
return err
|
|
}
|
|
if err := tmpFile.Close(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := os.Rename(tmpPath, sessionPath); err != nil {
|
|
return err
|
|
}
|
|
cleanup = false
|
|
return nil
|
|
}
|
|
|
|
func (sm *SessionManager) loadSessions() error {
|
|
files, err := os.ReadDir(sm.storage)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, file := range files {
|
|
if file.IsDir() {
|
|
continue
|
|
}
|
|
|
|
if filepath.Ext(file.Name()) != ".json" {
|
|
continue
|
|
}
|
|
|
|
sessionPath := filepath.Join(sm.storage, file.Name())
|
|
data, err := os.ReadFile(sessionPath)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
var session Session
|
|
if err := json.Unmarshal(data, &session); err != nil {
|
|
continue
|
|
}
|
|
|
|
sm.sessions[session.Key] = &session
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Close is a no-op for the in-memory SessionManager; it satisfies the
|
|
// SessionStore interface so callers can release resources uniformly.
|
|
func (sm *SessionManager) Close() error {
|
|
return nil
|
|
}
|
|
|
|
// SetHistory updates the messages of a session.
|
|
func (sm *SessionManager) SetHistory(key string, history []providers.Message) {
|
|
sm.mu.Lock()
|
|
defer sm.mu.Unlock()
|
|
|
|
session, ok := sm.sessions[key]
|
|
if ok {
|
|
// Create a deep copy to strictly isolate internal state
|
|
// from the caller's slice.
|
|
msgs := make([]providers.Message, len(history))
|
|
copy(msgs, history)
|
|
session.Messages = msgs
|
|
session.Updated = time.Now()
|
|
}
|
|
}
|