mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-05-25 16:00:35 +00:00
15a70ac45c
* feat(seahorse): implement short-term memory engine of seahorse Add pkg/seahorse/ module implementing a SQLite-backed DAG-based summary hierarchy for context management, ported from lossless-claw's LCM design: - types.go + short_constants.go: core types (Message, Summary, Conversation, ContextItem) and configuration constants (fanout, token targets, thresholds) - migration.go: idempotent DB schema with FTS5 trigram tokenizer for CJK - store.go: full SQLite CRUD (conversations, messages, summaries DAG, context_items with ordinal gap numbering, FTS5 search) - short_engine.go: Engine lifecycle (NewEngine, Ingest, Assemble, Compact), session pattern filtering (ignore/stateless glob→regex compilation), per-session mutex via sync.Map - short_assembler.go: budget-aware context assembly with fresh tail protection (32 messages), oldest-first eviction, summary XML formatting, RebuildContextItems - short_compaction.go: leaf compaction (messages→summary) and condensed compaction (summaries→higher-level summary), 3-level LLM escalation, CompactUntilUnder for emergency overflow - short_retrieval.go: lookupByID, FTS5/LIKE search, recursive expand with token cap - context_seahorse.go: agent.ContextManager adapter, registered as "seahorse", provider↔seahorse message type conversion (ToolCalls, tool_result) * fix(seahorse): correct 3 adapter bugs in context management - TokenCount: use full message (Content+ToolCalls+Media) instead of Content-only - Empty Content: rebuild Content from tool_result Parts when stored empty - Duplicate summaries: summaries only in Summary field, not in History messages - Grep: fix SearchResult.Snippet→Content for summaries - Schema: fix FTS5 SQL uses VIRTUAL TABLE not TEMP TABLE - TestFTS5SQLConstants: verify FTS5 SQL syntax correctness - Test: fix flaky TestCompactLeaf * fix(agent): ingest steering messages into seahorse SQLite Steering messages were only persisted to session JSONL but not ingested into seahorse SQLite, causing them to be missing from context assembly. Added `ts.ingestMessage(turnCtx, al, pm)` call in the steering message injection block alongside the existing JSONL persistence. Test: TestSeahorseSteeringMessageIngested verifies steering messages appear in seahorse SQLite DB after being processed. * fix(seahorse): address 3 blocking bugs from code review - Fix resequenceContextItemsTx scan error handling (store.go:850) Changed `return err` to `return scanErr` to properly propagate scan errors instead of returning nil (which silently corrupts data) - Fix sql.NullString for INTEGER column (store.go:847) Changed `mid` from sql.NullString to sql.NullInt64 since message_id is INTEGER in schema. Removed unnecessary strconv.ParseInt call. - Fix compactCondensed fallback deleting non-candidate items Added ReplaceContextItemsWithSummary method for per-item deletion when candidates are not contiguous in ordinal space. Optimized to use range deletion when candidates are consecutive. * fix(seahorse): pass Budget to Compact for correct condensed threshold Issue #4 from PR review: When Budget was not passed to seahorse.Compact, it defaulted to `tokensBefore * 0.75`, making `tokensBefore > budget` always true and causing condensed compaction to trigger unnecessarily. Changes: - context_seahorse.go: Forward Budget from CompactRequest to CompactInput - loop.go: Pass Budget (ContextWindow) in all 3 Compact calls - Add test verifying condensed is skipped when tokens < threshold - Fix lint issues in store.go and store_test.go * fix(seahorse): add mutex for assembler lazy initialization Issue #5 from PR review: The check-then-create pattern for e.assembler was a data race when multiple goroutines called Assemble() concurrently: if e.assembler == nil { e.assembler = &Assembler{...} } Changes: - Add assemblerMu sync.Mutex to Engine struct - Add initAssemblerOnce() using double-checked locking (same pattern as initCompactionOnce) - Add TestAssemblerLazyInitRace to verify thread-safety * fix(seahorse): handle non-consecutive depths in selectShallowestCondensationCandidate Issue #8 from PR review: the loop iterated depth 0, 1, 2... assuming consecutive keys, but break when key was missing caused deeper depths to never be checked. Fix: collect all existing depth keys, sort, then iterate in order. * fix(seahorse): wrap DeleteMessagesAfterID and appendContextItems in transactions - DeleteMessagesAfterID: wrap all DELETE operations in a transaction for atomicity, remove redundant manual FTS delete (handled by trigger) - appendContextItems: use transaction to fix read-then-write race condition - Add GetMaxOrdinalTx and resolveItemTokenCountTx for transaction-scoped queries - Remove unused resolveItemTokenCount function Fixes PR review issues 6 and 7. * fix(seahorse): derive readable content from Parts and cap CompactUntilUnder iterations - Derive readable content from MessageParts in AddMessageWithParts so FTS5 indexing and summary formatting can access tool call information - formatMessagesForSummary and truncateSummary now fall back to Parts when Content is empty, fixing blank summaries for Part-based messages - Add MaxCompactIterations (20) to prevent CompactUntilUnder infinite loops; exceeded iterations are logged as warnings
380 lines
9.9 KiB
Go
380 lines
9.9 KiB
Go
package agent
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/logger"
|
|
"github.com/sipeed/picoclaw/pkg/providers"
|
|
)
|
|
|
|
// legacyContextManager wraps the existing summarization/compression logic
|
|
// as a ContextManager implementation. It is the default when no other
|
|
// ContextManager is configured.
|
|
type legacyContextManager struct {
|
|
al *AgentLoop
|
|
summarizing sync.Map // dedup for async Compact (post-turn)
|
|
}
|
|
|
|
func (m *legacyContextManager) Assemble(_ context.Context, req *AssembleRequest) (*AssembleResponse, error) {
|
|
// Legacy: read history from session, return as-is.
|
|
// Budget enforcement happens in BuildMessages caller via
|
|
// isOverContextBudget + forceCompression.
|
|
agent := m.al.registry.GetDefaultAgent()
|
|
if agent == nil {
|
|
return &AssembleResponse{}, nil
|
|
}
|
|
history := agent.Sessions.GetHistory(req.SessionKey)
|
|
summary := agent.Sessions.GetSummary(req.SessionKey)
|
|
return &AssembleResponse{
|
|
History: history,
|
|
Summary: summary,
|
|
}, nil
|
|
}
|
|
|
|
func (m *legacyContextManager) Compact(_ context.Context, req *CompactRequest) error {
|
|
switch req.Reason {
|
|
case ContextCompressReasonProactive, ContextCompressReasonRetry:
|
|
// Sync emergency compression — budget exceeded.
|
|
if result, ok := m.forceCompression(req.SessionKey); ok {
|
|
m.al.emitEvent(
|
|
EventKindContextCompress,
|
|
m.al.newTurnEventScope("", req.SessionKey).meta(0, "forceCompression", "turn.context.compress"),
|
|
ContextCompressPayload{
|
|
Reason: req.Reason,
|
|
DroppedMessages: result.DroppedMessages,
|
|
RemainingMessages: result.RemainingMessages,
|
|
},
|
|
)
|
|
}
|
|
case ContextCompressReasonSummarize:
|
|
m.maybeSummarize(req.SessionKey)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (m *legacyContextManager) Ingest(_ context.Context, _ *IngestRequest) error {
|
|
// Legacy: no-op. Messages are persisted by Sessions JSONL.
|
|
return nil
|
|
}
|
|
|
|
// maybeSummarize triggers summarization if the session history exceeds thresholds.
|
|
// It runs asynchronously in a goroutine.
|
|
func (m *legacyContextManager) maybeSummarize(sessionKey string) {
|
|
agent := m.al.registry.GetDefaultAgent()
|
|
if agent == nil {
|
|
return
|
|
}
|
|
|
|
newHistory := agent.Sessions.GetHistory(sessionKey)
|
|
tokenEstimate := m.estimateTokens(newHistory)
|
|
threshold := agent.ContextWindow * agent.SummarizeTokenPercent / 100
|
|
|
|
if len(newHistory) > agent.SummarizeMessageThreshold || tokenEstimate > threshold {
|
|
summarizeKey := agent.ID + ":" + sessionKey
|
|
if _, loading := m.summarizing.LoadOrStore(summarizeKey, true); !loading {
|
|
go func() {
|
|
defer m.summarizing.Delete(summarizeKey)
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
logger.WarnCF("agent", "Summarization panic recovered", map[string]any{
|
|
"session_key": sessionKey,
|
|
"panic": r,
|
|
})
|
|
}
|
|
}()
|
|
logger.Debug("Memory threshold reached. Optimizing conversation history...")
|
|
m.summarizeSession(agent, sessionKey)
|
|
}()
|
|
}
|
|
}
|
|
}
|
|
|
|
type compressionResult struct {
|
|
DroppedMessages int
|
|
RemainingMessages int
|
|
}
|
|
|
|
// forceCompression aggressively reduces context when the limit is hit.
|
|
// It drops the oldest ~50% of Turns (a Turn is a complete user→LLM→response
|
|
// cycle, as defined in #1316), so tool-call sequences are never split.
|
|
func (m *legacyContextManager) forceCompression(sessionKey string) (compressionResult, bool) {
|
|
agent := m.al.registry.GetDefaultAgent()
|
|
if agent == nil {
|
|
return compressionResult{}, false
|
|
}
|
|
|
|
history := agent.Sessions.GetHistory(sessionKey)
|
|
if len(history) <= 2 {
|
|
return compressionResult{}, false
|
|
}
|
|
|
|
turns := parseTurnBoundaries(history)
|
|
var mid int
|
|
if len(turns) >= 2 {
|
|
mid = turns[len(turns)/2]
|
|
} else {
|
|
mid = findSafeBoundary(history, len(history)/2)
|
|
}
|
|
var keptHistory []providers.Message
|
|
if mid <= 0 {
|
|
for i := len(history) - 1; i >= 0; i-- {
|
|
if history[i].Role == "user" {
|
|
keptHistory = []providers.Message{history[i]}
|
|
break
|
|
}
|
|
}
|
|
} else {
|
|
keptHistory = history[mid:]
|
|
}
|
|
|
|
droppedCount := len(history) - len(keptHistory)
|
|
|
|
existingSummary := agent.Sessions.GetSummary(sessionKey)
|
|
compressionNote := fmt.Sprintf(
|
|
"[Emergency compression dropped %d oldest messages due to context limit]",
|
|
droppedCount,
|
|
)
|
|
if existingSummary != "" {
|
|
compressionNote = existingSummary + "\n\n" + compressionNote
|
|
}
|
|
agent.Sessions.SetSummary(sessionKey, compressionNote)
|
|
|
|
agent.Sessions.SetHistory(sessionKey, keptHistory)
|
|
agent.Sessions.Save(sessionKey)
|
|
|
|
logger.WarnCF("agent", "Forced compression executed", map[string]any{
|
|
"session_key": sessionKey,
|
|
"dropped_msgs": droppedCount,
|
|
"new_count": len(keptHistory),
|
|
})
|
|
|
|
return compressionResult{
|
|
DroppedMessages: droppedCount,
|
|
RemainingMessages: len(keptHistory),
|
|
}, true
|
|
}
|
|
|
|
func (m *legacyContextManager) summarizeSession(agent *AgentInstance, sessionKey string) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
|
defer cancel()
|
|
|
|
history := agent.Sessions.GetHistory(sessionKey)
|
|
summary := agent.Sessions.GetSummary(sessionKey)
|
|
|
|
if len(history) <= 4 {
|
|
return
|
|
}
|
|
|
|
safeCut := findSafeBoundary(history, len(history)-4)
|
|
if safeCut <= 0 {
|
|
return
|
|
}
|
|
keepCount := len(history) - safeCut
|
|
toSummarize := history[:safeCut]
|
|
|
|
maxMessageTokens := agent.ContextWindow / 2
|
|
validMessages := make([]providers.Message, 0)
|
|
omitted := false
|
|
|
|
for _, msg := range toSummarize {
|
|
if msg.Role != "user" && msg.Role != "assistant" {
|
|
continue
|
|
}
|
|
msgTokens := len(msg.Content) / 2
|
|
if msgTokens > maxMessageTokens {
|
|
omitted = true
|
|
continue
|
|
}
|
|
validMessages = append(validMessages, msg)
|
|
}
|
|
|
|
if len(validMessages) == 0 {
|
|
return
|
|
}
|
|
|
|
const (
|
|
maxSummarizationMessages = 10
|
|
llmMaxRetries = 3
|
|
)
|
|
|
|
var finalSummary string
|
|
if len(validMessages) > maxSummarizationMessages {
|
|
mid := len(validMessages) / 2
|
|
mid = m.findNearestUserMessage(validMessages, mid)
|
|
|
|
part1 := validMessages[:mid]
|
|
part2 := validMessages[mid:]
|
|
|
|
s1, _ := m.summarizeBatch(ctx, agent, part1, "")
|
|
s2, _ := m.summarizeBatch(ctx, agent, part2, "")
|
|
|
|
mergePrompt := fmt.Sprintf(
|
|
"Merge these two conversation summaries into one cohesive summary:\n\n1: %s\n\n2: %s",
|
|
s1, s2,
|
|
)
|
|
|
|
resp, err := m.retryLLMCall(ctx, agent, mergePrompt, llmMaxRetries)
|
|
if err == nil && resp.Content != "" {
|
|
finalSummary = resp.Content
|
|
} else {
|
|
finalSummary = s1 + " " + s2
|
|
}
|
|
} else {
|
|
finalSummary, _ = m.summarizeBatch(ctx, agent, validMessages, summary)
|
|
}
|
|
|
|
if omitted && finalSummary != "" {
|
|
finalSummary += "\n[Note: Some oversized messages were omitted from this summary for efficiency.]"
|
|
}
|
|
|
|
if finalSummary != "" {
|
|
agent.Sessions.SetSummary(sessionKey, finalSummary)
|
|
agent.Sessions.TruncateHistory(sessionKey, keepCount)
|
|
agent.Sessions.Save(sessionKey)
|
|
m.al.emitEvent(
|
|
EventKindSessionSummarize,
|
|
m.al.newTurnEventScope(agent.ID, sessionKey).meta(0, "summarizeSession", "turn.session.summarize"),
|
|
SessionSummarizePayload{
|
|
SummarizedMessages: len(validMessages),
|
|
KeptMessages: keepCount,
|
|
SummaryLen: len(finalSummary),
|
|
OmittedOversized: omitted,
|
|
},
|
|
)
|
|
}
|
|
}
|
|
|
|
func (m *legacyContextManager) findNearestUserMessage(messages []providers.Message, mid int) int {
|
|
originalMid := mid
|
|
|
|
for mid > 0 && messages[mid].Role != "user" {
|
|
mid--
|
|
}
|
|
|
|
if messages[mid].Role == "user" {
|
|
return mid
|
|
}
|
|
|
|
mid = originalMid
|
|
for mid < len(messages) && messages[mid].Role != "user" {
|
|
mid++
|
|
}
|
|
|
|
if mid < len(messages) {
|
|
return mid
|
|
}
|
|
|
|
return originalMid
|
|
}
|
|
|
|
func (m *legacyContextManager) retryLLMCall(
|
|
ctx context.Context,
|
|
agent *AgentInstance,
|
|
prompt string,
|
|
maxRetries int,
|
|
) (*providers.LLMResponse, error) {
|
|
const llmTemperature = 0.3
|
|
|
|
var resp *providers.LLMResponse
|
|
var err error
|
|
|
|
for attempt := 0; attempt < maxRetries; attempt++ {
|
|
m.al.activeRequests.Add(1)
|
|
resp, err = func() (*providers.LLMResponse, error) {
|
|
defer m.al.activeRequests.Done()
|
|
return agent.Provider.Chat(
|
|
ctx,
|
|
[]providers.Message{{Role: "user", Content: prompt}},
|
|
nil,
|
|
agent.Model,
|
|
map[string]any{
|
|
"max_tokens": agent.MaxTokens,
|
|
"temperature": llmTemperature,
|
|
"prompt_cache_key": agent.ID,
|
|
},
|
|
)
|
|
}()
|
|
|
|
if err == nil && resp != nil && resp.Content != "" {
|
|
return resp, nil
|
|
}
|
|
if attempt < maxRetries-1 {
|
|
time.Sleep(time.Duration(attempt+1) * 100 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
return resp, err
|
|
}
|
|
|
|
func (m *legacyContextManager) summarizeBatch(
|
|
ctx context.Context,
|
|
agent *AgentInstance,
|
|
batch []providers.Message,
|
|
existingSummary string,
|
|
) (string, error) {
|
|
const (
|
|
llmMaxRetries = 3
|
|
fallbackMinContentLength = 200
|
|
fallbackMaxContentPercent = 10
|
|
)
|
|
|
|
var sb strings.Builder
|
|
sb.WriteString("Provide a concise summary of this conversation segment, preserving core context and key points.\n")
|
|
if existingSummary != "" {
|
|
sb.WriteString("Existing context: ")
|
|
sb.WriteString(existingSummary)
|
|
sb.WriteString("\n")
|
|
}
|
|
sb.WriteString("\nCONVERSATION:\n")
|
|
for _, msg := range batch {
|
|
fmt.Fprintf(&sb, "%s: %s\n", msg.Role, msg.Content)
|
|
}
|
|
prompt := sb.String()
|
|
|
|
response, err := m.retryLLMCall(ctx, agent, prompt, llmMaxRetries)
|
|
if err == nil && response.Content != "" {
|
|
return strings.TrimSpace(response.Content), nil
|
|
}
|
|
|
|
var fallback strings.Builder
|
|
fallback.WriteString("Conversation summary: ")
|
|
for i, msg := range batch {
|
|
if i > 0 {
|
|
fallback.WriteString(" | ")
|
|
}
|
|
content := strings.TrimSpace(msg.Content)
|
|
runes := []rune(content)
|
|
if len(runes) == 0 {
|
|
fallback.WriteString(fmt.Sprintf("%s: ", msg.Role))
|
|
continue
|
|
}
|
|
|
|
keepLength := len(runes) * fallbackMaxContentPercent / 100
|
|
if keepLength < fallbackMinContentLength {
|
|
keepLength = fallbackMinContentLength
|
|
}
|
|
if keepLength > len(runes) {
|
|
keepLength = len(runes)
|
|
}
|
|
|
|
content = string(runes[:keepLength])
|
|
if keepLength < len(runes) {
|
|
content += "..."
|
|
}
|
|
fallback.WriteString(fmt.Sprintf("%s: %s", msg.Role, content))
|
|
}
|
|
return fallback.String(), nil
|
|
}
|
|
|
|
func (m *legacyContextManager) estimateTokens(messages []providers.Message) int {
|
|
total := 0
|
|
for _, msg := range messages {
|
|
total += EstimateMessageTokens(msg)
|
|
}
|
|
return total
|
|
}
|