feat: add ContextManager abstraction for pluggable context management (#2203)

- Define ContextManager interface with Assemble/Compact/Ingest methods
- Implement legacyContextManager wrapping existing summarization logic
- Wire Assemble (before BuildMessages), Compact (post-turn + overflow),
  and Ingest (after message persistence) into agent loop
- Add ContextManager config field and factory registry with config passthrough
- Remove old maybeSummarize/summarizeSession/summarizeBatch/etc from loop.go
- All existing tests pass with default (legacy) config

Co-authored-by: Liu Yuan <namei.unix@gmail.com>
This commit is contained in:
Liu Yuan
2026-04-02 00:08:15 +08:00
committed by GitHub
parent 2973b30ad7
commit 7eba27c3c4
8 changed files with 1354 additions and 381 deletions
+81 -363
View File
@@ -48,7 +48,7 @@ type AgentLoop struct {
// Runtime state
running atomic.Bool
summarizing sync.Map
contextManager ContextManager
fallback *providers.FallbackChain
channelManager *channels.Manager
mediaStore media.MediaStore
@@ -137,13 +137,13 @@ func NewAgentLoop(
registry: registry,
state: stateManager,
eventBus: eventBus,
summarizing: sync.Map{},
fallback: fallbackChain,
cmdRegistry: commands.NewRegistry(commands.BuiltinDefinitions()),
steering: newSteeringQueue(parseSteeringMode(cfg.Agents.Defaults.SteeringMode)),
}
al.hooks = NewHookManager(eventBus)
configureHookManagerFromConfig(al.hooks, cfg)
al.contextManager = al.resolveContextManager()
// Register shared tools to all agents (now that al is created)
registerSharedTools(al, cfg, msgBus, registry, provider)
@@ -1690,8 +1690,15 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState) (turnResult, er
var history []providers.Message
var summary string
if !ts.opts.NoHistory {
history = ts.agent.Sessions.GetHistory(ts.sessionKey)
summary = ts.agent.Sessions.GetSummary(ts.sessionKey)
// ContextManager assembles budget-aware history and summary.
if resp, err := al.contextManager.Assemble(turnCtx, &AssembleRequest{
SessionKey: ts.sessionKey,
Budget: ts.agent.ContextWindow,
MaxTokens: ts.agent.MaxTokens,
}); err == nil && resp != nil {
history = resp.History
summary = resp.Summary
}
}
ts.captureRestorePoint(history, summary)
@@ -1716,22 +1723,27 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState) (turnResult, er
if isOverContextBudget(ts.agent.ContextWindow, messages, toolDefs, ts.agent.MaxTokens) {
logger.WarnCF("agent", "Proactive compression: context budget exceeded before LLM call",
map[string]any{"session_key": ts.sessionKey})
if compression, ok := al.forceCompression(ts.agent, ts.sessionKey); ok {
al.emitEvent(
EventKindContextCompress,
ts.eventMeta("runTurn", "turn.context.compress"),
ContextCompressPayload{
Reason: ContextCompressReasonProactive,
DroppedMessages: compression.DroppedMessages,
RemainingMessages: compression.RemainingMessages,
},
)
ts.refreshRestorePointFromSession(ts.agent)
if err := al.contextManager.Compact(turnCtx, &CompactRequest{
SessionKey: ts.sessionKey,
Reason: ContextCompressReasonProactive,
}); err != nil {
logger.WarnCF("agent", "Proactive compact failed", map[string]any{
"session_key": ts.sessionKey,
"error": err.Error(),
})
}
ts.refreshRestorePointFromSession(ts.agent)
// Re-assemble from CM after compact.
if resp, err := al.contextManager.Assemble(turnCtx, &AssembleRequest{
SessionKey: ts.sessionKey,
Budget: ts.agent.ContextWindow,
MaxTokens: ts.agent.MaxTokens,
}); err == nil && resp != nil {
history = resp.History
summary = resp.Summary
}
newHistory := ts.agent.Sessions.GetHistory(ts.sessionKey)
newSummary := ts.agent.Sessions.GetSummary(ts.sessionKey)
messages = ts.agent.ContextBuilder.BuildMessages(
newHistory, newSummary, ts.userMessage,
history, summary, ts.userMessage,
ts.media, ts.channel, ts.chatID,
ts.opts.SenderID, ts.opts.SenderDisplayName,
activeSkillNames(ts.agent, ts.opts)...,
@@ -1753,6 +1765,7 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState) (turnResult, er
ts.agent.Sessions.AddMessage(ts.sessionKey, rootMsg.Role, rootMsg.Content)
}
ts.recordPersistedMessage(rootMsg)
ts.ingestMessage(turnCtx, al, rootMsg)
}
activeCandidates, activeModel, usedLight := al.selectCandidates(ts.agent, ts.userMessage, messages)
@@ -2096,23 +2109,27 @@ turnLoop:
})
}
if compression, ok := al.forceCompression(ts.agent, ts.sessionKey); ok {
al.emitEvent(
EventKindContextCompress,
ts.eventMeta("runTurn", "turn.context.compress"),
ContextCompressPayload{
Reason: ContextCompressReasonRetry,
DroppedMessages: compression.DroppedMessages,
RemainingMessages: compression.RemainingMessages,
},
)
ts.refreshRestorePointFromSession(ts.agent)
if compactErr := al.contextManager.Compact(turnCtx, &CompactRequest{
SessionKey: ts.sessionKey,
Reason: ContextCompressReasonRetry,
}); compactErr != nil {
logger.WarnCF("agent", "Context overflow compact failed", map[string]any{
"session_key": ts.sessionKey,
"error": compactErr.Error(),
})
}
ts.refreshRestorePointFromSession(ts.agent)
// Re-assemble from CM after compact.
if asmResp, asmErr := al.contextManager.Assemble(turnCtx, &AssembleRequest{
SessionKey: ts.sessionKey,
Budget: ts.agent.ContextWindow,
MaxTokens: ts.agent.MaxTokens,
}); asmErr == nil && asmResp != nil {
history = asmResp.History
summary = asmResp.Summary
}
newHistory := ts.agent.Sessions.GetHistory(ts.sessionKey)
newSummary := ts.agent.Sessions.GetSummary(ts.sessionKey)
messages = ts.agent.ContextBuilder.BuildMessages(
newHistory, newSummary, "",
history, summary, "",
nil, ts.channel, ts.chatID, ts.opts.SenderID, ts.opts.SenderDisplayName,
activeSkillNames(ts.agent, ts.opts)...,
)
@@ -2285,6 +2302,7 @@ turnLoop:
if !ts.opts.NoHistory {
ts.agent.Sessions.AddFullMessage(ts.sessionKey, assistantMsg)
ts.recordPersistedMessage(assistantMsg)
ts.ingestMessage(turnCtx, al, assistantMsg)
}
ts.setPhase(TurnPhaseTools)
@@ -2624,6 +2642,7 @@ turnLoop:
if !ts.opts.NoHistory {
ts.agent.Sessions.AddFullMessage(ts.sessionKey, toolResultMsg)
ts.recordPersistedMessage(toolResultMsg)
ts.ingestMessage(turnCtx, al, toolResultMsg)
}
if steerMsgs := al.dequeueSteeringMessagesForScope(ts.sessionKey); len(steerMsgs) > 0 {
@@ -2723,6 +2742,7 @@ turnLoop:
if !ts.opts.NoHistory {
ts.agent.Sessions.AddMessage(ts.sessionKey, summaryMsg.Role, summaryMsg.Content)
ts.recordPersistedMessage(summaryMsg)
ts.ingestMessage(turnCtx, al, summaryMsg)
if err := ts.agent.Sessions.Save(ts.sessionKey); err != nil {
turnStatus = TurnEndStatusError
al.emitEvent(
@@ -2737,7 +2757,7 @@ turnLoop:
}
}
if ts.opts.EnableSummary {
al.maybeSummarize(ts.agent, ts.sessionKey, ts.scope)
al.contextManager.Compact(turnCtx, &CompactRequest{SessionKey: ts.sessionKey, Reason: ContextCompressReasonSummarize})
}
ts.setPhase(TurnPhaseCompleted)
@@ -2792,6 +2812,7 @@ turnLoop:
finalMsg := providers.Message{Role: "assistant", Content: finalContent}
ts.agent.Sessions.AddMessage(ts.sessionKey, finalMsg.Role, finalMsg.Content)
ts.recordPersistedMessage(finalMsg)
ts.ingestMessage(turnCtx, al, finalMsg)
if err := ts.agent.Sessions.Save(ts.sessionKey); err != nil {
turnStatus = TurnEndStatusError
al.emitEvent(
@@ -2807,7 +2828,13 @@ turnLoop:
}
if ts.opts.EnableSummary {
al.maybeSummarize(ts.agent, ts.sessionKey, ts.scope)
al.contextManager.Compact(
turnCtx,
&CompactRequest{
SessionKey: ts.sessionKey,
Reason: ContextCompressReasonSummarize,
},
)
}
ts.setPhase(TurnPhaseCompleted)
@@ -2886,103 +2913,28 @@ func (al *AgentLoop) selectCandidates(
return agent.LightCandidates, resolvedCandidateModel(agent.LightCandidates, agent.Router.LightModel()), true
}
// maybeSummarize triggers summarization if the session history exceeds thresholds.
func (al *AgentLoop) maybeSummarize(agent *AgentInstance, sessionKey string, turnScope turnEventScope) {
newHistory := agent.Sessions.GetHistory(sessionKey)
tokenEstimate := al.estimateTokens(newHistory)
threshold := agent.ContextWindow * agent.SummarizeTokenPercent / 100
if len(newHistory) > agent.SummarizeMessageThreshold || tokenEstimate > threshold {
summarizeKey := agent.ID + ":" + sessionKey
if _, loading := al.summarizing.LoadOrStore(summarizeKey, true); !loading {
go func() {
defer al.summarizing.Delete(summarizeKey)
logger.Debug("Memory threshold reached. Optimizing conversation history...")
al.summarizeSession(agent, sessionKey, turnScope)
}()
}
// resolveContextManager selects the ContextManager implementation based on config.
func (al *AgentLoop) resolveContextManager() ContextManager {
name := al.cfg.Agents.Defaults.ContextManager
if name == "" || name == "legacy" {
return &legacyContextManager{al: al}
}
}
type compressionResult struct {
DroppedMessages int
RemainingMessages int
}
// forceCompression aggressively reduces context when the limit is hit.
// It drops the oldest ~50% of Turns (a Turn is a complete user→LLM→response
// cycle, as defined in #1316), so tool-call sequences are never split.
//
// If the history is a single Turn with no safe split point, the function
// falls back to keeping only the most recent user message. This breaks
// Turn atomicity as a last resort to avoid a context-exceeded loop.
//
// Session history contains only user/assistant/tool messages — the system
// prompt is built dynamically by BuildMessages and is NOT stored here.
// The compression note is recorded in the session summary so that
// BuildMessages can include it in the next system prompt.
func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) (compressionResult, bool) {
history := agent.Sessions.GetHistory(sessionKey)
if len(history) <= 2 {
return compressionResult{}, false
factory, ok := lookupContextManager(name)
if !ok {
logger.WarnCF("agent", "Unknown context manager, falling back to legacy", map[string]any{
"name": name,
})
return &legacyContextManager{al: al}
}
// Split at a Turn boundary so no tool-call sequence is torn apart.
// parseTurnBoundaries gives us the start of each Turn; we drop the
// oldest half of Turns and keep the most recent ones.
turns := parseTurnBoundaries(history)
var mid int
if len(turns) >= 2 {
mid = turns[len(turns)/2]
} else {
// Fewer than 2 Turns — fall back to message-level midpoint
// aligned to the nearest Turn boundary.
mid = findSafeBoundary(history, len(history)/2)
cm, err := factory(al.cfg.Agents.Defaults.ContextManagerConfig, al)
if err != nil {
logger.WarnCF("agent", "Failed to create context manager, falling back to legacy", map[string]any{
"name": name,
"error": err.Error(),
})
return &legacyContextManager{al: al}
}
var keptHistory []providers.Message
if mid <= 0 {
// No safe Turn boundary — the entire history is a single Turn
// (e.g. one user message followed by a massive tool response).
// Keeping everything would leave the agent stuck in a context-
// exceeded loop, so fall back to keeping only the most recent
// user message. This breaks Turn atomicity as a last resort.
for i := len(history) - 1; i >= 0; i-- {
if history[i].Role == "user" {
keptHistory = []providers.Message{history[i]}
break
}
}
} else {
keptHistory = history[mid:]
}
droppedCount := len(history) - len(keptHistory)
// Record compression in the session summary so BuildMessages includes it
// in the system prompt. We do not modify history messages themselves.
existingSummary := agent.Sessions.GetSummary(sessionKey)
compressionNote := fmt.Sprintf(
"[Emergency compression dropped %d oldest messages due to context limit]",
droppedCount,
)
if existingSummary != "" {
compressionNote = existingSummary + "\n\n" + compressionNote
}
agent.Sessions.SetSummary(sessionKey, compressionNote)
agent.Sessions.SetHistory(sessionKey, keptHistory)
agent.Sessions.Save(sessionKey)
logger.WarnCF("agent", "Forced compression executed", map[string]any{
"session_key": sessionKey,
"dropped_msgs": droppedCount,
"new_count": len(keptHistory),
})
return compressionResult{
DroppedMessages: droppedCount,
RemainingMessages: len(keptHistory),
}, true
return cm
}
// GetStartupInfo returns information about loaded tools and skills for logging.
@@ -3074,247 +3026,13 @@ func formatToolsForLog(toolDefs []providers.ToolDefinition) string {
}
// summarizeSession summarizes the conversation history for a session.
func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string, turnScope turnEventScope) {
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
history := agent.Sessions.GetHistory(sessionKey)
summary := agent.Sessions.GetSummary(sessionKey)
// Keep the most recent Turns for continuity, aligned to a Turn boundary
// so that no tool-call sequence is split.
if len(history) <= 4 {
return
}
safeCut := findSafeBoundary(history, len(history)-4)
if safeCut <= 0 {
return
}
keepCount := len(history) - safeCut
toSummarize := history[:safeCut]
// Oversized Message Guard
maxMessageTokens := agent.ContextWindow / 2
validMessages := make([]providers.Message, 0)
omitted := false
for _, m := range toSummarize {
if m.Role != "user" && m.Role != "assistant" {
continue
}
msgTokens := len(m.Content) / 2
if msgTokens > maxMessageTokens {
omitted = true
continue
}
validMessages = append(validMessages, m)
}
if len(validMessages) == 0 {
return
}
const (
maxSummarizationMessages = 10
llmMaxRetries = 3
llmTemperature = 0.3
fallbackMaxContentLength = 200
)
// Multi-Part Summarization
var finalSummary string
if len(validMessages) > maxSummarizationMessages {
mid := len(validMessages) / 2
mid = al.findNearestUserMessage(validMessages, mid)
part1 := validMessages[:mid]
part2 := validMessages[mid:]
s1, _ := al.summarizeBatch(ctx, agent, part1, "")
s2, _ := al.summarizeBatch(ctx, agent, part2, "")
mergePrompt := fmt.Sprintf(
"Merge these two conversation summaries into one cohesive summary:\n\n1: %s\n\n2: %s",
s1,
s2,
)
resp, err := al.retryLLMCall(ctx, agent, mergePrompt, llmMaxRetries)
if err == nil && resp.Content != "" {
finalSummary = resp.Content
} else {
finalSummary = s1 + " " + s2
}
} else {
finalSummary, _ = al.summarizeBatch(ctx, agent, validMessages, summary)
}
if omitted && finalSummary != "" {
finalSummary += "\n[Note: Some oversized messages were omitted from this summary for efficiency.]"
}
if finalSummary != "" {
agent.Sessions.SetSummary(sessionKey, finalSummary)
agent.Sessions.TruncateHistory(sessionKey, keepCount)
agent.Sessions.Save(sessionKey)
al.emitEvent(
EventKindSessionSummarize,
turnScope.meta(0, "summarizeSession", "turn.session.summarize"),
SessionSummarizePayload{
SummarizedMessages: len(validMessages),
KeptMessages: keepCount,
SummaryLen: len(finalSummary),
OmittedOversized: omitted,
},
)
}
}
// findNearestUserMessage finds the nearest user message to the given index.
// It searches backward first, then forward if no user message is found.
func (al *AgentLoop) findNearestUserMessage(messages []providers.Message, mid int) int {
originalMid := mid
for mid > 0 && messages[mid].Role != "user" {
mid--
}
if messages[mid].Role == "user" {
return mid
}
mid = originalMid
for mid < len(messages) && messages[mid].Role != "user" {
mid++
}
if mid < len(messages) {
return mid
}
return originalMid
}
// retryLLMCall calls the LLM with retry logic.
func (al *AgentLoop) retryLLMCall(
ctx context.Context,
agent *AgentInstance,
prompt string,
maxRetries int,
) (*providers.LLMResponse, error) {
const (
llmTemperature = 0.3
)
var resp *providers.LLMResponse
var err error
for attempt := 0; attempt < maxRetries; attempt++ {
al.activeRequests.Add(1)
resp, err = func() (*providers.LLMResponse, error) {
defer al.activeRequests.Done()
return agent.Provider.Chat(
ctx,
[]providers.Message{{Role: "user", Content: prompt}},
nil,
agent.Model,
map[string]any{
"max_tokens": agent.MaxTokens,
"temperature": llmTemperature,
"prompt_cache_key": agent.ID,
},
)
}()
if err == nil && resp != nil && resp.Content != "" {
return resp, nil
}
if attempt < maxRetries-1 {
time.Sleep(time.Duration(attempt+1) * 100 * time.Millisecond)
}
}
return resp, err
}
// summarizeBatch summarizes a batch of messages.
func (al *AgentLoop) summarizeBatch(
ctx context.Context,
agent *AgentInstance,
batch []providers.Message,
existingSummary string,
) (string, error) {
const (
llmMaxRetries = 3
llmTemperature = 0.3
fallbackMinContentLength = 200
fallbackMaxContentPercent = 10
)
var sb strings.Builder
sb.WriteString(
"Provide a concise summary of this conversation segment, preserving core context and key points.\n",
)
if existingSummary != "" {
sb.WriteString("Existing context: ")
sb.WriteString(existingSummary)
sb.WriteString("\n")
}
sb.WriteString("\nCONVERSATION:\n")
for _, m := range batch {
fmt.Fprintf(&sb, "%s: %s\n", m.Role, m.Content)
}
prompt := sb.String()
response, err := al.retryLLMCall(ctx, agent, prompt, llmMaxRetries)
if err == nil && response.Content != "" {
return strings.TrimSpace(response.Content), nil
}
var fallback strings.Builder
fallback.WriteString("Conversation summary: ")
for i, m := range batch {
if i > 0 {
fallback.WriteString(" | ")
}
content := strings.TrimSpace(m.Content)
runes := []rune(content)
if len(runes) == 0 {
fallback.WriteString(fmt.Sprintf("%s: ", m.Role))
continue
}
keepLength := len(runes) * fallbackMaxContentPercent / 100
if keepLength < fallbackMinContentLength {
keepLength = fallbackMinContentLength
}
if keepLength > len(runes) {
keepLength = len(runes)
}
content = string(runes[:keepLength])
if keepLength < len(runes) {
content += "..."
}
fallback.WriteString(fmt.Sprintf("%s: %s", m.Role, content))
}
return fallback.String(), nil
}
// estimateTokens estimates the number of tokens in a message list.
// Counts Content, ToolCalls arguments, and ToolCallID metadata so that
// tool-heavy conversations are not systematically undercounted.
func (al *AgentLoop) estimateTokens(messages []providers.Message) int {
total := 0
for _, m := range messages {
total += estimateMessageTokens(m)
}
return total
}
func (al *AgentLoop) handleCommand(
ctx context.Context,
msg bus.InboundMessage,