From c63c6449b4a3a9fbe15fb2a269eddddc8817084f Mon Sep 17 00:00:00 2001 From: xiaoen <2768753269@qq.com> Date: Tue, 17 Mar 2026 10:23:16 +0800 Subject: [PATCH] fix(agent): forceCompression recovers from single oversized Turn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the entire session history is a single Turn (e.g. one user message followed by a massive tool response), findSafeBoundary returns 0 and forceCompression previously did nothing — leaving the agent stuck in a context-exceeded retry loop. Now falls back to keeping only the most recent user message when no safe Turn boundary exists. This breaks Turn atomicity as a last resort but guarantees the agent can recover. Also updates docs/agent-refactor/context.md to document this behavior. Ref #1490 --- docs/agent-refactor/context.md | 4 +++- pkg/agent/loop.go | 22 +++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/docs/agent-refactor/context.md b/docs/agent-refactor/context.md index 785fae2be..2269d9258 100644 --- a/docs/agent-refactor/context.md +++ b/docs/agent-refactor/context.md @@ -103,7 +103,9 @@ This prevents wasted (and billed) LLM calls that would otherwise fail with a con `forceCompression` runs when the LLM returns a context-window error despite the proactive check. -Drops the oldest ~50% of Turns. Stores a compression note in the session summary (not in history messages) so `BuildMessages` can include it in the next system prompt. +Drops the oldest ~50% of Turns. If the history is a single Turn with no safe split point (e.g. one user message followed by a massive tool response), falls back to keeping only the most recent user message — breaking Turn atomicity as a last resort to avoid a context-exceeded loop. + +Stores a compression note in the session summary (not in history messages) so `BuildMessages` can include it in the next system prompt. This is the fallback for when the token estimate undershoots reality. diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 688d0ed1d..c583f5ca5 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -1559,6 +1559,10 @@ func (al *AgentLoop) maybeSummarize(agent *AgentInstance, sessionKey, channel, c // It drops the oldest ~50% of Turns (a Turn is a complete user→LLM→response // cycle, as defined in #1316), so tool-call sequences are never split. // +// If the history is a single Turn with no safe split point, the function +// falls back to keeping only the most recent user message. This breaks +// Turn atomicity as a last resort to avoid a context-exceeded loop. +// // Session history contains only user/assistant/tool messages — the system // prompt is built dynamically by BuildMessages and is NOT stored here. // The compression note is recorded in the session summary so that @@ -1581,12 +1585,24 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) { // aligned to the nearest Turn boundary. mid = findSafeBoundary(history, len(history)/2) } + var keptHistory []providers.Message if mid <= 0 { - return + // No safe Turn boundary — the entire history is a single Turn + // (e.g. one user message followed by a massive tool response). + // Keeping everything would leave the agent stuck in a context- + // exceeded loop, so fall back to keeping only the most recent + // user message. This breaks Turn atomicity as a last resort. + for i := len(history) - 1; i >= 0; i-- { + if history[i].Role == "user" { + keptHistory = []providers.Message{history[i]} + break + } + } + } else { + keptHistory = history[mid:] } - droppedCount := mid - keptHistory := history[mid:] + droppedCount := len(history) - len(keptHistory) // Record compression in the session summary so BuildMessages includes it // in the system prompt. We do not modify history messages themselves.