mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
fix(agent): forceCompression recovers from single oversized Turn
When the entire session history is a single Turn (e.g. one user message followed by a massive tool response), findSafeBoundary returns 0 and forceCompression previously did nothing — leaving the agent stuck in a context-exceeded retry loop. Now falls back to keeping only the most recent user message when no safe Turn boundary exists. This breaks Turn atomicity as a last resort but guarantees the agent can recover. Also updates docs/agent-refactor/context.md to document this behavior. Ref #1490
This commit is contained in:
@@ -103,7 +103,9 @@ This prevents wasted (and billed) LLM calls that would otherwise fail with a con
|
||||
|
||||
`forceCompression` runs when the LLM returns a context-window error despite the proactive check.
|
||||
|
||||
Drops the oldest ~50% of Turns. Stores a compression note in the session summary (not in history messages) so `BuildMessages` can include it in the next system prompt.
|
||||
Drops the oldest ~50% of Turns. If the history is a single Turn with no safe split point (e.g. one user message followed by a massive tool response), falls back to keeping only the most recent user message — breaking Turn atomicity as a last resort to avoid a context-exceeded loop.
|
||||
|
||||
Stores a compression note in the session summary (not in history messages) so `BuildMessages` can include it in the next system prompt.
|
||||
|
||||
This is the fallback for when the token estimate undershoots reality.
|
||||
|
||||
|
||||
+19
-3
@@ -1559,6 +1559,10 @@ func (al *AgentLoop) maybeSummarize(agent *AgentInstance, sessionKey, channel, c
|
||||
// It drops the oldest ~50% of Turns (a Turn is a complete user→LLM→response
|
||||
// cycle, as defined in #1316), so tool-call sequences are never split.
|
||||
//
|
||||
// If the history is a single Turn with no safe split point, the function
|
||||
// falls back to keeping only the most recent user message. This breaks
|
||||
// Turn atomicity as a last resort to avoid a context-exceeded loop.
|
||||
//
|
||||
// Session history contains only user/assistant/tool messages — the system
|
||||
// prompt is built dynamically by BuildMessages and is NOT stored here.
|
||||
// The compression note is recorded in the session summary so that
|
||||
@@ -1581,12 +1585,24 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) {
|
||||
// aligned to the nearest Turn boundary.
|
||||
mid = findSafeBoundary(history, len(history)/2)
|
||||
}
|
||||
var keptHistory []providers.Message
|
||||
if mid <= 0 {
|
||||
return
|
||||
// No safe Turn boundary — the entire history is a single Turn
|
||||
// (e.g. one user message followed by a massive tool response).
|
||||
// Keeping everything would leave the agent stuck in a context-
|
||||
// exceeded loop, so fall back to keeping only the most recent
|
||||
// user message. This breaks Turn atomicity as a last resort.
|
||||
for i := len(history) - 1; i >= 0; i-- {
|
||||
if history[i].Role == "user" {
|
||||
keptHistory = []providers.Message{history[i]}
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
keptHistory = history[mid:]
|
||||
}
|
||||
|
||||
droppedCount := mid
|
||||
keptHistory := history[mid:]
|
||||
droppedCount := len(history) - len(keptHistory)
|
||||
|
||||
// Record compression in the session summary so BuildMessages includes it
|
||||
// in the system prompt. We do not modify history messages themselves.
|
||||
|
||||
Reference in New Issue
Block a user