From c63c6449b4a3a9fbe15fb2a269eddddc8817084f Mon Sep 17 00:00:00 2001
From: xiaoen <2768753269@qq.com>
Date: Tue, 17 Mar 2026 10:23:16 +0800
Subject: [PATCH] fix(agent): forceCompression recovers from single oversized
 Turn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the entire session history is a single Turn (e.g. one user message
followed by a massive tool response), findSafeBoundary returns 0 and
forceCompression previously did nothing — leaving the agent stuck in
a context-exceeded retry loop.

Now falls back to keeping only the most recent user message when no
safe Turn boundary exists. This breaks Turn atomicity as a last resort
but guarantees the agent can recover.

Also updates docs/agent-refactor/context.md to document this behavior.

Ref #1490
---
 docs/agent-refactor/context.md |  4 +++-
 pkg/agent/loop.go              | 22 +++++++++++++++++++---
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/docs/agent-refactor/context.md b/docs/agent-refactor/context.md
index 785fae2be..2269d9258 100644
--- a/docs/agent-refactor/context.md
+++ b/docs/agent-refactor/context.md
@@ -103,7 +103,9 @@ This prevents wasted (and billed) LLM calls that would otherwise fail with a con
 
 `forceCompression` runs when the LLM returns a context-window error despite the proactive check.
 
-Drops the oldest ~50% of Turns. Stores a compression note in the session summary (not in history messages) so `BuildMessages` can include it in the next system prompt.
+Drops the oldest ~50% of Turns. If the history is a single Turn with no safe split point (e.g. one user message followed by a massive tool response), falls back to keeping only the most recent user message — breaking Turn atomicity as a last resort to avoid a context-exceeded loop.
+
+Stores a compression note in the session summary (not in history messages) so `BuildMessages` can include it in the next system prompt.
 
 This is the fallback for when the token estimate undershoots reality.
 
diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go
index 688d0ed1d..c583f5ca5 100644
--- a/pkg/agent/loop.go
+++ b/pkg/agent/loop.go
@@ -1559,6 +1559,10 @@ func (al *AgentLoop) maybeSummarize(agent *AgentInstance, sessionKey, channel, c
 // It drops the oldest ~50% of Turns (a Turn is a complete user→LLM→response
 // cycle, as defined in #1316), so tool-call sequences are never split.
 //
+// If the history is a single Turn with no safe split point, the function
+// falls back to keeping only the most recent user message. This breaks
+// Turn atomicity as a last resort to avoid a context-exceeded loop.
+//
 // Session history contains only user/assistant/tool messages — the system
 // prompt is built dynamically by BuildMessages and is NOT stored here.
 // The compression note is recorded in the session summary so that
@@ -1581,12 +1585,24 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) {
 		// aligned to the nearest Turn boundary.
 		mid = findSafeBoundary(history, len(history)/2)
 	}
+	var keptHistory []providers.Message
 	if mid <= 0 {
-		return
+		// No safe Turn boundary — the entire history is a single Turn
+		// (e.g. one user message followed by a massive tool response).
+		// Keeping everything would leave the agent stuck in a context-
+		// exceeded loop, so fall back to keeping only the most recent
+		// user message. This breaks Turn atomicity as a last resort.
+		for i := len(history) - 1; i >= 0; i-- {
+			if history[i].Role == "user" {
+				keptHistory = []providers.Message{history[i]}
+				break
+			}
+		}
+	} else {
+		keptHistory = history[mid:]
 	}
 
-	droppedCount := mid
-	keptHistory := history[mid:]
+	droppedCount := len(history) - len(keptHistory)
 
 	// Record compression in the session summary so BuildMessages includes it
 	// in the system prompt. We do not modify history messages themselves.