refactor(agent): use Turn as the atomic unit for compression cut-off

Introduce parseTurnBoundaries() which identifies each Turn start index in the session history. A Turn is a complete "user input → LLM iterations → final response" cycle (as defined in the agent refactor design #1316). findSafeBoundary now uses Turn boundaries instead of raw role-scanning, making the intent explicit: "find the nearest Turn boundary." forceCompression drops the oldest half of Turns (not arbitrary messages), which is simpler and more intuitive. The Turn-based approach naturally prevents splitting tool-call sequences since each Turn is atomic.
2026-06-12 18:08:54 +00:00 · 2026-03-13 15:54:50 +08:00
parent efd403242e
commit 639739cb85
3 changed files with 136 additions and 24 deletions
@@ -12,14 +12,26 @@ import (
 	"github.com/sipeed/picoclaw/pkg/providers"
 )

-// isSafeBoundary reports whether index is a valid position to split a message
-// history for truncation or compression. Splitting at index means:
-//   - history[:index] is dropped or summarized
-//   - history[index:] is kept
+// parseTurnBoundaries returns the starting index of each Turn in the history.
+// A Turn is a complete "user input → LLM iterations → final response" cycle
+// (as defined in #1316). Each Turn begins at a user message and extends
+// through all subsequent assistant/tool messages until the next user message.
 //
-// A boundary is safe when the kept portion begins at a "user" message,
-// ensuring no tool-call sequence (assistant+ToolCalls → tool results)
-// is torn apart across the split.
+// Cutting at a Turn boundary guarantees that no tool-call sequence
+// (assistant+ToolCalls → tool results) is split across the cut.
+func parseTurnBoundaries(history []providers.Message) []int {
+	var starts []int
+	for i, msg := range history {
+		if msg.Role == "user" {
+			starts = append(starts, i)
+		}
+	}
+	return starts
+}
+
+// isSafeBoundary reports whether index is a valid Turn boundary — i.e.,
+// a position where the kept portion (history[index:]) begins at a user
+// message, so no tool-call sequence is torn apart.
 func isSafeBoundary(history []providers.Message, index int) bool {
 	if index <= 0 || index >= len(history) {
 		return true
@@ -27,9 +39,10 @@ func isSafeBoundary(history []providers.Message, index int) bool {
 	return history[index].Role == "user"
 }

-// findSafeBoundary locates the nearest safe split point to targetIndex.
-// It scans backward first (preserving more context), then forward.
-// Returns targetIndex unchanged only when no safe boundary exists.
+// findSafeBoundary locates the nearest Turn boundary to targetIndex.
+// It prefers the boundary at or before targetIndex (preserving more recent
+// context). Falls back to the nearest boundary after targetIndex, and
+// returns targetIndex unchanged only when no Turn boundary exists at all.
 func findSafeBoundary(history []providers.Message, targetIndex int) int {
 	if len(history) == 0 {
 		return 0
@@ -41,21 +54,28 @@ func findSafeBoundary(history []providers.Message, targetIndex int) int {
 		return len(history)
 	}

-	if isSafeBoundary(history, targetIndex) {
+	turns := parseTurnBoundaries(history)
+	if len(turns) == 0 {
 		return targetIndex
 	}

-	// Backward scan: prefer keeping more messages.
-	for i := targetIndex - 1; i > 0; i-- {
-		if isSafeBoundary(history, i) {
-			return i
+	// Find the last Turn boundary at or before targetIndex.
+	// Prefer backward: keeps more recent messages.
+	backward := -1
+	for _, t := range turns {
+		if t <= targetIndex {
+			backward = t
 		}
 	}
+	if backward > 0 {
+		return backward
+	}

-	// Forward scan: fall back to keeping fewer messages.
-	for i := targetIndex + 1; i < len(history); i++ {
-		if isSafeBoundary(history, i) {
-			return i
+	// No valid Turn boundary before target (or only at index 0 which
+	// would keep everything). Use the first Turn after targetIndex.
+	for _, t := range turns {
+		if t > targetIndex {
+			return t
 		}
 	}

@@ -40,6 +40,88 @@ func msgTool(callID, content string) providers.Message {
 	return providers.Message{Role: "tool", ToolCallID: callID, Content: content}
 }

+func TestParseTurnBoundaries(t *testing.T) {
+	tests := []struct {
+		name    string
+		history []providers.Message
+		want    []int
+	}{
+		{
+			name:    "empty history",
+			history: nil,
+			want:    nil,
+		},
+		{
+			name: "simple exchange",
+			history: []providers.Message{
+				msgUser("q1"),
+				msgAssistant("a1"),
+				msgUser("q2"),
+				msgAssistant("a2"),
+			},
+			want: []int{0, 2},
+		},
+		{
+			name: "tool-call Turn",
+			history: []providers.Message{
+				msgUser("search"),
+				msgAssistantTC("tc1"),
+				msgTool("tc1", "result"),
+				msgAssistant("found it"),
+				msgUser("thanks"),
+				msgAssistant("welcome"),
+			},
+			want: []int{0, 4},
+		},
+		{
+			name: "chained tool calls in single Turn",
+			history: []providers.Message{
+				msgUser("save and notify"),
+				msgAssistantTC("tc_save"),
+				msgTool("tc_save", "saved"),
+				msgAssistantTC("tc_notify"),
+				msgTool("tc_notify", "notified"),
+				msgAssistant("done"),
+			},
+			want: []int{0},
+		},
+		{
+			name: "no user messages",
+			history: []providers.Message{
+				msgAssistant("a1"),
+				msgAssistant("a2"),
+			},
+			want: nil,
+		},
+		{
+			name: "leading non-user messages",
+			history: []providers.Message{
+				msgAssistantTC("tc1"),
+				msgTool("tc1", "r1"),
+				msgAssistant("greeting"),
+				msgUser("hello"),
+				msgAssistant("hi"),
+			},
+			want: []int{3},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := parseTurnBoundaries(tt.history)
+			if len(got) != len(tt.want) {
+				t.Errorf("parseTurnBoundaries() = %v, want %v", got, tt.want)
+				return
+			}
+			for i := range got {
+				if got[i] != tt.want[i] {
+					t.Errorf("parseTurnBoundaries()[%d] = %d, want %d", i, got[i], tt.want[i])
+				}
+			}
+		})
+	}
+}
+
 func TestIsSafeBoundary(t *testing.T) {
 	tests := []struct {
 		name    string
@@ -1556,8 +1556,8 @@ func (al *AgentLoop) maybeSummarize(agent *AgentInstance, sessionKey, channel, c
 }

 // forceCompression aggressively reduces context when the limit is hit.
-// It drops the oldest ~50% of messages, aligning the split to a safe
-// boundary so tool-call sequences stay intact.
+// It drops the oldest ~50% of Turns (a Turn is a complete user→LLM→response
+// cycle, as defined in #1316), so tool-call sequences are never split.
 //
 // Session history contains only user/assistant/tool messages — the system
 // prompt is built dynamically by BuildMessages and is NOT stored here.
@@ -1569,8 +1569,18 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) {
 		return
 	}

-	// Find a safe mid-point that does not split a tool-call sequence.
-	mid := findSafeBoundary(history, len(history)/2)
+	// Split at a Turn boundary so no tool-call sequence is torn apart.
+	// parseTurnBoundaries gives us the start of each Turn; we drop the
+	// oldest half of Turns and keep the most recent ones.
+	turns := parseTurnBoundaries(history)
+	var mid int
+	if len(turns) >= 2 {
+		mid = turns[len(turns)/2]
+	} else {
+		// Fewer than 2 Turns — fall back to message-level midpoint
+		// aligned to the nearest Turn boundary.
+		mid = findSafeBoundary(history, len(history)/2)
+	}
 	if mid <= 0 {
 		return
 	}
@@ -1696,7 +1706,7 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) {
 	history := agent.Sessions.GetHistory(sessionKey)
 	summary := agent.Sessions.GetSummary(sessionKey)

-	// Keep last few messages for continuity, aligned to a safe boundary
+	// Keep the most recent Turns for continuity, aligned to a Turn boundary
 	// so that no tool-call sequence is split.
 	if len(history) <= 4 {
 		return