From 639739cb8512e7b3610015265f30197dbe421096 Mon Sep 17 00:00:00 2001 From: xiaoen <2768753269@qq.com> Date: Fri, 13 Mar 2026 15:54:50 +0800 Subject: [PATCH] refactor(agent): use Turn as the atomic unit for compression cut-off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce parseTurnBoundaries() which identifies each Turn start index in the session history. A Turn is a complete "user input → LLM iterations → final response" cycle (as defined in the agent refactor design #1316). findSafeBoundary now uses Turn boundaries instead of raw role-scanning, making the intent explicit: "find the nearest Turn boundary." forceCompression drops the oldest half of Turns (not arbitrary messages), which is simpler and more intuitive. The Turn-based approach naturally prevents splitting tool-call sequences since each Turn is atomic. --- pkg/agent/context_budget.go | 58 ++++++++++++++-------- pkg/agent/context_budget_test.go | 82 ++++++++++++++++++++++++++++++++ pkg/agent/loop.go | 20 ++++++-- 3 files changed, 136 insertions(+), 24 deletions(-) diff --git a/pkg/agent/context_budget.go b/pkg/agent/context_budget.go index 71da5d8f7..05e27e18a 100644 --- a/pkg/agent/context_budget.go +++ b/pkg/agent/context_budget.go @@ -12,14 +12,26 @@ import ( "github.com/sipeed/picoclaw/pkg/providers" ) -// isSafeBoundary reports whether index is a valid position to split a message -// history for truncation or compression. Splitting at index means: -// - history[:index] is dropped or summarized -// - history[index:] is kept +// parseTurnBoundaries returns the starting index of each Turn in the history. +// A Turn is a complete "user input → LLM iterations → final response" cycle +// (as defined in #1316). Each Turn begins at a user message and extends +// through all subsequent assistant/tool messages until the next user message. // -// A boundary is safe when the kept portion begins at a "user" message, -// ensuring no tool-call sequence (assistant+ToolCalls → tool results) -// is torn apart across the split. +// Cutting at a Turn boundary guarantees that no tool-call sequence +// (assistant+ToolCalls → tool results) is split across the cut. +func parseTurnBoundaries(history []providers.Message) []int { + var starts []int + for i, msg := range history { + if msg.Role == "user" { + starts = append(starts, i) + } + } + return starts +} + +// isSafeBoundary reports whether index is a valid Turn boundary — i.e., +// a position where the kept portion (history[index:]) begins at a user +// message, so no tool-call sequence is torn apart. func isSafeBoundary(history []providers.Message, index int) bool { if index <= 0 || index >= len(history) { return true @@ -27,9 +39,10 @@ func isSafeBoundary(history []providers.Message, index int) bool { return history[index].Role == "user" } -// findSafeBoundary locates the nearest safe split point to targetIndex. -// It scans backward first (preserving more context), then forward. -// Returns targetIndex unchanged only when no safe boundary exists. +// findSafeBoundary locates the nearest Turn boundary to targetIndex. +// It prefers the boundary at or before targetIndex (preserving more recent +// context). Falls back to the nearest boundary after targetIndex, and +// returns targetIndex unchanged only when no Turn boundary exists at all. func findSafeBoundary(history []providers.Message, targetIndex int) int { if len(history) == 0 { return 0 @@ -41,21 +54,28 @@ func findSafeBoundary(history []providers.Message, targetIndex int) int { return len(history) } - if isSafeBoundary(history, targetIndex) { + turns := parseTurnBoundaries(history) + if len(turns) == 0 { return targetIndex } - // Backward scan: prefer keeping more messages. - for i := targetIndex - 1; i > 0; i-- { - if isSafeBoundary(history, i) { - return i + // Find the last Turn boundary at or before targetIndex. + // Prefer backward: keeps more recent messages. + backward := -1 + for _, t := range turns { + if t <= targetIndex { + backward = t } } + if backward > 0 { + return backward + } - // Forward scan: fall back to keeping fewer messages. - for i := targetIndex + 1; i < len(history); i++ { - if isSafeBoundary(history, i) { - return i + // No valid Turn boundary before target (or only at index 0 which + // would keep everything). Use the first Turn after targetIndex. + for _, t := range turns { + if t > targetIndex { + return t } } diff --git a/pkg/agent/context_budget_test.go b/pkg/agent/context_budget_test.go index 4073506cf..15198d03b 100644 --- a/pkg/agent/context_budget_test.go +++ b/pkg/agent/context_budget_test.go @@ -40,6 +40,88 @@ func msgTool(callID, content string) providers.Message { return providers.Message{Role: "tool", ToolCallID: callID, Content: content} } +func TestParseTurnBoundaries(t *testing.T) { + tests := []struct { + name string + history []providers.Message + want []int + }{ + { + name: "empty history", + history: nil, + want: nil, + }, + { + name: "simple exchange", + history: []providers.Message{ + msgUser("q1"), + msgAssistant("a1"), + msgUser("q2"), + msgAssistant("a2"), + }, + want: []int{0, 2}, + }, + { + name: "tool-call Turn", + history: []providers.Message{ + msgUser("search"), + msgAssistantTC("tc1"), + msgTool("tc1", "result"), + msgAssistant("found it"), + msgUser("thanks"), + msgAssistant("welcome"), + }, + want: []int{0, 4}, + }, + { + name: "chained tool calls in single Turn", + history: []providers.Message{ + msgUser("save and notify"), + msgAssistantTC("tc_save"), + msgTool("tc_save", "saved"), + msgAssistantTC("tc_notify"), + msgTool("tc_notify", "notified"), + msgAssistant("done"), + }, + want: []int{0}, + }, + { + name: "no user messages", + history: []providers.Message{ + msgAssistant("a1"), + msgAssistant("a2"), + }, + want: nil, + }, + { + name: "leading non-user messages", + history: []providers.Message{ + msgAssistantTC("tc1"), + msgTool("tc1", "r1"), + msgAssistant("greeting"), + msgUser("hello"), + msgAssistant("hi"), + }, + want: []int{3}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseTurnBoundaries(tt.history) + if len(got) != len(tt.want) { + t.Errorf("parseTurnBoundaries() = %v, want %v", got, tt.want) + return + } + for i := range got { + if got[i] != tt.want[i] { + t.Errorf("parseTurnBoundaries()[%d] = %d, want %d", i, got[i], tt.want[i]) + } + } + }) + } +} + func TestIsSafeBoundary(t *testing.T) { tests := []struct { name string diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 14dc8c5ca..688d0ed1d 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -1556,8 +1556,8 @@ func (al *AgentLoop) maybeSummarize(agent *AgentInstance, sessionKey, channel, c } // forceCompression aggressively reduces context when the limit is hit. -// It drops the oldest ~50% of messages, aligning the split to a safe -// boundary so tool-call sequences stay intact. +// It drops the oldest ~50% of Turns (a Turn is a complete user→LLM→response +// cycle, as defined in #1316), so tool-call sequences are never split. // // Session history contains only user/assistant/tool messages — the system // prompt is built dynamically by BuildMessages and is NOT stored here. @@ -1569,8 +1569,18 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) { return } - // Find a safe mid-point that does not split a tool-call sequence. - mid := findSafeBoundary(history, len(history)/2) + // Split at a Turn boundary so no tool-call sequence is torn apart. + // parseTurnBoundaries gives us the start of each Turn; we drop the + // oldest half of Turns and keep the most recent ones. + turns := parseTurnBoundaries(history) + var mid int + if len(turns) >= 2 { + mid = turns[len(turns)/2] + } else { + // Fewer than 2 Turns — fall back to message-level midpoint + // aligned to the nearest Turn boundary. + mid = findSafeBoundary(history, len(history)/2) + } if mid <= 0 { return } @@ -1696,7 +1706,7 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) { history := agent.Sessions.GetHistory(sessionKey) summary := agent.Sessions.GetSummary(sessionKey) - // Keep last few messages for continuity, aligned to a safe boundary + // Keep the most recent Turns for continuity, aligned to a Turn boundary // so that no tool-call sequence is split. if len(history) <= 4 { return