From 93f391a6bf7a845c3d59dda2da032ee460118e6c Mon Sep 17 00:00:00 2001 From: Badgerbees Date: Thu, 26 Mar 2026 00:31:22 +0700 Subject: [PATCH 1/2] fix(agent): include SystemParts in token estimation and add reasoning guards --- pkg/agent/context_budget.go | 9 +++++---- pkg/agent/context_budget_test.go | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/pkg/agent/context_budget.go b/pkg/agent/context_budget.go index c87695c7a..416ec4a5b 100644 --- a/pkg/agent/context_budget.go +++ b/pkg/agent/context_budget.go @@ -91,11 +91,12 @@ func findSafeBoundary(history []providers.Message, targetIndex int) int { // metadata, and Media items. Uses a heuristic of 2.5 characters per token. func estimateMessageTokens(msg providers.Message) int { chars := utf8.RuneCountInString(msg.Content) + chars += utf8.RuneCountInString(msg.ReasoningContent) - // ReasoningContent (extended thinking / chain-of-thought) can be - // substantial and is stored in session history via AddFullMessage. - if msg.ReasoningContent != "" { - chars += utf8.RuneCountInString(msg.ReasoningContent) + // SystemParts are structured system blocks that can be substantial + // when using instruction-heavy agents or KV-cache-aware adapters. + for _, part := range msg.SystemParts { + chars += utf8.RuneCountInString(part.Text) } for _, tc := range msg.ToolCalls { diff --git a/pkg/agent/context_budget_test.go b/pkg/agent/context_budget_test.go index 870f0fbe6..22cbdc0db 100644 --- a/pkg/agent/context_budget_test.go +++ b/pkg/agent/context_budget_test.go @@ -529,6 +529,26 @@ func TestEstimateMessageTokens_MediaItems(t *testing.T) { } } +func TestEstimateMessageTokens_SystemParts(t *testing.T) { + plain := providers.Message{Role: "system", Content: "instructions"} + withParts := providers.Message{ + Role: "system", + Content: "instructions", + SystemParts: []providers.ContentBlock{ + {Type: "text", Text: "some more system context"}, + {Type: "text", Text: "even more cached blocks"}, + }, + } + + plainTokens := estimateMessageTokens(plain) + partsTokens := estimateMessageTokens(withParts) + + if partsTokens <= plainTokens { + t.Errorf("system message with SystemParts (%d) should exceed plain message (%d)", + partsTokens, plainTokens) + } +} + // --- estimateToolDefsTokens tests --- func TestEstimateToolDefsTokens(t *testing.T) { From 1a44752dc56675e5ed04edfcd7ed65a6f097774b Mon Sep 17 00:00:00 2001 From: Badgerbees Date: Tue, 31 Mar 2026 17:09:01 +0700 Subject: [PATCH 2/2] fix(agent): prevent double-counting system message tokens in estimator Treat SystemParts as an alternative representation of message Content rather than an additive one. This prevents systematic overestimation of system message tokens which could trigger premature context pruning or summarization. - Picks the maximum of Content vs. SystemParts to stay conservative. - Adds a per-part overhead (20 chars) to account for JSON metadata. - Streamlines the ReasoningContent counting logic. Fixes a deficiency where structured blocks for cache-aware adapters caused overestimated budgets or hidden overflows. --- pkg/agent/context_budget.go | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pkg/agent/context_budget.go b/pkg/agent/context_budget.go index 416ec4a5b..3398d7863 100644 --- a/pkg/agent/context_budget.go +++ b/pkg/agent/context_budget.go @@ -90,15 +90,29 @@ func findSafeBoundary(history []providers.Message, targetIndex int) int { // including Content, ReasoningContent, ToolCalls arguments, ToolCallID // metadata, and Media items. Uses a heuristic of 2.5 characters per token. func estimateMessageTokens(msg providers.Message) int { - chars := utf8.RuneCountInString(msg.Content) - chars += utf8.RuneCountInString(msg.ReasoningContent) + contentChars := utf8.RuneCountInString(msg.Content) - // SystemParts are structured system blocks that can be substantial - // when using instruction-heavy agents or KV-cache-aware adapters. - for _, part := range msg.SystemParts { - chars += utf8.RuneCountInString(part.Text) + // SystemParts are structured system blocks used for cache-aware adapters. + // They carry the same content as Content, but in multiple blocks. + // We estimate them as an alternative representation, not additive. + systemPartsChars := 0 + if len(msg.SystemParts) > 0 { + for _, part := range msg.SystemParts { + systemPartsChars += utf8.RuneCountInString(part.Text) + } + // Per-part overhead for JSON structure (type, text, cache_control). + const perPartOverhead = 20 + systemPartsChars += len(msg.SystemParts) * perPartOverhead } + // Use the larger of the two representations to stay conservative. + chars := contentChars + if systemPartsChars > chars { + chars = systemPartsChars + } + + chars += utf8.RuneCountInString(msg.ReasoningContent) + for _, tc := range msg.ToolCalls { chars += len(tc.ID) + len(tc.Type) if tc.Function != nil {