From 1a44752dc56675e5ed04edfcd7ed65a6f097774b Mon Sep 17 00:00:00 2001 From: Badgerbees Date: Tue, 31 Mar 2026 17:09:01 +0700 Subject: [PATCH] fix(agent): prevent double-counting system message tokens in estimator Treat SystemParts as an alternative representation of message Content rather than an additive one. This prevents systematic overestimation of system message tokens which could trigger premature context pruning or summarization. - Picks the maximum of Content vs. SystemParts to stay conservative. - Adds a per-part overhead (20 chars) to account for JSON metadata. - Streamlines the ReasoningContent counting logic. Fixes a deficiency where structured blocks for cache-aware adapters caused overestimated budgets or hidden overflows. --- pkg/agent/context_budget.go | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pkg/agent/context_budget.go b/pkg/agent/context_budget.go index 416ec4a5b..3398d7863 100644 --- a/pkg/agent/context_budget.go +++ b/pkg/agent/context_budget.go @@ -90,15 +90,29 @@ func findSafeBoundary(history []providers.Message, targetIndex int) int { // including Content, ReasoningContent, ToolCalls arguments, ToolCallID // metadata, and Media items. Uses a heuristic of 2.5 characters per token. func estimateMessageTokens(msg providers.Message) int { - chars := utf8.RuneCountInString(msg.Content) - chars += utf8.RuneCountInString(msg.ReasoningContent) + contentChars := utf8.RuneCountInString(msg.Content) - // SystemParts are structured system blocks that can be substantial - // when using instruction-heavy agents or KV-cache-aware adapters. - for _, part := range msg.SystemParts { - chars += utf8.RuneCountInString(part.Text) + // SystemParts are structured system blocks used for cache-aware adapters. + // They carry the same content as Content, but in multiple blocks. + // We estimate them as an alternative representation, not additive. + systemPartsChars := 0 + if len(msg.SystemParts) > 0 { + for _, part := range msg.SystemParts { + systemPartsChars += utf8.RuneCountInString(part.Text) + } + // Per-part overhead for JSON structure (type, text, cache_control). + const perPartOverhead = 20 + systemPartsChars += len(msg.SystemParts) * perPartOverhead } + // Use the larger of the two representations to stay conservative. + chars := contentChars + if systemPartsChars > chars { + chars = systemPartsChars + } + + chars += utf8.RuneCountInString(msg.ReasoningContent) + for _, tc := range msg.ToolCalls { chars += len(tc.ID) + len(tc.Type) if tc.Function != nil {