diff --git a/pkg/agent/context_budget.go b/pkg/agent/context_budget.go index 2eec9c267..71da5d8f7 100644 --- a/pkg/agent/context_budget.go +++ b/pkg/agent/context_budget.go @@ -63,11 +63,17 @@ func findSafeBoundary(history []providers.Message, targetIndex int) int { } // estimateMessageTokens estimates the token count for a single message, -// including Content, ToolCalls arguments, and ToolCallID metadata. -// Uses a heuristic of 2.5 characters per token. +// including Content, ReasoningContent, ToolCalls arguments, ToolCallID +// metadata, and Media items. Uses a heuristic of 2.5 characters per token. func estimateMessageTokens(msg providers.Message) int { chars := utf8.RuneCountInString(msg.Content) + // ReasoningContent (extended thinking / chain-of-thought) can be + // substantial and is stored in session history via AddFullMessage. + if msg.ReasoningContent != "" { + chars += utf8.RuneCountInString(msg.ReasoningContent) + } + for _, tc := range msg.ToolCalls { // Count tool call metadata: ID, type, function name chars += len(tc.ID) + len(tc.Type) + len(tc.Name) @@ -80,6 +86,12 @@ func estimateMessageTokens(msg providers.Message) int { chars += len(msg.ToolCallID) } + // Media items (images, files) are serialized by provider adapters into + // multipart or image_url payloads. Use a fixed per-item estimate since + // actual token cost depends on resolution and provider tokenization. + const mediaTokensPerItem = 256 + chars += len(msg.Media) * mediaTokensPerItem + // Per-message overhead for role label, JSON structure, separators. const messageOverhead = 12 chars += messageOverhead diff --git a/pkg/agent/context_budget_test.go b/pkg/agent/context_budget_test.go index c8a6b19c5..03ace82e2 100644 --- a/pkg/agent/context_budget_test.go +++ b/pkg/agent/context_budget_test.go @@ -389,6 +389,40 @@ func TestEstimateMessageTokens_LargeArguments(t *testing.T) { } } +func TestEstimateMessageTokens_ReasoningContent(t *testing.T) { + plain := msgAssistant("result") + withReasoning := providers.Message{ + Role: "assistant", + Content: "result", + ReasoningContent: strings.Repeat("thinking step ", 200), + } + + plainTokens := estimateMessageTokens(plain) + reasoningTokens := estimateMessageTokens(withReasoning) + + if reasoningTokens <= plainTokens { + t.Errorf("message with ReasoningContent (%d tokens) should exceed plain message (%d tokens)", + reasoningTokens, plainTokens) + } +} + +func TestEstimateMessageTokens_MediaItems(t *testing.T) { + plain := msgUser("describe this") + withMedia := providers.Message{ + Role: "user", + Content: "describe this", + Media: []string{"media://img1.png", "media://img2.png"}, + } + + plainTokens := estimateMessageTokens(plain) + mediaTokens := estimateMessageTokens(withMedia) + + if mediaTokens <= plainTokens { + t.Errorf("message with Media (%d tokens) should exceed plain message (%d tokens)", + mediaTokens, plainTokens) + } +} + // --- estimateToolDefsTokens tests --- func TestEstimateToolDefsTokens(t *testing.T) {