fix(agent): include ReasoningContent and Media in token estimation

estimateMessageTokens now counts ReasoningContent (extended thinking /
chain-of-thought) which can be substantial and is persisted in session
history. Media items get a fixed per-item overhead (256 tokens) since
actual cost depends on provider-specific image tokenization.
This commit is contained in:
xiaoen
2026-03-13 15:14:00 +08:00
parent 9c65d78b07
commit d5fdd5ebd2
2 changed files with 48 additions and 2 deletions
+14 -2
View File
@@ -63,11 +63,17 @@ func findSafeBoundary(history []providers.Message, targetIndex int) int {
}
// estimateMessageTokens estimates the token count for a single message,
// including Content, ToolCalls arguments, and ToolCallID metadata.
// Uses a heuristic of 2.5 characters per token.
// including Content, ReasoningContent, ToolCalls arguments, ToolCallID
// metadata, and Media items. Uses a heuristic of 2.5 characters per token.
func estimateMessageTokens(msg providers.Message) int {
chars := utf8.RuneCountInString(msg.Content)
// ReasoningContent (extended thinking / chain-of-thought) can be
// substantial and is stored in session history via AddFullMessage.
if msg.ReasoningContent != "" {
chars += utf8.RuneCountInString(msg.ReasoningContent)
}
for _, tc := range msg.ToolCalls {
// Count tool call metadata: ID, type, function name
chars += len(tc.ID) + len(tc.Type) + len(tc.Name)
@@ -80,6 +86,12 @@ func estimateMessageTokens(msg providers.Message) int {
chars += len(msg.ToolCallID)
}
// Media items (images, files) are serialized by provider adapters into
// multipart or image_url payloads. Use a fixed per-item estimate since
// actual token cost depends on resolution and provider tokenization.
const mediaTokensPerItem = 256
chars += len(msg.Media) * mediaTokensPerItem
// Per-message overhead for role label, JSON structure, separators.
const messageOverhead = 12
chars += messageOverhead
+34
View File
@@ -389,6 +389,40 @@ func TestEstimateMessageTokens_LargeArguments(t *testing.T) {
}
}
func TestEstimateMessageTokens_ReasoningContent(t *testing.T) {
plain := msgAssistant("result")
withReasoning := providers.Message{
Role: "assistant",
Content: "result",
ReasoningContent: strings.Repeat("thinking step ", 200),
}
plainTokens := estimateMessageTokens(plain)
reasoningTokens := estimateMessageTokens(withReasoning)
if reasoningTokens <= plainTokens {
t.Errorf("message with ReasoningContent (%d tokens) should exceed plain message (%d tokens)",
reasoningTokens, plainTokens)
}
}
func TestEstimateMessageTokens_MediaItems(t *testing.T) {
plain := msgUser("describe this")
withMedia := providers.Message{
Role: "user",
Content: "describe this",
Media: []string{"media://img1.png", "media://img2.png"},
}
plainTokens := estimateMessageTokens(plain)
mediaTokens := estimateMessageTokens(withMedia)
if mediaTokens <= plainTokens {
t.Errorf("message with Media (%d tokens) should exceed plain message (%d tokens)",
mediaTokens, plainTokens)
}
}
// --- estimateToolDefsTokens tests ---
func TestEstimateToolDefsTokens(t *testing.T) {