mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
fix(agent): correct media token arithmetic and tool call double-counting
Two estimation bugs fixed: 1. Media tokens were added to the chars accumulator before the chars*2/5 conversion, resulting in 256*2/5=102 tokens per item instead of 256. Fix: add media tokens directly to the final token count, bypassing the character-based heuristic. 2. estimateMessageTokens counted both tc.Name and tc.Function.Name for tool calls, but providers only send one (OpenAI-compat uses function.name, Anthropic uses tc.Name). Fix: count tc.Function.Name when Function is present, fall back to tc.Name only otherwise. Also fix i18n hint text: "auto-detect" was misleading — the backend uses a 4x max_tokens heuristic, not actual model detection.
This commit is contained in:
@@ -95,10 +95,14 @@ func estimateMessageTokens(msg providers.Message) int {
|
||||
}
|
||||
|
||||
for _, tc := range msg.ToolCalls {
|
||||
// Count tool call metadata: ID, type, function name
|
||||
chars += len(tc.ID) + len(tc.Type) + len(tc.Name)
|
||||
chars += len(tc.ID) + len(tc.Type)
|
||||
if tc.Function != nil {
|
||||
// Count function name + arguments (the wire format for most providers).
|
||||
// tc.Name mirrors tc.Function.Name — count only once to avoid double-counting.
|
||||
chars += len(tc.Function.Name) + len(tc.Function.Arguments)
|
||||
} else {
|
||||
// Fallback: some provider formats use top-level Name without Function.
|
||||
chars += len(tc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,17 +110,20 @@ func estimateMessageTokens(msg providers.Message) int {
|
||||
chars += len(msg.ToolCallID)
|
||||
}
|
||||
|
||||
// Media items (images, files) are serialized by provider adapters into
|
||||
// multipart or image_url payloads. Use a fixed per-item estimate since
|
||||
// actual token cost depends on resolution and provider tokenization.
|
||||
const mediaTokensPerItem = 256
|
||||
chars += len(msg.Media) * mediaTokensPerItem
|
||||
|
||||
// Per-message overhead for role label, JSON structure, separators.
|
||||
const messageOverhead = 12
|
||||
chars += messageOverhead
|
||||
|
||||
return chars * 2 / 5
|
||||
tokens := chars * 2 / 5
|
||||
|
||||
// Media items (images, files) are serialized by provider adapters into
|
||||
// multipart or image_url payloads. Add a fixed per-item token estimate
|
||||
// directly (not through the chars heuristic) since actual cost depends
|
||||
// on resolution and provider-specific image tokenization.
|
||||
const mediaTokensPerItem = 256
|
||||
tokens += len(msg.Media) * mediaTokensPerItem
|
||||
|
||||
return tokens
|
||||
}
|
||||
|
||||
// estimateToolDefsTokens estimates the total token cost of tool definitions
|
||||
|
||||
@@ -503,6 +503,13 @@ func TestEstimateMessageTokens_MediaItems(t *testing.T) {
|
||||
t.Errorf("message with Media (%d tokens) should exceed plain message (%d tokens)",
|
||||
mediaTokens, plainTokens)
|
||||
}
|
||||
|
||||
// Each media item should add exactly 256 tokens (not run through chars*2/5).
|
||||
expectedDelta := 256 * 2
|
||||
actualDelta := mediaTokens - plainTokens
|
||||
if actualDelta != expectedDelta {
|
||||
t.Errorf("2 media items should add %d tokens, got delta %d", expectedDelta, actualDelta)
|
||||
}
|
||||
}
|
||||
|
||||
// --- estimateToolDefsTokens tests ---
|
||||
|
||||
Reference in New Issue
Block a user