From b84adacc2f302aa68c3ccd88bc5815ff51904273 Mon Sep 17 00:00:00 2001 From: xiaoen <2768753269@qq.com> Date: Fri, 6 Mar 2026 13:10:20 +0800 Subject: [PATCH] fix(routing): address review feedback on CJK estimation and observability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. CJK token estimation: replace flat rune_count/3 with script-aware counting — CJK runes (U+2E80–U+9FFF, U+F900–U+FAFF, U+AC00–U+D7AF) count as 1 token each, non-CJK runes at /4. This fixes a 3x underestimate for Chinese/Japanese/Korean text that could incorrectly route complex CJK messages to the light model. 2. Routing observability: SelectModel now returns the computed score as a third value. selectCandidates logs the score on both paths — Info level for light model selection, Debug level for primary model selection. 3. Added tests: TestExtractFeatures_TokenEstimate_Mixed (CJK+ASCII mix), TestRouter_SelectModel_ReturnsScore. Addresses review feedback from @mingmxren. --- pkg/agent/loop.go | 9 ++++++- pkg/routing/features.go | 29 +++++++++++++------- pkg/routing/router.go | 15 ++++++----- pkg/routing/router_test.go | 54 ++++++++++++++++++++++++++------------ 4 files changed, 72 insertions(+), 35 deletions(-) diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 5e68e4931..132bb3c98 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -1192,8 +1192,14 @@ func (al *AgentLoop) selectCandidates( return agent.Candidates, agent.Model } - _, usedLight := agent.Router.SelectModel(userMsg, history, agent.Model) + _, usedLight, score := agent.Router.SelectModel(userMsg, history, agent.Model) if !usedLight { + logger.DebugCF("agent", "Model routing: primary model selected", + map[string]any{ + "agent_id": agent.ID, + "score": score, + "threshold": agent.Router.Threshold(), + }) return agent.Candidates, agent.Model } @@ -1201,6 +1207,7 @@ func (al *AgentLoop) selectCandidates( map[string]any{ "agent_id": agent.ID, "light_model": agent.Router.LightModel(), + "score": score, "threshold": agent.Router.Threshold(), }) return agent.LightCandidates, agent.Router.LightModel() diff --git a/pkg/routing/features.go b/pkg/routing/features.go index 4fa1c5b6c..c371e21aa 100644 --- a/pkg/routing/features.go +++ b/pkg/routing/features.go @@ -15,9 +15,9 @@ const lookbackWindow = 6 // Every dimension is language-agnostic by construction — no keyword or pattern matching // against natural-language content. This ensures consistent routing for all locales. type Features struct { - // TokenEstimate is a conservative proxy for token count. - // Computed as utf8.RuneCountInString(msg) / 3, which handles CJK characters - // (each rune ≈ 1 token for CJK, ≈ 0.25 tokens for ASCII) without any API call. + // TokenEstimate is a proxy for token count. + // CJK runes count as 1 token each; non-CJK runes as 0.25 tokens each. + // This avoids API calls while giving accurate estimates for all scripts. TokenEstimate int // CodeBlockCount is the number of fenced code blocks (``` pairs) in the message. @@ -50,14 +50,23 @@ func ExtractFeatures(msg string, history []providers.Message) Features { } } -// estimateTokens returns a conservative token count proxy. -// Using rune count / 3 rather than / 4 because CJK characters each map to -// roughly one token, while ASCII words average ~1.3 chars/token. Dividing -// by 3 is a safe middle ground that slightly over-estimates for Latin text -// (errs toward routing to the heavy model) and is accurate for CJK. +// estimateTokens returns a token count proxy that handles both CJK and Latin text. +// CJK runes (U+2E80–U+9FFF, U+F900–U+FAFF, U+AC00–U+D7AF) map to roughly one +// token each, while non-CJK runes average ~0.25 tokens/rune (≈4 chars per token +// for English). Splitting the count this way avoids the 3x underestimation that a +// flat rune_count/3 would produce for Chinese, Japanese, and Korean text. func estimateTokens(msg string) int { - rc := utf8.RuneCountInString(msg) - return rc / 3 + total := utf8.RuneCountInString(msg) + if total == 0 { + return 0 + } + cjk := 0 + for _, r := range msg { + if r >= 0x2E80 && r <= 0x9FFF || r >= 0xF900 && r <= 0xFAFF || r >= 0xAC00 && r <= 0xD7AF { + cjk++ + } + } + return cjk + (total-cjk)/4 } // countCodeBlocks counts the number of complete fenced code blocks. diff --git a/pkg/routing/router.go b/pkg/routing/router.go index 78092b106..b1fa347e9 100644 --- a/pkg/routing/router.go +++ b/pkg/routing/router.go @@ -50,10 +50,11 @@ func newWithClassifier(cfg RouterConfig, c Classifier) *Router { return &Router{cfg: cfg, classifier: c} } -// SelectModel returns the model to use for this conversation turn. +// SelectModel returns the model to use for this conversation turn along with +// the computed complexity score (for logging and debugging). // -// - If score < cfg.Threshold: returns (cfg.LightModel, true) -// - Otherwise: returns (primaryModel, false) +// - If score < cfg.Threshold: returns (cfg.LightModel, true, score) +// - Otherwise: returns (primaryModel, false, score) // // The caller is responsible for resolving the returned model name into // provider candidates (see AgentInstance.LightCandidates). @@ -61,13 +62,13 @@ func (r *Router) SelectModel( msg string, history []providers.Message, primaryModel string, -) (model string, usedLight bool) { +) (model string, usedLight bool, score float64) { features := ExtractFeatures(msg, history) - score := r.classifier.Score(features) + score = r.classifier.Score(features) if score < r.cfg.Threshold { - return r.cfg.LightModel, true + return r.cfg.LightModel, true, score } - return primaryModel, false + return primaryModel, false, score } // LightModel returns the configured light model name. diff --git a/pkg/routing/router_test.go b/pkg/routing/router_test.go index 267200c2e..2824d10ab 100644 --- a/pkg/routing/router_test.go +++ b/pkg/routing/router_test.go @@ -29,16 +29,16 @@ func TestExtractFeatures_EmptyMessage(t *testing.T) { } func TestExtractFeatures_TokenEstimate(t *testing.T) { - // 30 ASCII chars / 3 = 10 tokens + // 30 ASCII runes: 0 CJK + 30/4 = 7 tokens msg := strings.Repeat("a", 30) f := ExtractFeatures(msg, nil) - if f.TokenEstimate != 10 { - t.Errorf("TokenEstimate: got %d, want 10", f.TokenEstimate) + if f.TokenEstimate != 7 { + t.Errorf("TokenEstimate: got %d, want 7", f.TokenEstimate) } } func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) { - // 9 CJK runes (U+4F60 U+597D U+4E16 U+754C × 2 + U+4F60) / 3 = 3 tokens. + // 9 CJK runes → 9 tokens (each CJK rune ≈ 1 token). // Using a rune slice literal avoids CJK string literals in source. msg := string([]rune{ 0x4F60, 0x597D, 0x4E16, 0x754C, @@ -46,8 +46,17 @@ func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) { 0x4F60, }) f := ExtractFeatures(msg, nil) - if f.TokenEstimate != 3 { - t.Errorf("CJK TokenEstimate: got %d, want 3", f.TokenEstimate) + if f.TokenEstimate != 9 { + t.Errorf("CJK TokenEstimate: got %d, want 9", f.TokenEstimate) + } +} + +func TestExtractFeatures_TokenEstimate_Mixed(t *testing.T) { + // Mixed: 4 CJK runes + 8 ASCII runes → 4 + 8/4 = 6 tokens. + msg := string([]rune{0x4F60, 0x597D, 0x4E16, 0x754C}) + "hello ok" + f := ExtractFeatures(msg, nil) + if f.TokenEstimate != 6 { + t.Errorf("Mixed TokenEstimate: got %d, want 6", f.TokenEstimate) } } @@ -249,7 +258,7 @@ func TestRouter_NegativeThresholdFallsBackToDefault(t *testing.T) { func TestRouter_SelectModel_SimpleMessageUsesLight(t *testing.T) { r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) msg := "hi" - model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if !usedLight { t.Error("simple message: expected light model to be selected") } @@ -261,7 +270,7 @@ func TestRouter_SelectModel_SimpleMessageUsesLight(t *testing.T) { func TestRouter_SelectModel_CodeBlockUsesPrimary(t *testing.T) { r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) msg := "```go\nfmt.Println(\"hello\")\n```" - model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if usedLight { t.Error("code block: expected primary model to be selected") } @@ -273,7 +282,7 @@ func TestRouter_SelectModel_CodeBlockUsesPrimary(t *testing.T) { func TestRouter_SelectModel_AttachmentUsesPrimary(t *testing.T) { r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) msg := "can you analyze this? data:image/png;base64,abc123" - model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if usedLight { t.Error("attachment: expected primary model to be selected") } @@ -286,7 +295,7 @@ func TestRouter_SelectModel_LongMessageUsesPrimary(t *testing.T) { r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) // >200 token estimate: 210 * 3 = 630 chars msg := strings.Repeat("word ", 210) - model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if usedLight { t.Error("long message: expected primary model to be selected") } @@ -304,7 +313,7 @@ func TestRouter_SelectModel_DeepToolChainUsesLight(t *testing.T) { {Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "exec"}, {Name: "search"}}}, } msg := "ok" - _, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6") + _, usedLight, _ := r.SelectModel(msg, history, "claude-sonnet-4-6") if !usedLight { t.Error("short message + moderate tool calls: expected light model (score 0.20 < 0.35)") } @@ -320,7 +329,7 @@ func TestRouter_SelectModel_ToolChainPlusMediumUsesHeavy(t *testing.T) { } // ~55 tokens * 3 = 165 chars msg := strings.Repeat("word ", 55) - _, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6") + _, usedLight, _ := r.SelectModel(msg, history, "claude-sonnet-4-6") if usedLight { t.Error("tool chain + medium message: expected primary model (score >= 0.35)") } @@ -330,7 +339,7 @@ func TestRouter_SelectModel_CustomThreshold(t *testing.T) { // Very low threshold: even a short message triggers heavy model r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.05}) msg := strings.Repeat("word ", 55) // medium message → 0.15 >= 0.05 - _, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + _, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if usedLight { t.Error("low threshold: medium message should use primary model") } @@ -340,7 +349,7 @@ func TestRouter_SelectModel_HighThreshold(t *testing.T) { // Very high threshold: even code blocks route to light r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.99}) msg := "```go\nfmt.Println()\n```" - _, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + _, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if !usedLight { t.Error("very high threshold: code block (0.40) should route to light model") } @@ -364,7 +373,7 @@ func TestRouter_CustomClassifier_LowScore_SelectsLight(t *testing.T) { RouterConfig{LightModel: "light", Threshold: 0.5}, &fixedScoreClassifier{score: 0.2}, ) - _, usedLight := r.SelectModel("anything", nil, "heavy") + _, usedLight, _ := r.SelectModel("anything", nil, "heavy") if !usedLight { t.Error("low score with custom classifier: expected light model") } @@ -375,7 +384,7 @@ func TestRouter_CustomClassifier_HighScore_SelectsPrimary(t *testing.T) { RouterConfig{LightModel: "light", Threshold: 0.5}, &fixedScoreClassifier{score: 0.8}, ) - _, usedLight := r.SelectModel("anything", nil, "heavy") + _, usedLight, _ := r.SelectModel("anything", nil, "heavy") if usedLight { t.Error("high score with custom classifier: expected primary model") } @@ -387,8 +396,19 @@ func TestRouter_CustomClassifier_ExactThreshold_SelectsPrimary(t *testing.T) { RouterConfig{LightModel: "light", Threshold: 0.5}, &fixedScoreClassifier{score: 0.5}, ) - _, usedLight := r.SelectModel("anything", nil, "heavy") + _, usedLight, _ := r.SelectModel("anything", nil, "heavy") if usedLight { t.Error("score == threshold: expected primary model (>= threshold → primary)") } } + +func TestRouter_SelectModel_ReturnsScore(t *testing.T) { + r := newWithClassifier( + RouterConfig{LightModel: "light", Threshold: 0.5}, + &fixedScoreClassifier{score: 0.42}, + ) + _, _, score := r.SelectModel("anything", nil, "heavy") + if score != 0.42 { + t.Errorf("score: got %f, want 0.42", score) + } +}