fix(routing): address review feedback on CJK estimation and observability

1. CJK token estimation: replace flat rune_count/3 with script-aware
   counting — CJK runes (U+2E80–U+9FFF, U+F900–U+FAFF, U+AC00–U+D7AF)
   count as 1 token each, non-CJK runes at /4. This fixes a 3x
   underestimate for Chinese/Japanese/Korean text that could incorrectly
   route complex CJK messages to the light model.

2. Routing observability: SelectModel now returns the computed score as
   a third value. selectCandidates logs the score on both paths — Info
   level for light model selection, Debug level for primary model
   selection.

3. Added tests: TestExtractFeatures_TokenEstimate_Mixed (CJK+ASCII mix),
   TestRouter_SelectModel_ReturnsScore.

Addresses review feedback from @mingmxren.
This commit is contained in:
xiaoen
2026-03-06 13:10:20 +08:00
parent 04ddb6b472
commit b84adacc2f
4 changed files with 72 additions and 35 deletions
+8 -7
View File
@@ -50,10 +50,11 @@ func newWithClassifier(cfg RouterConfig, c Classifier) *Router {
return &Router{cfg: cfg, classifier: c}
}
// SelectModel returns the model to use for this conversation turn.
// SelectModel returns the model to use for this conversation turn along with
// the computed complexity score (for logging and debugging).
//
// - If score < cfg.Threshold: returns (cfg.LightModel, true)
// - Otherwise: returns (primaryModel, false)
// - If score < cfg.Threshold: returns (cfg.LightModel, true, score)
// - Otherwise: returns (primaryModel, false, score)
//
// The caller is responsible for resolving the returned model name into
// provider candidates (see AgentInstance.LightCandidates).
@@ -61,13 +62,13 @@ func (r *Router) SelectModel(
msg string,
history []providers.Message,
primaryModel string,
) (model string, usedLight bool) {
) (model string, usedLight bool, score float64) {
features := ExtractFeatures(msg, history)
score := r.classifier.Score(features)
score = r.classifier.Score(features)
if score < r.cfg.Threshold {
return r.cfg.LightModel, true
return r.cfg.LightModel, true, score
}
return primaryModel, false
return primaryModel, false, score
}
// LightModel returns the configured light model name.