mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
fix(routing): address review feedback on CJK estimation and observability
1. CJK token estimation: replace flat rune_count/3 with script-aware counting — CJK runes (U+2E80–U+9FFF, U+F900–U+FAFF, U+AC00–U+D7AF) count as 1 token each, non-CJK runes at /4. This fixes a 3x underestimate for Chinese/Japanese/Korean text that could incorrectly route complex CJK messages to the light model. 2. Routing observability: SelectModel now returns the computed score as a third value. selectCandidates logs the score on both paths — Info level for light model selection, Debug level for primary model selection. 3. Added tests: TestExtractFeatures_TokenEstimate_Mixed (CJK+ASCII mix), TestRouter_SelectModel_ReturnsScore. Addresses review feedback from @mingmxren.
This commit is contained in:
+8
-1
@@ -1192,8 +1192,14 @@ func (al *AgentLoop) selectCandidates(
|
||||
return agent.Candidates, agent.Model
|
||||
}
|
||||
|
||||
_, usedLight := agent.Router.SelectModel(userMsg, history, agent.Model)
|
||||
_, usedLight, score := agent.Router.SelectModel(userMsg, history, agent.Model)
|
||||
if !usedLight {
|
||||
logger.DebugCF("agent", "Model routing: primary model selected",
|
||||
map[string]any{
|
||||
"agent_id": agent.ID,
|
||||
"score": score,
|
||||
"threshold": agent.Router.Threshold(),
|
||||
})
|
||||
return agent.Candidates, agent.Model
|
||||
}
|
||||
|
||||
@@ -1201,6 +1207,7 @@ func (al *AgentLoop) selectCandidates(
|
||||
map[string]any{
|
||||
"agent_id": agent.ID,
|
||||
"light_model": agent.Router.LightModel(),
|
||||
"score": score,
|
||||
"threshold": agent.Router.Threshold(),
|
||||
})
|
||||
return agent.LightCandidates, agent.Router.LightModel()
|
||||
|
||||
+19
-10
@@ -15,9 +15,9 @@ const lookbackWindow = 6
|
||||
// Every dimension is language-agnostic by construction — no keyword or pattern matching
|
||||
// against natural-language content. This ensures consistent routing for all locales.
|
||||
type Features struct {
|
||||
// TokenEstimate is a conservative proxy for token count.
|
||||
// Computed as utf8.RuneCountInString(msg) / 3, which handles CJK characters
|
||||
// (each rune ≈ 1 token for CJK, ≈ 0.25 tokens for ASCII) without any API call.
|
||||
// TokenEstimate is a proxy for token count.
|
||||
// CJK runes count as 1 token each; non-CJK runes as 0.25 tokens each.
|
||||
// This avoids API calls while giving accurate estimates for all scripts.
|
||||
TokenEstimate int
|
||||
|
||||
// CodeBlockCount is the number of fenced code blocks (``` pairs) in the message.
|
||||
@@ -50,14 +50,23 @@ func ExtractFeatures(msg string, history []providers.Message) Features {
|
||||
}
|
||||
}
|
||||
|
||||
// estimateTokens returns a conservative token count proxy.
|
||||
// Using rune count / 3 rather than / 4 because CJK characters each map to
|
||||
// roughly one token, while ASCII words average ~1.3 chars/token. Dividing
|
||||
// by 3 is a safe middle ground that slightly over-estimates for Latin text
|
||||
// (errs toward routing to the heavy model) and is accurate for CJK.
|
||||
// estimateTokens returns a token count proxy that handles both CJK and Latin text.
|
||||
// CJK runes (U+2E80–U+9FFF, U+F900–U+FAFF, U+AC00–U+D7AF) map to roughly one
|
||||
// token each, while non-CJK runes average ~0.25 tokens/rune (≈4 chars per token
|
||||
// for English). Splitting the count this way avoids the 3x underestimation that a
|
||||
// flat rune_count/3 would produce for Chinese, Japanese, and Korean text.
|
||||
func estimateTokens(msg string) int {
|
||||
rc := utf8.RuneCountInString(msg)
|
||||
return rc / 3
|
||||
total := utf8.RuneCountInString(msg)
|
||||
if total == 0 {
|
||||
return 0
|
||||
}
|
||||
cjk := 0
|
||||
for _, r := range msg {
|
||||
if r >= 0x2E80 && r <= 0x9FFF || r >= 0xF900 && r <= 0xFAFF || r >= 0xAC00 && r <= 0xD7AF {
|
||||
cjk++
|
||||
}
|
||||
}
|
||||
return cjk + (total-cjk)/4
|
||||
}
|
||||
|
||||
// countCodeBlocks counts the number of complete fenced code blocks.
|
||||
|
||||
@@ -50,10 +50,11 @@ func newWithClassifier(cfg RouterConfig, c Classifier) *Router {
|
||||
return &Router{cfg: cfg, classifier: c}
|
||||
}
|
||||
|
||||
// SelectModel returns the model to use for this conversation turn.
|
||||
// SelectModel returns the model to use for this conversation turn along with
|
||||
// the computed complexity score (for logging and debugging).
|
||||
//
|
||||
// - If score < cfg.Threshold: returns (cfg.LightModel, true)
|
||||
// - Otherwise: returns (primaryModel, false)
|
||||
// - If score < cfg.Threshold: returns (cfg.LightModel, true, score)
|
||||
// - Otherwise: returns (primaryModel, false, score)
|
||||
//
|
||||
// The caller is responsible for resolving the returned model name into
|
||||
// provider candidates (see AgentInstance.LightCandidates).
|
||||
@@ -61,13 +62,13 @@ func (r *Router) SelectModel(
|
||||
msg string,
|
||||
history []providers.Message,
|
||||
primaryModel string,
|
||||
) (model string, usedLight bool) {
|
||||
) (model string, usedLight bool, score float64) {
|
||||
features := ExtractFeatures(msg, history)
|
||||
score := r.classifier.Score(features)
|
||||
score = r.classifier.Score(features)
|
||||
if score < r.cfg.Threshold {
|
||||
return r.cfg.LightModel, true
|
||||
return r.cfg.LightModel, true, score
|
||||
}
|
||||
return primaryModel, false
|
||||
return primaryModel, false, score
|
||||
}
|
||||
|
||||
// LightModel returns the configured light model name.
|
||||
|
||||
+37
-17
@@ -29,16 +29,16 @@ func TestExtractFeatures_EmptyMessage(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestExtractFeatures_TokenEstimate(t *testing.T) {
|
||||
// 30 ASCII chars / 3 = 10 tokens
|
||||
// 30 ASCII runes: 0 CJK + 30/4 = 7 tokens
|
||||
msg := strings.Repeat("a", 30)
|
||||
f := ExtractFeatures(msg, nil)
|
||||
if f.TokenEstimate != 10 {
|
||||
t.Errorf("TokenEstimate: got %d, want 10", f.TokenEstimate)
|
||||
if f.TokenEstimate != 7 {
|
||||
t.Errorf("TokenEstimate: got %d, want 7", f.TokenEstimate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) {
|
||||
// 9 CJK runes (U+4F60 U+597D U+4E16 U+754C × 2 + U+4F60) / 3 = 3 tokens.
|
||||
// 9 CJK runes → 9 tokens (each CJK rune ≈ 1 token).
|
||||
// Using a rune slice literal avoids CJK string literals in source.
|
||||
msg := string([]rune{
|
||||
0x4F60, 0x597D, 0x4E16, 0x754C,
|
||||
@@ -46,8 +46,17 @@ func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) {
|
||||
0x4F60,
|
||||
})
|
||||
f := ExtractFeatures(msg, nil)
|
||||
if f.TokenEstimate != 3 {
|
||||
t.Errorf("CJK TokenEstimate: got %d, want 3", f.TokenEstimate)
|
||||
if f.TokenEstimate != 9 {
|
||||
t.Errorf("CJK TokenEstimate: got %d, want 9", f.TokenEstimate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractFeatures_TokenEstimate_Mixed(t *testing.T) {
|
||||
// Mixed: 4 CJK runes + 8 ASCII runes → 4 + 8/4 = 6 tokens.
|
||||
msg := string([]rune{0x4F60, 0x597D, 0x4E16, 0x754C}) + "hello ok"
|
||||
f := ExtractFeatures(msg, nil)
|
||||
if f.TokenEstimate != 6 {
|
||||
t.Errorf("Mixed TokenEstimate: got %d, want 6", f.TokenEstimate)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,7 +258,7 @@ func TestRouter_NegativeThresholdFallsBackToDefault(t *testing.T) {
|
||||
func TestRouter_SelectModel_SimpleMessageUsesLight(t *testing.T) {
|
||||
r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
|
||||
msg := "hi"
|
||||
model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
if !usedLight {
|
||||
t.Error("simple message: expected light model to be selected")
|
||||
}
|
||||
@@ -261,7 +270,7 @@ func TestRouter_SelectModel_SimpleMessageUsesLight(t *testing.T) {
|
||||
func TestRouter_SelectModel_CodeBlockUsesPrimary(t *testing.T) {
|
||||
r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
|
||||
msg := "```go\nfmt.Println(\"hello\")\n```"
|
||||
model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
if usedLight {
|
||||
t.Error("code block: expected primary model to be selected")
|
||||
}
|
||||
@@ -273,7 +282,7 @@ func TestRouter_SelectModel_CodeBlockUsesPrimary(t *testing.T) {
|
||||
func TestRouter_SelectModel_AttachmentUsesPrimary(t *testing.T) {
|
||||
r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
|
||||
msg := "can you analyze this? data:image/png;base64,abc123"
|
||||
model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
if usedLight {
|
||||
t.Error("attachment: expected primary model to be selected")
|
||||
}
|
||||
@@ -286,7 +295,7 @@ func TestRouter_SelectModel_LongMessageUsesPrimary(t *testing.T) {
|
||||
r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
|
||||
// >200 token estimate: 210 * 3 = 630 chars
|
||||
msg := strings.Repeat("word ", 210)
|
||||
model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
if usedLight {
|
||||
t.Error("long message: expected primary model to be selected")
|
||||
}
|
||||
@@ -304,7 +313,7 @@ func TestRouter_SelectModel_DeepToolChainUsesLight(t *testing.T) {
|
||||
{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "exec"}, {Name: "search"}}},
|
||||
}
|
||||
msg := "ok"
|
||||
_, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6")
|
||||
_, usedLight, _ := r.SelectModel(msg, history, "claude-sonnet-4-6")
|
||||
if !usedLight {
|
||||
t.Error("short message + moderate tool calls: expected light model (score 0.20 < 0.35)")
|
||||
}
|
||||
@@ -320,7 +329,7 @@ func TestRouter_SelectModel_ToolChainPlusMediumUsesHeavy(t *testing.T) {
|
||||
}
|
||||
// ~55 tokens * 3 = 165 chars
|
||||
msg := strings.Repeat("word ", 55)
|
||||
_, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6")
|
||||
_, usedLight, _ := r.SelectModel(msg, history, "claude-sonnet-4-6")
|
||||
if usedLight {
|
||||
t.Error("tool chain + medium message: expected primary model (score >= 0.35)")
|
||||
}
|
||||
@@ -330,7 +339,7 @@ func TestRouter_SelectModel_CustomThreshold(t *testing.T) {
|
||||
// Very low threshold: even a short message triggers heavy model
|
||||
r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.05})
|
||||
msg := strings.Repeat("word ", 55) // medium message → 0.15 >= 0.05
|
||||
_, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
_, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
if usedLight {
|
||||
t.Error("low threshold: medium message should use primary model")
|
||||
}
|
||||
@@ -340,7 +349,7 @@ func TestRouter_SelectModel_HighThreshold(t *testing.T) {
|
||||
// Very high threshold: even code blocks route to light
|
||||
r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.99})
|
||||
msg := "```go\nfmt.Println()\n```"
|
||||
_, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
_, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
|
||||
if !usedLight {
|
||||
t.Error("very high threshold: code block (0.40) should route to light model")
|
||||
}
|
||||
@@ -364,7 +373,7 @@ func TestRouter_CustomClassifier_LowScore_SelectsLight(t *testing.T) {
|
||||
RouterConfig{LightModel: "light", Threshold: 0.5},
|
||||
&fixedScoreClassifier{score: 0.2},
|
||||
)
|
||||
_, usedLight := r.SelectModel("anything", nil, "heavy")
|
||||
_, usedLight, _ := r.SelectModel("anything", nil, "heavy")
|
||||
if !usedLight {
|
||||
t.Error("low score with custom classifier: expected light model")
|
||||
}
|
||||
@@ -375,7 +384,7 @@ func TestRouter_CustomClassifier_HighScore_SelectsPrimary(t *testing.T) {
|
||||
RouterConfig{LightModel: "light", Threshold: 0.5},
|
||||
&fixedScoreClassifier{score: 0.8},
|
||||
)
|
||||
_, usedLight := r.SelectModel("anything", nil, "heavy")
|
||||
_, usedLight, _ := r.SelectModel("anything", nil, "heavy")
|
||||
if usedLight {
|
||||
t.Error("high score with custom classifier: expected primary model")
|
||||
}
|
||||
@@ -387,8 +396,19 @@ func TestRouter_CustomClassifier_ExactThreshold_SelectsPrimary(t *testing.T) {
|
||||
RouterConfig{LightModel: "light", Threshold: 0.5},
|
||||
&fixedScoreClassifier{score: 0.5},
|
||||
)
|
||||
_, usedLight := r.SelectModel("anything", nil, "heavy")
|
||||
_, usedLight, _ := r.SelectModel("anything", nil, "heavy")
|
||||
if usedLight {
|
||||
t.Error("score == threshold: expected primary model (>= threshold → primary)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRouter_SelectModel_ReturnsScore(t *testing.T) {
|
||||
r := newWithClassifier(
|
||||
RouterConfig{LightModel: "light", Threshold: 0.5},
|
||||
&fixedScoreClassifier{score: 0.42},
|
||||
)
|
||||
_, _, score := r.SelectModel("anything", nil, "heavy")
|
||||
if score != 0.42 {
|
||||
t.Errorf("score: got %f, want 0.42", score)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user