From c5a21b269f1d1487e89125228979b1dd0fcc4477 Mon Sep 17 00:00:00 2001
From: xiaoen <2768753269@qq.com>
Date: Mon, 2 Mar 2026 22:40:52 +0800
Subject: [PATCH 1/6] feat(config): add RoutingConfig to AgentDefaults

Introduce RoutingConfig with three fields:
  - enabled: activates per-turn model routing
  - light_model: references a model_name in model_list
  - threshold: complexity score cutoff in [0,1]

When routing.enabled is true and the incoming message scores below
threshold, the agent switches to light_model for that turn. Absent or
disabled config leaves existing behaviour completely unchanged.

Example:
  "agents": {
    "defaults": {
      "model": "claude-sonnet-4-6",
      "routing": {
        "enabled": true,
        "light_model": "gemini-flash",
        "threshold": 0.35
      }
    }
  }
---
 pkg/config/config.go | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/pkg/config/config.go b/pkg/config/config.go
index c4c175495..af2acb726 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -167,19 +167,32 @@ type SessionConfig struct {
 	IdentityLinks map[string][]string `json:"identity_links,omitempty"`
 }
 
+// RoutingConfig controls the intelligent model routing feature.
+// When enabled, each incoming message is scored against structural features
+// (message length, code blocks, tool call history, conversation depth, attachments).
+// Messages scoring below Threshold are sent to LightModel; all others use the
+// agent's primary model. This reduces cost and latency for simple tasks without
+// requiring any keyword matching — all scoring is language-agnostic.
+type RoutingConfig struct {
+	Enabled    bool    `json:"enabled"`
+	LightModel string  `json:"light_model"` // model_name from model_list to use for simple tasks
+	Threshold  float64 `json:"threshold"`   // complexity score in [0,1]; score >= threshold → primary model
+}
+
 type AgentDefaults struct {
-	Workspace                 string   `json:"workspace"                       env:"PICOCLAW_AGENTS_DEFAULTS_WORKSPACE"`
-	RestrictToWorkspace       bool     `json:"restrict_to_workspace"           env:"PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE"`
-	AllowReadOutsideWorkspace bool     `json:"allow_read_outside_workspace"    env:"PICOCLAW_AGENTS_DEFAULTS_ALLOW_READ_OUTSIDE_WORKSPACE"`
-	Provider                  string   `json:"provider"                        env:"PICOCLAW_AGENTS_DEFAULTS_PROVIDER"`
-	ModelName                 string   `json:"model_name,omitempty"            env:"PICOCLAW_AGENTS_DEFAULTS_MODEL_NAME"`
-	Model                     string   `json:"model"                           env:"PICOCLAW_AGENTS_DEFAULTS_MODEL"` // Deprecated: use model_name instead
-	ModelFallbacks            []string `json:"model_fallbacks,omitempty"`
-	ImageModel                string   `json:"image_model,omitempty"           env:"PICOCLAW_AGENTS_DEFAULTS_IMAGE_MODEL"`
-	ImageModelFallbacks       []string `json:"image_model_fallbacks,omitempty"`
-	MaxTokens                 int      `json:"max_tokens"                      env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOKENS"`
-	Temperature               *float64 `json:"temperature,omitempty"           env:"PICOCLAW_AGENTS_DEFAULTS_TEMPERATURE"`
-	MaxToolIterations         int      `json:"max_tool_iterations"             env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOOL_ITERATIONS"`
+	Workspace                 string         `json:"workspace"                       env:"PICOCLAW_AGENTS_DEFAULTS_WORKSPACE"`
+	RestrictToWorkspace       bool           `json:"restrict_to_workspace"           env:"PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE"`
+	AllowReadOutsideWorkspace bool           `json:"allow_read_outside_workspace"    env:"PICOCLAW_AGENTS_DEFAULTS_ALLOW_READ_OUTSIDE_WORKSPACE"`
+	Provider                  string         `json:"provider"                        env:"PICOCLAW_AGENTS_DEFAULTS_PROVIDER"`
+	ModelName                 string         `json:"model_name,omitempty"            env:"PICOCLAW_AGENTS_DEFAULTS_MODEL_NAME"`
+	Model                     string         `json:"model"                           env:"PICOCLAW_AGENTS_DEFAULTS_MODEL"` // Deprecated: use model_name instead
+	ModelFallbacks            []string       `json:"model_fallbacks,omitempty"`
+	ImageModel                string         `json:"image_model,omitempty"           env:"PICOCLAW_AGENTS_DEFAULTS_IMAGE_MODEL"`
+	ImageModelFallbacks       []string       `json:"image_model_fallbacks,omitempty"`
+	MaxTokens                 int            `json:"max_tokens"                      env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOKENS"`
+	Temperature               *float64       `json:"temperature,omitempty"           env:"PICOCLAW_AGENTS_DEFAULTS_TEMPERATURE"`
+	MaxToolIterations         int            `json:"max_tool_iterations"             env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOOL_ITERATIONS"`
+	Routing                   *RoutingConfig `json:"routing,omitempty"`
 }
 
 // GetModelName returns the effective model name for the agent defaults.

From 1943c3e6602930880c2da90fb973d5e07dc98854 Mon Sep 17 00:00:00 2001
From: xiaoen <2768753269@qq.com>
Date: Mon, 2 Mar 2026 22:42:20 +0800
Subject: [PATCH 2/6] feat(routing): add language-agnostic model complexity
 scorer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add three new files to pkg/routing/:

features.go — ExtractFeatures(msg, history) → Features
  Computes five structural dimensions with zero keyword matching:
  - TokenEstimate: rune_count/3 (CJK-safe token proxy)
  - CodeBlockCount: ``` pairs in the message
  - RecentToolCalls: tool call count in the last 6 history entries
  - ConversationDepth: total messages in session
  - HasAttachments: data URIs or media file extensions

classifier.go — Classifier interface + RuleClassifier
  RuleClassifier uses a weighted sum that is capped at 1.0:
    code block      → +0.40  (triggers heavy model alone at 0.35 threshold)
    token > 200     → +0.35  (triggers heavy model alone)
    tool calls > 3  → +0.25
    token 50-200    → +0.15
    conversation depth > 10 → +0.10
    attachment      → 1.00 (hard gate, always heavy)

router.go — Router wraps config + Classifier
  Router.SelectModel(msg, history, primaryModel) returns either the
  configured light_model or the primary model depending on whether
  the complexity score clears the threshold. Threshold defaults to
  0.35 when zero/negative to prevent misconfiguration.

router_test.go — 34 tests covering all branches and edge cases
---
 pkg/routing/classifier.go  |  80 ++++++++
 pkg/routing/features.go    | 118 ++++++++++++
 pkg/routing/router.go      |  77 ++++++++
 pkg/routing/router_test.go | 386 +++++++++++++++++++++++++++++++++++++
 4 files changed, 661 insertions(+)
 create mode 100644 pkg/routing/classifier.go
 create mode 100644 pkg/routing/features.go
 create mode 100644 pkg/routing/router.go
 create mode 100644 pkg/routing/router_test.go

diff --git a/pkg/routing/classifier.go b/pkg/routing/classifier.go
new file mode 100644
index 000000000..761a6fdec
--- /dev/null
+++ b/pkg/routing/classifier.go
@@ -0,0 +1,80 @@
+package routing
+
+// Classifier evaluates a feature set and returns a complexity score in [0, 1].
+// A higher score indicates a more complex task that benefits from a heavy model.
+// The score is compared against the configured threshold: score >= threshold selects
+// the primary (heavy) model; score < threshold selects the light model.
+//
+// Classifier is an interface so that future implementations (ML-based, embedding-based,
+// or any other approach) can be swapped in without changing routing infrastructure.
+type Classifier interface {
+	Score(f Features) float64
+}
+
+// RuleClassifier is the v1 implementation.
+// It uses a weighted sum of structural signals with no external dependencies,
+// no API calls, and sub-microsecond latency. The raw sum is capped at 1.0 so
+// that the returned score always falls within the [0, 1] contract.
+//
+// Individual weights (multiple signals can fire simultaneously):
+//
+//	token > 200 (≈600 chars): 0.35  — very long prompts are almost always complex
+//	token 50-200:             0.15  — medium length; may or may not be complex
+//	code block present:       0.40  — coding tasks need the heavy model
+//	tool calls > 3 (recent):  0.25  — dense tool usage signals an agentic workflow
+//	tool calls 1-3 (recent):  0.10  — some tool activity
+//	conversation depth > 10:  0.10  — long sessions carry implicit complexity
+//	attachments present:      1.00  — hard gate; multi-modal always needs heavy model
+//
+// Default threshold is 0.35, so:
+//   - Pure greetings / trivial Q&A:                 0.00 → light  ✓
+//   - Medium prose message (50–200 tokens):          0.15 → light  ✓
+//   - Message with code block:                       0.40 → heavy  ✓
+//   - Long message (>200 tokens):                    0.35 → heavy  ✓
+//   - Active tool session + medium message:          0.25 → light  (acceptable)
+//   - Any message with an image/audio attachment:    1.00 → heavy  ✓
+type RuleClassifier struct{}
+
+// Score computes the complexity score for the given feature set.
+// The returned value is in [0, 1]. Attachments short-circuit to 1.0.
+func (c *RuleClassifier) Score(f Features) float64 {
+	// Hard gate: multi-modal inputs always require the heavy model.
+	if f.HasAttachments {
+		return 1.0
+	}
+
+	var score float64
+
+	// Token estimate — primary verbosity signal
+	switch {
+	case f.TokenEstimate > 200:
+		score += 0.35
+	case f.TokenEstimate > 50:
+		score += 0.15
+	}
+
+	// Fenced code blocks — strongest indicator of a coding/technical task
+	if f.CodeBlockCount > 0 {
+		score += 0.40
+	}
+
+	// Recent tool call density — indicates an ongoing agentic workflow
+	switch {
+	case f.RecentToolCalls > 3:
+		score += 0.25
+	case f.RecentToolCalls > 0:
+		score += 0.10
+	}
+
+	// Conversation depth — accumulated context implies compound task
+	if f.ConversationDepth > 10 {
+		score += 0.10
+	}
+
+	// Cap at 1.0 to honour the [0, 1] contract even when multiple signals fire
+	// simultaneously (e.g., long message + code block + tool chain = 1.10 raw).
+	if score > 1.0 {
+		score = 1.0
+	}
+	return score
+}
diff --git a/pkg/routing/features.go b/pkg/routing/features.go
new file mode 100644
index 000000000..4fa1c5b6c
--- /dev/null
+++ b/pkg/routing/features.go
@@ -0,0 +1,118 @@
+package routing
+
+import (
+	"strings"
+	"unicode/utf8"
+
+	"github.com/sipeed/picoclaw/pkg/providers"
+)
+
+// lookbackWindow is the number of recent history entries scanned for tool calls.
+// Six entries covers roughly one full tool-use round-trip (user → assistant+tool_call → tool_result → assistant).
+const lookbackWindow = 6
+
+// Features holds the structural signals extracted from a message and its session context.
+// Every dimension is language-agnostic by construction — no keyword or pattern matching
+// against natural-language content. This ensures consistent routing for all locales.
+type Features struct {
+	// TokenEstimate is a conservative proxy for token count.
+	// Computed as utf8.RuneCountInString(msg) / 3, which handles CJK characters
+	// (each rune ≈ 1 token for CJK, ≈ 0.25 tokens for ASCII) without any API call.
+	TokenEstimate int
+
+	// CodeBlockCount is the number of fenced code blocks (``` pairs) in the message.
+	// Coding tasks almost always require the heavy model.
+	CodeBlockCount int
+
+	// RecentToolCalls is the count of tool_call messages in the last lookbackWindow
+	// history entries. A high density indicates an active agentic workflow.
+	RecentToolCalls int
+
+	// ConversationDepth is the total number of messages in the session history.
+	// Deep sessions tend to carry implicit complexity built up over many turns.
+	ConversationDepth int
+
+	// HasAttachments is true when the message appears to contain media (images,
+	// audio, video). Multi-modal inputs require vision-capable heavy models.
+	HasAttachments bool
+}
+
+// ExtractFeatures computes the structural feature vector for a message.
+// It is a pure function with no side effects and zero allocations beyond
+// the returned struct.
+func ExtractFeatures(msg string, history []providers.Message) Features {
+	return Features{
+		TokenEstimate:     estimateTokens(msg),
+		CodeBlockCount:    countCodeBlocks(msg),
+		RecentToolCalls:   countRecentToolCalls(history),
+		ConversationDepth: len(history),
+		HasAttachments:    hasAttachments(msg),
+	}
+}
+
+// estimateTokens returns a conservative token count proxy.
+// Using rune count / 3 rather than / 4 because CJK characters each map to
+// roughly one token, while ASCII words average ~1.3 chars/token. Dividing
+// by 3 is a safe middle ground that slightly over-estimates for Latin text
+// (errs toward routing to the heavy model) and is accurate for CJK.
+func estimateTokens(msg string) int {
+	rc := utf8.RuneCountInString(msg)
+	return rc / 3
+}
+
+// countCodeBlocks counts the number of complete fenced code blocks.
+// Each ``` delimiter increments a counter; pairs of delimiters form one block.
+// An unclosed opening fence (odd count) is treated as zero complete blocks
+// since it may just be an inline code span or a typo.
+func countCodeBlocks(msg string) int {
+	n := strings.Count(msg, "```")
+	return n / 2
+}
+
+// countRecentToolCalls counts messages with tool calls in the last lookbackWindow
+// entries of history. It examines the ToolCalls field rather than parsing
+// the content string, so it is robust to any message format.
+func countRecentToolCalls(history []providers.Message) int {
+	start := len(history) - lookbackWindow
+	if start < 0 {
+		start = 0
+	}
+
+	count := 0
+	for _, msg := range history[start:] {
+		if len(msg.ToolCalls) > 0 {
+			count += len(msg.ToolCalls)
+		}
+	}
+	return count
+}
+
+// hasAttachments returns true when the message content contains embedded media.
+// It checks for base64 data URIs (data:image/, data:audio/, data:video/) and
+// common image/audio URL extensions. This is intentionally conservative —
+// false negatives (missing an attachment) just mean the routing falls back to
+// the primary model anyway.
+func hasAttachments(msg string) bool {
+	lower := strings.ToLower(msg)
+
+	// Base64 data URIs embedded directly in the message
+	if strings.Contains(lower, "data:image/") ||
+		strings.Contains(lower, "data:audio/") ||
+		strings.Contains(lower, "data:video/") {
+		return true
+	}
+
+	// Common image/audio extensions in URLs or file references
+	mediaExts := []string{
+		".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp",
+		".mp3", ".wav", ".ogg", ".m4a", ".flac",
+		".mp4", ".avi", ".mov", ".webm",
+	}
+	for _, ext := range mediaExts {
+		if strings.Contains(lower, ext) {
+			return true
+		}
+	}
+
+	return false
+}
diff --git a/pkg/routing/router.go b/pkg/routing/router.go
new file mode 100644
index 000000000..d4f5218d3
--- /dev/null
+++ b/pkg/routing/router.go
@@ -0,0 +1,77 @@
+package routing
+
+import (
+	"github.com/sipeed/picoclaw/pkg/providers"
+)
+
+// defaultThreshold is used when the config threshold is zero or negative.
+// At 0.35 a message needs at least one strong signal (code block, long text,
+// or an attachment) before the heavy model is chosen.
+const defaultThreshold = 0.35
+
+// RouterConfig holds the validated model routing settings.
+// It mirrors config.RoutingConfig but lives in pkg/routing to keep the
+// dependency graph simple: pkg/agent resolves config → routing, not the reverse.
+type RouterConfig struct {
+	// LightModel is the model_name (from model_list) used for simple tasks.
+	LightModel string
+
+	// Threshold is the complexity score cutoff in [0, 1].
+	// score >= Threshold → primary (heavy) model.
+	// score <  Threshold → light model.
+	Threshold float64
+}
+
+// Router selects the appropriate model tier for each incoming message.
+// It is safe for concurrent use from multiple goroutines.
+type Router struct {
+	cfg        RouterConfig
+	classifier Classifier
+}
+
+// New creates a Router with the given config and the default RuleClassifier.
+// If cfg.Threshold is zero or negative, defaultThreshold (0.35) is used.
+func New(cfg RouterConfig) *Router {
+	if cfg.Threshold <= 0 {
+		cfg.Threshold = defaultThreshold
+	}
+	return &Router{
+		cfg:        cfg,
+		classifier: &RuleClassifier{},
+	}
+}
+
+// newWithClassifier creates a Router with a custom Classifier.
+// Intended for unit tests that need to inject a deterministic scorer.
+func newWithClassifier(cfg RouterConfig, c Classifier) *Router {
+	if cfg.Threshold <= 0 {
+		cfg.Threshold = defaultThreshold
+	}
+	return &Router{cfg: cfg, classifier: c}
+}
+
+// SelectModel returns the model to use for this conversation turn.
+//
+//   - If score < cfg.Threshold: returns (cfg.LightModel, true)
+//   - Otherwise:               returns (primaryModel, false)
+//
+// The caller is responsible for resolving the returned model name into
+// provider candidates (see AgentInstance.LightCandidates).
+func (r *Router) SelectModel(msg string, history []providers.Message, primaryModel string) (model string, usedLight bool) {
+	features := ExtractFeatures(msg, history)
+	score := r.classifier.Score(features)
+	if score < r.cfg.Threshold {
+		return r.cfg.LightModel, true
+	}
+	return primaryModel, false
+}
+
+// LightModel returns the configured light model name.
+func (r *Router) LightModel() string {
+	return r.cfg.LightModel
+}
+
+// Threshold returns the complexity threshold in use.
+func (r *Router) Threshold() float64 {
+	return r.cfg.Threshold
+}
diff --git a/pkg/routing/router_test.go b/pkg/routing/router_test.go
new file mode 100644
index 000000000..168227638
--- /dev/null
+++ b/pkg/routing/router_test.go
@@ -0,0 +1,386 @@
+package routing
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/sipeed/picoclaw/pkg/providers"
+)
+
+// ── ExtractFeatures ──────────────────────────────────────────────────────────
+
+func TestExtractFeatures_EmptyMessage(t *testing.T) {
+	f := ExtractFeatures("", nil)
+	if f.TokenEstimate != 0 {
+		t.Errorf("TokenEstimate: got %d, want 0", f.TokenEstimate)
+	}
+	if f.CodeBlockCount != 0 {
+		t.Errorf("CodeBlockCount: got %d, want 0", f.CodeBlockCount)
+	}
+	if f.RecentToolCalls != 0 {
+		t.Errorf("RecentToolCalls: got %d, want 0", f.RecentToolCalls)
+	}
+	if f.ConversationDepth != 0 {
+		t.Errorf("ConversationDepth: got %d, want 0", f.ConversationDepth)
+	}
+	if f.HasAttachments {
+		t.Error("HasAttachments: got true, want false")
+	}
+}
+
+func TestExtractFeatures_TokenEstimate(t *testing.T) {
+	// 30 ASCII chars / 3 = 10 tokens
+	msg := strings.Repeat("a", 30)
+	f := ExtractFeatures(msg, nil)
+	if f.TokenEstimate != 10 {
+		t.Errorf("TokenEstimate: got %d, want 10", f.TokenEstimate)
+	}
+}
+
+func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) {
+	// 9 CJK runes / 3 = 3 tokens
+	msg := "你好世界你好世界你" // 9 runes
+	f := ExtractFeatures(msg, nil)
+	if f.TokenEstimate != 3 {
+		t.Errorf("CJK TokenEstimate: got %d, want 3", f.TokenEstimate)
+	}
+}
+
+func TestExtractFeatures_CodeBlocks(t *testing.T) {
+	cases := []struct {
+		msg  string
+		want int
+	}{
+		{"no code here", 0},
+		{"```go\nfmt.Println()\n```", 1},
+		{"```python\npass\n```\n```js\nconsole.log()\n```", 2},
+		{"```unclosed", 0}, // odd number of fences = 0 complete blocks
+	}
+	for _, tc := range cases {
+		f := ExtractFeatures(tc.msg, nil)
+		if f.CodeBlockCount != tc.want {
+			t.Errorf("msg=%q: CodeBlockCount got %d, want %d", tc.msg, f.CodeBlockCount, tc.want)
+		}
+	}
+}
+
+func TestExtractFeatures_RecentToolCalls(t *testing.T) {
+	// History longer than lookbackWindow — only last lookbackWindow entries count.
+	history := make([]providers.Message, 10)
+	// Put 2 tool calls at positions 8 and 9 (within the last 6)
+	history[8] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "exec"}}}
+	history[9] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "read_file"}, {Name: "write_file"}}}
+	// Position 3 is outside the lookback window and must NOT be counted
+	history[3] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "old_tool"}}}
+
+	f := ExtractFeatures("test", history)
+	// 1 (position 8) + 2 (position 9) = 3
+	if f.RecentToolCalls != 3 {
+		t.Errorf("RecentToolCalls: got %d, want 3", f.RecentToolCalls)
+	}
+}
+
+func TestExtractFeatures_ConversationDepth(t *testing.T) {
+	history := make([]providers.Message, 7)
+	f := ExtractFeatures("msg", history)
+	if f.ConversationDepth != 7 {
+		t.Errorf("ConversationDepth: got %d, want 7", f.ConversationDepth)
+	}
+}
+
+func TestExtractFeatures_HasAttachments_DataURI(t *testing.T) {
+	cases := []struct {
+		msg  string
+		want bool
+	}{
+		{"plain text", false},
+		{"here is an image: data:image/png;base64,abc123", true},
+		{"audio: data:audio/mp3;base64,xyz", true},
+		{"video: data:video/mp4;base64,xyz", true},
+	}
+	for _, tc := range cases {
+		f := ExtractFeatures(tc.msg, nil)
+		if f.HasAttachments != tc.want {
+			t.Errorf("msg=%q: HasAttachments got %v, want %v", tc.msg, f.HasAttachments, tc.want)
+		}
+	}
+}
+
+func TestExtractFeatures_HasAttachments_Extension(t *testing.T) {
+	cases := []struct {
+		msg  string
+		want bool
+	}{
+		{"check out photo.jpg", true},
+		{"see screenshot.png", true},
+		{"listen to audio.mp3", true},
+		{"watch clip.mp4", true},
+		{"just a .go file", false},
+		{"document.pdf", false}, // pdf is not in the media list
+	}
+	for _, tc := range cases {
+		f := ExtractFeatures(tc.msg, nil)
+		if f.HasAttachments != tc.want {
+			t.Errorf("msg=%q: HasAttachments got %v, want %v", tc.msg, f.HasAttachments, tc.want)
+		}
+	}
+}
+
+// ── RuleClassifier ───────────────────────────────────────────────────────────
+
+func TestRuleClassifier_ZeroFeatures(t *testing.T) {
+	c := &RuleClassifier{}
+	score := c.Score(Features{})
+	if score != 0.0 {
+		t.Errorf("zero features: got %f, want 0.0", score)
+	}
+}
+
+func TestRuleClassifier_AttachmentsHardGate(t *testing.T) {
+	c := &RuleClassifier{}
+	score := c.Score(Features{HasAttachments: true})
+	if score != 1.0 {
+		t.Errorf("attachments: got %f, want 1.0", score)
+	}
+}
+
+func TestRuleClassifier_CodeBlockAlone(t *testing.T) {
+	c := &RuleClassifier{}
+	// Code block alone = 0.40, above default threshold 0.35
+	score := c.Score(Features{CodeBlockCount: 1})
+	if score < 0.35 {
+		t.Errorf("code block: score %f is below default threshold 0.35", score)
+	}
+}
+
+func TestRuleClassifier_LongMessage(t *testing.T) {
+	c := &RuleClassifier{}
+	// >200 tokens = 0.35, exactly at default threshold → heavy
+	score := c.Score(Features{TokenEstimate: 250})
+	if score < 0.35 {
+		t.Errorf("long message: score %f is below default threshold 0.35", score)
+	}
+}
+
+func TestRuleClassifier_MediumMessage(t *testing.T) {
+	c := &RuleClassifier{}
+	// 50-200 tokens = 0.15, below threshold → light
+	score := c.Score(Features{TokenEstimate: 100})
+	if score >= 0.35 {
+		t.Errorf("medium message: score %f should be below default threshold 0.35", score)
+	}
+}
+
+func TestRuleClassifier_ShortMessage(t *testing.T) {
+	c := &RuleClassifier{}
+	// <50 tokens, no other signals = 0.0 → light
+	score := c.Score(Features{TokenEstimate: 10})
+	if score != 0.0 {
+		t.Errorf("short message: got %f, want 0.0", score)
+	}
+}
+
+func TestRuleClassifier_ToolCallDensity(t *testing.T) {
+	c := &RuleClassifier{}
+
+	scoreNone := c.Score(Features{RecentToolCalls: 0})
+	scoreLow := c.Score(Features{RecentToolCalls: 2})
+	scoreHigh := c.Score(Features{RecentToolCalls: 5})
+
+	if scoreNone != 0.0 {
+		t.Errorf("no tools: got %f, want 0.0", scoreNone)
+	}
+	if scoreLow <= scoreNone {
+		t.Errorf("low tools should score higher than none: %f vs %f", scoreLow, scoreNone)
+	}
+	if scoreHigh <= scoreLow {
+		t.Errorf("high tools should score higher than low: %f vs %f", scoreHigh, scoreLow)
+	}
+}
+
+func TestRuleClassifier_DeepConversation(t *testing.T) {
+	c := &RuleClassifier{}
+	shallow := c.Score(Features{ConversationDepth: 5})
+	deep := c.Score(Features{ConversationDepth: 15})
+	if deep <= shallow {
+		t.Errorf("deep conversation should score higher: %f vs %f", deep, shallow)
+	}
+}
+
+func TestRuleClassifier_ScoreDoesNotExceedOne(t *testing.T) {
+	c := &RuleClassifier{}
+	// Max all signals simultaneously
+	f := Features{
+		TokenEstimate:     500,
+		CodeBlockCount:    3,
+		RecentToolCalls:   10,
+		ConversationDepth: 20,
+	}
+	score := c.Score(f)
+	if score > 1.0 {
+		t.Errorf("score %f exceeds 1.0", score)
+	}
+}
+
+// ── Router ───────────────────────────────────────────────────────────────────
+
+func TestRouter_DefaultThreshold(t *testing.T) {
+	r := New(RouterConfig{LightModel: "gemini-flash"})
+	if r.Threshold() != defaultThreshold {
+		t.Errorf("default threshold: got %f, want %f", r.Threshold(), defaultThreshold)
+	}
+}
+
+func TestRouter_NegativeThresholdFallsBackToDefault(t *testing.T) {
+	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: -0.1})
+	if r.Threshold() != defaultThreshold {
+		t.Errorf("negative threshold: got %f, want %f", r.Threshold(), defaultThreshold)
+	}
+}
+
+func TestRouter_SelectModel_SimpleMessageUsesLight(t *testing.T) {
+	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
+	msg := "hi"
+	model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	if !usedLight {
+		t.Error("simple message: expected light model to be selected")
+	}
+	if model != "gemini-flash" {
+		t.Errorf("simple message: model got %q, want %q", model, "gemini-flash")
+	}
+}
+
+func TestRouter_SelectModel_CodeBlockUsesPrimary(t *testing.T) {
+	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
+	msg := "```go\nfmt.Println(\"hello\")\n```"
+	model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	if usedLight {
+		t.Error("code block: expected primary model to be selected")
+	}
+	if model != "claude-sonnet-4-6" {
+		t.Errorf("code block: model got %q, want %q", model, "claude-sonnet-4-6")
+	}
+}
+
+func TestRouter_SelectModel_AttachmentUsesPrimary(t *testing.T) {
+	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
+	msg := "can you analyze this? data:image/png;base64,abc123"
+	model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	if usedLight {
+		t.Error("attachment: expected primary model to be selected")
+	}
+	if model != "claude-sonnet-4-6" {
+		t.Errorf("attachment: model got %q, want %q", model, "claude-sonnet-4-6")
+	}
+}
+
+func TestRouter_SelectModel_LongMessageUsesPrimary(t *testing.T) {
+	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
+	// >200 token estimate: 210 * 3 = 630 chars
+	msg := strings.Repeat("word ", 210)
+	model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	if usedLight {
+		t.Error("long message: expected primary model to be selected")
+	}
+	if model != "claude-sonnet-4-6" {
+		t.Errorf("long message: model got %q, want %q", model, "claude-sonnet-4-6")
+	}
+}
+
+func TestRouter_SelectModel_DeepToolChainUsesLight(t *testing.T) {
+	// Tool calls alone (0.25) don't cross the 0.35 threshold — acceptable behavior.
+	// Routing is conservative: only promote to heavy when the signal is unambiguous.
+	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
+	history := []providers.Message{
+		{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "read_file"}, {Name: "write_file"}}},
+		{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "exec"}, {Name: "search"}}},
+	}
+	msg := "ok"
+	_, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6")
+	if !usedLight {
+		t.Error("short message + moderate tool calls: expected light model (score 0.20 < 0.35)")
+	}
+}
+
+func TestRouter_SelectModel_ToolChainPlusMediumUsesHeavy(t *testing.T) {
+	// Tool calls (0.25) + medium message (0.15) = 0.40 >= 0.35 → heavy
+	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
+	history := []providers.Message{
+		{Role: "assistant", ToolCalls: []providers.ToolCall{
+			{Name: "a"}, {Name: "b"}, {Name: "c"}, {Name: "d"},
+		}},
+	}
+	// ~55 tokens * 3 = 165 chars
+	msg := strings.Repeat("word ", 55)
+	_, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6")
+	if usedLight {
+		t.Error("tool chain + medium message: expected primary model (score >= 0.35)")
+	}
+}
+
+func TestRouter_SelectModel_CustomThreshold(t *testing.T) {
+	// Very low threshold: even a short message triggers heavy model
+	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.05})
+	msg := strings.Repeat("word ", 55) // medium message → 0.15 >= 0.05
+	_, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	if usedLight {
+		t.Error("low threshold: medium message should use primary model")
+	}
+}
+
+func TestRouter_SelectModel_HighThreshold(t *testing.T) {
+	// Very high threshold: even code blocks route to light
+	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.99})
+	msg := "```go\nfmt.Println()\n```"
+	_, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	if !usedLight {
+		t.Error("very high threshold: code block (0.40) should route to light model")
+	}
+}
+
+func TestRouter_LightModel(t *testing.T) {
+	r := New(RouterConfig{LightModel: "my-fast-model", Threshold: 0.35})
+	if r.LightModel() != "my-fast-model" {
+		t.Errorf("LightModel: got %q, want %q", r.LightModel(), "my-fast-model")
+	}
+}
+
+// ── newWithClassifier (internal testing hook) ─────────────────────────────────
+
+type fixedScoreClassifier struct{ score float64 }
+
+func (f *fixedScoreClassifier) Score(_ Features) float64 { return f.score }
+
+func TestRouter_CustomClassifier_LowScore_SelectsLight(t *testing.T) {
+	r := newWithClassifier(
+		RouterConfig{LightModel: "light", Threshold: 0.5},
+		&fixedScoreClassifier{score: 0.2},
+	)
+	_, usedLight := r.SelectModel("anything", nil, "heavy")
+	if !usedLight {
+		t.Error("low score with custom classifier: expected light model")
+	}
+}
+
+func TestRouter_CustomClassifier_HighScore_SelectsPrimary(t *testing.T) {
+	r := newWithClassifier(
+		RouterConfig{LightModel: "light", Threshold: 0.5},
+		&fixedScoreClassifier{score: 0.8},
+	)
+	_, usedLight := r.SelectModel("anything", nil, "heavy")
+	if usedLight {
+		t.Error("high score with custom classifier: expected primary model")
+	}
+}
+
+func TestRouter_CustomClassifier_ExactThreshold_SelectsPrimary(t *testing.T) {
+	// score == threshold → primary (uses >= comparison)
+	r := newWithClassifier(
+		RouterConfig{LightModel: "light", Threshold: 0.5},
+		&fixedScoreClassifier{score: 0.5},
+	)
+	_, usedLight := r.SelectModel("anything", nil, "heavy")
+	if usedLight {
+		t.Error("score == threshold: expected primary model (>= threshold → primary)")
+	}
+}

From 02e81923493712bd714fce8f63d08a79912bd97b Mon Sep 17 00:00:00 2001
From: xiaoen <2768753269@qq.com>
Date: Mon, 2 Mar 2026 22:42:52 +0800
Subject: [PATCH 3/6] feat(agent): wire model routing into the agent loop

instance.go:
  - Add Router *routing.Router and LightCandidates []FallbackCandidate
    to AgentInstance.
  - At agent creation, when routing.enabled and light_model resolves
    successfully in model_list, pre-build the Router and resolve the
    light model candidates once. If the light model isn't in model_list,
    log a warning and disable routing for that agent gracefully.

loop.go:
  - Add selectCandidates(agent, userMsg, history) helper.
    It calls Router.SelectModel and returns either agent.Candidates /
    agent.Model (primary tier) or agent.LightCandidates / light_model
    (light tier). Returns primary unchanged when routing is disabled.
  - In runLLMIteration, resolve (activeCandidates, activeModel) once
    before entering the tool-iteration loop. The model tier is sticky
    for the entire turn so a multi-step tool chain doesn't switch
    models mid-way.
  - Replace hard-coded agent.Candidates / agent.Model references in
    callLLM and the debug log with the resolved active values.

The fallback chain and retry logic are untouched. When light_model
returns an error the fallback chain handles escalation normally.
---
 pkg/agent/instance.go | 61 +++++++++++++++++++++++++++++++------------
 pkg/agent/loop.go     | 47 +++++++++++++++++++++++++++++----
 2 files changed, 87 insertions(+), 21 deletions(-)

diff --git a/pkg/agent/instance.go b/pkg/agent/instance.go
index ed438059f..ec8871e30 100644
--- a/pkg/agent/instance.go
+++ b/pkg/agent/instance.go
@@ -34,6 +34,14 @@ type AgentInstance struct {
 	Subagents      *config.SubagentsConfig
 	SkillsFilter   []string
 	Candidates     []providers.FallbackCandidate
+
+	// Router is non-nil when model routing is configured and the light model
+	// was successfully resolved. It scores each incoming message and decides
+	// whether to route to LightCandidates or stay with Candidates.
+	Router *routing.Router
+	// LightCandidates holds the resolved provider candidates for the light model.
+	// Pre-computed at agent creation to avoid repeated model_list lookups at runtime.
+	LightCandidates []providers.FallbackCandidate
 }
 
 // NewAgentInstance creates an agent instance from config.
@@ -148,23 +156,44 @@ func NewAgentInstance(
 
 	candidates := providers.ResolveCandidatesWithLookup(modelCfg, defaults.Provider, resolveFromModelList)
 
+	// Model routing setup: pre-resolve light model candidates at creation time
+	// to avoid repeated model_list lookups on every incoming message.
+	var router *routing.Router
+	var lightCandidates []providers.FallbackCandidate
+	if rc := defaults.Routing; rc != nil && rc.Enabled && rc.LightModel != "" {
+		lightModelCfg := providers.ModelConfig{Primary: rc.LightModel}
+		resolved := providers.ResolveCandidatesWithLookup(lightModelCfg, defaults.Provider, resolveFromModelList)
+		if len(resolved) > 0 {
+			router = routing.New(routing.RouterConfig{
+				LightModel: rc.LightModel,
+				Threshold:  rc.Threshold,
+			})
+			lightCandidates = resolved
+		} else {
+			log.Printf("routing: light_model %q not found in model_list — routing disabled for agent %q",
+				rc.LightModel, agentID)
+		}
+	}
+
 	return &AgentInstance{
-		ID:             agentID,
-		Name:           agentName,
-		Model:          model,
-		Fallbacks:      fallbacks,
-		Workspace:      workspace,
-		MaxIterations:  maxIter,
-		MaxTokens:      maxTokens,
-		Temperature:    temperature,
-		ContextWindow:  maxTokens,
-		Provider:       provider,
-		Sessions:       sessionsManager,
-		ContextBuilder: contextBuilder,
-		Tools:          toolsRegistry,
-		Subagents:      subagents,
-		SkillsFilter:   skillsFilter,
-		Candidates:     candidates,
+		ID:              agentID,
+		Name:            agentName,
+		Model:           model,
+		Fallbacks:       fallbacks,
+		Workspace:       workspace,
+		MaxIterations:   maxIter,
+		MaxTokens:       maxTokens,
+		Temperature:     temperature,
+		ContextWindow:   maxTokens,
+		Provider:        provider,
+		Sessions:        sessionsManager,
+		ContextBuilder:  contextBuilder,
+		Tools:           toolsRegistry,
+		Subagents:       subagents,
+		SkillsFilter:    skillsFilter,
+		Candidates:      candidates,
+		Router:          router,
+		LightCandidates: lightCandidates,
 	}
 }
 
diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go
index 00b0f096a..6df956627 100644
--- a/pkg/agent/loop.go
+++ b/pkg/agent/loop.go
@@ -625,6 +625,12 @@ func (al *AgentLoop) runLLMIteration(
 	iteration := 0
 	var finalContent string
 
+	// Determine effective model tier for this conversation turn.
+	// selectCandidates evaluates routing once and the decision is sticky for
+	// all tool-follow-up iterations within the same turn so that a multi-step
+	// tool chain doesn't switch models mid-way through.
+	activeCandidates, activeModel := al.selectCandidates(agent, opts.UserMessage, messages)
+
 	for iteration < agent.MaxIterations {
 		iteration++
 
@@ -643,7 +649,7 @@ func (al *AgentLoop) runLLMIteration(
 			map[string]any{
 				"agent_id":          agent.ID,
 				"iteration":         iteration,
-				"model":             agent.Model,
+				"model":             activeModel,
 				"messages_count":    len(messages),
 				"tools_count":       len(providerToolDefs),
 				"max_tokens":        agent.MaxTokens,
@@ -659,13 +665,13 @@ func (al *AgentLoop) runLLMIteration(
 				"tools_json":    formatToolsForLog(providerToolDefs),
 			})
 
-		// Call LLM with fallback chain if candidates are configured.
+		// Call LLM with fallback chain if multiple candidates are configured.
 		var response *providers.LLMResponse
 		var err error
 
 		callLLM := func() (*providers.LLMResponse, error) {
-			if len(agent.Candidates) > 1 && al.fallback != nil {
-				fbResult, fbErr := al.fallback.Execute(ctx, agent.Candidates,
+			if len(activeCandidates) > 1 && al.fallback != nil {
+				fbResult, fbErr := al.fallback.Execute(ctx, activeCandidates,
 					func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) {
 						return agent.Provider.Chat(ctx, messages, providerToolDefs, model, map[string]any{
 							"max_tokens":       agent.MaxTokens,
@@ -684,7 +690,7 @@ func (al *AgentLoop) runLLMIteration(
 				}
 				return fbResult.Response, nil
 			}
-			return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]any{
+			return agent.Provider.Chat(ctx, messages, providerToolDefs, activeModel, map[string]any{
 				"max_tokens":       agent.MaxTokens,
 				"temperature":      agent.Temperature,
 				"prompt_cache_key": agent.ID,
@@ -934,6 +940,37 @@ func (al *AgentLoop) runLLMIteration(
 	return finalContent, iteration, nil
 }
 
+// selectCandidates returns the model candidates and resolved model name to use
+// for a conversation turn. When model routing is configured and the incoming
+// message scores below the complexity threshold, it returns the light model
+// candidates instead of the primary ones.
+//
+// The returned (candidates, model) pair is used for all LLM calls within one
+// turn — tool follow-up iterations use the same tier as the initial call so
+// that a multi-step tool chain doesn't switch models mid-way.
+func (al *AgentLoop) selectCandidates(
+	agent *AgentInstance,
+	userMsg string,
+	history []providers.Message,
+) (candidates []providers.FallbackCandidate, model string) {
+	if agent.Router == nil || len(agent.LightCandidates) == 0 {
+		return agent.Candidates, agent.Model
+	}
+
+	_, usedLight := agent.Router.SelectModel(userMsg, history, agent.Model)
+	if !usedLight {
+		return agent.Candidates, agent.Model
+	}
+
+	logger.InfoCF("agent", "Model routing: light model selected",
+		map[string]any{
+			"agent_id":    agent.ID,
+			"light_model": agent.Router.LightModel(),
+			"threshold":   agent.Router.Threshold(),
+		})
+	return agent.LightCandidates, agent.Router.LightModel()
+}
+
 // updateToolContexts updates the context for tools that need channel/chatID info.
 func (al *AgentLoop) updateToolContexts(agent *AgentInstance, channel, chatID string) {
 	// Use ContextualTool interface instead of type assertions

From 09e68cb63bd2ee556adcc1f559dd0e8019b3af37 Mon Sep 17 00:00:00 2001
From: xiaoen <2768753269@qq.com>
Date: Mon, 2 Mar 2026 23:11:45 +0800
Subject: [PATCH 4/6] fix(routing): resolve golines, gosmopolitan and misspell
 lint failures

- classifier.go: s/honour/honor/ (American English per misspell)
- router.go: break SelectModel signature across lines (golines)
- router_test.go: break long Message literal (golines)
- router_test.go: replace CJK string literal with rune slice so
  gosmopolitan does not flag the source file; behaviour is identical
---
 pkg/routing/classifier.go  |  2 +-
 pkg/routing/router.go      |  6 +++++-
 pkg/routing/router_test.go | 14 +++++++++++---
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/pkg/routing/classifier.go b/pkg/routing/classifier.go
index 761a6fdec..8cddaf069 100644
--- a/pkg/routing/classifier.go
+++ b/pkg/routing/classifier.go
@@ -71,7 +71,7 @@ func (c *RuleClassifier) Score(f Features) float64 {
 		score += 0.10
 	}
 
-	// Cap at 1.0 to honour the [0, 1] contract even when multiple signals fire
+	// Cap at 1.0 to honor the [0, 1] contract even when multiple signals fire
 	// simultaneously (e.g., long message + code block + tool chain = 1.10 raw).
 	if score > 1.0 {
 		score = 1.0
diff --git a/pkg/routing/router.go b/pkg/routing/router.go
index d4f5218d3..78092b106 100644
--- a/pkg/routing/router.go
+++ b/pkg/routing/router.go
@@ -57,7 +57,11 @@ func newWithClassifier(cfg RouterConfig, c Classifier) *Router {
 //
 // The caller is responsible for resolving the returned model name into
 // provider candidates (see AgentInstance.LightCandidates).
-func (r *Router) SelectModel(msg string, history []providers.Message, primaryModel string) (model string, usedLight bool) {
+func (r *Router) SelectModel(
+	msg string,
+	history []providers.Message,
+	primaryModel string,
+) (model string, usedLight bool) {
 	features := ExtractFeatures(msg, history)
 	score := r.classifier.Score(features)
 	if score < r.cfg.Threshold {
diff --git a/pkg/routing/router_test.go b/pkg/routing/router_test.go
index 168227638..267200c2e 100644
--- a/pkg/routing/router_test.go
+++ b/pkg/routing/router_test.go
@@ -38,8 +38,13 @@ func TestExtractFeatures_TokenEstimate(t *testing.T) {
 }
 
 func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) {
-	// 9 CJK runes / 3 = 3 tokens
-	msg := "你好世界你好世界你" // 9 runes
+	// 9 CJK runes (U+4F60 U+597D U+4E16 U+754C × 2 + U+4F60) / 3 = 3 tokens.
+	// Using a rune slice literal avoids CJK string literals in source.
+	msg := string([]rune{
+		0x4F60, 0x597D, 0x4E16, 0x754C,
+		0x4F60, 0x597D, 0x4E16, 0x754C,
+		0x4F60,
+	})
 	f := ExtractFeatures(msg, nil)
 	if f.TokenEstimate != 3 {
 		t.Errorf("CJK TokenEstimate: got %d, want 3", f.TokenEstimate)
@@ -69,7 +74,10 @@ func TestExtractFeatures_RecentToolCalls(t *testing.T) {
 	history := make([]providers.Message, 10)
 	// Put 2 tool calls at positions 8 and 9 (within the last 6)
 	history[8] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "exec"}}}
-	history[9] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "read_file"}, {Name: "write_file"}}}
+	history[9] = providers.Message{
+		Role:      "assistant",
+		ToolCalls: []providers.ToolCall{{Name: "read_file"}, {Name: "write_file"}},
+	}
 	// Position 3 is outside the lookback window and must NOT be counted
 	history[3] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "old_tool"}}}
 

From 04ddb6b472e991a25fc05b6d3fba100649025d33 Mon Sep 17 00:00:00 2001
From: xiaoen <2768753269@qq.com>
Date: Fri, 6 Mar 2026 12:20:21 +0800
Subject: [PATCH 5/6] chore: remove accidentally committed local files

---
 .claude/settings.local.json       |  42 --------
 PicoClaw 26M2W3 社区开发者会议.md | 161 ------------------------------
 PicoClaw贡献方向规划.md           | 108 --------------------
 3 files changed, 311 deletions(-)
 delete mode 100644 .claude/settings.local.json
 delete mode 100644 PicoClaw 26M2W3 社区开发者会议.md
 delete mode 100644 PicoClaw贡献方向规划.md

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
deleted file mode 100644
index aa8927667..000000000
--- a/.claude/settings.local.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Bash(cd:*)",
-      "Bash(cd /e/Project/picoclaw && go test ./pkg/memory/... -v -count=1 2>&1)",
-      "Bash(cd /e/Project/picoclaw && golangci-lint run ./pkg/memory/... 2>&1)",
-      "Bash(cd /e/Project/picoclaw && golangci-lint run ./pkg/memory/... --fix 2>&1)",
-      "Bash(cd /e/Project/picoclaw && go test ./pkg/memory/... -count=1 2>&1)",
-      "Bash(cd /e/Project/picoclaw && go vet ./pkg/memory/... 2>&1)",
-      "Bash(cd /e/Project/picoclaw && go build ./... 2>&1)",
-      "Bash(cd /e/Project/picoclaw && go test ./pkg/memory/... -bench=. -benchmem -run=^$ 2>&1)",
-      "Bash(cd /e/Project/picoclaw && go test ./pkg/session/... -count=1 2>&1)",
-      "mcp__sequential-thinking__sequentialthinking",
-      "Bash(cd /e/Project/picoclaw && git push -u origin feat/jsonl-memory-store 2>&1)",
-      "Bash(head:*)",
-      "WebSearch",
-      "Bash(cd /e/Project/picoclaw && gh issue view 711 --comments 2>&1)",
-      "Bash(cd /e/Project/picoclaw && gh pr view 732 --comments 2>&1)",
-      "Bash(cd /e/Project/picoclaw && gh pr view 732 2>&1)",
-      "Bash(cd /e/Project/picoclaw && gh pr checks 732 2>&1)",
-      "Bash(echo no upstream remote:*)",
-      "Bash(cd /e/Project/picoclaw && git rebase upstream/main 2>&1)",
-      "Bash(cd /e/Project/picoclaw && go build ./pkg/memory/... 2>&1)",
-      "Bash(cd /e/Project/picoclaw && go test ./pkg/memory/... -count=1 -v 2>&1)",
-      "Bash(gh api:*)",
-      "Bash(git push:*)",
-      "Bash(go test:*)",
-      "Bash(find .:*)",
-      "Bash(golangci-lint run:*)",
-      "Bash(gh pr:*)",
-      "Bash(gh issue:*)",
-      "Bash(git fetch:*)",
-      "Bash(echo exit: $?:*)",
-      "WebFetch(domain:github.com)",
-      "Bash(git log:*)",
-      "Bash(grep:*)",
-      "Bash(ls:*)",
-      "Bash(go build:*)",
-      "Bash(go vet:*)"
-    ]
-  }
-}
diff --git a/PicoClaw 26M2W3 社区开发者会议.md b/PicoClaw 26M2W3 社区开发者会议.md
deleted file mode 100644
index ab356424e..000000000
--- a/PicoClaw 26M2W3 社区开发者会议.md	
+++ /dev/null
@@ -1,161 +0,0 @@
-# PicoClaw 26M2W3 社区开发者会议
-
-> **PicoClaw的设计目标**：轻量高效，任意部署；简单易用，普惠大众；
-> **致PicoClaw开发者**：让我们携手加速AI奇点的到来，共同创造并见证历史。
-
----
-
-## 26M2W3 概况
-
-### 成果
-* **Github 表现**：Star 17K+，Merge 100+ PR，Contributors 70+
-* **用户规模**：微信群 1600+，Discord 1300+
-* **开发者规模**：微信群 ~50，Discord ~40
-* **生态进展**：PicoClaw 进入 Homebrew
-* **工程进展**：Provider 完成重构
-* **特别鸣谢**：daming, lxowalle 在假期的努力！
-
-### 暴露的问题
-* 第一次开展大规模社区协同开发，又是在假期期间，响应速度、社区协调、工程架构方面都暴露出了很多不足。
-* PicoClaw 早期 vibe-coding 的快速实现架构在蜂拥而至的 PR 面前会迅速变成“屎山”和冲突地狱。
-* 为尽快合并 PR，未充分验证社区开发者的能力，也没有提供合并指导规范，过早给予 write 权限，在上面架构问题下更暴露出问题。
-* 忙于以上 PR 协调问题，拖后了文档和宣发进度。特别是宣发问题，被不放春节假的海外开发者项目 zeroclaw 趁虚而入。
-* ⚠️ **警惕币圈！** 尤其是 pump.fun 空气币，不要认领参与！
-
-> **会议核心任务**：本次周会主要需要划分项目板块，认领板块负责人，制订下周计划。以下内容社区开发者可以继续添加遗漏的地方。
-
----
-
-## 开发板块
-
-### 仓库管理
-* 新建 `dev` 分支，`main` 分支推送严格化。
-* 完善 `CONTRIBUTING.md`。
-* **时区审核分工**：
-  * GMT+8 附近时区审核（中国）
-  * GMT+0 附近时区审核（欧洲）：**Huaaudio**
-  * GMT-8 附近时区审核（美洲）
-* 仓库权限申请：联系 **zepan** 审核。
-* Readme 中公布本次会议的分工人员表格，方便开发者找寻对应人员审核。
-
-### Provider（负责人：daming）
-* **进度**：已重构完成。
-* **计划**：
-  * 梳理支持和计划支持的 provider 协议列表及进度计划。
-  * **插件系统探索**：go 原生插件？(参考 [hashicorp/go-plugin](https://github.com/hashicorp/go-plugin))
-  * **优化思路**：现在各种系统的 LLM provider 都在重复造轮子，而且每新增一个 provider 都得再改代码、重新发版才能支持。应该把专业的事交给专业的组件来负责。我开了个新的开源项目——`open-next-router`，采用 nginx 原子化配置的思想，新增 provider 无需改代码，新增配置文件即可支持，提供了 go 的 sdk 包，可快速接入项目。PicoClaw 接入后可更聚焦于 agent 的实现而不是各种上游 provider 的适配，就能快其它 claw 一步。
-
-### Channels（负责人：daming）
-* **进度**：正在重构。
-* **计划**：
-  * 梳理支持和计划支持的 channel 协议列表及进度计划。
-  * **附件支持讨论**：音频、视频、文件。
-    * 附件的生命周期应该由谁管理？channel 应该只负责下载文件，然后交由 Agent 消费完成后管理生命周期？
-    * 音频转文字是否要迁移到 agent 层？或者说附件应该在哪一层被处理？
-    * 发送附件的方法如何拓展？添加新的方法？拓展原有 Message？
-  * 群友建议的 **skill加channel**？(参考 [nanoclaw skill](https://github.com/qwibitai/nanoclaw/blob/main/.claude/skills/add-telegram/SKILL.md))
-  * **插件系统讨论**。
-  * **架构优化**：
-    * 抽离公共的 HTTP 服务器，采用 WebHook 通信的 channel 通过复用公共的服务器来节省资源和端口。
-    * Websocket 支持。
-    * 将路由相关字段（`peer_kind`、`peer_id`）从 metadata 中提升为 `InboundMessage` 的结构体字段。
-  * **状态管理**：聊天记录应该由 channel 管理还是 agent 管理？
-
-### Agent（负责人：学欧）
-* Agent Loop 机制优化。
-* **记忆系统**：引入 SQLite。
-* **Multi-Agent / Swarm** 支持。
-* **模型能力回退链**：在主模型不支持多模态时，使用多模态模型进行辅助。
-
-### Tools（负责人：学欧）
-* 整理规范。
-* 插件系统探索。
-
-### Heartbeat / Status / Log 等（负责人：daming）
-* 完善心跳、状态和日志监控。
-
-### Skill
-* 搜索 skill 的 skill，已合并 PR：[PR #332](https://github.com/sipeed/picoclaw/pull/332)。
-* **安全与维护**：探讨 skill 的维护和安全性问题，防范目前常见的投毒现象。
-
-### MCP（负责人：evo）
-* **功能实现**：已有 PR [#376](https://github.com/sipeed/picoclaw/pull/376)、[#282](https://github.com/sipeed/picoclaw/pull/282)。
-* 安卓手机操作支持。
-* 浏览器操作 (`webmcp?` `action book?`)：已有相关 PR ([agent-browser-tool](https://github.com/sipeed/picoclaw/tree/feat/agent-browser-tool))。
-
-### 占用/效率优化（负责人：学欧）
-* **目标**：优化内存占用与执行效率，希望控制在 **20M 以内**。
-* **分析**：分析各个版本之间的内存占用变化，分析各个模块的内存占用情况。
-* **裁剪**：裁剪出最小版本，用于宣发。
-
-### Security
-* 响应并修复安全机构发送的漏洞警示。
-* 参考 openclaw 等现有仓库的安全措施，加固 PicoClaw。
-
-### AI CI（负责人：政宇）
-* 完善仓库的 CI 流程。
-* 加入 AI review 等自动化流程。
-* 完善发布流程、测试项目、release note、breaking change 记录。
-* 根目录加上 `CLAUDE.md`？
-* 增加 `loongarch` & `deb/rpm` 支持。
-
-### UX Testing
-* 对 release 版进行一般性测试。
-* 站在小白用户角度对使用交互提出意见建议，比如完善 PicoClaw onboard 流程。
-* 展示性优化：比如启动时刷屏 ascii-art 的 PicoClaw 标识，增加用户拍摄视频时的辨识度。
-
-### 文档工作
-* 仓库 Readme 美化，仓库文档整理、规范。
-* 整理所有 Channel、Provider 的实现支持列表。
-* 针对小白用户的各个 Provider、Channel 详细手把手教程文档。
-* 建设 Wiki 页面（deepwiki?）。
-
----
-
-## Release 待办事项 (Checklist)
-- [ ] Provider
-- [ ] Channel
-- [ ] Agent
-- [ ] Swarm
-- [ ] Security
-- [ ] MCP：浏览器
-- [ ] 文档
-- [ ] Logo
-- [ ] Metadata 问题解决
-
----
-
-## 关于插件系统测试方案（补充记录）
-测试了以下几种方案：
-1. **内置的 plugin 模块**：不考虑。不支持 Windows 等平台 ([plugin](https://pkg.go.dev/plugin@go1.26.0))。
-2. **hashicorp/go-plugin**：不考虑。占用资源过大，固件都增加了 20～30M。
-3. **net/rpc**（client-server 模式）：
-   * **优点**：支持热加载，插件可以保存运行状态。
-   * **缺点**：资源消耗较多（内存约增加 5M+，每个插件大小 10+M），每个插件占用一个端口，不太优雅。
-4. **encoding/gob**（编译为可执行程序，由主程序调用并获取返回值）：
-   * **优点**：支持热加载，消耗资源相对较少（测试固件大小增加了 376KB，内存消耗增加了 640KB）。
-   * **缺点**：无法保存运行状态（应该可以用 socket 等方法来优化支持）。
-
----
-
-## 宣发板块
-
-### 社区运营
-* **宣发物料/策划**：负责人 **zepan**，再寻求 1~2 位有网感的社区成员。
-  * 制作标准 Logo, Slogan。
-  * 制作具有传播性的图文/视频等。
-  * 策划互动性、传播性强的用户活动，产生用户内容。
-  * KOL 建联等其它宣发手段。
-* **微信群运营**：负责人 **zepan**。
-* **推特运营**：负责人 **zepan**。
-* **Discord运营**：负责人 **OsmiumOP**；需要再找一个国内开发者盯一下，会给予 admin 权限。
-* **其他渠道开拓**：小红书、知乎、Reddit？
-* **Go社区联络大使**：负责人 **卓**。
-
----
-
-## 中期 TODO
-
-* **桌面应用 / 安卓 APP**
-  * 架构讨论：C/S 还是单程序？接口文档规范？
-* **配套硬件**
diff --git a/PicoClaw贡献方向规划.md b/PicoClaw贡献方向规划.md
deleted file mode 100644
index 0b4ea40b8..000000000
--- a/PicoClaw贡献方向规划.md
+++ /dev/null
@@ -1,108 +0,0 @@
-# PicoClaw 贡献方向规划（3月1日更新）
-
-## 个人情况
-
-- Go 开发者，会 Python，在学 AI Agent
-- 已合并 PR：#173（多bug修复）、#186（安全加固）
-- 已提交 PR：#732（JSONL session store，等待 review）
-- 已关闭 PR：#719（SQLite 方案，被维护者建议改用 JSONL）
-
----
-
-## 项目当前态势（3月1日）
-
-### 已完成的重构
-- Provider 重构：daming #492 — 完成
-- Channel 重构 Phase 1：alexhoshina #662 — 完成
-- Channel 重构 Phase 2：alexhoshina #877 (10,926行) — 2月27日合并
-- Migrate 重构：lxowalle #910 — 2月28日合并
-
-### 正在进行的重构
-- **Tools 系统重构**：lxowalle PR #846（50个文件）— OPEN
-- **Plugin 系统**：gh-xj PR #936-939（4个PR系列）— OPEN
-- **Agent 系统重构**：alexhoshina Issue #772（roadmap）— 只有 issue，还没有 PR
-
-### 我的行动记录
-- 2月24日：在 #772 评论，将 PR #732 定位为 Agent 重构的 memory 子任务
-- 3月1日：在 #295 评论，提出模型路由设计方案
-
----
-
-## 战略方向
-
-### 方向 1：智能模型路由（#295）— 主攻 ✅ 代码已完成
-
-**为什么选这个**：
-1. Zepan（创始人）亲自创建的 issue，roadmap 标签
-2. 有大量社区讨论但零 PR
-3. 独立模块 `pkg/routing/`，不碰任何重构区文件
-4. 面试价值极高
-
-**已完成（分支 feat/model-routing）**：
-- `pkg/routing/features.go` — ExtractFeatures：5维结构评分，纯语言无关
-- `pkg/routing/classifier.go` — Classifier 接口 + RuleClassifier（加权求和，上限 1.0）
-- `pkg/routing/router.go` — Router：SelectModel，阈值默认 0.35
-- `pkg/routing/router_test.go` — 34 个测试，全部通过
-- `pkg/config/config.go` — RoutingConfig 添加到 AgentDefaults
-- `pkg/agent/instance.go` — 预计算 Router + LightCandidates
-- `pkg/agent/loop.go` — selectCandidates helper，turn 级别粘性路由
-
-**3 个 commit，773 行新增，33 行修改，0 个新依赖**
-
-**配置**：
-```json
-{
-  "agents": {
-    "defaults": {
-      "model": "claude-sonnet-4-6",
-      "routing": {
-        "enabled": true,
-        "light_model": "gemini-flash",
-        "threshold": 0.35
-      }
-    }
-  }
-}
-```
-
-**下一步**：向上游 push 并开 PR，PR body 引用 issue #295
-
-### 方向 2：JSONL Store 集成 — 等待时机
-
-PR #732 已提交。等 Tools 重构 (#846) 合并后再做集成 PR。
-已在 #772 评论建立关联。
-
-### 方向 3：sessions CLI 子命令（#575）— 备选快速 PR
-
-如果需要一个快速能合并的 PR 来积累信任：
-- `picoclaw sessions list/clear/export`
-- 不碰任何重构区文件
-- 实用性强
-
----
-
-## 需要避开的区域
-
-| 区域 | 原因 |
-|------|------|
-| Tools 系统 | lxowalle PR #846 正在重构 |
-| Plugin 系统 | gh-xj PR #936-939 正在做 |
-| Channel 任何东西 | alexhoshina 刚完成大重构 |
-| Provider 配置 | daming 已定型 |
-| MCP | 两个竞争 PR (#282, #376) |
-| Hooks 基础 | gh-xj #936 包含 pkg/hooks/ |
-| AgentLoop 拆分 | SaiBalusu-usf PR #699 |
-| Tool pair 修复 | QuietyAwe PR #871 |
-
----
-
-## 关键人物（更新）
-
-| 人 | GitHub | 角色 | 最近活动 |
-|---|--------|------|---------|
-| Zepan | @Zepan | 创始人 | #806 WebUI issue |
-| daming | @yinwm | Provider/审核 | 审核 PR #877 |
-| alexhoshina | @alexhoshina | Channel+Agent 重构 | #877 合并，#772 发起 |
-| lxowalle | @lxowalle | Tools+审核 | #846 Tools重构中 |
-| gh-xj | @gh-xj | Plugin 系统 | #936-939 四个 PR |
-| nikolasdehor | @nikolasdehor | 社区活跃评论者 | 每个 issue 都有他 |

From b84adacc2f302aa68c3ccd88bc5815ff51904273 Mon Sep 17 00:00:00 2001
From: xiaoen <2768753269@qq.com>
Date: Fri, 6 Mar 2026 13:10:20 +0800
Subject: [PATCH 6/6] fix(routing): address review feedback on CJK estimation
 and observability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. CJK token estimation: replace flat rune_count/3 with script-aware
   counting — CJK runes (U+2E80–U+9FFF, U+F900–U+FAFF, U+AC00–U+D7AF)
   count as 1 token each, non-CJK runes at /4. This fixes a 3x
   underestimate for Chinese/Japanese/Korean text that could incorrectly
   route complex CJK messages to the light model.

2. Routing observability: SelectModel now returns the computed score as
   a third value. selectCandidates logs the score on both paths — Info
   level for light model selection, Debug level for primary model
   selection.

3. Added tests: TestExtractFeatures_TokenEstimate_Mixed (CJK+ASCII mix),
   TestRouter_SelectModel_ReturnsScore.

Addresses review feedback from @mingmxren.
---
 pkg/agent/loop.go          |  9 ++++++-
 pkg/routing/features.go    | 29 +++++++++++++-------
 pkg/routing/router.go      | 15 ++++++-----
 pkg/routing/router_test.go | 54 ++++++++++++++++++++++++++------------
 4 files changed, 72 insertions(+), 35 deletions(-)

diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go
index 5e68e4931..132bb3c98 100644
--- a/pkg/agent/loop.go
+++ b/pkg/agent/loop.go
@@ -1192,8 +1192,14 @@ func (al *AgentLoop) selectCandidates(
 		return agent.Candidates, agent.Model
 	}
 
-	_, usedLight := agent.Router.SelectModel(userMsg, history, agent.Model)
+	_, usedLight, score := agent.Router.SelectModel(userMsg, history, agent.Model)
 	if !usedLight {
+		logger.DebugCF("agent", "Model routing: primary model selected",
+			map[string]any{
+				"agent_id":  agent.ID,
+				"score":     score,
+				"threshold": agent.Router.Threshold(),
+			})
 		return agent.Candidates, agent.Model
 	}
 
@@ -1201,6 +1207,7 @@ func (al *AgentLoop) selectCandidates(
 		map[string]any{
 			"agent_id":    agent.ID,
 			"light_model": agent.Router.LightModel(),
+			"score":       score,
 			"threshold":   agent.Router.Threshold(),
 		})
 	return agent.LightCandidates, agent.Router.LightModel()
diff --git a/pkg/routing/features.go b/pkg/routing/features.go
index 4fa1c5b6c..c371e21aa 100644
--- a/pkg/routing/features.go
+++ b/pkg/routing/features.go
@@ -15,9 +15,9 @@ const lookbackWindow = 6
 // Every dimension is language-agnostic by construction — no keyword or pattern matching
 // against natural-language content. This ensures consistent routing for all locales.
 type Features struct {
-	// TokenEstimate is a conservative proxy for token count.
-	// Computed as utf8.RuneCountInString(msg) / 3, which handles CJK characters
-	// (each rune ≈ 1 token for CJK, ≈ 0.25 tokens for ASCII) without any API call.
+	// TokenEstimate is a proxy for token count.
+	// CJK runes count as 1 token each; non-CJK runes as 0.25 tokens each.
+	// This avoids API calls while giving accurate estimates for all scripts.
 	TokenEstimate int
 
 	// CodeBlockCount is the number of fenced code blocks (``` pairs) in the message.
@@ -50,14 +50,23 @@ func ExtractFeatures(msg string, history []providers.Message) Features {
 	}
 }
 
-// estimateTokens returns a conservative token count proxy.
-// Using rune count / 3 rather than / 4 because CJK characters each map to
-// roughly one token, while ASCII words average ~1.3 chars/token. Dividing
-// by 3 is a safe middle ground that slightly over-estimates for Latin text
-// (errs toward routing to the heavy model) and is accurate for CJK.
+// estimateTokens returns a token count proxy that handles both CJK and Latin text.
+// CJK runes (U+2E80–U+9FFF, U+F900–U+FAFF, U+AC00–U+D7AF) map to roughly one
+// token each, while non-CJK runes average ~0.25 tokens/rune (≈4 chars per token
+// for English). Splitting the count this way avoids the 3x underestimation that a
+// flat rune_count/3 would produce for Chinese, Japanese, and Korean text.
 func estimateTokens(msg string) int {
-	rc := utf8.RuneCountInString(msg)
-	return rc / 3
+	total := utf8.RuneCountInString(msg)
+	if total == 0 {
+		return 0
+	}
+	cjk := 0
+	for _, r := range msg {
+		if r >= 0x2E80 && r <= 0x9FFF || r >= 0xF900 && r <= 0xFAFF || r >= 0xAC00 && r <= 0xD7AF {
+			cjk++
+		}
+	}
+	return cjk + (total-cjk)/4
 }
 
 // countCodeBlocks counts the number of complete fenced code blocks.
diff --git a/pkg/routing/router.go b/pkg/routing/router.go
index 78092b106..b1fa347e9 100644
--- a/pkg/routing/router.go
+++ b/pkg/routing/router.go
@@ -50,10 +50,11 @@ func newWithClassifier(cfg RouterConfig, c Classifier) *Router {
 	return &Router{cfg: cfg, classifier: c}
 }
 
-// SelectModel returns the model to use for this conversation turn.
+// SelectModel returns the model to use for this conversation turn along with
+// the computed complexity score (for logging and debugging).
 //
-//   - If score < cfg.Threshold: returns (cfg.LightModel, true)
-//   - Otherwise:               returns (primaryModel, false)
+//   - If score < cfg.Threshold: returns (cfg.LightModel, true, score)
+//   - Otherwise:               returns (primaryModel, false, score)
 //
 // The caller is responsible for resolving the returned model name into
 // provider candidates (see AgentInstance.LightCandidates).
@@ -61,13 +62,13 @@ func (r *Router) SelectModel(
 	msg string,
 	history []providers.Message,
 	primaryModel string,
-) (model string, usedLight bool) {
+) (model string, usedLight bool, score float64) {
 	features := ExtractFeatures(msg, history)
-	score := r.classifier.Score(features)
+	score = r.classifier.Score(features)
 	if score < r.cfg.Threshold {
-		return r.cfg.LightModel, true
+		return r.cfg.LightModel, true, score
 	}
-	return primaryModel, false
+	return primaryModel, false, score
 }
 
 // LightModel returns the configured light model name.
diff --git a/pkg/routing/router_test.go b/pkg/routing/router_test.go
index 267200c2e..2824d10ab 100644
--- a/pkg/routing/router_test.go
+++ b/pkg/routing/router_test.go
@@ -29,16 +29,16 @@ func TestExtractFeatures_EmptyMessage(t *testing.T) {
 }
 
 func TestExtractFeatures_TokenEstimate(t *testing.T) {
-	// 30 ASCII chars / 3 = 10 tokens
+	// 30 ASCII runes: 0 CJK + 30/4 = 7 tokens
 	msg := strings.Repeat("a", 30)
 	f := ExtractFeatures(msg, nil)
-	if f.TokenEstimate != 10 {
-		t.Errorf("TokenEstimate: got %d, want 10", f.TokenEstimate)
+	if f.TokenEstimate != 7 {
+		t.Errorf("TokenEstimate: got %d, want 7", f.TokenEstimate)
 	}
 }
 
 func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) {
-	// 9 CJK runes (U+4F60 U+597D U+4E16 U+754C × 2 + U+4F60) / 3 = 3 tokens.
+	// 9 CJK runes → 9 tokens (each CJK rune ≈ 1 token).
 	// Using a rune slice literal avoids CJK string literals in source.
 	msg := string([]rune{
 		0x4F60, 0x597D, 0x4E16, 0x754C,
@@ -46,8 +46,17 @@ func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) {
 		0x4F60,
 	})
 	f := ExtractFeatures(msg, nil)
-	if f.TokenEstimate != 3 {
-		t.Errorf("CJK TokenEstimate: got %d, want 3", f.TokenEstimate)
+	if f.TokenEstimate != 9 {
+		t.Errorf("CJK TokenEstimate: got %d, want 9", f.TokenEstimate)
+	}
+}
+
+func TestExtractFeatures_TokenEstimate_Mixed(t *testing.T) {
+	// Mixed: 4 CJK runes + 8 ASCII runes → 4 + 8/4 = 6 tokens.
+	msg := string([]rune{0x4F60, 0x597D, 0x4E16, 0x754C}) + "hello ok"
+	f := ExtractFeatures(msg, nil)
+	if f.TokenEstimate != 6 {
+		t.Errorf("Mixed TokenEstimate: got %d, want 6", f.TokenEstimate)
 	}
 }
 
@@ -249,7 +258,7 @@ func TestRouter_NegativeThresholdFallsBackToDefault(t *testing.T) {
 func TestRouter_SelectModel_SimpleMessageUsesLight(t *testing.T) {
 	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
 	msg := "hi"
-	model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
 	if !usedLight {
 		t.Error("simple message: expected light model to be selected")
 	}
@@ -261,7 +270,7 @@ func TestRouter_SelectModel_SimpleMessageUsesLight(t *testing.T) {
 func TestRouter_SelectModel_CodeBlockUsesPrimary(t *testing.T) {
 	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
 	msg := "```go\nfmt.Println(\"hello\")\n```"
-	model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
 	if usedLight {
 		t.Error("code block: expected primary model to be selected")
 	}
@@ -273,7 +282,7 @@ func TestRouter_SelectModel_CodeBlockUsesPrimary(t *testing.T) {
 func TestRouter_SelectModel_AttachmentUsesPrimary(t *testing.T) {
 	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
 	msg := "can you analyze this? data:image/png;base64,abc123"
-	model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
 	if usedLight {
 		t.Error("attachment: expected primary model to be selected")
 	}
@@ -286,7 +295,7 @@ func TestRouter_SelectModel_LongMessageUsesPrimary(t *testing.T) {
 	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35})
 	// >200 token estimate: 210 * 3 = 630 chars
 	msg := strings.Repeat("word ", 210)
-	model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
 	if usedLight {
 		t.Error("long message: expected primary model to be selected")
 	}
@@ -304,7 +313,7 @@ func TestRouter_SelectModel_DeepToolChainUsesLight(t *testing.T) {
 		{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "exec"}, {Name: "search"}}},
 	}
 	msg := "ok"
-	_, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6")
+	_, usedLight, _ := r.SelectModel(msg, history, "claude-sonnet-4-6")
 	if !usedLight {
 		t.Error("short message + moderate tool calls: expected light model (score 0.20 < 0.35)")
 	}
@@ -320,7 +329,7 @@ func TestRouter_SelectModel_ToolChainPlusMediumUsesHeavy(t *testing.T) {
 	}
 	// ~55 tokens * 3 = 165 chars
 	msg := strings.Repeat("word ", 55)
-	_, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6")
+	_, usedLight, _ := r.SelectModel(msg, history, "claude-sonnet-4-6")
 	if usedLight {
 		t.Error("tool chain + medium message: expected primary model (score >= 0.35)")
 	}
@@ -330,7 +339,7 @@ func TestRouter_SelectModel_CustomThreshold(t *testing.T) {
 	// Very low threshold: even a short message triggers heavy model
 	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.05})
 	msg := strings.Repeat("word ", 55) // medium message → 0.15 >= 0.05
-	_, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	_, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
 	if usedLight {
 		t.Error("low threshold: medium message should use primary model")
 	}
@@ -340,7 +349,7 @@ func TestRouter_SelectModel_HighThreshold(t *testing.T) {
 	// Very high threshold: even code blocks route to light
 	r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.99})
 	msg := "```go\nfmt.Println()\n```"
-	_, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6")
+	_, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6")
 	if !usedLight {
 		t.Error("very high threshold: code block (0.40) should route to light model")
 	}
@@ -364,7 +373,7 @@ func TestRouter_CustomClassifier_LowScore_SelectsLight(t *testing.T) {
 		RouterConfig{LightModel: "light", Threshold: 0.5},
 		&fixedScoreClassifier{score: 0.2},
 	)
-	_, usedLight := r.SelectModel("anything", nil, "heavy")
+	_, usedLight, _ := r.SelectModel("anything", nil, "heavy")
 	if !usedLight {
 		t.Error("low score with custom classifier: expected light model")
 	}
@@ -375,7 +384,7 @@ func TestRouter_CustomClassifier_HighScore_SelectsPrimary(t *testing.T) {
 		RouterConfig{LightModel: "light", Threshold: 0.5},
 		&fixedScoreClassifier{score: 0.8},
 	)
-	_, usedLight := r.SelectModel("anything", nil, "heavy")
+	_, usedLight, _ := r.SelectModel("anything", nil, "heavy")
 	if usedLight {
 		t.Error("high score with custom classifier: expected primary model")
 	}
@@ -387,8 +396,19 @@ func TestRouter_CustomClassifier_ExactThreshold_SelectsPrimary(t *testing.T) {
 		RouterConfig{LightModel: "light", Threshold: 0.5},
 		&fixedScoreClassifier{score: 0.5},
 	)
-	_, usedLight := r.SelectModel("anything", nil, "heavy")
+	_, usedLight, _ := r.SelectModel("anything", nil, "heavy")
 	if usedLight {
 		t.Error("score == threshold: expected primary model (>= threshold → primary)")
 	}
 }
+
+func TestRouter_SelectModel_ReturnsScore(t *testing.T) {
+	r := newWithClassifier(
+		RouterConfig{LightModel: "light", Threshold: 0.5},
+		&fixedScoreClassifier{score: 0.42},
+	)
+	_, _, score := r.SelectModel("anything", nil, "heavy")
+	if score != 0.42 {
+		t.Errorf("score: got %f, want 0.42", score)
+	}
+}