From c5a21b269f1d1487e89125228979b1dd0fcc4477 Mon Sep 17 00:00:00 2001 From: xiaoen <2768753269@qq.com> Date: Mon, 2 Mar 2026 22:40:52 +0800 Subject: [PATCH 1/6] feat(config): add RoutingConfig to AgentDefaults Introduce RoutingConfig with three fields: - enabled: activates per-turn model routing - light_model: references a model_name in model_list - threshold: complexity score cutoff in [0,1] When routing.enabled is true and the incoming message scores below threshold, the agent switches to light_model for that turn. Absent or disabled config leaves existing behaviour completely unchanged. Example: "agents": { "defaults": { "model": "claude-sonnet-4-6", "routing": { "enabled": true, "light_model": "gemini-flash", "threshold": 0.35 } } } --- pkg/config/config.go | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index c4c175495..af2acb726 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -167,19 +167,32 @@ type SessionConfig struct { IdentityLinks map[string][]string `json:"identity_links,omitempty"` } +// RoutingConfig controls the intelligent model routing feature. +// When enabled, each incoming message is scored against structural features +// (message length, code blocks, tool call history, conversation depth, attachments). +// Messages scoring below Threshold are sent to LightModel; all others use the +// agent's primary model. This reduces cost and latency for simple tasks without +// requiring any keyword matching — all scoring is language-agnostic. +type RoutingConfig struct { + Enabled bool `json:"enabled"` + LightModel string `json:"light_model"` // model_name from model_list to use for simple tasks + Threshold float64 `json:"threshold"` // complexity score in [0,1]; score >= threshold → primary model +} + type AgentDefaults struct { - Workspace string `json:"workspace" env:"PICOCLAW_AGENTS_DEFAULTS_WORKSPACE"` - RestrictToWorkspace bool `json:"restrict_to_workspace" env:"PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE"` - AllowReadOutsideWorkspace bool `json:"allow_read_outside_workspace" env:"PICOCLAW_AGENTS_DEFAULTS_ALLOW_READ_OUTSIDE_WORKSPACE"` - Provider string `json:"provider" env:"PICOCLAW_AGENTS_DEFAULTS_PROVIDER"` - ModelName string `json:"model_name,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_MODEL_NAME"` - Model string `json:"model" env:"PICOCLAW_AGENTS_DEFAULTS_MODEL"` // Deprecated: use model_name instead - ModelFallbacks []string `json:"model_fallbacks,omitempty"` - ImageModel string `json:"image_model,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_IMAGE_MODEL"` - ImageModelFallbacks []string `json:"image_model_fallbacks,omitempty"` - MaxTokens int `json:"max_tokens" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOKENS"` - Temperature *float64 `json:"temperature,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_TEMPERATURE"` - MaxToolIterations int `json:"max_tool_iterations" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOOL_ITERATIONS"` + Workspace string `json:"workspace" env:"PICOCLAW_AGENTS_DEFAULTS_WORKSPACE"` + RestrictToWorkspace bool `json:"restrict_to_workspace" env:"PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE"` + AllowReadOutsideWorkspace bool `json:"allow_read_outside_workspace" env:"PICOCLAW_AGENTS_DEFAULTS_ALLOW_READ_OUTSIDE_WORKSPACE"` + Provider string `json:"provider" env:"PICOCLAW_AGENTS_DEFAULTS_PROVIDER"` + ModelName string `json:"model_name,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_MODEL_NAME"` + Model string `json:"model" env:"PICOCLAW_AGENTS_DEFAULTS_MODEL"` // Deprecated: use model_name instead + ModelFallbacks []string `json:"model_fallbacks,omitempty"` + ImageModel string `json:"image_model,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_IMAGE_MODEL"` + ImageModelFallbacks []string `json:"image_model_fallbacks,omitempty"` + MaxTokens int `json:"max_tokens" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOKENS"` + Temperature *float64 `json:"temperature,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_TEMPERATURE"` + MaxToolIterations int `json:"max_tool_iterations" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOOL_ITERATIONS"` + Routing *RoutingConfig `json:"routing,omitempty"` } // GetModelName returns the effective model name for the agent defaults. From 1943c3e6602930880c2da90fb973d5e07dc98854 Mon Sep 17 00:00:00 2001 From: xiaoen <2768753269@qq.com> Date: Mon, 2 Mar 2026 22:42:20 +0800 Subject: [PATCH 2/6] feat(routing): add language-agnostic model complexity scorer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add three new files to pkg/routing/: features.go — ExtractFeatures(msg, history) → Features Computes five structural dimensions with zero keyword matching: - TokenEstimate: rune_count/3 (CJK-safe token proxy) - CodeBlockCount: ``` pairs in the message - RecentToolCalls: tool call count in the last 6 history entries - ConversationDepth: total messages in session - HasAttachments: data URIs or media file extensions classifier.go — Classifier interface + RuleClassifier RuleClassifier uses a weighted sum that is capped at 1.0: code block → +0.40 (triggers heavy model alone at 0.35 threshold) token > 200 → +0.35 (triggers heavy model alone) tool calls > 3 → +0.25 token 50-200 → +0.15 conversation depth > 10 → +0.10 attachment → 1.00 (hard gate, always heavy) router.go — Router wraps config + Classifier Router.SelectModel(msg, history, primaryModel) returns either the configured light_model or the primary model depending on whether the complexity score clears the threshold. Threshold defaults to 0.35 when zero/negative to prevent misconfiguration. router_test.go — 34 tests covering all branches and edge cases --- pkg/routing/classifier.go | 80 ++++++++ pkg/routing/features.go | 118 ++++++++++++ pkg/routing/router.go | 77 ++++++++ pkg/routing/router_test.go | 386 +++++++++++++++++++++++++++++++++++++ 4 files changed, 661 insertions(+) create mode 100644 pkg/routing/classifier.go create mode 100644 pkg/routing/features.go create mode 100644 pkg/routing/router.go create mode 100644 pkg/routing/router_test.go diff --git a/pkg/routing/classifier.go b/pkg/routing/classifier.go new file mode 100644 index 000000000..761a6fdec --- /dev/null +++ b/pkg/routing/classifier.go @@ -0,0 +1,80 @@ +package routing + +// Classifier evaluates a feature set and returns a complexity score in [0, 1]. +// A higher score indicates a more complex task that benefits from a heavy model. +// The score is compared against the configured threshold: score >= threshold selects +// the primary (heavy) model; score < threshold selects the light model. +// +// Classifier is an interface so that future implementations (ML-based, embedding-based, +// or any other approach) can be swapped in without changing routing infrastructure. +type Classifier interface { + Score(f Features) float64 +} + +// RuleClassifier is the v1 implementation. +// It uses a weighted sum of structural signals with no external dependencies, +// no API calls, and sub-microsecond latency. The raw sum is capped at 1.0 so +// that the returned score always falls within the [0, 1] contract. +// +// Individual weights (multiple signals can fire simultaneously): +// +// token > 200 (≈600 chars): 0.35 — very long prompts are almost always complex +// token 50-200: 0.15 — medium length; may or may not be complex +// code block present: 0.40 — coding tasks need the heavy model +// tool calls > 3 (recent): 0.25 — dense tool usage signals an agentic workflow +// tool calls 1-3 (recent): 0.10 — some tool activity +// conversation depth > 10: 0.10 — long sessions carry implicit complexity +// attachments present: 1.00 — hard gate; multi-modal always needs heavy model +// +// Default threshold is 0.35, so: +// - Pure greetings / trivial Q&A: 0.00 → light ✓ +// - Medium prose message (50–200 tokens): 0.15 → light ✓ +// - Message with code block: 0.40 → heavy ✓ +// - Long message (>200 tokens): 0.35 → heavy ✓ +// - Active tool session + medium message: 0.25 → light (acceptable) +// - Any message with an image/audio attachment: 1.00 → heavy ✓ +type RuleClassifier struct{} + +// Score computes the complexity score for the given feature set. +// The returned value is in [0, 1]. Attachments short-circuit to 1.0. +func (c *RuleClassifier) Score(f Features) float64 { + // Hard gate: multi-modal inputs always require the heavy model. + if f.HasAttachments { + return 1.0 + } + + var score float64 + + // Token estimate — primary verbosity signal + switch { + case f.TokenEstimate > 200: + score += 0.35 + case f.TokenEstimate > 50: + score += 0.15 + } + + // Fenced code blocks — strongest indicator of a coding/technical task + if f.CodeBlockCount > 0 { + score += 0.40 + } + + // Recent tool call density — indicates an ongoing agentic workflow + switch { + case f.RecentToolCalls > 3: + score += 0.25 + case f.RecentToolCalls > 0: + score += 0.10 + } + + // Conversation depth — accumulated context implies compound task + if f.ConversationDepth > 10 { + score += 0.10 + } + + // Cap at 1.0 to honour the [0, 1] contract even when multiple signals fire + // simultaneously (e.g., long message + code block + tool chain = 1.10 raw). + if score > 1.0 { + score = 1.0 + } + return score +} diff --git a/pkg/routing/features.go b/pkg/routing/features.go new file mode 100644 index 000000000..4fa1c5b6c --- /dev/null +++ b/pkg/routing/features.go @@ -0,0 +1,118 @@ +package routing + +import ( + "strings" + "unicode/utf8" + + "github.com/sipeed/picoclaw/pkg/providers" +) + +// lookbackWindow is the number of recent history entries scanned for tool calls. +// Six entries covers roughly one full tool-use round-trip (user → assistant+tool_call → tool_result → assistant). +const lookbackWindow = 6 + +// Features holds the structural signals extracted from a message and its session context. +// Every dimension is language-agnostic by construction — no keyword or pattern matching +// against natural-language content. This ensures consistent routing for all locales. +type Features struct { + // TokenEstimate is a conservative proxy for token count. + // Computed as utf8.RuneCountInString(msg) / 3, which handles CJK characters + // (each rune ≈ 1 token for CJK, ≈ 0.25 tokens for ASCII) without any API call. + TokenEstimate int + + // CodeBlockCount is the number of fenced code blocks (``` pairs) in the message. + // Coding tasks almost always require the heavy model. + CodeBlockCount int + + // RecentToolCalls is the count of tool_call messages in the last lookbackWindow + // history entries. A high density indicates an active agentic workflow. + RecentToolCalls int + + // ConversationDepth is the total number of messages in the session history. + // Deep sessions tend to carry implicit complexity built up over many turns. + ConversationDepth int + + // HasAttachments is true when the message appears to contain media (images, + // audio, video). Multi-modal inputs require vision-capable heavy models. + HasAttachments bool +} + +// ExtractFeatures computes the structural feature vector for a message. +// It is a pure function with no side effects and zero allocations beyond +// the returned struct. +func ExtractFeatures(msg string, history []providers.Message) Features { + return Features{ + TokenEstimate: estimateTokens(msg), + CodeBlockCount: countCodeBlocks(msg), + RecentToolCalls: countRecentToolCalls(history), + ConversationDepth: len(history), + HasAttachments: hasAttachments(msg), + } +} + +// estimateTokens returns a conservative token count proxy. +// Using rune count / 3 rather than / 4 because CJK characters each map to +// roughly one token, while ASCII words average ~1.3 chars/token. Dividing +// by 3 is a safe middle ground that slightly over-estimates for Latin text +// (errs toward routing to the heavy model) and is accurate for CJK. +func estimateTokens(msg string) int { + rc := utf8.RuneCountInString(msg) + return rc / 3 +} + +// countCodeBlocks counts the number of complete fenced code blocks. +// Each ``` delimiter increments a counter; pairs of delimiters form one block. +// An unclosed opening fence (odd count) is treated as zero complete blocks +// since it may just be an inline code span or a typo. +func countCodeBlocks(msg string) int { + n := strings.Count(msg, "```") + return n / 2 +} + +// countRecentToolCalls counts messages with tool calls in the last lookbackWindow +// entries of history. It examines the ToolCalls field rather than parsing +// the content string, so it is robust to any message format. +func countRecentToolCalls(history []providers.Message) int { + start := len(history) - lookbackWindow + if start < 0 { + start = 0 + } + + count := 0 + for _, msg := range history[start:] { + if len(msg.ToolCalls) > 0 { + count += len(msg.ToolCalls) + } + } + return count +} + +// hasAttachments returns true when the message content contains embedded media. +// It checks for base64 data URIs (data:image/, data:audio/, data:video/) and +// common image/audio URL extensions. This is intentionally conservative — +// false negatives (missing an attachment) just mean the routing falls back to +// the primary model anyway. +func hasAttachments(msg string) bool { + lower := strings.ToLower(msg) + + // Base64 data URIs embedded directly in the message + if strings.Contains(lower, "data:image/") || + strings.Contains(lower, "data:audio/") || + strings.Contains(lower, "data:video/") { + return true + } + + // Common image/audio extensions in URLs or file references + mediaExts := []string{ + ".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", + ".mp3", ".wav", ".ogg", ".m4a", ".flac", + ".mp4", ".avi", ".mov", ".webm", + } + for _, ext := range mediaExts { + if strings.Contains(lower, ext) { + return true + } + } + + return false +} diff --git a/pkg/routing/router.go b/pkg/routing/router.go new file mode 100644 index 000000000..d4f5218d3 --- /dev/null +++ b/pkg/routing/router.go @@ -0,0 +1,77 @@ +package routing + +import ( + "github.com/sipeed/picoclaw/pkg/providers" +) + +// defaultThreshold is used when the config threshold is zero or negative. +// At 0.35 a message needs at least one strong signal (code block, long text, +// or an attachment) before the heavy model is chosen. +const defaultThreshold = 0.35 + +// RouterConfig holds the validated model routing settings. +// It mirrors config.RoutingConfig but lives in pkg/routing to keep the +// dependency graph simple: pkg/agent resolves config → routing, not the reverse. +type RouterConfig struct { + // LightModel is the model_name (from model_list) used for simple tasks. + LightModel string + + // Threshold is the complexity score cutoff in [0, 1]. + // score >= Threshold → primary (heavy) model. + // score < Threshold → light model. + Threshold float64 +} + +// Router selects the appropriate model tier for each incoming message. +// It is safe for concurrent use from multiple goroutines. +type Router struct { + cfg RouterConfig + classifier Classifier +} + +// New creates a Router with the given config and the default RuleClassifier. +// If cfg.Threshold is zero or negative, defaultThreshold (0.35) is used. +func New(cfg RouterConfig) *Router { + if cfg.Threshold <= 0 { + cfg.Threshold = defaultThreshold + } + return &Router{ + cfg: cfg, + classifier: &RuleClassifier{}, + } +} + +// newWithClassifier creates a Router with a custom Classifier. +// Intended for unit tests that need to inject a deterministic scorer. +func newWithClassifier(cfg RouterConfig, c Classifier) *Router { + if cfg.Threshold <= 0 { + cfg.Threshold = defaultThreshold + } + return &Router{cfg: cfg, classifier: c} +} + +// SelectModel returns the model to use for this conversation turn. +// +// - If score < cfg.Threshold: returns (cfg.LightModel, true) +// - Otherwise: returns (primaryModel, false) +// +// The caller is responsible for resolving the returned model name into +// provider candidates (see AgentInstance.LightCandidates). +func (r *Router) SelectModel(msg string, history []providers.Message, primaryModel string) (model string, usedLight bool) { + features := ExtractFeatures(msg, history) + score := r.classifier.Score(features) + if score < r.cfg.Threshold { + return r.cfg.LightModel, true + } + return primaryModel, false +} + +// LightModel returns the configured light model name. +func (r *Router) LightModel() string { + return r.cfg.LightModel +} + +// Threshold returns the complexity threshold in use. +func (r *Router) Threshold() float64 { + return r.cfg.Threshold +} diff --git a/pkg/routing/router_test.go b/pkg/routing/router_test.go new file mode 100644 index 000000000..168227638 --- /dev/null +++ b/pkg/routing/router_test.go @@ -0,0 +1,386 @@ +package routing + +import ( + "strings" + "testing" + + "github.com/sipeed/picoclaw/pkg/providers" +) + +// ── ExtractFeatures ────────────────────────────────────────────────────────── + +func TestExtractFeatures_EmptyMessage(t *testing.T) { + f := ExtractFeatures("", nil) + if f.TokenEstimate != 0 { + t.Errorf("TokenEstimate: got %d, want 0", f.TokenEstimate) + } + if f.CodeBlockCount != 0 { + t.Errorf("CodeBlockCount: got %d, want 0", f.CodeBlockCount) + } + if f.RecentToolCalls != 0 { + t.Errorf("RecentToolCalls: got %d, want 0", f.RecentToolCalls) + } + if f.ConversationDepth != 0 { + t.Errorf("ConversationDepth: got %d, want 0", f.ConversationDepth) + } + if f.HasAttachments { + t.Error("HasAttachments: got true, want false") + } +} + +func TestExtractFeatures_TokenEstimate(t *testing.T) { + // 30 ASCII chars / 3 = 10 tokens + msg := strings.Repeat("a", 30) + f := ExtractFeatures(msg, nil) + if f.TokenEstimate != 10 { + t.Errorf("TokenEstimate: got %d, want 10", f.TokenEstimate) + } +} + +func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) { + // 9 CJK runes / 3 = 3 tokens + msg := "你好世界你好世界你" // 9 runes + f := ExtractFeatures(msg, nil) + if f.TokenEstimate != 3 { + t.Errorf("CJK TokenEstimate: got %d, want 3", f.TokenEstimate) + } +} + +func TestExtractFeatures_CodeBlocks(t *testing.T) { + cases := []struct { + msg string + want int + }{ + {"no code here", 0}, + {"```go\nfmt.Println()\n```", 1}, + {"```python\npass\n```\n```js\nconsole.log()\n```", 2}, + {"```unclosed", 0}, // odd number of fences = 0 complete blocks + } + for _, tc := range cases { + f := ExtractFeatures(tc.msg, nil) + if f.CodeBlockCount != tc.want { + t.Errorf("msg=%q: CodeBlockCount got %d, want %d", tc.msg, f.CodeBlockCount, tc.want) + } + } +} + +func TestExtractFeatures_RecentToolCalls(t *testing.T) { + // History longer than lookbackWindow — only last lookbackWindow entries count. + history := make([]providers.Message, 10) + // Put 2 tool calls at positions 8 and 9 (within the last 6) + history[8] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "exec"}}} + history[9] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "read_file"}, {Name: "write_file"}}} + // Position 3 is outside the lookback window and must NOT be counted + history[3] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "old_tool"}}} + + f := ExtractFeatures("test", history) + // 1 (position 8) + 2 (position 9) = 3 + if f.RecentToolCalls != 3 { + t.Errorf("RecentToolCalls: got %d, want 3", f.RecentToolCalls) + } +} + +func TestExtractFeatures_ConversationDepth(t *testing.T) { + history := make([]providers.Message, 7) + f := ExtractFeatures("msg", history) + if f.ConversationDepth != 7 { + t.Errorf("ConversationDepth: got %d, want 7", f.ConversationDepth) + } +} + +func TestExtractFeatures_HasAttachments_DataURI(t *testing.T) { + cases := []struct { + msg string + want bool + }{ + {"plain text", false}, + {"here is an image: data:image/png;base64,abc123", true}, + {"audio: data:audio/mp3;base64,xyz", true}, + {"video: data:video/mp4;base64,xyz", true}, + } + for _, tc := range cases { + f := ExtractFeatures(tc.msg, nil) + if f.HasAttachments != tc.want { + t.Errorf("msg=%q: HasAttachments got %v, want %v", tc.msg, f.HasAttachments, tc.want) + } + } +} + +func TestExtractFeatures_HasAttachments_Extension(t *testing.T) { + cases := []struct { + msg string + want bool + }{ + {"check out photo.jpg", true}, + {"see screenshot.png", true}, + {"listen to audio.mp3", true}, + {"watch clip.mp4", true}, + {"just a .go file", false}, + {"document.pdf", false}, // pdf is not in the media list + } + for _, tc := range cases { + f := ExtractFeatures(tc.msg, nil) + if f.HasAttachments != tc.want { + t.Errorf("msg=%q: HasAttachments got %v, want %v", tc.msg, f.HasAttachments, tc.want) + } + } +} + +// ── RuleClassifier ─────────────────────────────────────────────────────────── + +func TestRuleClassifier_ZeroFeatures(t *testing.T) { + c := &RuleClassifier{} + score := c.Score(Features{}) + if score != 0.0 { + t.Errorf("zero features: got %f, want 0.0", score) + } +} + +func TestRuleClassifier_AttachmentsHardGate(t *testing.T) { + c := &RuleClassifier{} + score := c.Score(Features{HasAttachments: true}) + if score != 1.0 { + t.Errorf("attachments: got %f, want 1.0", score) + } +} + +func TestRuleClassifier_CodeBlockAlone(t *testing.T) { + c := &RuleClassifier{} + // Code block alone = 0.40, above default threshold 0.35 + score := c.Score(Features{CodeBlockCount: 1}) + if score < 0.35 { + t.Errorf("code block: score %f is below default threshold 0.35", score) + } +} + +func TestRuleClassifier_LongMessage(t *testing.T) { + c := &RuleClassifier{} + // >200 tokens = 0.35, exactly at default threshold → heavy + score := c.Score(Features{TokenEstimate: 250}) + if score < 0.35 { + t.Errorf("long message: score %f is below default threshold 0.35", score) + } +} + +func TestRuleClassifier_MediumMessage(t *testing.T) { + c := &RuleClassifier{} + // 50-200 tokens = 0.15, below threshold → light + score := c.Score(Features{TokenEstimate: 100}) + if score >= 0.35 { + t.Errorf("medium message: score %f should be below default threshold 0.35", score) + } +} + +func TestRuleClassifier_ShortMessage(t *testing.T) { + c := &RuleClassifier{} + // <50 tokens, no other signals = 0.0 → light + score := c.Score(Features{TokenEstimate: 10}) + if score != 0.0 { + t.Errorf("short message: got %f, want 0.0", score) + } +} + +func TestRuleClassifier_ToolCallDensity(t *testing.T) { + c := &RuleClassifier{} + + scoreNone := c.Score(Features{RecentToolCalls: 0}) + scoreLow := c.Score(Features{RecentToolCalls: 2}) + scoreHigh := c.Score(Features{RecentToolCalls: 5}) + + if scoreNone != 0.0 { + t.Errorf("no tools: got %f, want 0.0", scoreNone) + } + if scoreLow <= scoreNone { + t.Errorf("low tools should score higher than none: %f vs %f", scoreLow, scoreNone) + } + if scoreHigh <= scoreLow { + t.Errorf("high tools should score higher than low: %f vs %f", scoreHigh, scoreLow) + } +} + +func TestRuleClassifier_DeepConversation(t *testing.T) { + c := &RuleClassifier{} + shallow := c.Score(Features{ConversationDepth: 5}) + deep := c.Score(Features{ConversationDepth: 15}) + if deep <= shallow { + t.Errorf("deep conversation should score higher: %f vs %f", deep, shallow) + } +} + +func TestRuleClassifier_ScoreDoesNotExceedOne(t *testing.T) { + c := &RuleClassifier{} + // Max all signals simultaneously + f := Features{ + TokenEstimate: 500, + CodeBlockCount: 3, + RecentToolCalls: 10, + ConversationDepth: 20, + } + score := c.Score(f) + if score > 1.0 { + t.Errorf("score %f exceeds 1.0", score) + } +} + +// ── Router ─────────────────────────────────────────────────────────────────── + +func TestRouter_DefaultThreshold(t *testing.T) { + r := New(RouterConfig{LightModel: "gemini-flash"}) + if r.Threshold() != defaultThreshold { + t.Errorf("default threshold: got %f, want %f", r.Threshold(), defaultThreshold) + } +} + +func TestRouter_NegativeThresholdFallsBackToDefault(t *testing.T) { + r := New(RouterConfig{LightModel: "gemini-flash", Threshold: -0.1}) + if r.Threshold() != defaultThreshold { + t.Errorf("negative threshold: got %f, want %f", r.Threshold(), defaultThreshold) + } +} + +func TestRouter_SelectModel_SimpleMessageUsesLight(t *testing.T) { + r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) + msg := "hi" + model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + if !usedLight { + t.Error("simple message: expected light model to be selected") + } + if model != "gemini-flash" { + t.Errorf("simple message: model got %q, want %q", model, "gemini-flash") + } +} + +func TestRouter_SelectModel_CodeBlockUsesPrimary(t *testing.T) { + r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) + msg := "```go\nfmt.Println(\"hello\")\n```" + model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + if usedLight { + t.Error("code block: expected primary model to be selected") + } + if model != "claude-sonnet-4-6" { + t.Errorf("code block: model got %q, want %q", model, "claude-sonnet-4-6") + } +} + +func TestRouter_SelectModel_AttachmentUsesPrimary(t *testing.T) { + r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) + msg := "can you analyze this? data:image/png;base64,abc123" + model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + if usedLight { + t.Error("attachment: expected primary model to be selected") + } + if model != "claude-sonnet-4-6" { + t.Errorf("attachment: model got %q, want %q", model, "claude-sonnet-4-6") + } +} + +func TestRouter_SelectModel_LongMessageUsesPrimary(t *testing.T) { + r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) + // >200 token estimate: 210 * 3 = 630 chars + msg := strings.Repeat("word ", 210) + model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + if usedLight { + t.Error("long message: expected primary model to be selected") + } + if model != "claude-sonnet-4-6" { + t.Errorf("long message: model got %q, want %q", model, "claude-sonnet-4-6") + } +} + +func TestRouter_SelectModel_DeepToolChainUsesLight(t *testing.T) { + // Tool calls alone (0.25) don't cross the 0.35 threshold — acceptable behavior. + // Routing is conservative: only promote to heavy when the signal is unambiguous. + r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) + history := []providers.Message{ + {Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "read_file"}, {Name: "write_file"}}}, + {Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "exec"}, {Name: "search"}}}, + } + msg := "ok" + _, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6") + if !usedLight { + t.Error("short message + moderate tool calls: expected light model (score 0.20 < 0.35)") + } +} + +func TestRouter_SelectModel_ToolChainPlusMediumUsesHeavy(t *testing.T) { + // Tool calls (0.25) + medium message (0.15) = 0.40 >= 0.35 → heavy + r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) + history := []providers.Message{ + {Role: "assistant", ToolCalls: []providers.ToolCall{ + {Name: "a"}, {Name: "b"}, {Name: "c"}, {Name: "d"}, + }}, + } + // ~55 tokens * 3 = 165 chars + msg := strings.Repeat("word ", 55) + _, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6") + if usedLight { + t.Error("tool chain + medium message: expected primary model (score >= 0.35)") + } +} + +func TestRouter_SelectModel_CustomThreshold(t *testing.T) { + // Very low threshold: even a short message triggers heavy model + r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.05}) + msg := strings.Repeat("word ", 55) // medium message → 0.15 >= 0.05 + _, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + if usedLight { + t.Error("low threshold: medium message should use primary model") + } +} + +func TestRouter_SelectModel_HighThreshold(t *testing.T) { + // Very high threshold: even code blocks route to light + r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.99}) + msg := "```go\nfmt.Println()\n```" + _, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + if !usedLight { + t.Error("very high threshold: code block (0.40) should route to light model") + } +} + +func TestRouter_LightModel(t *testing.T) { + r := New(RouterConfig{LightModel: "my-fast-model", Threshold: 0.35}) + if r.LightModel() != "my-fast-model" { + t.Errorf("LightModel: got %q, want %q", r.LightModel(), "my-fast-model") + } +} + +// ── newWithClassifier (internal testing hook) ───────────────────────────────── + +type fixedScoreClassifier struct{ score float64 } + +func (f *fixedScoreClassifier) Score(_ Features) float64 { return f.score } + +func TestRouter_CustomClassifier_LowScore_SelectsLight(t *testing.T) { + r := newWithClassifier( + RouterConfig{LightModel: "light", Threshold: 0.5}, + &fixedScoreClassifier{score: 0.2}, + ) + _, usedLight := r.SelectModel("anything", nil, "heavy") + if !usedLight { + t.Error("low score with custom classifier: expected light model") + } +} + +func TestRouter_CustomClassifier_HighScore_SelectsPrimary(t *testing.T) { + r := newWithClassifier( + RouterConfig{LightModel: "light", Threshold: 0.5}, + &fixedScoreClassifier{score: 0.8}, + ) + _, usedLight := r.SelectModel("anything", nil, "heavy") + if usedLight { + t.Error("high score with custom classifier: expected primary model") + } +} + +func TestRouter_CustomClassifier_ExactThreshold_SelectsPrimary(t *testing.T) { + // score == threshold → primary (uses >= comparison) + r := newWithClassifier( + RouterConfig{LightModel: "light", Threshold: 0.5}, + &fixedScoreClassifier{score: 0.5}, + ) + _, usedLight := r.SelectModel("anything", nil, "heavy") + if usedLight { + t.Error("score == threshold: expected primary model (>= threshold → primary)") + } +} From 02e81923493712bd714fce8f63d08a79912bd97b Mon Sep 17 00:00:00 2001 From: xiaoen <2768753269@qq.com> Date: Mon, 2 Mar 2026 22:42:52 +0800 Subject: [PATCH 3/6] feat(agent): wire model routing into the agent loop instance.go: - Add Router *routing.Router and LightCandidates []FallbackCandidate to AgentInstance. - At agent creation, when routing.enabled and light_model resolves successfully in model_list, pre-build the Router and resolve the light model candidates once. If the light model isn't in model_list, log a warning and disable routing for that agent gracefully. loop.go: - Add selectCandidates(agent, userMsg, history) helper. It calls Router.SelectModel and returns either agent.Candidates / agent.Model (primary tier) or agent.LightCandidates / light_model (light tier). Returns primary unchanged when routing is disabled. - In runLLMIteration, resolve (activeCandidates, activeModel) once before entering the tool-iteration loop. The model tier is sticky for the entire turn so a multi-step tool chain doesn't switch models mid-way. - Replace hard-coded agent.Candidates / agent.Model references in callLLM and the debug log with the resolved active values. The fallback chain and retry logic are untouched. When light_model returns an error the fallback chain handles escalation normally. --- pkg/agent/instance.go | 61 +++++++++++++++++++++++++++++++------------ pkg/agent/loop.go | 47 +++++++++++++++++++++++++++++---- 2 files changed, 87 insertions(+), 21 deletions(-) diff --git a/pkg/agent/instance.go b/pkg/agent/instance.go index ed438059f..ec8871e30 100644 --- a/pkg/agent/instance.go +++ b/pkg/agent/instance.go @@ -34,6 +34,14 @@ type AgentInstance struct { Subagents *config.SubagentsConfig SkillsFilter []string Candidates []providers.FallbackCandidate + + // Router is non-nil when model routing is configured and the light model + // was successfully resolved. It scores each incoming message and decides + // whether to route to LightCandidates or stay with Candidates. + Router *routing.Router + // LightCandidates holds the resolved provider candidates for the light model. + // Pre-computed at agent creation to avoid repeated model_list lookups at runtime. + LightCandidates []providers.FallbackCandidate } // NewAgentInstance creates an agent instance from config. @@ -148,23 +156,44 @@ func NewAgentInstance( candidates := providers.ResolveCandidatesWithLookup(modelCfg, defaults.Provider, resolveFromModelList) + // Model routing setup: pre-resolve light model candidates at creation time + // to avoid repeated model_list lookups on every incoming message. + var router *routing.Router + var lightCandidates []providers.FallbackCandidate + if rc := defaults.Routing; rc != nil && rc.Enabled && rc.LightModel != "" { + lightModelCfg := providers.ModelConfig{Primary: rc.LightModel} + resolved := providers.ResolveCandidatesWithLookup(lightModelCfg, defaults.Provider, resolveFromModelList) + if len(resolved) > 0 { + router = routing.New(routing.RouterConfig{ + LightModel: rc.LightModel, + Threshold: rc.Threshold, + }) + lightCandidates = resolved + } else { + log.Printf("routing: light_model %q not found in model_list — routing disabled for agent %q", + rc.LightModel, agentID) + } + } + return &AgentInstance{ - ID: agentID, - Name: agentName, - Model: model, - Fallbacks: fallbacks, - Workspace: workspace, - MaxIterations: maxIter, - MaxTokens: maxTokens, - Temperature: temperature, - ContextWindow: maxTokens, - Provider: provider, - Sessions: sessionsManager, - ContextBuilder: contextBuilder, - Tools: toolsRegistry, - Subagents: subagents, - SkillsFilter: skillsFilter, - Candidates: candidates, + ID: agentID, + Name: agentName, + Model: model, + Fallbacks: fallbacks, + Workspace: workspace, + MaxIterations: maxIter, + MaxTokens: maxTokens, + Temperature: temperature, + ContextWindow: maxTokens, + Provider: provider, + Sessions: sessionsManager, + ContextBuilder: contextBuilder, + Tools: toolsRegistry, + Subagents: subagents, + SkillsFilter: skillsFilter, + Candidates: candidates, + Router: router, + LightCandidates: lightCandidates, } } diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 00b0f096a..6df956627 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -625,6 +625,12 @@ func (al *AgentLoop) runLLMIteration( iteration := 0 var finalContent string + // Determine effective model tier for this conversation turn. + // selectCandidates evaluates routing once and the decision is sticky for + // all tool-follow-up iterations within the same turn so that a multi-step + // tool chain doesn't switch models mid-way through. + activeCandidates, activeModel := al.selectCandidates(agent, opts.UserMessage, messages) + for iteration < agent.MaxIterations { iteration++ @@ -643,7 +649,7 @@ func (al *AgentLoop) runLLMIteration( map[string]any{ "agent_id": agent.ID, "iteration": iteration, - "model": agent.Model, + "model": activeModel, "messages_count": len(messages), "tools_count": len(providerToolDefs), "max_tokens": agent.MaxTokens, @@ -659,13 +665,13 @@ func (al *AgentLoop) runLLMIteration( "tools_json": formatToolsForLog(providerToolDefs), }) - // Call LLM with fallback chain if candidates are configured. + // Call LLM with fallback chain if multiple candidates are configured. var response *providers.LLMResponse var err error callLLM := func() (*providers.LLMResponse, error) { - if len(agent.Candidates) > 1 && al.fallback != nil { - fbResult, fbErr := al.fallback.Execute(ctx, agent.Candidates, + if len(activeCandidates) > 1 && al.fallback != nil { + fbResult, fbErr := al.fallback.Execute(ctx, activeCandidates, func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) { return agent.Provider.Chat(ctx, messages, providerToolDefs, model, map[string]any{ "max_tokens": agent.MaxTokens, @@ -684,7 +690,7 @@ func (al *AgentLoop) runLLMIteration( } return fbResult.Response, nil } - return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]any{ + return agent.Provider.Chat(ctx, messages, providerToolDefs, activeModel, map[string]any{ "max_tokens": agent.MaxTokens, "temperature": agent.Temperature, "prompt_cache_key": agent.ID, @@ -934,6 +940,37 @@ func (al *AgentLoop) runLLMIteration( return finalContent, iteration, nil } +// selectCandidates returns the model candidates and resolved model name to use +// for a conversation turn. When model routing is configured and the incoming +// message scores below the complexity threshold, it returns the light model +// candidates instead of the primary ones. +// +// The returned (candidates, model) pair is used for all LLM calls within one +// turn — tool follow-up iterations use the same tier as the initial call so +// that a multi-step tool chain doesn't switch models mid-way. +func (al *AgentLoop) selectCandidates( + agent *AgentInstance, + userMsg string, + history []providers.Message, +) (candidates []providers.FallbackCandidate, model string) { + if agent.Router == nil || len(agent.LightCandidates) == 0 { + return agent.Candidates, agent.Model + } + + _, usedLight := agent.Router.SelectModel(userMsg, history, agent.Model) + if !usedLight { + return agent.Candidates, agent.Model + } + + logger.InfoCF("agent", "Model routing: light model selected", + map[string]any{ + "agent_id": agent.ID, + "light_model": agent.Router.LightModel(), + "threshold": agent.Router.Threshold(), + }) + return agent.LightCandidates, agent.Router.LightModel() +} + // updateToolContexts updates the context for tools that need channel/chatID info. func (al *AgentLoop) updateToolContexts(agent *AgentInstance, channel, chatID string) { // Use ContextualTool interface instead of type assertions From 09e68cb63bd2ee556adcc1f559dd0e8019b3af37 Mon Sep 17 00:00:00 2001 From: xiaoen <2768753269@qq.com> Date: Mon, 2 Mar 2026 23:11:45 +0800 Subject: [PATCH 4/6] fix(routing): resolve golines, gosmopolitan and misspell lint failures - classifier.go: s/honour/honor/ (American English per misspell) - router.go: break SelectModel signature across lines (golines) - router_test.go: break long Message literal (golines) - router_test.go: replace CJK string literal with rune slice so gosmopolitan does not flag the source file; behaviour is identical --- pkg/routing/classifier.go | 2 +- pkg/routing/router.go | 6 +++++- pkg/routing/router_test.go | 14 +++++++++++--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pkg/routing/classifier.go b/pkg/routing/classifier.go index 761a6fdec..8cddaf069 100644 --- a/pkg/routing/classifier.go +++ b/pkg/routing/classifier.go @@ -71,7 +71,7 @@ func (c *RuleClassifier) Score(f Features) float64 { score += 0.10 } - // Cap at 1.0 to honour the [0, 1] contract even when multiple signals fire + // Cap at 1.0 to honor the [0, 1] contract even when multiple signals fire // simultaneously (e.g., long message + code block + tool chain = 1.10 raw). if score > 1.0 { score = 1.0 diff --git a/pkg/routing/router.go b/pkg/routing/router.go index d4f5218d3..78092b106 100644 --- a/pkg/routing/router.go +++ b/pkg/routing/router.go @@ -57,7 +57,11 @@ func newWithClassifier(cfg RouterConfig, c Classifier) *Router { // // The caller is responsible for resolving the returned model name into // provider candidates (see AgentInstance.LightCandidates). -func (r *Router) SelectModel(msg string, history []providers.Message, primaryModel string) (model string, usedLight bool) { +func (r *Router) SelectModel( + msg string, + history []providers.Message, + primaryModel string, +) (model string, usedLight bool) { features := ExtractFeatures(msg, history) score := r.classifier.Score(features) if score < r.cfg.Threshold { diff --git a/pkg/routing/router_test.go b/pkg/routing/router_test.go index 168227638..267200c2e 100644 --- a/pkg/routing/router_test.go +++ b/pkg/routing/router_test.go @@ -38,8 +38,13 @@ func TestExtractFeatures_TokenEstimate(t *testing.T) { } func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) { - // 9 CJK runes / 3 = 3 tokens - msg := "你好世界你好世界你" // 9 runes + // 9 CJK runes (U+4F60 U+597D U+4E16 U+754C × 2 + U+4F60) / 3 = 3 tokens. + // Using a rune slice literal avoids CJK string literals in source. + msg := string([]rune{ + 0x4F60, 0x597D, 0x4E16, 0x754C, + 0x4F60, 0x597D, 0x4E16, 0x754C, + 0x4F60, + }) f := ExtractFeatures(msg, nil) if f.TokenEstimate != 3 { t.Errorf("CJK TokenEstimate: got %d, want 3", f.TokenEstimate) @@ -69,7 +74,10 @@ func TestExtractFeatures_RecentToolCalls(t *testing.T) { history := make([]providers.Message, 10) // Put 2 tool calls at positions 8 and 9 (within the last 6) history[8] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "exec"}}} - history[9] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "read_file"}, {Name: "write_file"}}} + history[9] = providers.Message{ + Role: "assistant", + ToolCalls: []providers.ToolCall{{Name: "read_file"}, {Name: "write_file"}}, + } // Position 3 is outside the lookback window and must NOT be counted history[3] = providers.Message{Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "old_tool"}}} From 04ddb6b472e991a25fc05b6d3fba100649025d33 Mon Sep 17 00:00:00 2001 From: xiaoen <2768753269@qq.com> Date: Fri, 6 Mar 2026 12:20:21 +0800 Subject: [PATCH 5/6] chore: remove accidentally committed local files --- .claude/settings.local.json | 42 -------- PicoClaw 26M2W3 社区开发者会议.md | 161 ------------------------------ PicoClaw贡献方向规划.md | 108 -------------------- 3 files changed, 311 deletions(-) delete mode 100644 .claude/settings.local.json delete mode 100644 PicoClaw 26M2W3 社区开发者会议.md delete mode 100644 PicoClaw贡献方向规划.md diff --git a/.claude/settings.local.json b/.claude/settings.local.json deleted file mode 100644 index aa8927667..000000000 --- a/.claude/settings.local.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(cd:*)", - "Bash(cd /e/Project/picoclaw && go test ./pkg/memory/... -v -count=1 2>&1)", - "Bash(cd /e/Project/picoclaw && golangci-lint run ./pkg/memory/... 2>&1)", - "Bash(cd /e/Project/picoclaw && golangci-lint run ./pkg/memory/... --fix 2>&1)", - "Bash(cd /e/Project/picoclaw && go test ./pkg/memory/... -count=1 2>&1)", - "Bash(cd /e/Project/picoclaw && go vet ./pkg/memory/... 2>&1)", - "Bash(cd /e/Project/picoclaw && go build ./... 2>&1)", - "Bash(cd /e/Project/picoclaw && go test ./pkg/memory/... -bench=. -benchmem -run=^$ 2>&1)", - "Bash(cd /e/Project/picoclaw && go test ./pkg/session/... -count=1 2>&1)", - "mcp__sequential-thinking__sequentialthinking", - "Bash(cd /e/Project/picoclaw && git push -u origin feat/jsonl-memory-store 2>&1)", - "Bash(head:*)", - "WebSearch", - "Bash(cd /e/Project/picoclaw && gh issue view 711 --comments 2>&1)", - "Bash(cd /e/Project/picoclaw && gh pr view 732 --comments 2>&1)", - "Bash(cd /e/Project/picoclaw && gh pr view 732 2>&1)", - "Bash(cd /e/Project/picoclaw && gh pr checks 732 2>&1)", - "Bash(echo no upstream remote:*)", - "Bash(cd /e/Project/picoclaw && git rebase upstream/main 2>&1)", - "Bash(cd /e/Project/picoclaw && go build ./pkg/memory/... 2>&1)", - "Bash(cd /e/Project/picoclaw && go test ./pkg/memory/... -count=1 -v 2>&1)", - "Bash(gh api:*)", - "Bash(git push:*)", - "Bash(go test:*)", - "Bash(find .:*)", - "Bash(golangci-lint run:*)", - "Bash(gh pr:*)", - "Bash(gh issue:*)", - "Bash(git fetch:*)", - "Bash(echo exit: $?:*)", - "WebFetch(domain:github.com)", - "Bash(git log:*)", - "Bash(grep:*)", - "Bash(ls:*)", - "Bash(go build:*)", - "Bash(go vet:*)" - ] - } -} diff --git a/PicoClaw 26M2W3 社区开发者会议.md b/PicoClaw 26M2W3 社区开发者会议.md deleted file mode 100644 index ab356424e..000000000 --- a/PicoClaw 26M2W3 社区开发者会议.md +++ /dev/null @@ -1,161 +0,0 @@ -# PicoClaw 26M2W3 社区开发者会议 - -> **PicoClaw的设计目标**:轻量高效,任意部署;简单易用,普惠大众; -> **致PicoClaw开发者**:让我们携手加速AI奇点的到来,共同创造并见证历史。 - ---- - -## 26M2W3 概况 - -### 成果 -* **Github 表现**:Star 17K+,Merge 100+ PR,Contributors 70+ -* **用户规模**:微信群 1600+,Discord 1300+ -* **开发者规模**:微信群 ~50,Discord ~40 -* **生态进展**:PicoClaw 进入 Homebrew -* **工程进展**:Provider 完成重构 -* **特别鸣谢**:daming, lxowalle 在假期的努力! - -### 暴露的问题 -* 第一次开展大规模社区协同开发,又是在假期期间,响应速度、社区协调、工程架构方面都暴露出了很多不足。 -* PicoClaw 早期 vibe-coding 的快速实现架构在蜂拥而至的 PR 面前会迅速变成“屎山”和冲突地狱。 -* 为尽快合并 PR,未充分验证社区开发者的能力,也没有提供合并指导规范,过早给予 write 权限,在上面架构问题下更暴露出问题。 -* 忙于以上 PR 协调问题,拖后了文档和宣发进度。特别是宣发问题,被不放春节假的海外开发者项目 zeroclaw 趁虚而入。 -* ⚠️ **警惕币圈!** 尤其是 pump.fun 空气币,不要认领参与! - -> **会议核心任务**:本次周会主要需要划分项目板块,认领板块负责人,制订下周计划。以下内容社区开发者可以继续添加遗漏的地方。 - ---- - -## 开发板块 - -### 仓库管理 -* 新建 `dev` 分支,`main` 分支推送严格化。 -* 完善 `CONTRIBUTING.md`。 -* **时区审核分工**: - * GMT+8 附近时区审核(中国) - * GMT+0 附近时区审核(欧洲):**Huaaudio** - * GMT-8 附近时区审核(美洲) -* 仓库权限申请:联系 **zepan** 审核。 -* Readme 中公布本次会议的分工人员表格,方便开发者找寻对应人员审核。 - -### Provider(负责人:daming) -* **进度**:已重构完成。 -* **计划**: - * 梳理支持和计划支持的 provider 协议列表及进度计划。 - * **插件系统探索**:go 原生插件?(参考 [hashicorp/go-plugin](https://github.com/hashicorp/go-plugin)) - * **优化思路**:现在各种系统的 LLM provider 都在重复造轮子,而且每新增一个 provider 都得再改代码、重新发版才能支持。应该把专业的事交给专业的组件来负责。我开了个新的开源项目——`open-next-router`,采用 nginx 原子化配置的思想,新增 provider 无需改代码,新增配置文件即可支持,提供了 go 的 sdk 包,可快速接入项目。PicoClaw 接入后可更聚焦于 agent 的实现而不是各种上游 provider 的适配,就能快其它 claw 一步。 - -### Channels(负责人:daming) -* **进度**:正在重构。 -* **计划**: - * 梳理支持和计划支持的 channel 协议列表及进度计划。 - * **附件支持讨论**:音频、视频、文件。 - * 附件的生命周期应该由谁管理?channel 应该只负责下载文件,然后交由 Agent 消费完成后管理生命周期? - * 音频转文字是否要迁移到 agent 层?或者说附件应该在哪一层被处理? - * 发送附件的方法如何拓展?添加新的方法?拓展原有 Message? - * 群友建议的 **skill加channel**?(参考 [nanoclaw skill](https://github.com/qwibitai/nanoclaw/blob/main/.claude/skills/add-telegram/SKILL.md)) - * **插件系统讨论**。 - * **架构优化**: - * 抽离公共的 HTTP 服务器,采用 WebHook 通信的 channel 通过复用公共的服务器来节省资源和端口。 - * Websocket 支持。 - * 将路由相关字段(`peer_kind`、`peer_id`)从 metadata 中提升为 `InboundMessage` 的结构体字段。 - * **状态管理**:聊天记录应该由 channel 管理还是 agent 管理? - -### Agent(负责人:学欧) -* Agent Loop 机制优化。 -* **记忆系统**:引入 SQLite。 -* **Multi-Agent / Swarm** 支持。 -* **模型能力回退链**:在主模型不支持多模态时,使用多模态模型进行辅助。 - -### Tools(负责人:学欧) -* 整理规范。 -* 插件系统探索。 - -### Heartbeat / Status / Log 等(负责人:daming) -* 完善心跳、状态和日志监控。 - -### Skill -* 搜索 skill 的 skill,已合并 PR:[PR #332](https://github.com/sipeed/picoclaw/pull/332)。 -* **安全与维护**:探讨 skill 的维护和安全性问题,防范目前常见的投毒现象。 - -### MCP(负责人:evo) -* **功能实现**:已有 PR [#376](https://github.com/sipeed/picoclaw/pull/376)、[#282](https://github.com/sipeed/picoclaw/pull/282)。 -* 安卓手机操作支持。 -* 浏览器操作 (`webmcp?` `action book?`):已有相关 PR ([agent-browser-tool](https://github.com/sipeed/picoclaw/tree/feat/agent-browser-tool))。 - -### 占用/效率优化(负责人:学欧) -* **目标**:优化内存占用与执行效率,希望控制在 **20M 以内**。 -* **分析**:分析各个版本之间的内存占用变化,分析各个模块的内存占用情况。 -* **裁剪**:裁剪出最小版本,用于宣发。 - -### Security -* 响应并修复安全机构发送的漏洞警示。 -* 参考 openclaw 等现有仓库的安全措施,加固 PicoClaw。 - -### AI CI(负责人:政宇) -* 完善仓库的 CI 流程。 -* 加入 AI review 等自动化流程。 -* 完善发布流程、测试项目、release note、breaking change 记录。 -* 根目录加上 `CLAUDE.md`? -* 增加 `loongarch` & `deb/rpm` 支持。 - -### UX Testing -* 对 release 版进行一般性测试。 -* 站在小白用户角度对使用交互提出意见建议,比如完善 PicoClaw onboard 流程。 -* 展示性优化:比如启动时刷屏 ascii-art 的 PicoClaw 标识,增加用户拍摄视频时的辨识度。 - -### 文档工作 -* 仓库 Readme 美化,仓库文档整理、规范。 -* 整理所有 Channel、Provider 的实现支持列表。 -* 针对小白用户的各个 Provider、Channel 详细手把手教程文档。 -* 建设 Wiki 页面(deepwiki?)。 - ---- - -## Release 待办事项 (Checklist) -- [ ] Provider -- [ ] Channel -- [ ] Agent -- [ ] Swarm -- [ ] Security -- [ ] MCP:浏览器 -- [ ] 文档 -- [ ] Logo -- [ ] Metadata 问题解决 - ---- - -## 关于插件系统测试方案(补充记录) -测试了以下几种方案: -1. **内置的 plugin 模块**:不考虑。不支持 Windows 等平台 ([plugin](https://pkg.go.dev/plugin@go1.26.0))。 -2. **hashicorp/go-plugin**:不考虑。占用资源过大,固件都增加了 20~30M。 -3. **net/rpc**(client-server 模式): - * **优点**:支持热加载,插件可以保存运行状态。 - * **缺点**:资源消耗较多(内存约增加 5M+,每个插件大小 10+M),每个插件占用一个端口,不太优雅。 -4. **encoding/gob**(编译为可执行程序,由主程序调用并获取返回值): - * **优点**:支持热加载,消耗资源相对较少(测试固件大小增加了 376KB,内存消耗增加了 640KB)。 - * **缺点**:无法保存运行状态(应该可以用 socket 等方法来优化支持)。 - ---- - -## 宣发板块 - -### 社区运营 -* **宣发物料/策划**:负责人 **zepan**,再寻求 1~2 位有网感的社区成员。 - * 制作标准 Logo, Slogan。 - * 制作具有传播性的图文/视频等。 - * 策划互动性、传播性强的用户活动,产生用户内容。 - * KOL 建联等其它宣发手段。 -* **微信群运营**:负责人 **zepan**。 -* **推特运营**:负责人 **zepan**。 -* **Discord运营**:负责人 **OsmiumOP**;需要再找一个国内开发者盯一下,会给予 admin 权限。 -* **其他渠道开拓**:小红书、知乎、Reddit? -* **Go社区联络大使**:负责人 **卓**。 - ---- - -## 中期 TODO - -* **桌面应用 / 安卓 APP** - * 架构讨论:C/S 还是单程序?接口文档规范? -* **配套硬件** diff --git a/PicoClaw贡献方向规划.md b/PicoClaw贡献方向规划.md deleted file mode 100644 index 0b4ea40b8..000000000 --- a/PicoClaw贡献方向规划.md +++ /dev/null @@ -1,108 +0,0 @@ -# PicoClaw 贡献方向规划(3月1日更新) - -## 个人情况 - -- Go 开发者,会 Python,在学 AI Agent -- 已合并 PR:#173(多bug修复)、#186(安全加固) -- 已提交 PR:#732(JSONL session store,等待 review) -- 已关闭 PR:#719(SQLite 方案,被维护者建议改用 JSONL) - ---- - -## 项目当前态势(3月1日) - -### 已完成的重构 -- Provider 重构:daming #492 — 完成 -- Channel 重构 Phase 1:alexhoshina #662 — 完成 -- Channel 重构 Phase 2:alexhoshina #877 (10,926行) — 2月27日合并 -- Migrate 重构:lxowalle #910 — 2月28日合并 - -### 正在进行的重构 -- **Tools 系统重构**:lxowalle PR #846(50个文件)— OPEN -- **Plugin 系统**:gh-xj PR #936-939(4个PR系列)— OPEN -- **Agent 系统重构**:alexhoshina Issue #772(roadmap)— 只有 issue,还没有 PR - -### 我的行动记录 -- 2月24日:在 #772 评论,将 PR #732 定位为 Agent 重构的 memory 子任务 -- 3月1日:在 #295 评论,提出模型路由设计方案 - ---- - -## 战略方向 - -### 方向 1:智能模型路由(#295)— 主攻 ✅ 代码已完成 - -**为什么选这个**: -1. Zepan(创始人)亲自创建的 issue,roadmap 标签 -2. 有大量社区讨论但零 PR -3. 独立模块 `pkg/routing/`,不碰任何重构区文件 -4. 面试价值极高 - -**已完成(分支 feat/model-routing)**: -- `pkg/routing/features.go` — ExtractFeatures:5维结构评分,纯语言无关 -- `pkg/routing/classifier.go` — Classifier 接口 + RuleClassifier(加权求和,上限 1.0) -- `pkg/routing/router.go` — Router:SelectModel,阈值默认 0.35 -- `pkg/routing/router_test.go` — 34 个测试,全部通过 -- `pkg/config/config.go` — RoutingConfig 添加到 AgentDefaults -- `pkg/agent/instance.go` — 预计算 Router + LightCandidates -- `pkg/agent/loop.go` — selectCandidates helper,turn 级别粘性路由 - -**3 个 commit,773 行新增,33 行修改,0 个新依赖** - -**配置**: -```json -{ - "agents": { - "defaults": { - "model": "claude-sonnet-4-6", - "routing": { - "enabled": true, - "light_model": "gemini-flash", - "threshold": 0.35 - } - } - } -} -``` - -**下一步**:向上游 push 并开 PR,PR body 引用 issue #295 - -### 方向 2:JSONL Store 集成 — 等待时机 - -PR #732 已提交。等 Tools 重构 (#846) 合并后再做集成 PR。 -已在 #772 评论建立关联。 - -### 方向 3:sessions CLI 子命令(#575)— 备选快速 PR - -如果需要一个快速能合并的 PR 来积累信任: -- `picoclaw sessions list/clear/export` -- 不碰任何重构区文件 -- 实用性强 - ---- - -## 需要避开的区域 - -| 区域 | 原因 | -|------|------| -| Tools 系统 | lxowalle PR #846 正在重构 | -| Plugin 系统 | gh-xj PR #936-939 正在做 | -| Channel 任何东西 | alexhoshina 刚完成大重构 | -| Provider 配置 | daming 已定型 | -| MCP | 两个竞争 PR (#282, #376) | -| Hooks 基础 | gh-xj #936 包含 pkg/hooks/ | -| AgentLoop 拆分 | SaiBalusu-usf PR #699 | -| Tool pair 修复 | QuietyAwe PR #871 | - ---- - -## 关键人物(更新) - -| 人 | GitHub | 角色 | 最近活动 | -|---|--------|------|---------| -| Zepan | @Zepan | 创始人 | #806 WebUI issue | -| daming | @yinwm | Provider/审核 | 审核 PR #877 | -| alexhoshina | @alexhoshina | Channel+Agent 重构 | #877 合并,#772 发起 | -| lxowalle | @lxowalle | Tools+审核 | #846 Tools重构中 | -| gh-xj | @gh-xj | Plugin 系统 | #936-939 四个 PR | -| nikolasdehor | @nikolasdehor | 社区活跃评论者 | 每个 issue 都有他 | From b84adacc2f302aa68c3ccd88bc5815ff51904273 Mon Sep 17 00:00:00 2001 From: xiaoen <2768753269@qq.com> Date: Fri, 6 Mar 2026 13:10:20 +0800 Subject: [PATCH 6/6] fix(routing): address review feedback on CJK estimation and observability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. CJK token estimation: replace flat rune_count/3 with script-aware counting — CJK runes (U+2E80–U+9FFF, U+F900–U+FAFF, U+AC00–U+D7AF) count as 1 token each, non-CJK runes at /4. This fixes a 3x underestimate for Chinese/Japanese/Korean text that could incorrectly route complex CJK messages to the light model. 2. Routing observability: SelectModel now returns the computed score as a third value. selectCandidates logs the score on both paths — Info level for light model selection, Debug level for primary model selection. 3. Added tests: TestExtractFeatures_TokenEstimate_Mixed (CJK+ASCII mix), TestRouter_SelectModel_ReturnsScore. Addresses review feedback from @mingmxren. --- pkg/agent/loop.go | 9 ++++++- pkg/routing/features.go | 29 +++++++++++++------- pkg/routing/router.go | 15 ++++++----- pkg/routing/router_test.go | 54 ++++++++++++++++++++++++++------------ 4 files changed, 72 insertions(+), 35 deletions(-) diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 5e68e4931..132bb3c98 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -1192,8 +1192,14 @@ func (al *AgentLoop) selectCandidates( return agent.Candidates, agent.Model } - _, usedLight := agent.Router.SelectModel(userMsg, history, agent.Model) + _, usedLight, score := agent.Router.SelectModel(userMsg, history, agent.Model) if !usedLight { + logger.DebugCF("agent", "Model routing: primary model selected", + map[string]any{ + "agent_id": agent.ID, + "score": score, + "threshold": agent.Router.Threshold(), + }) return agent.Candidates, agent.Model } @@ -1201,6 +1207,7 @@ func (al *AgentLoop) selectCandidates( map[string]any{ "agent_id": agent.ID, "light_model": agent.Router.LightModel(), + "score": score, "threshold": agent.Router.Threshold(), }) return agent.LightCandidates, agent.Router.LightModel() diff --git a/pkg/routing/features.go b/pkg/routing/features.go index 4fa1c5b6c..c371e21aa 100644 --- a/pkg/routing/features.go +++ b/pkg/routing/features.go @@ -15,9 +15,9 @@ const lookbackWindow = 6 // Every dimension is language-agnostic by construction — no keyword or pattern matching // against natural-language content. This ensures consistent routing for all locales. type Features struct { - // TokenEstimate is a conservative proxy for token count. - // Computed as utf8.RuneCountInString(msg) / 3, which handles CJK characters - // (each rune ≈ 1 token for CJK, ≈ 0.25 tokens for ASCII) without any API call. + // TokenEstimate is a proxy for token count. + // CJK runes count as 1 token each; non-CJK runes as 0.25 tokens each. + // This avoids API calls while giving accurate estimates for all scripts. TokenEstimate int // CodeBlockCount is the number of fenced code blocks (``` pairs) in the message. @@ -50,14 +50,23 @@ func ExtractFeatures(msg string, history []providers.Message) Features { } } -// estimateTokens returns a conservative token count proxy. -// Using rune count / 3 rather than / 4 because CJK characters each map to -// roughly one token, while ASCII words average ~1.3 chars/token. Dividing -// by 3 is a safe middle ground that slightly over-estimates for Latin text -// (errs toward routing to the heavy model) and is accurate for CJK. +// estimateTokens returns a token count proxy that handles both CJK and Latin text. +// CJK runes (U+2E80–U+9FFF, U+F900–U+FAFF, U+AC00–U+D7AF) map to roughly one +// token each, while non-CJK runes average ~0.25 tokens/rune (≈4 chars per token +// for English). Splitting the count this way avoids the 3x underestimation that a +// flat rune_count/3 would produce for Chinese, Japanese, and Korean text. func estimateTokens(msg string) int { - rc := utf8.RuneCountInString(msg) - return rc / 3 + total := utf8.RuneCountInString(msg) + if total == 0 { + return 0 + } + cjk := 0 + for _, r := range msg { + if r >= 0x2E80 && r <= 0x9FFF || r >= 0xF900 && r <= 0xFAFF || r >= 0xAC00 && r <= 0xD7AF { + cjk++ + } + } + return cjk + (total-cjk)/4 } // countCodeBlocks counts the number of complete fenced code blocks. diff --git a/pkg/routing/router.go b/pkg/routing/router.go index 78092b106..b1fa347e9 100644 --- a/pkg/routing/router.go +++ b/pkg/routing/router.go @@ -50,10 +50,11 @@ func newWithClassifier(cfg RouterConfig, c Classifier) *Router { return &Router{cfg: cfg, classifier: c} } -// SelectModel returns the model to use for this conversation turn. +// SelectModel returns the model to use for this conversation turn along with +// the computed complexity score (for logging and debugging). // -// - If score < cfg.Threshold: returns (cfg.LightModel, true) -// - Otherwise: returns (primaryModel, false) +// - If score < cfg.Threshold: returns (cfg.LightModel, true, score) +// - Otherwise: returns (primaryModel, false, score) // // The caller is responsible for resolving the returned model name into // provider candidates (see AgentInstance.LightCandidates). @@ -61,13 +62,13 @@ func (r *Router) SelectModel( msg string, history []providers.Message, primaryModel string, -) (model string, usedLight bool) { +) (model string, usedLight bool, score float64) { features := ExtractFeatures(msg, history) - score := r.classifier.Score(features) + score = r.classifier.Score(features) if score < r.cfg.Threshold { - return r.cfg.LightModel, true + return r.cfg.LightModel, true, score } - return primaryModel, false + return primaryModel, false, score } // LightModel returns the configured light model name. diff --git a/pkg/routing/router_test.go b/pkg/routing/router_test.go index 267200c2e..2824d10ab 100644 --- a/pkg/routing/router_test.go +++ b/pkg/routing/router_test.go @@ -29,16 +29,16 @@ func TestExtractFeatures_EmptyMessage(t *testing.T) { } func TestExtractFeatures_TokenEstimate(t *testing.T) { - // 30 ASCII chars / 3 = 10 tokens + // 30 ASCII runes: 0 CJK + 30/4 = 7 tokens msg := strings.Repeat("a", 30) f := ExtractFeatures(msg, nil) - if f.TokenEstimate != 10 { - t.Errorf("TokenEstimate: got %d, want 10", f.TokenEstimate) + if f.TokenEstimate != 7 { + t.Errorf("TokenEstimate: got %d, want 7", f.TokenEstimate) } } func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) { - // 9 CJK runes (U+4F60 U+597D U+4E16 U+754C × 2 + U+4F60) / 3 = 3 tokens. + // 9 CJK runes → 9 tokens (each CJK rune ≈ 1 token). // Using a rune slice literal avoids CJK string literals in source. msg := string([]rune{ 0x4F60, 0x597D, 0x4E16, 0x754C, @@ -46,8 +46,17 @@ func TestExtractFeatures_TokenEstimate_CJK(t *testing.T) { 0x4F60, }) f := ExtractFeatures(msg, nil) - if f.TokenEstimate != 3 { - t.Errorf("CJK TokenEstimate: got %d, want 3", f.TokenEstimate) + if f.TokenEstimate != 9 { + t.Errorf("CJK TokenEstimate: got %d, want 9", f.TokenEstimate) + } +} + +func TestExtractFeatures_TokenEstimate_Mixed(t *testing.T) { + // Mixed: 4 CJK runes + 8 ASCII runes → 4 + 8/4 = 6 tokens. + msg := string([]rune{0x4F60, 0x597D, 0x4E16, 0x754C}) + "hello ok" + f := ExtractFeatures(msg, nil) + if f.TokenEstimate != 6 { + t.Errorf("Mixed TokenEstimate: got %d, want 6", f.TokenEstimate) } } @@ -249,7 +258,7 @@ func TestRouter_NegativeThresholdFallsBackToDefault(t *testing.T) { func TestRouter_SelectModel_SimpleMessageUsesLight(t *testing.T) { r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) msg := "hi" - model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if !usedLight { t.Error("simple message: expected light model to be selected") } @@ -261,7 +270,7 @@ func TestRouter_SelectModel_SimpleMessageUsesLight(t *testing.T) { func TestRouter_SelectModel_CodeBlockUsesPrimary(t *testing.T) { r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) msg := "```go\nfmt.Println(\"hello\")\n```" - model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if usedLight { t.Error("code block: expected primary model to be selected") } @@ -273,7 +282,7 @@ func TestRouter_SelectModel_CodeBlockUsesPrimary(t *testing.T) { func TestRouter_SelectModel_AttachmentUsesPrimary(t *testing.T) { r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) msg := "can you analyze this? data:image/png;base64,abc123" - model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if usedLight { t.Error("attachment: expected primary model to be selected") } @@ -286,7 +295,7 @@ func TestRouter_SelectModel_LongMessageUsesPrimary(t *testing.T) { r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.35}) // >200 token estimate: 210 * 3 = 630 chars msg := strings.Repeat("word ", 210) - model, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + model, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if usedLight { t.Error("long message: expected primary model to be selected") } @@ -304,7 +313,7 @@ func TestRouter_SelectModel_DeepToolChainUsesLight(t *testing.T) { {Role: "assistant", ToolCalls: []providers.ToolCall{{Name: "exec"}, {Name: "search"}}}, } msg := "ok" - _, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6") + _, usedLight, _ := r.SelectModel(msg, history, "claude-sonnet-4-6") if !usedLight { t.Error("short message + moderate tool calls: expected light model (score 0.20 < 0.35)") } @@ -320,7 +329,7 @@ func TestRouter_SelectModel_ToolChainPlusMediumUsesHeavy(t *testing.T) { } // ~55 tokens * 3 = 165 chars msg := strings.Repeat("word ", 55) - _, usedLight := r.SelectModel(msg, history, "claude-sonnet-4-6") + _, usedLight, _ := r.SelectModel(msg, history, "claude-sonnet-4-6") if usedLight { t.Error("tool chain + medium message: expected primary model (score >= 0.35)") } @@ -330,7 +339,7 @@ func TestRouter_SelectModel_CustomThreshold(t *testing.T) { // Very low threshold: even a short message triggers heavy model r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.05}) msg := strings.Repeat("word ", 55) // medium message → 0.15 >= 0.05 - _, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + _, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if usedLight { t.Error("low threshold: medium message should use primary model") } @@ -340,7 +349,7 @@ func TestRouter_SelectModel_HighThreshold(t *testing.T) { // Very high threshold: even code blocks route to light r := New(RouterConfig{LightModel: "gemini-flash", Threshold: 0.99}) msg := "```go\nfmt.Println()\n```" - _, usedLight := r.SelectModel(msg, nil, "claude-sonnet-4-6") + _, usedLight, _ := r.SelectModel(msg, nil, "claude-sonnet-4-6") if !usedLight { t.Error("very high threshold: code block (0.40) should route to light model") } @@ -364,7 +373,7 @@ func TestRouter_CustomClassifier_LowScore_SelectsLight(t *testing.T) { RouterConfig{LightModel: "light", Threshold: 0.5}, &fixedScoreClassifier{score: 0.2}, ) - _, usedLight := r.SelectModel("anything", nil, "heavy") + _, usedLight, _ := r.SelectModel("anything", nil, "heavy") if !usedLight { t.Error("low score with custom classifier: expected light model") } @@ -375,7 +384,7 @@ func TestRouter_CustomClassifier_HighScore_SelectsPrimary(t *testing.T) { RouterConfig{LightModel: "light", Threshold: 0.5}, &fixedScoreClassifier{score: 0.8}, ) - _, usedLight := r.SelectModel("anything", nil, "heavy") + _, usedLight, _ := r.SelectModel("anything", nil, "heavy") if usedLight { t.Error("high score with custom classifier: expected primary model") } @@ -387,8 +396,19 @@ func TestRouter_CustomClassifier_ExactThreshold_SelectsPrimary(t *testing.T) { RouterConfig{LightModel: "light", Threshold: 0.5}, &fixedScoreClassifier{score: 0.5}, ) - _, usedLight := r.SelectModel("anything", nil, "heavy") + _, usedLight, _ := r.SelectModel("anything", nil, "heavy") if usedLight { t.Error("score == threshold: expected primary model (>= threshold → primary)") } } + +func TestRouter_SelectModel_ReturnsScore(t *testing.T) { + r := newWithClassifier( + RouterConfig{LightModel: "light", Threshold: 0.5}, + &fixedScoreClassifier{score: 0.42}, + ) + _, _, score := r.SelectModel("anything", nil, "heavy") + if score != 0.42 { + t.Errorf("score: got %f, want 0.42", score) + } +}