mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
1943c3e660
Add three new files to pkg/routing/:
features.go — ExtractFeatures(msg, history) → Features
Computes five structural dimensions with zero keyword matching:
- TokenEstimate: rune_count/3 (CJK-safe token proxy)
- CodeBlockCount: ``` pairs in the message
- RecentToolCalls: tool call count in the last 6 history entries
- ConversationDepth: total messages in session
- HasAttachments: data URIs or media file extensions
classifier.go — Classifier interface + RuleClassifier
RuleClassifier uses a weighted sum that is capped at 1.0:
code block → +0.40 (triggers heavy model alone at 0.35 threshold)
token > 200 → +0.35 (triggers heavy model alone)
tool calls > 3 → +0.25
token 50-200 → +0.15
conversation depth > 10 → +0.10
attachment → 1.00 (hard gate, always heavy)
router.go — Router wraps config + Classifier
Router.SelectModel(msg, history, primaryModel) returns either the
configured light_model or the primary model depending on whether
the complexity score clears the threshold. Threshold defaults to
0.35 when zero/negative to prevent misconfiguration.
router_test.go — 34 tests covering all branches and edge cases
81 lines
3.0 KiB
Go
81 lines
3.0 KiB
Go
package routing
|
||
|
||
// Classifier evaluates a feature set and returns a complexity score in [0, 1].
|
||
// A higher score indicates a more complex task that benefits from a heavy model.
|
||
// The score is compared against the configured threshold: score >= threshold selects
|
||
// the primary (heavy) model; score < threshold selects the light model.
|
||
//
|
||
// Classifier is an interface so that future implementations (ML-based, embedding-based,
|
||
// or any other approach) can be swapped in without changing routing infrastructure.
|
||
type Classifier interface {
|
||
Score(f Features) float64
|
||
}
|
||
|
||
// RuleClassifier is the v1 implementation.
|
||
// It uses a weighted sum of structural signals with no external dependencies,
|
||
// no API calls, and sub-microsecond latency. The raw sum is capped at 1.0 so
|
||
// that the returned score always falls within the [0, 1] contract.
|
||
//
|
||
// Individual weights (multiple signals can fire simultaneously):
|
||
//
|
||
// token > 200 (≈600 chars): 0.35 — very long prompts are almost always complex
|
||
// token 50-200: 0.15 — medium length; may or may not be complex
|
||
// code block present: 0.40 — coding tasks need the heavy model
|
||
// tool calls > 3 (recent): 0.25 — dense tool usage signals an agentic workflow
|
||
// tool calls 1-3 (recent): 0.10 — some tool activity
|
||
// conversation depth > 10: 0.10 — long sessions carry implicit complexity
|
||
// attachments present: 1.00 — hard gate; multi-modal always needs heavy model
|
||
//
|
||
// Default threshold is 0.35, so:
|
||
// - Pure greetings / trivial Q&A: 0.00 → light ✓
|
||
// - Medium prose message (50–200 tokens): 0.15 → light ✓
|
||
// - Message with code block: 0.40 → heavy ✓
|
||
// - Long message (>200 tokens): 0.35 → heavy ✓
|
||
// - Active tool session + medium message: 0.25 → light (acceptable)
|
||
// - Any message with an image/audio attachment: 1.00 → heavy ✓
|
||
type RuleClassifier struct{}
|
||
|
||
// Score computes the complexity score for the given feature set.
|
||
// The returned value is in [0, 1]. Attachments short-circuit to 1.0.
|
||
func (c *RuleClassifier) Score(f Features) float64 {
|
||
// Hard gate: multi-modal inputs always require the heavy model.
|
||
if f.HasAttachments {
|
||
return 1.0
|
||
}
|
||
|
||
var score float64
|
||
|
||
// Token estimate — primary verbosity signal
|
||
switch {
|
||
case f.TokenEstimate > 200:
|
||
score += 0.35
|
||
case f.TokenEstimate > 50:
|
||
score += 0.15
|
||
}
|
||
|
||
// Fenced code blocks — strongest indicator of a coding/technical task
|
||
if f.CodeBlockCount > 0 {
|
||
score += 0.40
|
||
}
|
||
|
||
// Recent tool call density — indicates an ongoing agentic workflow
|
||
switch {
|
||
case f.RecentToolCalls > 3:
|
||
score += 0.25
|
||
case f.RecentToolCalls > 0:
|
||
score += 0.10
|
||
}
|
||
|
||
// Conversation depth — accumulated context implies compound task
|
||
if f.ConversationDepth > 10 {
|
||
score += 0.10
|
||
}
|
||
|
||
// Cap at 1.0 to honour the [0, 1] contract even when multiple signals fire
|
||
// simultaneously (e.g., long message + code block + tool chain = 1.10 raw).
|
||
if score > 1.0 {
|
||
score = 1.0
|
||
}
|
||
return score
|
||
}
|