mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
09e68cb63b
- classifier.go: s/honour/honor/ (American English per misspell) - router.go: break SelectModel signature across lines (golines) - router_test.go: break long Message literal (golines) - router_test.go: replace CJK string literal with rune slice so gosmopolitan does not flag the source file; behaviour is identical
81 lines
3.0 KiB
Go
81 lines
3.0 KiB
Go
package routing
|
||
|
||
// Classifier evaluates a feature set and returns a complexity score in [0, 1].
|
||
// A higher score indicates a more complex task that benefits from a heavy model.
|
||
// The score is compared against the configured threshold: score >= threshold selects
|
||
// the primary (heavy) model; score < threshold selects the light model.
|
||
//
|
||
// Classifier is an interface so that future implementations (ML-based, embedding-based,
|
||
// or any other approach) can be swapped in without changing routing infrastructure.
|
||
type Classifier interface {
|
||
Score(f Features) float64
|
||
}
|
||
|
||
// RuleClassifier is the v1 implementation.
|
||
// It uses a weighted sum of structural signals with no external dependencies,
|
||
// no API calls, and sub-microsecond latency. The raw sum is capped at 1.0 so
|
||
// that the returned score always falls within the [0, 1] contract.
|
||
//
|
||
// Individual weights (multiple signals can fire simultaneously):
|
||
//
|
||
// token > 200 (≈600 chars): 0.35 — very long prompts are almost always complex
|
||
// token 50-200: 0.15 — medium length; may or may not be complex
|
||
// code block present: 0.40 — coding tasks need the heavy model
|
||
// tool calls > 3 (recent): 0.25 — dense tool usage signals an agentic workflow
|
||
// tool calls 1-3 (recent): 0.10 — some tool activity
|
||
// conversation depth > 10: 0.10 — long sessions carry implicit complexity
|
||
// attachments present: 1.00 — hard gate; multi-modal always needs heavy model
|
||
//
|
||
// Default threshold is 0.35, so:
|
||
// - Pure greetings / trivial Q&A: 0.00 → light ✓
|
||
// - Medium prose message (50–200 tokens): 0.15 → light ✓
|
||
// - Message with code block: 0.40 → heavy ✓
|
||
// - Long message (>200 tokens): 0.35 → heavy ✓
|
||
// - Active tool session + medium message: 0.25 → light (acceptable)
|
||
// - Any message with an image/audio attachment: 1.00 → heavy ✓
|
||
type RuleClassifier struct{}
|
||
|
||
// Score computes the complexity score for the given feature set.
|
||
// The returned value is in [0, 1]. Attachments short-circuit to 1.0.
|
||
func (c *RuleClassifier) Score(f Features) float64 {
|
||
// Hard gate: multi-modal inputs always require the heavy model.
|
||
if f.HasAttachments {
|
||
return 1.0
|
||
}
|
||
|
||
var score float64
|
||
|
||
// Token estimate — primary verbosity signal
|
||
switch {
|
||
case f.TokenEstimate > 200:
|
||
score += 0.35
|
||
case f.TokenEstimate > 50:
|
||
score += 0.15
|
||
}
|
||
|
||
// Fenced code blocks — strongest indicator of a coding/technical task
|
||
if f.CodeBlockCount > 0 {
|
||
score += 0.40
|
||
}
|
||
|
||
// Recent tool call density — indicates an ongoing agentic workflow
|
||
switch {
|
||
case f.RecentToolCalls > 3:
|
||
score += 0.25
|
||
case f.RecentToolCalls > 0:
|
||
score += 0.10
|
||
}
|
||
|
||
// Conversation depth — accumulated context implies compound task
|
||
if f.ConversationDepth > 10 {
|
||
score += 0.10
|
||
}
|
||
|
||
// Cap at 1.0 to honor the [0, 1] contract even when multiple signals fire
|
||
// simultaneously (e.g., long message + code block + tool chain = 1.10 raw).
|
||
if score > 1.0 {
|
||
score = 1.0
|
||
}
|
||
return score
|
||
}
|