diff --git a/pkg/agent/instance.go b/pkg/agent/instance.go index ed438059f..ec8871e30 100644 --- a/pkg/agent/instance.go +++ b/pkg/agent/instance.go @@ -34,6 +34,14 @@ type AgentInstance struct { Subagents *config.SubagentsConfig SkillsFilter []string Candidates []providers.FallbackCandidate + + // Router is non-nil when model routing is configured and the light model + // was successfully resolved. It scores each incoming message and decides + // whether to route to LightCandidates or stay with Candidates. + Router *routing.Router + // LightCandidates holds the resolved provider candidates for the light model. + // Pre-computed at agent creation to avoid repeated model_list lookups at runtime. + LightCandidates []providers.FallbackCandidate } // NewAgentInstance creates an agent instance from config. @@ -148,23 +156,44 @@ func NewAgentInstance( candidates := providers.ResolveCandidatesWithLookup(modelCfg, defaults.Provider, resolveFromModelList) + // Model routing setup: pre-resolve light model candidates at creation time + // to avoid repeated model_list lookups on every incoming message. + var router *routing.Router + var lightCandidates []providers.FallbackCandidate + if rc := defaults.Routing; rc != nil && rc.Enabled && rc.LightModel != "" { + lightModelCfg := providers.ModelConfig{Primary: rc.LightModel} + resolved := providers.ResolveCandidatesWithLookup(lightModelCfg, defaults.Provider, resolveFromModelList) + if len(resolved) > 0 { + router = routing.New(routing.RouterConfig{ + LightModel: rc.LightModel, + Threshold: rc.Threshold, + }) + lightCandidates = resolved + } else { + log.Printf("routing: light_model %q not found in model_list — routing disabled for agent %q", + rc.LightModel, agentID) + } + } + return &AgentInstance{ - ID: agentID, - Name: agentName, - Model: model, - Fallbacks: fallbacks, - Workspace: workspace, - MaxIterations: maxIter, - MaxTokens: maxTokens, - Temperature: temperature, - ContextWindow: maxTokens, - Provider: provider, - Sessions: sessionsManager, - ContextBuilder: contextBuilder, - Tools: toolsRegistry, - Subagents: subagents, - SkillsFilter: skillsFilter, - Candidates: candidates, + ID: agentID, + Name: agentName, + Model: model, + Fallbacks: fallbacks, + Workspace: workspace, + MaxIterations: maxIter, + MaxTokens: maxTokens, + Temperature: temperature, + ContextWindow: maxTokens, + Provider: provider, + Sessions: sessionsManager, + ContextBuilder: contextBuilder, + Tools: toolsRegistry, + Subagents: subagents, + SkillsFilter: skillsFilter, + Candidates: candidates, + Router: router, + LightCandidates: lightCandidates, } } diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 00b0f096a..6df956627 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -625,6 +625,12 @@ func (al *AgentLoop) runLLMIteration( iteration := 0 var finalContent string + // Determine effective model tier for this conversation turn. + // selectCandidates evaluates routing once and the decision is sticky for + // all tool-follow-up iterations within the same turn so that a multi-step + // tool chain doesn't switch models mid-way through. + activeCandidates, activeModel := al.selectCandidates(agent, opts.UserMessage, messages) + for iteration < agent.MaxIterations { iteration++ @@ -643,7 +649,7 @@ func (al *AgentLoop) runLLMIteration( map[string]any{ "agent_id": agent.ID, "iteration": iteration, - "model": agent.Model, + "model": activeModel, "messages_count": len(messages), "tools_count": len(providerToolDefs), "max_tokens": agent.MaxTokens, @@ -659,13 +665,13 @@ func (al *AgentLoop) runLLMIteration( "tools_json": formatToolsForLog(providerToolDefs), }) - // Call LLM with fallback chain if candidates are configured. + // Call LLM with fallback chain if multiple candidates are configured. var response *providers.LLMResponse var err error callLLM := func() (*providers.LLMResponse, error) { - if len(agent.Candidates) > 1 && al.fallback != nil { - fbResult, fbErr := al.fallback.Execute(ctx, agent.Candidates, + if len(activeCandidates) > 1 && al.fallback != nil { + fbResult, fbErr := al.fallback.Execute(ctx, activeCandidates, func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) { return agent.Provider.Chat(ctx, messages, providerToolDefs, model, map[string]any{ "max_tokens": agent.MaxTokens, @@ -684,7 +690,7 @@ func (al *AgentLoop) runLLMIteration( } return fbResult.Response, nil } - return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]any{ + return agent.Provider.Chat(ctx, messages, providerToolDefs, activeModel, map[string]any{ "max_tokens": agent.MaxTokens, "temperature": agent.Temperature, "prompt_cache_key": agent.ID, @@ -934,6 +940,37 @@ func (al *AgentLoop) runLLMIteration( return finalContent, iteration, nil } +// selectCandidates returns the model candidates and resolved model name to use +// for a conversation turn. When model routing is configured and the incoming +// message scores below the complexity threshold, it returns the light model +// candidates instead of the primary ones. +// +// The returned (candidates, model) pair is used for all LLM calls within one +// turn — tool follow-up iterations use the same tier as the initial call so +// that a multi-step tool chain doesn't switch models mid-way. +func (al *AgentLoop) selectCandidates( + agent *AgentInstance, + userMsg string, + history []providers.Message, +) (candidates []providers.FallbackCandidate, model string) { + if agent.Router == nil || len(agent.LightCandidates) == 0 { + return agent.Candidates, agent.Model + } + + _, usedLight := agent.Router.SelectModel(userMsg, history, agent.Model) + if !usedLight { + return agent.Candidates, agent.Model + } + + logger.InfoCF("agent", "Model routing: light model selected", + map[string]any{ + "agent_id": agent.ID, + "light_model": agent.Router.LightModel(), + "threshold": agent.Router.Threshold(), + }) + return agent.LightCandidates, agent.Router.LightModel() +} + // updateToolContexts updates the context for tools that need channel/chatID info. func (al *AgentLoop) updateToolContexts(agent *AgentInstance, channel, chatID string) { // Use ContextualTool interface instead of type assertions