Merge branch 'main' into refactor-inbound-context-routing-session

# Conflicts: # pkg/agent/eventbus_test.go # pkg/agent/loop.go # pkg/bus/bus.go # pkg/bus/types.go # pkg/channels/pico/pico.go # pkg/channels/telegram/telegram.go # pkg/config/config.go # web/backend/api/session.go # web/backend/api/session_test.go
2026-06-12 18:08:54 +00:00 · 2026-04-07 21:41:02 +08:00
parent 528c57dda0 6ce0306c66
commit e32a209683
282 changed files with 33064 additions and 3251 deletions
@@ -18,6 +18,8 @@ import (
 	"sync/atomic"
 	"time"

+	"github.com/sipeed/picoclaw/pkg/audio/asr"
+	"github.com/sipeed/picoclaw/pkg/audio/tts"
 	"github.com/sipeed/picoclaw/pkg/bus"
 	"github.com/sipeed/picoclaw/pkg/channels"
 	"github.com/sipeed/picoclaw/pkg/commands"
@@ -32,7 +34,6 @@ import (
 	"github.com/sipeed/picoclaw/pkg/state"
 	"github.com/sipeed/picoclaw/pkg/tools"
 	"github.com/sipeed/picoclaw/pkg/utils"
-	"github.com/sipeed/picoclaw/pkg/voice"
 )

 type AgentLoop struct {
@@ -48,11 +49,11 @@ type AgentLoop struct {

 	// Runtime state
 	running        atomic.Bool
-	summarizing    sync.Map
+	contextManager ContextManager
 	fallback       *providers.FallbackChain
 	channelManager *channels.Manager
 	mediaStore     media.MediaStore
-	transcriber    voice.Transcriber
+	transcriber    asr.Transcriber
 	cmdRegistry    *commands.Registry
 	mcp            mcpRuntime
 	hookRuntime    hookRuntime
@@ -116,9 +117,18 @@ func NewAgentLoop(
 ) *AgentLoop {
 	registry := NewAgentRegistry(cfg, provider)

-	// Set up shared fallback chain
+	// Set up shared fallback chain with rate limiting.
 	cooldown := providers.NewCooldownTracker()
-	fallbackChain := providers.NewFallbackChain(cooldown)
+	rl := providers.NewRateLimiterRegistry()
+	// Register rate limiters for all agents' candidates so that RPM limits
+	// configured in ModelConfig are enforced before each LLM call.
+	for _, agentID := range registry.ListAgentIDs() {
+		if agent, ok := registry.GetAgent(agentID); ok {
+			rl.RegisterCandidates(agent.Candidates)
+			rl.RegisterCandidates(agent.LightCandidates)
+		}
+	}
+	fallbackChain := providers.NewFallbackChain(cooldown, rl)

 	// Create state manager using default agent's workspace for channel recording
 	defaultAgent := registry.GetDefaultAgent()
@@ -134,13 +144,13 @@ func NewAgentLoop(
 		registry:    registry,
 		state:       stateManager,
 		eventBus:    eventBus,
-		summarizing: sync.Map{},
 		fallback:    fallbackChain,
 		cmdRegistry: commands.NewRegistry(commands.BuiltinDefinitions()),
 		steering:    newSteeringQueue(parseSteeringMode(cfg.Agents.Defaults.SteeringMode)),
 	}
 	al.hooks = NewHookManager(eventBus)
 	configureHookManagerFromConfig(al.hooks, cfg)
+	al.contextManager = al.resolveContextManager()

 	// Register shared tools to all agents (now that al is created)
 	registerSharedTools(al, cfg, msgBus, registry, provider)
@@ -157,6 +167,13 @@ func registerSharedTools(
 	provider providers.LLMProvider,
 ) {
 	allowReadPaths := buildAllowReadPatterns(cfg)
+	var ttsProvider tts.TTSProvider
+	if cfg.Tools.IsToolEnabled("send_tts") {
+		ttsProvider = tts.DetectTTS(cfg)
+		if ttsProvider == nil {
+			logger.WarnCF("voice-tts", "send_tts enabled but no TTS provider configured", nil)
+		}
+	}

 	for _, agentID := range registry.ListAgentIDs() {
 		agent, ok := registry.GetAgent(agentID)
@@ -267,6 +284,21 @@ func registerSharedTools(
 			agent.Tools.Register(sendFileTool)
 		}

+		if ttsProvider != nil {
+			agent.Tools.Register(tools.NewSendTTSTool(ttsProvider, nil))
+		}
+
+		if cfg.Tools.IsToolEnabled("load_image") {
+			loadImageTool := tools.NewLoadImageTool(
+				agent.Workspace,
+				cfg.Agents.Defaults.RestrictToWorkspace,
+				cfg.Agents.Defaults.GetMaxMediaSize(),
+				nil,
+				allowReadPaths,
+			)
+			agent.Tools.Register(loadImageTool)
+		}
+
 		// Skill discovery and installation tools
 		skills_enabled := cfg.Tools.IsToolEnabled("skills")
 		find_skills_enable := cfg.Tools.IsToolEnabled("find_skills")
@@ -309,6 +341,14 @@ func registerSharedTools(
 			subagentManager := tools.NewSubagentManager(provider, agent.Model, agent.Workspace)
 			subagentManager.SetLLMOptions(agent.MaxTokens, agent.Temperature)

+			// Inject a media resolver so the legacy RunToolLoop fallback path can
+			// resolve media:// refs in the same way the main AgentLoop does.
+			// This keeps subagent vision support working even when the optimized
+			// sub-turn spawner path is unavailable.
+			subagentManager.SetMediaResolver(func(msgs []providers.Message) []providers.Message {
+				return resolveMediaRefs(msgs, al.mediaStore, cfg.Agents.Defaults.GetMaxMediaSize())
+			})
+
 			// Set the spawner that links into AgentLoop's turnState
 			subagentManager.SetSpawner(func(
 				ctx context.Context,
@@ -1075,6 +1115,7 @@ func (al *AgentLoop) ReloadProviderAndConfig(
 	go func() {
 		defer func() {
 			if r := recover(); r != nil {
+				logger.RecoverPanicNoExit(r)
 				panicErr = fmt.Errorf("panic during registry creation: %v", r)
 				logger.ErrorCF("agent", "Panic during registry creation",
 					map[string]any{"panic": r})
@@ -1115,8 +1156,15 @@ func (al *AgentLoop) ReloadProviderAndConfig(
 	al.cfg = cfg
 	al.registry = registry

-	// Also update fallback chain with new config
-	al.fallback = providers.NewFallbackChain(providers.NewCooldownTracker())
+	// Also update fallback chain with new config; rebuild rate limiter registry.
+	newRL := providers.NewRateLimiterRegistry()
+	for _, agentID := range registry.ListAgentIDs() {
+		if agent, ok := registry.GetAgent(agentID); ok {
+			newRL.RegisterCandidates(agent.Candidates)
+			newRL.RegisterCandidates(agent.LightCandidates)
+		}
+	}
+	al.fallback = providers.NewFallbackChain(providers.NewCooldownTracker(), newRL)

 	al.mu.Unlock()

@@ -1174,10 +1222,15 @@ func (al *AgentLoop) SetMediaStore(s media.MediaStore) {
 			agent.Tools.SetMediaStore(s)
 		}
 	}
+	registry.ForEachTool("send_tts", func(t tools.Tool) {
+		if st, ok := t.(*tools.SendTTSTool); ok {
+			st.SetMediaStore(s)
+		}
+	})
 }

 // SetTranscriber injects a voice transcriber for agent-level audio transcription.
-func (al *AgentLoop) SetTranscriber(t voice.Transcriber) {
+func (al *AgentLoop) SetTranscriber(t asr.Transcriber) {
 	al.transcriber = t
 }

@@ -1198,19 +1251,23 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou

 	// Transcribe each audio media ref in order.
 	var transcriptions []string
+	var keptMedia []string
 	for _, ref := range msg.Media {
 		path, meta, err := al.mediaStore.ResolveWithMeta(ref)
 		if err != nil {
 			logger.WarnCF("voice", "Failed to resolve media ref", map[string]any{"ref": ref, "error": err})
+			keptMedia = append(keptMedia, ref)
 			continue
 		}
 		if !utils.IsAudioFile(meta.Filename, meta.ContentType) {
+			keptMedia = append(keptMedia, ref)
 			continue
 		}
 		result, err := al.transcriber.Transcribe(ctx, path)
 		if err != nil {
 			logger.WarnCF("voice", "Transcription failed", map[string]any{"ref": ref, "error": err})
 			transcriptions = append(transcriptions, "")
+			keptMedia = append(keptMedia, ref)
 			continue
 		}
 		transcriptions = append(transcriptions, result.Text)
@@ -1230,15 +1287,21 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou
 		}
 		text := transcriptions[idx]
 		idx++
+		if text == "" {
+			return match
+		}
 		return "[voice: " + text + "]"
 	})

 	// Append any remaining transcriptions not matched by an annotation.
 	for ; idx < len(transcriptions); idx++ {
-		newContent += "\n[voice: " + transcriptions[idx] + "]"
+		if transcriptions[idx] != "" {
+			newContent += "\n[voice: " + transcriptions[idx] + "]"
+		}
 	}

 	msg.Content = newContent
+	msg.Media = keptMedia
 	return msg, true
 }

@@ -1825,8 +1888,15 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState) (turnResult, er
 	var history []providers.Message
 	var summary string
 	if !ts.opts.NoHistory {
-		history = ts.agent.Sessions.GetHistory(ts.sessionKey)
-		summary = ts.agent.Sessions.GetSummary(ts.sessionKey)
+		// ContextManager assembles budget-aware history and summary.
+		if resp, err := al.contextManager.Assemble(turnCtx, &AssembleRequest{
+			SessionKey: ts.sessionKey,
+			Budget:     ts.agent.ContextWindow,
+			MaxTokens:  ts.agent.MaxTokens,
+		}); err == nil && resp != nil {
+			history = resp.History
+			summary = resp.Summary
+		}
 	}
 	ts.captureRestorePoint(history, summary)

@@ -1851,22 +1921,28 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState) (turnResult, er
 		if isOverContextBudget(ts.agent.ContextWindow, messages, toolDefs, ts.agent.MaxTokens) {
 			logger.WarnCF("agent", "Proactive compression: context budget exceeded before LLM call",
 				map[string]any{"session_key": ts.sessionKey})
-			if compression, ok := al.forceCompression(ts.agent, ts.sessionKey); ok {
-				al.emitEvent(
-					EventKindContextCompress,
-					ts.eventMeta("runTurn", "turn.context.compress"),
-					ContextCompressPayload{
-						Reason:            ContextCompressReasonProactive,
-						DroppedMessages:   compression.DroppedMessages,
-						RemainingMessages: compression.RemainingMessages,
-					},
-				)
-				ts.refreshRestorePointFromSession(ts.agent)
+			if err := al.contextManager.Compact(turnCtx, &CompactRequest{
+				SessionKey: ts.sessionKey,
+				Reason:     ContextCompressReasonProactive,
+				Budget:     ts.agent.ContextWindow,
+			}); err != nil {
+				logger.WarnCF("agent", "Proactive compact failed", map[string]any{
+					"session_key": ts.sessionKey,
+					"error":       err.Error(),
+				})
+			}
+			ts.refreshRestorePointFromSession(ts.agent)
+			// Re-assemble from CM after compact.
+			if resp, err := al.contextManager.Assemble(turnCtx, &AssembleRequest{
+				SessionKey: ts.sessionKey,
+				Budget:     ts.agent.ContextWindow,
+				MaxTokens:  ts.agent.MaxTokens,
+			}); err == nil && resp != nil {
+				history = resp.History
+				summary = resp.Summary
 			}
-			newHistory := ts.agent.Sessions.GetHistory(ts.sessionKey)
-			newSummary := ts.agent.Sessions.GetSummary(ts.sessionKey)
 			messages = ts.agent.ContextBuilder.BuildMessages(
-				newHistory, newSummary, ts.userMessage,
+				history, summary, ts.userMessage,
 				ts.media, ts.channel, ts.chatID,
 				ts.opts.SenderID, ts.opts.SenderDisplayName,
 				activeSkillNames(ts.agent, ts.opts)...,
@@ -1888,6 +1964,7 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState) (turnResult, er
 			ts.agent.Sessions.AddMessage(ts.sessionKey, rootMsg.Role, rootMsg.Content)
 		}
 		ts.recordPersistedMessage(rootMsg)
+		ts.ingestMessage(turnCtx, al, rootMsg)
 	}

 	activeCandidates, activeModel, usedLight := al.selectCandidates(ts.agent, ts.userMessage, messages)
@@ -1963,6 +2040,7 @@ turnLoop:
 				if !ts.opts.NoHistory {
 					ts.agent.Sessions.AddFullMessage(ts.sessionKey, pm)
 					ts.recordPersistedMessage(pm)
+					ts.ingestMessage(turnCtx, al, pm)
 				}
 				logger.InfoCF("agent", "Injected steering message into context",
 					map[string]any{
@@ -2016,6 +2094,14 @@ turnLoop:
 			providerToolDefs = filtered
 		}

+		// Resolve media:// refs produced by tool results (e.g. load_image).
+		// Skipped on iteration 1 because inbound user media is already resolved
+		// before entering the loop; only subsequent iterations can contain new
+		// tool-generated media refs that need base64 encoding.
+		if iteration > 1 {
+			messages = resolveMediaRefs(messages, al.mediaStore, maxMediaSize)
+		}
+
 		callMessages := messages
 		if gracefulTerminal {
 			callMessages = append(append([]providers.Message(nil), messages...), ts.interruptHintMessage())
@@ -2115,7 +2201,11 @@ turnLoop:
 					providerCtx,
 					activeCandidates,
 					func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) {
-						return activeProvider.Chat(ctx, messagesForCall, toolDefsForCall, model, llmOpts)
+						candidateProvider := activeProvider
+						if cp, ok := ts.agent.CandidateProviders[providers.ModelKey(provider, model)]; ok {
+							candidateProvider = cp
+						}
+						return candidateProvider.Chat(ctx, messagesForCall, toolDefsForCall, model, llmOpts)
 					},
 				)
 				if fbErr != nil {
@@ -2221,23 +2311,28 @@ turnLoop:
 					))
 				}

-				if compression, ok := al.forceCompression(ts.agent, ts.sessionKey); ok {
-					al.emitEvent(
-						EventKindContextCompress,
-						ts.eventMeta("runTurn", "turn.context.compress"),
-						ContextCompressPayload{
-							Reason:            ContextCompressReasonRetry,
-							DroppedMessages:   compression.DroppedMessages,
-							RemainingMessages: compression.RemainingMessages,
-						},
-					)
-					ts.refreshRestorePointFromSession(ts.agent)
+				if compactErr := al.contextManager.Compact(turnCtx, &CompactRequest{
+					SessionKey: ts.sessionKey,
+					Reason:     ContextCompressReasonRetry,
+					Budget:     ts.agent.ContextWindow,
+				}); compactErr != nil {
+					logger.WarnCF("agent", "Context overflow compact failed", map[string]any{
+						"session_key": ts.sessionKey,
+						"error":       compactErr.Error(),
+					})
+				}
+				ts.refreshRestorePointFromSession(ts.agent)
+				// Re-assemble from CM after compact.
+				if asmResp, asmErr := al.contextManager.Assemble(turnCtx, &AssembleRequest{
+					SessionKey: ts.sessionKey,
+					Budget:     ts.agent.ContextWindow,
+					MaxTokens:  ts.agent.MaxTokens,
+				}); asmErr == nil && asmResp != nil {
+					history = asmResp.History
+					summary = asmResp.Summary
 				}
-
-				newHistory := ts.agent.Sessions.GetHistory(ts.sessionKey)
-				newSummary := ts.agent.Sessions.GetSummary(ts.sessionKey)
 				messages = ts.agent.ContextBuilder.BuildMessages(
-					newHistory, newSummary, "",
+					history, summary, "",
 					nil, ts.channel, ts.chatID, ts.opts.SenderID, ts.opts.SenderDisplayName,
 					activeSkillNames(ts.agent, ts.opts)...,
 				)
@@ -2409,6 +2504,7 @@ turnLoop:
 		if !ts.opts.NoHistory {
 			ts.agent.Sessions.AddFullMessage(ts.sessionKey, assistantMsg)
 			ts.recordPersistedMessage(assistantMsg)
+			ts.ingestMessage(turnCtx, al, assistantMsg)
 		}

 		ts.setPhase(TurnPhaseTools)
@@ -2633,6 +2729,7 @@ turnLoop:
 			if toolResult == nil {
 				toolResult = tools.ErrorResult("hook returned nil tool result")
 			}
+
 			if len(toolResult.Media) > 0 && toolResult.ResponseHandled {
 				parts := make([]bus.MediaPart, 0, len(toolResult.Media))
 				for _, ref := range toolResult.Media {
@@ -2675,6 +2772,13 @@ turnLoop:
 			}

 			if len(toolResult.Media) > 0 && !toolResult.ResponseHandled {
+				// For tools like load_image that produce media refs without sending them
+				// to the user channel (ResponseHandled == false), both Media and ArtifactTags
+				// coexist on the result:
+				//   - Media: carries media:// refs that resolveMediaRefs will base64-encode
+				//     into image_url parts in the next LLM iteration (enabling vision).
+				//   - ArtifactTags: exposes the local file path as a structured [file:…] tag
+				//     in the tool result text, so the LLM knows an artifact was produced.
 				toolResult.ArtifactTags = buildArtifactTags(al.mediaStore, toolResult.Media)
 			}

@@ -2693,7 +2797,6 @@ turnLoop:
 						"content_len": len(toolResult.ForUser),
 					})
 			}
-
 			contentForLLM := toolResult.ContentForLLM()

 			// Filter sensitive data (API keys, tokens, secrets) before sending to LLM
@@ -2706,6 +2809,9 @@ turnLoop:
 				Content:    contentForLLM,
 				ToolCallID: toolCallID,
 			}
+			if len(toolResult.Media) > 0 && !toolResult.ResponseHandled {
+				toolResultMsg.Media = append(toolResultMsg.Media, toolResult.Media...)
+			}
 			al.emitEvent(
 				EventKindToolExecEnd,
 				ts.eventMeta("runTurn", "turn.tool.end"),
@@ -2722,6 +2828,7 @@ turnLoop:
 			if !ts.opts.NoHistory {
 				ts.agent.Sessions.AddFullMessage(ts.sessionKey, toolResultMsg)
 				ts.recordPersistedMessage(toolResultMsg)
+				ts.ingestMessage(turnCtx, al, toolResultMsg)
 			}

 			if steerMsgs := al.dequeueSteeringMessagesForScope(ts.sessionKey); len(steerMsgs) > 0 {
@@ -2821,6 +2928,7 @@ turnLoop:
 			if !ts.opts.NoHistory {
 				ts.agent.Sessions.AddMessage(ts.sessionKey, summaryMsg.Role, summaryMsg.Content)
 				ts.recordPersistedMessage(summaryMsg)
+				ts.ingestMessage(turnCtx, al, summaryMsg)
 				if err := ts.agent.Sessions.Save(ts.sessionKey); err != nil {
 					turnStatus = TurnEndStatusError
 					al.emitEvent(
@@ -2835,7 +2943,7 @@ turnLoop:
 				}
 			}
 			if ts.opts.EnableSummary {
-				al.maybeSummarize(ts.agent, ts.sessionKey, ts.scope)
+				al.contextManager.Compact(turnCtx, &CompactRequest{SessionKey: ts.sessionKey, Reason: ContextCompressReasonSummarize, Budget: ts.agent.ContextWindow})
 			}

 			ts.setPhase(TurnPhaseCompleted)
@@ -2890,6 +2998,7 @@ turnLoop:
 		finalMsg := providers.Message{Role: "assistant", Content: finalContent}
 		ts.agent.Sessions.AddMessage(ts.sessionKey, finalMsg.Role, finalMsg.Content)
 		ts.recordPersistedMessage(finalMsg)
+		ts.ingestMessage(turnCtx, al, finalMsg)
 		if err := ts.agent.Sessions.Save(ts.sessionKey); err != nil {
 			turnStatus = TurnEndStatusError
 			al.emitEvent(
@@ -2905,7 +3014,14 @@ turnLoop:
 	}

 	if ts.opts.EnableSummary {
-		al.maybeSummarize(ts.agent, ts.sessionKey, ts.scope)
+		al.contextManager.Compact(
+			turnCtx,
+			&CompactRequest{
+				SessionKey: ts.sessionKey,
+				Reason:     ContextCompressReasonSummarize,
+				Budget:     ts.agent.ContextWindow,
+			},
+		)
 	}

 	ts.setPhase(TurnPhaseCompleted)
@@ -2984,103 +3100,28 @@ func (al *AgentLoop) selectCandidates(
 	return agent.LightCandidates, resolvedCandidateModel(agent.LightCandidates, agent.Router.LightModel()), true
 }

-// maybeSummarize triggers summarization if the session history exceeds thresholds.
-func (al *AgentLoop) maybeSummarize(agent *AgentInstance, sessionKey string, turnScope turnEventScope) {
-	newHistory := agent.Sessions.GetHistory(sessionKey)
-	tokenEstimate := al.estimateTokens(newHistory)
-	threshold := agent.ContextWindow * agent.SummarizeTokenPercent / 100
-
-	if len(newHistory) > agent.SummarizeMessageThreshold || tokenEstimate > threshold {
-		summarizeKey := agent.ID + ":" + sessionKey
-		if _, loading := al.summarizing.LoadOrStore(summarizeKey, true); !loading {
-			go func() {
-				defer al.summarizing.Delete(summarizeKey)
-				logger.Debug("Memory threshold reached. Optimizing conversation history...")
-				al.summarizeSession(agent, sessionKey, turnScope)
-			}()
-		}
+// resolveContextManager selects the ContextManager implementation based on config.
+func (al *AgentLoop) resolveContextManager() ContextManager {
+	name := al.cfg.Agents.Defaults.ContextManager
+	if name == "" || name == "legacy" {
+		return &legacyContextManager{al: al}
 	}
-}
-
-type compressionResult struct {
-	DroppedMessages   int
-	RemainingMessages int
-}
-
-// forceCompression aggressively reduces context when the limit is hit.
-// It drops the oldest ~50% of Turns (a Turn is a complete user→LLM→response
-// cycle, as defined in #1316), so tool-call sequences are never split.
-//
-// If the history is a single Turn with no safe split point, the function
-// falls back to keeping only the most recent user message. This breaks
-// Turn atomicity as a last resort to avoid a context-exceeded loop.
-//
-// Session history contains only user/assistant/tool messages — the system
-// prompt is built dynamically by BuildMessages and is NOT stored here.
-// The compression note is recorded in the session summary so that
-// BuildMessages can include it in the next system prompt.
-func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) (compressionResult, bool) {
-	history := agent.Sessions.GetHistory(sessionKey)
-	if len(history) <= 2 {
-		return compressionResult{}, false
+	factory, ok := lookupContextManager(name)
+	if !ok {
+		logger.WarnCF("agent", "Unknown context manager, falling back to legacy", map[string]any{
+			"name": name,
+		})
+		return &legacyContextManager{al: al}
 	}
-
-	// Split at a Turn boundary so no tool-call sequence is torn apart.
-	// parseTurnBoundaries gives us the start of each Turn; we drop the
-	// oldest half of Turns and keep the most recent ones.
-	turns := parseTurnBoundaries(history)
-	var mid int
-	if len(turns) >= 2 {
-		mid = turns[len(turns)/2]
-	} else {
-		// Fewer than 2 Turns — fall back to message-level midpoint
-		// aligned to the nearest Turn boundary.
-		mid = findSafeBoundary(history, len(history)/2)
+	cm, err := factory(al.cfg.Agents.Defaults.ContextManagerConfig, al)
+	if err != nil {
+		logger.WarnCF("agent", "Failed to create context manager, falling back to legacy", map[string]any{
+			"name":  name,
+			"error": err.Error(),
+		})
+		return &legacyContextManager{al: al}
 	}
-	var keptHistory []providers.Message
-	if mid <= 0 {
-		// No safe Turn boundary — the entire history is a single Turn
-		// (e.g. one user message followed by a massive tool response).
-		// Keeping everything would leave the agent stuck in a context-
-		// exceeded loop, so fall back to keeping only the most recent
-		// user message. This breaks Turn atomicity as a last resort.
-		for i := len(history) - 1; i >= 0; i-- {
-			if history[i].Role == "user" {
-				keptHistory = []providers.Message{history[i]}
-				break
-			}
-		}
-	} else {
-		keptHistory = history[mid:]
-	}
-
-	droppedCount := len(history) - len(keptHistory)
-
-	// Record compression in the session summary so BuildMessages includes it
-	// in the system prompt. We do not modify history messages themselves.
-	existingSummary := agent.Sessions.GetSummary(sessionKey)
-	compressionNote := fmt.Sprintf(
-		"[Emergency compression dropped %d oldest messages due to context limit]",
-		droppedCount,
-	)
-	if existingSummary != "" {
-		compressionNote = existingSummary + "\n\n" + compressionNote
-	}
-	agent.Sessions.SetSummary(sessionKey, compressionNote)
-
-	agent.Sessions.SetHistory(sessionKey, keptHistory)
-	agent.Sessions.Save(sessionKey)
-
-	logger.WarnCF("agent", "Forced compression executed", map[string]any{
-		"session_key":  sessionKey,
-		"dropped_msgs": droppedCount,
-		"new_count":    len(keptHistory),
-	})
-
-	return compressionResult{
-		DroppedMessages:   droppedCount,
-		RemainingMessages: len(keptHistory),
-	}, true
+	return cm
 }

 // GetStartupInfo returns information about loaded tools and skills for logging.
@@ -3172,247 +3213,13 @@ func formatToolsForLog(toolDefs []providers.ToolDefinition) string {
 }

 // summarizeSession summarizes the conversation history for a session.
-func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string, turnScope turnEventScope) {
-	ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
-	defer cancel()
-
-	history := agent.Sessions.GetHistory(sessionKey)
-	summary := agent.Sessions.GetSummary(sessionKey)
-
-	// Keep the most recent Turns for continuity, aligned to a Turn boundary
-	// so that no tool-call sequence is split.
-	if len(history) <= 4 {
-		return
-	}
-
-	safeCut := findSafeBoundary(history, len(history)-4)
-	if safeCut <= 0 {
-		return
-	}
-	keepCount := len(history) - safeCut
-	toSummarize := history[:safeCut]
-
-	// Oversized Message Guard
-	maxMessageTokens := agent.ContextWindow / 2
-	validMessages := make([]providers.Message, 0)
-	omitted := false
-
-	for _, m := range toSummarize {
-		if m.Role != "user" && m.Role != "assistant" {
-			continue
-		}
-		msgTokens := len(m.Content) / 2
-		if msgTokens > maxMessageTokens {
-			omitted = true
-			continue
-		}
-		validMessages = append(validMessages, m)
-	}
-
-	if len(validMessages) == 0 {
-		return
-	}
-
-	const (
-		maxSummarizationMessages = 10
-		llmMaxRetries            = 3
-		llmTemperature           = 0.3
-		fallbackMaxContentLength = 200
-	)
-
-	// Multi-Part Summarization
-	var finalSummary string
-	if len(validMessages) > maxSummarizationMessages {
-		mid := len(validMessages) / 2
-
-		mid = al.findNearestUserMessage(validMessages, mid)
-
-		part1 := validMessages[:mid]
-		part2 := validMessages[mid:]
-
-		s1, _ := al.summarizeBatch(ctx, agent, part1, "")
-		s2, _ := al.summarizeBatch(ctx, agent, part2, "")
-
-		mergePrompt := fmt.Sprintf(
-			"Merge these two conversation summaries into one cohesive summary:\n\n1: %s\n\n2: %s",
-			s1,
-			s2,
-		)
-
-		resp, err := al.retryLLMCall(ctx, agent, mergePrompt, llmMaxRetries)
-		if err == nil && resp.Content != "" {
-			finalSummary = resp.Content
-		} else {
-			finalSummary = s1 + " " + s2
-		}
-	} else {
-		finalSummary, _ = al.summarizeBatch(ctx, agent, validMessages, summary)
-	}
-
-	if omitted && finalSummary != "" {
-		finalSummary += "\n[Note: Some oversized messages were omitted from this summary for efficiency.]"
-	}
-
-	if finalSummary != "" {
-		agent.Sessions.SetSummary(sessionKey, finalSummary)
-		agent.Sessions.TruncateHistory(sessionKey, keepCount)
-		agent.Sessions.Save(sessionKey)
-		al.emitEvent(
-			EventKindSessionSummarize,
-			turnScope.meta(0, "summarizeSession", "turn.session.summarize"),
-			SessionSummarizePayload{
-				SummarizedMessages: len(validMessages),
-				KeptMessages:       keepCount,
-				SummaryLen:         len(finalSummary),
-				OmittedOversized:   omitted,
-			},
-		)
-	}
-}
-
 // findNearestUserMessage finds the nearest user message to the given index.
 // It searches backward first, then forward if no user message is found.
-func (al *AgentLoop) findNearestUserMessage(messages []providers.Message, mid int) int {
-	originalMid := mid
-
-	for mid > 0 && messages[mid].Role != "user" {
-		mid--
-	}
-
-	if messages[mid].Role == "user" {
-		return mid
-	}
-
-	mid = originalMid
-	for mid < len(messages) && messages[mid].Role != "user" {
-		mid++
-	}
-
-	if mid < len(messages) {
-		return mid
-	}
-
-	return originalMid
-}
-
 // retryLLMCall calls the LLM with retry logic.
-func (al *AgentLoop) retryLLMCall(
-	ctx context.Context,
-	agent *AgentInstance,
-	prompt string,
-	maxRetries int,
-) (*providers.LLMResponse, error) {
-	const (
-		llmTemperature = 0.3
-	)
-
-	var resp *providers.LLMResponse
-	var err error
-
-	for attempt := 0; attempt < maxRetries; attempt++ {
-		al.activeRequests.Add(1)
-		resp, err = func() (*providers.LLMResponse, error) {
-			defer al.activeRequests.Done()
-			return agent.Provider.Chat(
-				ctx,
-				[]providers.Message{{Role: "user", Content: prompt}},
-				nil,
-				agent.Model,
-				map[string]any{
-					"max_tokens":       agent.MaxTokens,
-					"temperature":      llmTemperature,
-					"prompt_cache_key": agent.ID,
-				},
-			)
-		}()
-
-		if err == nil && resp != nil && resp.Content != "" {
-			return resp, nil
-		}
-		if attempt < maxRetries-1 {
-			time.Sleep(time.Duration(attempt+1) * 100 * time.Millisecond)
-		}
-	}
-
-	return resp, err
-}
-
 // summarizeBatch summarizes a batch of messages.
-func (al *AgentLoop) summarizeBatch(
-	ctx context.Context,
-	agent *AgentInstance,
-	batch []providers.Message,
-	existingSummary string,
-) (string, error) {
-	const (
-		llmMaxRetries             = 3
-		llmTemperature            = 0.3
-		fallbackMinContentLength  = 200
-		fallbackMaxContentPercent = 10
-	)
-
-	var sb strings.Builder
-	sb.WriteString(
-		"Provide a concise summary of this conversation segment, preserving core context and key points.\n",
-	)
-	if existingSummary != "" {
-		sb.WriteString("Existing context: ")
-		sb.WriteString(existingSummary)
-		sb.WriteString("\n")
-	}
-	sb.WriteString("\nCONVERSATION:\n")
-	for _, m := range batch {
-		fmt.Fprintf(&sb, "%s: %s\n", m.Role, m.Content)
-	}
-	prompt := sb.String()
-
-	response, err := al.retryLLMCall(ctx, agent, prompt, llmMaxRetries)
-	if err == nil && response.Content != "" {
-		return strings.TrimSpace(response.Content), nil
-	}
-
-	var fallback strings.Builder
-	fallback.WriteString("Conversation summary: ")
-	for i, m := range batch {
-		if i > 0 {
-			fallback.WriteString(" | ")
-		}
-		content := strings.TrimSpace(m.Content)
-		runes := []rune(content)
-		if len(runes) == 0 {
-			fallback.WriteString(fmt.Sprintf("%s: ", m.Role))
-			continue
-		}
-
-		keepLength := len(runes) * fallbackMaxContentPercent / 100
-		if keepLength < fallbackMinContentLength {
-			keepLength = fallbackMinContentLength
-		}
-
-		if keepLength > len(runes) {
-			keepLength = len(runes)
-		}
-
-		content = string(runes[:keepLength])
-		if keepLength < len(runes) {
-			content += "..."
-		}
-		fallback.WriteString(fmt.Sprintf("%s: %s", m.Role, content))
-	}
-	return fallback.String(), nil
-}
-
 // estimateTokens estimates the number of tokens in a message list.
 // Counts Content, ToolCalls arguments, and ToolCallID metadata so that
 // tool-heavy conversations are not systematically undercounted.
-func (al *AgentLoop) estimateTokens(messages []providers.Message) int {
-	total := 0
-	for _, m := range messages {
-		total += estimateMessageTokens(m)
-	}
-	return total
-}
-
 func (al *AgentLoop) handleCommand(
 	ctx context.Context,
 	msg bus.InboundMessage,
@@ -3609,7 +3416,7 @@ func (al *AgentLoop) buildCommandsRuntime(agent *AgentInstance, opts *processOpt
 				return "", fmt.Errorf("failed to initialize model %q: %w", value, err)
 			}

-			nextCandidates := resolveModelCandidates(cfg, cfg.Agents.Defaults.Provider, modelCfg.Model, agent.Fallbacks)
+			nextCandidates := resolveModelCandidates(cfg, cfg.Agents.Defaults.Provider, value, agent.Fallbacks)
 			if len(nextCandidates) == 0 {
 				return "", fmt.Errorf("model %q did not resolve to any provider candidates", value)
 			}