Merge branch 'main' into refactor-inbound-context-routing-session

# Conflicts:
#	pkg/agent/eventbus_test.go
#	pkg/agent/loop.go
#	pkg/bus/bus.go
#	pkg/bus/types.go
#	pkg/channels/pico/pico.go
#	pkg/channels/telegram/telegram.go
#	pkg/config/config.go
#	web/backend/api/session.go
#	web/backend/api/session_test.go
This commit is contained in:
Hoshina
2026-04-07 21:41:02 +08:00
282 changed files with 33064 additions and 3251 deletions
+181 -374
View File
@@ -18,6 +18,8 @@ import (
"sync/atomic"
"time"
"github.com/sipeed/picoclaw/pkg/audio/asr"
"github.com/sipeed/picoclaw/pkg/audio/tts"
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/channels"
"github.com/sipeed/picoclaw/pkg/commands"
@@ -32,7 +34,6 @@ import (
"github.com/sipeed/picoclaw/pkg/state"
"github.com/sipeed/picoclaw/pkg/tools"
"github.com/sipeed/picoclaw/pkg/utils"
"github.com/sipeed/picoclaw/pkg/voice"
)
type AgentLoop struct {
@@ -48,11 +49,11 @@ type AgentLoop struct {
// Runtime state
running atomic.Bool
summarizing sync.Map
contextManager ContextManager
fallback *providers.FallbackChain
channelManager *channels.Manager
mediaStore media.MediaStore
transcriber voice.Transcriber
transcriber asr.Transcriber
cmdRegistry *commands.Registry
mcp mcpRuntime
hookRuntime hookRuntime
@@ -116,9 +117,18 @@ func NewAgentLoop(
) *AgentLoop {
registry := NewAgentRegistry(cfg, provider)
// Set up shared fallback chain
// Set up shared fallback chain with rate limiting.
cooldown := providers.NewCooldownTracker()
fallbackChain := providers.NewFallbackChain(cooldown)
rl := providers.NewRateLimiterRegistry()
// Register rate limiters for all agents' candidates so that RPM limits
// configured in ModelConfig are enforced before each LLM call.
for _, agentID := range registry.ListAgentIDs() {
if agent, ok := registry.GetAgent(agentID); ok {
rl.RegisterCandidates(agent.Candidates)
rl.RegisterCandidates(agent.LightCandidates)
}
}
fallbackChain := providers.NewFallbackChain(cooldown, rl)
// Create state manager using default agent's workspace for channel recording
defaultAgent := registry.GetDefaultAgent()
@@ -134,13 +144,13 @@ func NewAgentLoop(
registry: registry,
state: stateManager,
eventBus: eventBus,
summarizing: sync.Map{},
fallback: fallbackChain,
cmdRegistry: commands.NewRegistry(commands.BuiltinDefinitions()),
steering: newSteeringQueue(parseSteeringMode(cfg.Agents.Defaults.SteeringMode)),
}
al.hooks = NewHookManager(eventBus)
configureHookManagerFromConfig(al.hooks, cfg)
al.contextManager = al.resolveContextManager()
// Register shared tools to all agents (now that al is created)
registerSharedTools(al, cfg, msgBus, registry, provider)
@@ -157,6 +167,13 @@ func registerSharedTools(
provider providers.LLMProvider,
) {
allowReadPaths := buildAllowReadPatterns(cfg)
var ttsProvider tts.TTSProvider
if cfg.Tools.IsToolEnabled("send_tts") {
ttsProvider = tts.DetectTTS(cfg)
if ttsProvider == nil {
logger.WarnCF("voice-tts", "send_tts enabled but no TTS provider configured", nil)
}
}
for _, agentID := range registry.ListAgentIDs() {
agent, ok := registry.GetAgent(agentID)
@@ -267,6 +284,21 @@ func registerSharedTools(
agent.Tools.Register(sendFileTool)
}
if ttsProvider != nil {
agent.Tools.Register(tools.NewSendTTSTool(ttsProvider, nil))
}
if cfg.Tools.IsToolEnabled("load_image") {
loadImageTool := tools.NewLoadImageTool(
agent.Workspace,
cfg.Agents.Defaults.RestrictToWorkspace,
cfg.Agents.Defaults.GetMaxMediaSize(),
nil,
allowReadPaths,
)
agent.Tools.Register(loadImageTool)
}
// Skill discovery and installation tools
skills_enabled := cfg.Tools.IsToolEnabled("skills")
find_skills_enable := cfg.Tools.IsToolEnabled("find_skills")
@@ -309,6 +341,14 @@ func registerSharedTools(
subagentManager := tools.NewSubagentManager(provider, agent.Model, agent.Workspace)
subagentManager.SetLLMOptions(agent.MaxTokens, agent.Temperature)
// Inject a media resolver so the legacy RunToolLoop fallback path can
// resolve media:// refs in the same way the main AgentLoop does.
// This keeps subagent vision support working even when the optimized
// sub-turn spawner path is unavailable.
subagentManager.SetMediaResolver(func(msgs []providers.Message) []providers.Message {
return resolveMediaRefs(msgs, al.mediaStore, cfg.Agents.Defaults.GetMaxMediaSize())
})
// Set the spawner that links into AgentLoop's turnState
subagentManager.SetSpawner(func(
ctx context.Context,
@@ -1075,6 +1115,7 @@ func (al *AgentLoop) ReloadProviderAndConfig(
go func() {
defer func() {
if r := recover(); r != nil {
logger.RecoverPanicNoExit(r)
panicErr = fmt.Errorf("panic during registry creation: %v", r)
logger.ErrorCF("agent", "Panic during registry creation",
map[string]any{"panic": r})
@@ -1115,8 +1156,15 @@ func (al *AgentLoop) ReloadProviderAndConfig(
al.cfg = cfg
al.registry = registry
// Also update fallback chain with new config
al.fallback = providers.NewFallbackChain(providers.NewCooldownTracker())
// Also update fallback chain with new config; rebuild rate limiter registry.
newRL := providers.NewRateLimiterRegistry()
for _, agentID := range registry.ListAgentIDs() {
if agent, ok := registry.GetAgent(agentID); ok {
newRL.RegisterCandidates(agent.Candidates)
newRL.RegisterCandidates(agent.LightCandidates)
}
}
al.fallback = providers.NewFallbackChain(providers.NewCooldownTracker(), newRL)
al.mu.Unlock()
@@ -1174,10 +1222,15 @@ func (al *AgentLoop) SetMediaStore(s media.MediaStore) {
agent.Tools.SetMediaStore(s)
}
}
registry.ForEachTool("send_tts", func(t tools.Tool) {
if st, ok := t.(*tools.SendTTSTool); ok {
st.SetMediaStore(s)
}
})
}
// SetTranscriber injects a voice transcriber for agent-level audio transcription.
func (al *AgentLoop) SetTranscriber(t voice.Transcriber) {
func (al *AgentLoop) SetTranscriber(t asr.Transcriber) {
al.transcriber = t
}
@@ -1198,19 +1251,23 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou
// Transcribe each audio media ref in order.
var transcriptions []string
var keptMedia []string
for _, ref := range msg.Media {
path, meta, err := al.mediaStore.ResolveWithMeta(ref)
if err != nil {
logger.WarnCF("voice", "Failed to resolve media ref", map[string]any{"ref": ref, "error": err})
keptMedia = append(keptMedia, ref)
continue
}
if !utils.IsAudioFile(meta.Filename, meta.ContentType) {
keptMedia = append(keptMedia, ref)
continue
}
result, err := al.transcriber.Transcribe(ctx, path)
if err != nil {
logger.WarnCF("voice", "Transcription failed", map[string]any{"ref": ref, "error": err})
transcriptions = append(transcriptions, "")
keptMedia = append(keptMedia, ref)
continue
}
transcriptions = append(transcriptions, result.Text)
@@ -1230,15 +1287,21 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou
}
text := transcriptions[idx]
idx++
if text == "" {
return match
}
return "[voice: " + text + "]"
})
// Append any remaining transcriptions not matched by an annotation.
for ; idx < len(transcriptions); idx++ {
newContent += "\n[voice: " + transcriptions[idx] + "]"
if transcriptions[idx] != "" {
newContent += "\n[voice: " + transcriptions[idx] + "]"
}
}
msg.Content = newContent
msg.Media = keptMedia
return msg, true
}
@@ -1825,8 +1888,15 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState) (turnResult, er
var history []providers.Message
var summary string
if !ts.opts.NoHistory {
history = ts.agent.Sessions.GetHistory(ts.sessionKey)
summary = ts.agent.Sessions.GetSummary(ts.sessionKey)
// ContextManager assembles budget-aware history and summary.
if resp, err := al.contextManager.Assemble(turnCtx, &AssembleRequest{
SessionKey: ts.sessionKey,
Budget: ts.agent.ContextWindow,
MaxTokens: ts.agent.MaxTokens,
}); err == nil && resp != nil {
history = resp.History
summary = resp.Summary
}
}
ts.captureRestorePoint(history, summary)
@@ -1851,22 +1921,28 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState) (turnResult, er
if isOverContextBudget(ts.agent.ContextWindow, messages, toolDefs, ts.agent.MaxTokens) {
logger.WarnCF("agent", "Proactive compression: context budget exceeded before LLM call",
map[string]any{"session_key": ts.sessionKey})
if compression, ok := al.forceCompression(ts.agent, ts.sessionKey); ok {
al.emitEvent(
EventKindContextCompress,
ts.eventMeta("runTurn", "turn.context.compress"),
ContextCompressPayload{
Reason: ContextCompressReasonProactive,
DroppedMessages: compression.DroppedMessages,
RemainingMessages: compression.RemainingMessages,
},
)
ts.refreshRestorePointFromSession(ts.agent)
if err := al.contextManager.Compact(turnCtx, &CompactRequest{
SessionKey: ts.sessionKey,
Reason: ContextCompressReasonProactive,
Budget: ts.agent.ContextWindow,
}); err != nil {
logger.WarnCF("agent", "Proactive compact failed", map[string]any{
"session_key": ts.sessionKey,
"error": err.Error(),
})
}
ts.refreshRestorePointFromSession(ts.agent)
// Re-assemble from CM after compact.
if resp, err := al.contextManager.Assemble(turnCtx, &AssembleRequest{
SessionKey: ts.sessionKey,
Budget: ts.agent.ContextWindow,
MaxTokens: ts.agent.MaxTokens,
}); err == nil && resp != nil {
history = resp.History
summary = resp.Summary
}
newHistory := ts.agent.Sessions.GetHistory(ts.sessionKey)
newSummary := ts.agent.Sessions.GetSummary(ts.sessionKey)
messages = ts.agent.ContextBuilder.BuildMessages(
newHistory, newSummary, ts.userMessage,
history, summary, ts.userMessage,
ts.media, ts.channel, ts.chatID,
ts.opts.SenderID, ts.opts.SenderDisplayName,
activeSkillNames(ts.agent, ts.opts)...,
@@ -1888,6 +1964,7 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState) (turnResult, er
ts.agent.Sessions.AddMessage(ts.sessionKey, rootMsg.Role, rootMsg.Content)
}
ts.recordPersistedMessage(rootMsg)
ts.ingestMessage(turnCtx, al, rootMsg)
}
activeCandidates, activeModel, usedLight := al.selectCandidates(ts.agent, ts.userMessage, messages)
@@ -1963,6 +2040,7 @@ turnLoop:
if !ts.opts.NoHistory {
ts.agent.Sessions.AddFullMessage(ts.sessionKey, pm)
ts.recordPersistedMessage(pm)
ts.ingestMessage(turnCtx, al, pm)
}
logger.InfoCF("agent", "Injected steering message into context",
map[string]any{
@@ -2016,6 +2094,14 @@ turnLoop:
providerToolDefs = filtered
}
// Resolve media:// refs produced by tool results (e.g. load_image).
// Skipped on iteration 1 because inbound user media is already resolved
// before entering the loop; only subsequent iterations can contain new
// tool-generated media refs that need base64 encoding.
if iteration > 1 {
messages = resolveMediaRefs(messages, al.mediaStore, maxMediaSize)
}
callMessages := messages
if gracefulTerminal {
callMessages = append(append([]providers.Message(nil), messages...), ts.interruptHintMessage())
@@ -2115,7 +2201,11 @@ turnLoop:
providerCtx,
activeCandidates,
func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) {
return activeProvider.Chat(ctx, messagesForCall, toolDefsForCall, model, llmOpts)
candidateProvider := activeProvider
if cp, ok := ts.agent.CandidateProviders[providers.ModelKey(provider, model)]; ok {
candidateProvider = cp
}
return candidateProvider.Chat(ctx, messagesForCall, toolDefsForCall, model, llmOpts)
},
)
if fbErr != nil {
@@ -2221,23 +2311,28 @@ turnLoop:
))
}
if compression, ok := al.forceCompression(ts.agent, ts.sessionKey); ok {
al.emitEvent(
EventKindContextCompress,
ts.eventMeta("runTurn", "turn.context.compress"),
ContextCompressPayload{
Reason: ContextCompressReasonRetry,
DroppedMessages: compression.DroppedMessages,
RemainingMessages: compression.RemainingMessages,
},
)
ts.refreshRestorePointFromSession(ts.agent)
if compactErr := al.contextManager.Compact(turnCtx, &CompactRequest{
SessionKey: ts.sessionKey,
Reason: ContextCompressReasonRetry,
Budget: ts.agent.ContextWindow,
}); compactErr != nil {
logger.WarnCF("agent", "Context overflow compact failed", map[string]any{
"session_key": ts.sessionKey,
"error": compactErr.Error(),
})
}
ts.refreshRestorePointFromSession(ts.agent)
// Re-assemble from CM after compact.
if asmResp, asmErr := al.contextManager.Assemble(turnCtx, &AssembleRequest{
SessionKey: ts.sessionKey,
Budget: ts.agent.ContextWindow,
MaxTokens: ts.agent.MaxTokens,
}); asmErr == nil && asmResp != nil {
history = asmResp.History
summary = asmResp.Summary
}
newHistory := ts.agent.Sessions.GetHistory(ts.sessionKey)
newSummary := ts.agent.Sessions.GetSummary(ts.sessionKey)
messages = ts.agent.ContextBuilder.BuildMessages(
newHistory, newSummary, "",
history, summary, "",
nil, ts.channel, ts.chatID, ts.opts.SenderID, ts.opts.SenderDisplayName,
activeSkillNames(ts.agent, ts.opts)...,
)
@@ -2409,6 +2504,7 @@ turnLoop:
if !ts.opts.NoHistory {
ts.agent.Sessions.AddFullMessage(ts.sessionKey, assistantMsg)
ts.recordPersistedMessage(assistantMsg)
ts.ingestMessage(turnCtx, al, assistantMsg)
}
ts.setPhase(TurnPhaseTools)
@@ -2633,6 +2729,7 @@ turnLoop:
if toolResult == nil {
toolResult = tools.ErrorResult("hook returned nil tool result")
}
if len(toolResult.Media) > 0 && toolResult.ResponseHandled {
parts := make([]bus.MediaPart, 0, len(toolResult.Media))
for _, ref := range toolResult.Media {
@@ -2675,6 +2772,13 @@ turnLoop:
}
if len(toolResult.Media) > 0 && !toolResult.ResponseHandled {
// For tools like load_image that produce media refs without sending them
// to the user channel (ResponseHandled == false), both Media and ArtifactTags
// coexist on the result:
// - Media: carries media:// refs that resolveMediaRefs will base64-encode
// into image_url parts in the next LLM iteration (enabling vision).
// - ArtifactTags: exposes the local file path as a structured [file:…] tag
// in the tool result text, so the LLM knows an artifact was produced.
toolResult.ArtifactTags = buildArtifactTags(al.mediaStore, toolResult.Media)
}
@@ -2693,7 +2797,6 @@ turnLoop:
"content_len": len(toolResult.ForUser),
})
}
contentForLLM := toolResult.ContentForLLM()
// Filter sensitive data (API keys, tokens, secrets) before sending to LLM
@@ -2706,6 +2809,9 @@ turnLoop:
Content: contentForLLM,
ToolCallID: toolCallID,
}
if len(toolResult.Media) > 0 && !toolResult.ResponseHandled {
toolResultMsg.Media = append(toolResultMsg.Media, toolResult.Media...)
}
al.emitEvent(
EventKindToolExecEnd,
ts.eventMeta("runTurn", "turn.tool.end"),
@@ -2722,6 +2828,7 @@ turnLoop:
if !ts.opts.NoHistory {
ts.agent.Sessions.AddFullMessage(ts.sessionKey, toolResultMsg)
ts.recordPersistedMessage(toolResultMsg)
ts.ingestMessage(turnCtx, al, toolResultMsg)
}
if steerMsgs := al.dequeueSteeringMessagesForScope(ts.sessionKey); len(steerMsgs) > 0 {
@@ -2821,6 +2928,7 @@ turnLoop:
if !ts.opts.NoHistory {
ts.agent.Sessions.AddMessage(ts.sessionKey, summaryMsg.Role, summaryMsg.Content)
ts.recordPersistedMessage(summaryMsg)
ts.ingestMessage(turnCtx, al, summaryMsg)
if err := ts.agent.Sessions.Save(ts.sessionKey); err != nil {
turnStatus = TurnEndStatusError
al.emitEvent(
@@ -2835,7 +2943,7 @@ turnLoop:
}
}
if ts.opts.EnableSummary {
al.maybeSummarize(ts.agent, ts.sessionKey, ts.scope)
al.contextManager.Compact(turnCtx, &CompactRequest{SessionKey: ts.sessionKey, Reason: ContextCompressReasonSummarize, Budget: ts.agent.ContextWindow})
}
ts.setPhase(TurnPhaseCompleted)
@@ -2890,6 +2998,7 @@ turnLoop:
finalMsg := providers.Message{Role: "assistant", Content: finalContent}
ts.agent.Sessions.AddMessage(ts.sessionKey, finalMsg.Role, finalMsg.Content)
ts.recordPersistedMessage(finalMsg)
ts.ingestMessage(turnCtx, al, finalMsg)
if err := ts.agent.Sessions.Save(ts.sessionKey); err != nil {
turnStatus = TurnEndStatusError
al.emitEvent(
@@ -2905,7 +3014,14 @@ turnLoop:
}
if ts.opts.EnableSummary {
al.maybeSummarize(ts.agent, ts.sessionKey, ts.scope)
al.contextManager.Compact(
turnCtx,
&CompactRequest{
SessionKey: ts.sessionKey,
Reason: ContextCompressReasonSummarize,
Budget: ts.agent.ContextWindow,
},
)
}
ts.setPhase(TurnPhaseCompleted)
@@ -2984,103 +3100,28 @@ func (al *AgentLoop) selectCandidates(
return agent.LightCandidates, resolvedCandidateModel(agent.LightCandidates, agent.Router.LightModel()), true
}
// maybeSummarize triggers summarization if the session history exceeds thresholds.
func (al *AgentLoop) maybeSummarize(agent *AgentInstance, sessionKey string, turnScope turnEventScope) {
newHistory := agent.Sessions.GetHistory(sessionKey)
tokenEstimate := al.estimateTokens(newHistory)
threshold := agent.ContextWindow * agent.SummarizeTokenPercent / 100
if len(newHistory) > agent.SummarizeMessageThreshold || tokenEstimate > threshold {
summarizeKey := agent.ID + ":" + sessionKey
if _, loading := al.summarizing.LoadOrStore(summarizeKey, true); !loading {
go func() {
defer al.summarizing.Delete(summarizeKey)
logger.Debug("Memory threshold reached. Optimizing conversation history...")
al.summarizeSession(agent, sessionKey, turnScope)
}()
}
// resolveContextManager selects the ContextManager implementation based on config.
func (al *AgentLoop) resolveContextManager() ContextManager {
name := al.cfg.Agents.Defaults.ContextManager
if name == "" || name == "legacy" {
return &legacyContextManager{al: al}
}
}
type compressionResult struct {
DroppedMessages int
RemainingMessages int
}
// forceCompression aggressively reduces context when the limit is hit.
// It drops the oldest ~50% of Turns (a Turn is a complete user→LLM→response
// cycle, as defined in #1316), so tool-call sequences are never split.
//
// If the history is a single Turn with no safe split point, the function
// falls back to keeping only the most recent user message. This breaks
// Turn atomicity as a last resort to avoid a context-exceeded loop.
//
// Session history contains only user/assistant/tool messages — the system
// prompt is built dynamically by BuildMessages and is NOT stored here.
// The compression note is recorded in the session summary so that
// BuildMessages can include it in the next system prompt.
func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) (compressionResult, bool) {
history := agent.Sessions.GetHistory(sessionKey)
if len(history) <= 2 {
return compressionResult{}, false
factory, ok := lookupContextManager(name)
if !ok {
logger.WarnCF("agent", "Unknown context manager, falling back to legacy", map[string]any{
"name": name,
})
return &legacyContextManager{al: al}
}
// Split at a Turn boundary so no tool-call sequence is torn apart.
// parseTurnBoundaries gives us the start of each Turn; we drop the
// oldest half of Turns and keep the most recent ones.
turns := parseTurnBoundaries(history)
var mid int
if len(turns) >= 2 {
mid = turns[len(turns)/2]
} else {
// Fewer than 2 Turns — fall back to message-level midpoint
// aligned to the nearest Turn boundary.
mid = findSafeBoundary(history, len(history)/2)
cm, err := factory(al.cfg.Agents.Defaults.ContextManagerConfig, al)
if err != nil {
logger.WarnCF("agent", "Failed to create context manager, falling back to legacy", map[string]any{
"name": name,
"error": err.Error(),
})
return &legacyContextManager{al: al}
}
var keptHistory []providers.Message
if mid <= 0 {
// No safe Turn boundary — the entire history is a single Turn
// (e.g. one user message followed by a massive tool response).
// Keeping everything would leave the agent stuck in a context-
// exceeded loop, so fall back to keeping only the most recent
// user message. This breaks Turn atomicity as a last resort.
for i := len(history) - 1; i >= 0; i-- {
if history[i].Role == "user" {
keptHistory = []providers.Message{history[i]}
break
}
}
} else {
keptHistory = history[mid:]
}
droppedCount := len(history) - len(keptHistory)
// Record compression in the session summary so BuildMessages includes it
// in the system prompt. We do not modify history messages themselves.
existingSummary := agent.Sessions.GetSummary(sessionKey)
compressionNote := fmt.Sprintf(
"[Emergency compression dropped %d oldest messages due to context limit]",
droppedCount,
)
if existingSummary != "" {
compressionNote = existingSummary + "\n\n" + compressionNote
}
agent.Sessions.SetSummary(sessionKey, compressionNote)
agent.Sessions.SetHistory(sessionKey, keptHistory)
agent.Sessions.Save(sessionKey)
logger.WarnCF("agent", "Forced compression executed", map[string]any{
"session_key": sessionKey,
"dropped_msgs": droppedCount,
"new_count": len(keptHistory),
})
return compressionResult{
DroppedMessages: droppedCount,
RemainingMessages: len(keptHistory),
}, true
return cm
}
// GetStartupInfo returns information about loaded tools and skills for logging.
@@ -3172,247 +3213,13 @@ func formatToolsForLog(toolDefs []providers.ToolDefinition) string {
}
// summarizeSession summarizes the conversation history for a session.
func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string, turnScope turnEventScope) {
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
history := agent.Sessions.GetHistory(sessionKey)
summary := agent.Sessions.GetSummary(sessionKey)
// Keep the most recent Turns for continuity, aligned to a Turn boundary
// so that no tool-call sequence is split.
if len(history) <= 4 {
return
}
safeCut := findSafeBoundary(history, len(history)-4)
if safeCut <= 0 {
return
}
keepCount := len(history) - safeCut
toSummarize := history[:safeCut]
// Oversized Message Guard
maxMessageTokens := agent.ContextWindow / 2
validMessages := make([]providers.Message, 0)
omitted := false
for _, m := range toSummarize {
if m.Role != "user" && m.Role != "assistant" {
continue
}
msgTokens := len(m.Content) / 2
if msgTokens > maxMessageTokens {
omitted = true
continue
}
validMessages = append(validMessages, m)
}
if len(validMessages) == 0 {
return
}
const (
maxSummarizationMessages = 10
llmMaxRetries = 3
llmTemperature = 0.3
fallbackMaxContentLength = 200
)
// Multi-Part Summarization
var finalSummary string
if len(validMessages) > maxSummarizationMessages {
mid := len(validMessages) / 2
mid = al.findNearestUserMessage(validMessages, mid)
part1 := validMessages[:mid]
part2 := validMessages[mid:]
s1, _ := al.summarizeBatch(ctx, agent, part1, "")
s2, _ := al.summarizeBatch(ctx, agent, part2, "")
mergePrompt := fmt.Sprintf(
"Merge these two conversation summaries into one cohesive summary:\n\n1: %s\n\n2: %s",
s1,
s2,
)
resp, err := al.retryLLMCall(ctx, agent, mergePrompt, llmMaxRetries)
if err == nil && resp.Content != "" {
finalSummary = resp.Content
} else {
finalSummary = s1 + " " + s2
}
} else {
finalSummary, _ = al.summarizeBatch(ctx, agent, validMessages, summary)
}
if omitted && finalSummary != "" {
finalSummary += "\n[Note: Some oversized messages were omitted from this summary for efficiency.]"
}
if finalSummary != "" {
agent.Sessions.SetSummary(sessionKey, finalSummary)
agent.Sessions.TruncateHistory(sessionKey, keepCount)
agent.Sessions.Save(sessionKey)
al.emitEvent(
EventKindSessionSummarize,
turnScope.meta(0, "summarizeSession", "turn.session.summarize"),
SessionSummarizePayload{
SummarizedMessages: len(validMessages),
KeptMessages: keepCount,
SummaryLen: len(finalSummary),
OmittedOversized: omitted,
},
)
}
}
// findNearestUserMessage finds the nearest user message to the given index.
// It searches backward first, then forward if no user message is found.
func (al *AgentLoop) findNearestUserMessage(messages []providers.Message, mid int) int {
originalMid := mid
for mid > 0 && messages[mid].Role != "user" {
mid--
}
if messages[mid].Role == "user" {
return mid
}
mid = originalMid
for mid < len(messages) && messages[mid].Role != "user" {
mid++
}
if mid < len(messages) {
return mid
}
return originalMid
}
// retryLLMCall calls the LLM with retry logic.
func (al *AgentLoop) retryLLMCall(
ctx context.Context,
agent *AgentInstance,
prompt string,
maxRetries int,
) (*providers.LLMResponse, error) {
const (
llmTemperature = 0.3
)
var resp *providers.LLMResponse
var err error
for attempt := 0; attempt < maxRetries; attempt++ {
al.activeRequests.Add(1)
resp, err = func() (*providers.LLMResponse, error) {
defer al.activeRequests.Done()
return agent.Provider.Chat(
ctx,
[]providers.Message{{Role: "user", Content: prompt}},
nil,
agent.Model,
map[string]any{
"max_tokens": agent.MaxTokens,
"temperature": llmTemperature,
"prompt_cache_key": agent.ID,
},
)
}()
if err == nil && resp != nil && resp.Content != "" {
return resp, nil
}
if attempt < maxRetries-1 {
time.Sleep(time.Duration(attempt+1) * 100 * time.Millisecond)
}
}
return resp, err
}
// summarizeBatch summarizes a batch of messages.
func (al *AgentLoop) summarizeBatch(
ctx context.Context,
agent *AgentInstance,
batch []providers.Message,
existingSummary string,
) (string, error) {
const (
llmMaxRetries = 3
llmTemperature = 0.3
fallbackMinContentLength = 200
fallbackMaxContentPercent = 10
)
var sb strings.Builder
sb.WriteString(
"Provide a concise summary of this conversation segment, preserving core context and key points.\n",
)
if existingSummary != "" {
sb.WriteString("Existing context: ")
sb.WriteString(existingSummary)
sb.WriteString("\n")
}
sb.WriteString("\nCONVERSATION:\n")
for _, m := range batch {
fmt.Fprintf(&sb, "%s: %s\n", m.Role, m.Content)
}
prompt := sb.String()
response, err := al.retryLLMCall(ctx, agent, prompt, llmMaxRetries)
if err == nil && response.Content != "" {
return strings.TrimSpace(response.Content), nil
}
var fallback strings.Builder
fallback.WriteString("Conversation summary: ")
for i, m := range batch {
if i > 0 {
fallback.WriteString(" | ")
}
content := strings.TrimSpace(m.Content)
runes := []rune(content)
if len(runes) == 0 {
fallback.WriteString(fmt.Sprintf("%s: ", m.Role))
continue
}
keepLength := len(runes) * fallbackMaxContentPercent / 100
if keepLength < fallbackMinContentLength {
keepLength = fallbackMinContentLength
}
if keepLength > len(runes) {
keepLength = len(runes)
}
content = string(runes[:keepLength])
if keepLength < len(runes) {
content += "..."
}
fallback.WriteString(fmt.Sprintf("%s: %s", m.Role, content))
}
return fallback.String(), nil
}
// estimateTokens estimates the number of tokens in a message list.
// Counts Content, ToolCalls arguments, and ToolCallID metadata so that
// tool-heavy conversations are not systematically undercounted.
func (al *AgentLoop) estimateTokens(messages []providers.Message) int {
total := 0
for _, m := range messages {
total += estimateMessageTokens(m)
}
return total
}
func (al *AgentLoop) handleCommand(
ctx context.Context,
msg bus.InboundMessage,
@@ -3609,7 +3416,7 @@ func (al *AgentLoop) buildCommandsRuntime(agent *AgentInstance, opts *processOpt
return "", fmt.Errorf("failed to initialize model %q: %w", value, err)
}
nextCandidates := resolveModelCandidates(cfg, cfg.Agents.Defaults.Provider, modelCfg.Model, agent.Fallbacks)
nextCandidates := resolveModelCandidates(cfg, cfg.Agents.Defaults.Provider, value, agent.Fallbacks)
if len(nextCandidates) == 0 {
return "", fmt.Errorf("model %q did not resolve to any provider candidates", value)
}