diff --git a/pkg/agent/agent_command.go b/pkg/agent/agent_command.go index 944c58f3d..efd5326cf 100644 --- a/pkg/agent/agent_command.go +++ b/pkg/agent/agent_command.go @@ -350,11 +350,12 @@ func (al *AgentLoop) buildCommandsRuntime( } history := agent.Sessions.GetHistory(opts.SessionKey) return &commands.ContextStats{ - UsedTokens: usage.UsedTokens, - TotalTokens: usage.TotalTokens, - CompressAtTokens: usage.CompressAtTokens, - UsedPercent: usage.UsedPercent, - MessageCount: len(history), + UsedTokens: usage.UsedTokens, + TotalTokens: usage.TotalTokens, + CompressAtTokens: usage.CompressAtTokens, + SummarizeAtTokens: usage.SummarizeAtTokens, + UsedPercent: usage.UsedPercent, + MessageCount: len(history), } } } diff --git a/pkg/agent/context_usage.go b/pkg/agent/context_usage.go index 39d4f3dee..9b9222401 100644 --- a/pkg/agent/context_usage.go +++ b/pkg/agent/context_usage.go @@ -61,6 +61,13 @@ func computeContextUsage(agent *AgentInstance, sessionKey string) *bus.ContextUs // proactive trigger (msgTokens + toolTokens + maxTokens > contextWindow). compressAt := effectiveWindow + // summarizeAt = soft summarization trigger: matches maybeSummarize's + // threshold (contextWindow * SummarizeTokenPercent / 100). + summarizeAt := contextWindow * agent.SummarizeTokenPercent / 100 + if summarizeAt <= 0 { + summarizeAt = compressAt + } + usedPercent := 0 if compressAt > 0 { usedPercent = usedTokens * 100 / compressAt @@ -70,9 +77,10 @@ func computeContextUsage(agent *AgentInstance, sessionKey string) *bus.ContextUs } return &bus.ContextUsage{ - UsedTokens: usedTokens, - TotalTokens: contextWindow, - CompressAtTokens: compressAt, - UsedPercent: usedPercent, + UsedTokens: usedTokens, + TotalTokens: contextWindow, + CompressAtTokens: compressAt, + SummarizeAtTokens: summarizeAt, + UsedPercent: usedPercent, } } diff --git a/pkg/bus/types.go b/pkg/bus/types.go index 953e69d9c..6863a49ec 100644 --- a/pkg/bus/types.go +++ b/pkg/bus/types.go @@ -64,10 +64,11 @@ type OutboundScope struct { // ContextUsage describes how much of the model's context window the current // session consumes, and how far it is from triggering compression. type ContextUsage struct { - UsedTokens int `json:"used_tokens"` - TotalTokens int `json:"total_tokens"` // model context window - CompressAtTokens int `json:"compress_at_tokens"` // threshold that triggers compression - UsedPercent int `json:"used_percent"` // 0-100 + UsedTokens int `json:"used_tokens"` + TotalTokens int `json:"total_tokens"` // model context window + CompressAtTokens int `json:"compress_at_tokens"` // hard budget compression threshold (contextWindow - maxTokens) + SummarizeAtTokens int `json:"summarize_at_tokens"` // soft summarization trigger (contextWindow * summarizeTokenPercent / 100) + UsedPercent int `json:"used_percent"` // 0-100, relative to compressAt } type OutboundMessage struct { diff --git a/pkg/channels/pico/pico.go b/pkg/channels/pico/pico.go index 3c3938989..adceecad7 100644 --- a/pkg/channels/pico/pico.go +++ b/pkg/channels/pico/pico.go @@ -1394,10 +1394,11 @@ func setContextUsagePayload(payload map[string]any, u *bus.ContextUsage) { return } payload["context_usage"] = map[string]any{ - "used_tokens": u.UsedTokens, - "total_tokens": u.TotalTokens, - "compress_at_tokens": u.CompressAtTokens, - "used_percent": u.UsedPercent, + "used_tokens": u.UsedTokens, + "total_tokens": u.TotalTokens, + "compress_at_tokens": u.CompressAtTokens, + "summarize_at_tokens": u.SummarizeAtTokens, + "used_percent": u.UsedPercent, } } diff --git a/pkg/commands/cmd_context.go b/pkg/commands/cmd_context.go index 55481662c..4b4eddfa6 100644 --- a/pkg/commands/cmd_context.go +++ b/pkg/commands/cmd_context.go @@ -29,14 +29,16 @@ func formatContextStats(s *ContextStats) string { remaining = 0 } usedWindowPercent := s.UsedTokens * 100 / max(s.TotalTokens, 1) - return fmt.Sprintf( - "Context usage \nMessages: %d \nUsed: ~%d / %d tokens (%d%%) \nCompress at: %d tokens \nCompression progress: %d%% \nRemaining: ~%d tokens", + msg := fmt.Sprintf( + "Context usage \nMessages: %d \nUsed: ~%d / %d tokens (%d%%) \nCompress at: %d tokens \nSummarize at: %d tokens \nCompression progress: %d%% \nRemaining: ~%d tokens", s.MessageCount, s.UsedTokens, s.TotalTokens, usedWindowPercent, s.CompressAtTokens, + s.SummarizeAtTokens, s.UsedPercent, remaining, ) + return msg } diff --git a/pkg/commands/runtime.go b/pkg/commands/runtime.go index b0327c863..ba7d003c4 100644 --- a/pkg/commands/runtime.go +++ b/pkg/commands/runtime.go @@ -29,11 +29,12 @@ type MCPToolInfo struct { // ContextStats describes current session context window usage. type ContextStats struct { - UsedTokens int - TotalTokens int // model context window - CompressAtTokens int // compression threshold - UsedPercent int // 0-100 - MessageCount int + UsedTokens int + TotalTokens int // model context window + CompressAtTokens int // hard budget compression threshold + SummarizeAtTokens int // soft summarization trigger + UsedPercent int // 0-100 + MessageCount int } // StopResult describes the outcome of a stop request for the current session. diff --git a/web/frontend/src/components/chat/context-usage-ring.tsx b/web/frontend/src/components/chat/context-usage-ring.tsx index 037a20cef..7a33dc2fe 100644 --- a/web/frontend/src/components/chat/context-usage-ring.tsx +++ b/web/frontend/src/components/chat/context-usage-ring.tsx @@ -145,6 +145,23 @@ export function ContextUsageRing({ /> +
+
+ Compress at + + {formatTokens(usage.compress_at_tokens)} + +
+ {usage.summarize_at_tokens != null && usage.summarize_at_tokens > 0 && ( +
+ Summarize at + + {formatTokens(usage.summarize_at_tokens)} + +
+ )} +
+