diff --git a/pkg/agent/agent_command.go b/pkg/agent/agent_command.go index 944c58f3d..277d1bda8 100644 --- a/pkg/agent/agent_command.go +++ b/pkg/agent/agent_command.go @@ -350,11 +350,13 @@ func (al *AgentLoop) buildCommandsRuntime( } history := agent.Sessions.GetHistory(opts.SessionKey) return &commands.ContextStats{ - UsedTokens: usage.UsedTokens, - TotalTokens: usage.TotalTokens, - CompressAtTokens: usage.CompressAtTokens, - UsedPercent: usage.UsedPercent, - MessageCount: len(history), + UsedTokens: usage.UsedTokens, + TotalTokens: usage.TotalTokens, + HistoryTokens: usage.HistoryTokens, + CompressAtTokens: usage.CompressAtTokens, + SummarizeAtTokens: usage.SummarizeAtTokens, + UsedPercent: usage.UsedPercent, + MessageCount: len(history), } } } diff --git a/pkg/agent/context_usage.go b/pkg/agent/context_usage.go index 39d4f3dee..1463dde95 100644 --- a/pkg/agent/context_usage.go +++ b/pkg/agent/context_usage.go @@ -61,6 +61,17 @@ func computeContextUsage(agent *AgentInstance, sessionKey string) *bus.ContextUs // proactive trigger (msgTokens + toolTokens + maxTokens > contextWindow). compressAt := effectiveWindow + // summarizeAt = soft summarization trigger: matches maybeSummarize's + // threshold (contextWindow * SummarizeTokenPercent / 100). + // + // The engine compares this against history-message tokens ONLY (not + // UsedTokens). HistoryTokens is exposed alongside UsedTokens so the + // UI can show both values and avoid user confusion. + summarizeAt := contextWindow * agent.SummarizeTokenPercent / 100 + if summarizeAt <= 0 { + summarizeAt = compressAt + } + usedPercent := 0 if compressAt > 0 { usedPercent = usedTokens * 100 / compressAt @@ -70,9 +81,11 @@ func computeContextUsage(agent *AgentInstance, sessionKey string) *bus.ContextUs } return &bus.ContextUsage{ - UsedTokens: usedTokens, - TotalTokens: contextWindow, - CompressAtTokens: compressAt, - UsedPercent: usedPercent, + UsedTokens: usedTokens, + TotalTokens: contextWindow, + HistoryTokens: historyTokens, + CompressAtTokens: compressAt, + SummarizeAtTokens: summarizeAt, + UsedPercent: usedPercent, } } diff --git a/pkg/bus/types.go b/pkg/bus/types.go index 953e69d9c..0e23ea0f1 100644 --- a/pkg/bus/types.go +++ b/pkg/bus/types.go @@ -64,10 +64,12 @@ type OutboundScope struct { // ContextUsage describes how much of the model's context window the current // session consumes, and how far it is from triggering compression. type ContextUsage struct { - UsedTokens int `json:"used_tokens"` - TotalTokens int `json:"total_tokens"` // model context window - CompressAtTokens int `json:"compress_at_tokens"` // threshold that triggers compression - UsedPercent int `json:"used_percent"` // 0-100 + UsedTokens int `json:"used_tokens"` + TotalTokens int `json:"total_tokens"` // model context window + HistoryTokens int `json:"history_tokens"` // history-message tokens only (what maybeSummarize checks) + CompressAtTokens int `json:"compress_at_tokens"` // hard budget compression threshold (contextWindow - maxTokens) + SummarizeAtTokens int `json:"summarize_at_tokens"` // soft summarization trigger (vs history tokens) + UsedPercent int `json:"used_percent"` // 0-100, relative to compressAt } type OutboundMessage struct { diff --git a/pkg/channels/pico/pico.go b/pkg/channels/pico/pico.go index 3c3938989..587f889ab 100644 --- a/pkg/channels/pico/pico.go +++ b/pkg/channels/pico/pico.go @@ -1394,10 +1394,12 @@ func setContextUsagePayload(payload map[string]any, u *bus.ContextUsage) { return } payload["context_usage"] = map[string]any{ - "used_tokens": u.UsedTokens, - "total_tokens": u.TotalTokens, - "compress_at_tokens": u.CompressAtTokens, - "used_percent": u.UsedPercent, + "used_tokens": u.UsedTokens, + "total_tokens": u.TotalTokens, + "history_tokens": u.HistoryTokens, + "compress_at_tokens": u.CompressAtTokens, + "summarize_at_tokens": u.SummarizeAtTokens, + "used_percent": u.UsedPercent, } } diff --git a/pkg/channels/pico/pico_test.go b/pkg/channels/pico/pico_test.go index 9cdf79044..b776b4149 100644 --- a/pkg/channels/pico/pico_test.go +++ b/pkg/channels/pico/pico_test.go @@ -602,10 +602,12 @@ func TestBeginStream_FinalizeIncludesContextUsage(t *testing.T) { t.Fatal("streamer should support FinalizeWithContext") } if err := contextStreamer.FinalizeWithContext(context.Background(), "final", &bus.ContextUsage{ - UsedTokens: 10, - TotalTokens: 100, - CompressAtTokens: 80, - UsedPercent: 10, + UsedTokens: 10, + TotalTokens: 100, + HistoryTokens: 5, + CompressAtTokens: 80, + SummarizeAtTokens: 60, + UsedPercent: 10, }); err != nil { t.Fatalf("FinalizeWithContext() error = %v", err) } @@ -627,6 +629,12 @@ func TestBeginStream_FinalizeIncludesContextUsage(t *testing.T) { if got := rawUsage["used_tokens"]; got != float64(10) { t.Fatalf("used_tokens = %#v, want 10", got) } + if got := rawUsage["history_tokens"]; got != float64(5) { + t.Fatalf("history_tokens = %#v, want 5", got) + } + if got := rawUsage["summarize_at_tokens"]; got != float64(60) { + t.Fatalf("summarize_at_tokens = %#v, want 60", got) + } } func TestCreateAndAddConnection_RespectsMaxConnectionsConcurrently(t *testing.T) { diff --git a/pkg/commands/cmd_context.go b/pkg/commands/cmd_context.go index 55481662c..22a441d42 100644 --- a/pkg/commands/cmd_context.go +++ b/pkg/commands/cmd_context.go @@ -29,14 +29,17 @@ func formatContextStats(s *ContextStats) string { remaining = 0 } usedWindowPercent := s.UsedTokens * 100 / max(s.TotalTokens, 1) - return fmt.Sprintf( - "Context usage \nMessages: %d \nUsed: ~%d / %d tokens (%d%%) \nCompress at: %d tokens \nCompression progress: %d%% \nRemaining: ~%d tokens", + msg := fmt.Sprintf( + "Context usage \nMessages: %d \nUsed: ~%d / %d tokens (%d%%) \nHistory: ~%d tokens \nCompress at: %d tokens \nSummarize at: %d tokens \nCompression progress: %d%% \nRemaining: ~%d tokens", s.MessageCount, s.UsedTokens, s.TotalTokens, usedWindowPercent, + s.HistoryTokens, s.CompressAtTokens, + s.SummarizeAtTokens, s.UsedPercent, remaining, ) + return msg } diff --git a/pkg/commands/runtime.go b/pkg/commands/runtime.go index b0327c863..2c773eb0f 100644 --- a/pkg/commands/runtime.go +++ b/pkg/commands/runtime.go @@ -29,11 +29,13 @@ type MCPToolInfo struct { // ContextStats describes current session context window usage. type ContextStats struct { - UsedTokens int - TotalTokens int // model context window - CompressAtTokens int // compression threshold - UsedPercent int // 0-100 - MessageCount int + UsedTokens int + TotalTokens int // model context window + HistoryTokens int // history-only tokens (what maybeSummarize checks) + CompressAtTokens int // hard budget compression threshold + SummarizeAtTokens int // soft summarization trigger + UsedPercent int // 0-100 + MessageCount int } // StopResult describes the outcome of a stop request for the current session. diff --git a/web/frontend/src/components/chat/context-usage-ring.tsx b/web/frontend/src/components/chat/context-usage-ring.tsx index 037a20cef..000bcb4dd 100644 --- a/web/frontend/src/components/chat/context-usage-ring.tsx +++ b/web/frontend/src/components/chat/context-usage-ring.tsx @@ -145,6 +145,31 @@ export function ContextUsageRing({ /> +