feat(agent): add context usage ring indicator and /context command (#2537)

Add a context window usage indicator to the web chat UI and a /context slash command that works across all channels. Backend: - Add computeContextUsage() estimating history + system + tool tokens - Attach ContextUsage to outbound messages via the pico WebSocket protocol - Add /context command showing context stats as formatted text - Add EstimateSystemTokens() on ContextBuilder for system prompt estimation Frontend: - Add ContextUsageRing component (SVG ring + hover/tap popover) - Show usage percentage, token counts, and compression threshold - Hover on desktop (150ms leave delay), tap on mobile - "View Details" sends /context with 1s cooldown - i18n support (en/zh) for popover labels Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-12 18:08:54 +00:00 · 2026-04-21 16:30:02 +08:00
parent 9c3dc0ee3a
commit 6ca7311273
19 changed files with 462 additions and 35 deletions
@@ -527,10 +527,11 @@ func (al *AgentLoop) runAgentLoop(
 				opts.Dispatch.ChatID(),
 				opts.Dispatch.ReplyToMessageID(),
 			),
-			AgentID:    agentID,
-			SessionKey: sessionKey,
-			Scope:      scope,
-			Content:    result.finalContent,
+			AgentID:      agentID,
+			SessionKey:   sessionKey,
+			Scope:        scope,
+			Content:      result.finalContent,
+			ContextUsage: computeContextUsage(agent, opts.Dispatch.SessionKey),
 		})
 	}

@@ -214,6 +214,24 @@ func (al *AgentLoop) buildCommandsRuntime(
 		rt.AskSideQuestion = func(ctx context.Context, question string) (string, error) {
 			return al.askSideQuestion(ctx, agent, opts, question)
 		}
+
+		rt.GetContextStats = func() *commands.ContextStats {
+			if opts == nil || agent.Sessions == nil {
+				return nil
+			}
+			usage := computeContextUsage(agent, opts.SessionKey)
+			if usage == nil {
+				return nil
+			}
+			history := agent.Sessions.GetHistory(opts.SessionKey)
+			return &commands.ContextStats{
+				UsedTokens:       usage.UsedTokens,
+				TotalTokens:      usage.TotalTokens,
+				CompressAtTokens: usage.CompressAtTokens,
+				UsedPercent:      usage.UsedPercent,
+				MessageCount:     len(history),
+			}
+		}
 	}
 	return rt
 }
@@ -60,10 +60,14 @@ func (al *AgentLoop) PublishResponseIfNeeded(ctx context.Context, channel, chatI
 		return
 	}

-	al.bus.PublishOutbound(ctx, bus.OutboundMessage{
+	msg := bus.OutboundMessage{
 		Context: bus.NewOutboundContext(channel, chatID, ""),
 		Content: response,
-	})
+	}
+	if sessionKey != "" {
+		msg.ContextUsage = computeContextUsage(al.agentForSession(sessionKey), sessionKey)
+	}
+	al.bus.PublishOutbound(ctx, msg)
 	logger.InfoCF("agent", "Published outbound response",
 		map[string]any{
 			"channel":     channel,
@@ -11,6 +11,7 @@ import (
 	"strings"
 	"sync"
 	"time"
+	"unicode/utf8"

 	"github.com/sipeed/picoclaw/pkg/config"
 	"github.com/sipeed/picoclaw/pkg/logger"
@@ -210,6 +211,36 @@ func (cb *ContextBuilder) BuildSystemPromptWithCache() string {
 	return prompt
 }

+// EstimateSystemTokens estimates the token count of the full system message
+// that would be sent to the LLM, mirroring the composition logic in BuildMessages.
+// It includes: static prompt, dynamic context, active skills, and summary with
+// wrapping prefixes and separators. This avoids needing all per-request parameters
+// that BuildMessages requires (media, channel, chatID, sender, etc.).
+func (cb *ContextBuilder) EstimateSystemTokens(summary string, activeSkills []string) int {
+	staticPrompt := cb.BuildSystemPromptWithCache()
+
+	// Dynamic context is small and varies per request; use a representative estimate.
+	// Actual buildDynamicContext produces ~200-400 chars of time/runtime/session info.
+	const dynamicContextChars = 300
+
+	totalChars := utf8.RuneCountInString(staticPrompt) + dynamicContextChars
+
+	if skillsText := cb.buildActiveSkillsContext(activeSkills); skillsText != "" {
+		totalChars += utf8.RuneCountInString(skillsText)
+		totalChars += 7 // separator \n\n---\n\n
+	}
+
+	if summary != "" {
+		// Matches the CONTEXT_SUMMARY: prefix added in BuildMessages
+		const summaryPrefix = "CONTEXT_SUMMARY: The following is an approximate summary of prior conversation " +
+			"for reference only. It may be incomplete or outdated — always defer to explicit instructions.\n\n"
+		totalChars += utf8.RuneCountInString(summaryPrefix) + utf8.RuneCountInString(summary)
+		totalChars += 7 // separator
+	}
+
+	return totalChars * 2 / 5 // same heuristic as tokenizer.EstimateMessageTokens
+}
+
 // InvalidateCache clears the cached system prompt.
 // Normally not needed because the cache auto-invalidates via mtime checks,
 // but this is useful for tests or explicit reload commands.
@@ -0,0 +1,78 @@
+package agent
+
+import (
+	"github.com/sipeed/picoclaw/pkg/bus"
+)
+
+// computeContextUsage estimates current context window consumption for the
+// given agent and session. Includes history, system prompt (with dynamic context,
+// summary, and skills — mirroring BuildMessages composition), and tool definitions.
+// The output reserve (MaxTokens) is not counted as "used" but reduces the
+// effective budget, matching isOverContextBudget's compression trigger:
+//
+//	compress when: history + system + tools + maxTokens > contextWindow
+//	equivalent to: history + system + tools > contextWindow - maxTokens
+//
+// Returns nil when the agent or session is unavailable.
+func computeContextUsage(agent *AgentInstance, sessionKey string) *bus.ContextUsage {
+	if agent == nil || agent.Sessions == nil {
+		return nil
+	}
+	contextWindow := agent.ContextWindow
+	if contextWindow <= 0 {
+		return nil
+	}
+
+	// History tokens
+	history := agent.Sessions.GetHistory(sessionKey)
+	historyTokens := 0
+	for _, m := range history {
+		historyTokens += EstimateMessageTokens(m)
+	}
+
+	// System message tokens: uses EstimateSystemTokens which mirrors
+	// the full system message composition in BuildMessages (static prompt,
+	// dynamic context, active skills, summary with wrapping prefix).
+	systemTokens := 0
+	if agent.ContextBuilder != nil {
+		summary := agent.Sessions.GetSummary(sessionKey)
+		// Pass nil for active skills: skills are only injected when the user
+		// explicitly activates them via /use, which is rare. Using nil matches
+		// the common case and avoids over-counting all installed skills.
+		systemTokens = agent.ContextBuilder.EstimateSystemTokens(summary, nil)
+	}
+
+	// Tool definition tokens
+	toolTokens := 0
+	if agent.Tools != nil {
+		toolTokens = EstimateToolDefsTokens(agent.Tools.ToProviderDefs())
+	}
+
+	// Used = history + system (includes summary) + tools
+	usedTokens := historyTokens + systemTokens + toolTokens
+
+	// Effective budget = contextWindow minus output reserve (maxTokens)
+	effectiveWindow := contextWindow - agent.MaxTokens
+	if effectiveWindow < 0 {
+		effectiveWindow = contextWindow
+	}
+
+	// compressAt = effectiveWindow: aligns with isOverContextBudget's
+	// proactive trigger (msgTokens + toolTokens + maxTokens > contextWindow).
+	compressAt := effectiveWindow
+
+	usedPercent := 0
+	if compressAt > 0 {
+		usedPercent = usedTokens * 100 / compressAt
+	}
+	if usedPercent > 100 {
+		usedPercent = 100
+	}
+
+	return &bus.ContextUsage{
+		UsedTokens:       usedTokens,
+		TotalTokens:      contextWindow,
+		CompressAtTokens: compressAt,
+		UsedPercent:      usedPercent,
+	}
+}
@@ -61,6 +61,15 @@ type OutboundScope struct {
 	Values     map[string]string `json:"values,omitempty"`
 }

+// ContextUsage describes how much of the model's context window the current
+// session consumes, and how far it is from triggering compression.
+type ContextUsage struct {
+	UsedTokens       int `json:"used_tokens"`
+	TotalTokens      int `json:"total_tokens"`       // model context window
+	CompressAtTokens int `json:"compress_at_tokens"` // threshold that triggers compression
+	UsedPercent      int `json:"used_percent"`       // 0-100
+}
+
 type OutboundMessage struct {
 	Channel          string         `json:"channel"`
 	ChatID           string         `json:"chat_id"`
@@ -70,6 +79,7 @@ type OutboundMessage struct {
 	Scope            *OutboundScope `json:"scope,omitempty"`
 	Content          string         `json:"content"`
 	ReplyToMessageID string         `json:"reply_to_message_id,omitempty"`
+	ContextUsage     *ContextUsage  `json:"context_usage,omitempty"`
 }

 // MediaPart describes a single media attachment to send.
@@ -262,10 +262,12 @@ func (c *PicoChannel) Send(ctx context.Context, msg bus.OutboundMessage) ([]stri
 	}
 	isThought := outboundMessageIsThought(msg)

-	outMsg := newMessage(TypeMessageCreate, map[string]any{
+	payload := map[string]any{
 		PayloadKeyContent: msg.Content,
 		PayloadKeyThought: isThought,
-	})
+	}
+	setContextUsagePayload(payload, msg.ContextUsage)
+	outMsg := newMessage(TypeMessageCreate, payload)

 	return nil, c.broadcastToSession(msg.ChatID, outMsg)
 }
@@ -716,3 +718,16 @@ func validateInlineImageDataURL(mediaURL string) error {

 	return nil
 }
+
+// setContextUsagePayload adds context window usage stats to a pico payload.
+func setContextUsagePayload(payload map[string]any, u *bus.ContextUsage) {
+	if u == nil {
+		return
+	}
+	payload["context_usage"] = map[string]any{
+		"used_tokens":        u.UsedTokens,
+		"total_tokens":       u.TotalTokens,
+		"compress_at_tokens": u.CompressAtTokens,
+		"used_percent":       u.UsedPercent,
+	}
+}
@@ -15,6 +15,7 @@ func BuiltinDefinitions() []Definition {
 		switchCommand(),
 		checkCommand(),
 		clearCommand(),
+		contextCommand(),
 		subagentsCommand(),
 		reloadCommand(),
 	}
@@ -0,0 +1,42 @@
+package commands
+
+import (
+	"context"
+	"fmt"
+)
+
+func contextCommand() Definition {
+	return Definition{
+		Name:        "context",
+		Description: "Show current session context and token usage",
+		Usage:       "/context",
+		Handler: func(_ context.Context, req Request, rt *Runtime) error {
+			if rt == nil || rt.GetContextStats == nil {
+				return req.Reply(unavailableMsg)
+			}
+			stats := rt.GetContextStats()
+			if stats == nil {
+				return req.Reply("No active session context.")
+			}
+			return req.Reply(formatContextStats(stats))
+		},
+	}
+}
+
+func formatContextStats(s *ContextStats) string {
+	remaining := s.CompressAtTokens - s.UsedTokens
+	if remaining < 0 {
+		remaining = 0
+	}
+	usedWindowPercent := s.UsedTokens * 100 / max(s.TotalTokens, 1)
+	return fmt.Sprintf(
+		"Context usage  \nMessages: %d  \nUsed: ~%d / %d tokens (%d%%)  \nCompress at: %d tokens  \nCompression progress: %d%%  \nRemaining: ~%d tokens",
+		s.MessageCount,
+		s.UsedTokens,
+		s.TotalTokens,
+		usedWindowPercent,
+		s.CompressAtTokens,
+		s.UsedPercent,
+		remaining,
+	)
+}
@@ -6,6 +6,15 @@ import (
 	"github.com/sipeed/picoclaw/pkg/config"
 )

+// ContextStats describes current session context window usage.
+type ContextStats struct {
+	UsedTokens       int
+	TotalTokens      int // model context window
+	CompressAtTokens int // compression threshold
+	UsedPercent      int // 0-100
+	MessageCount     int
+}
+
 // Runtime provides runtime dependencies to command handlers. It is constructed
 // per-request by the agent loop so that per-request state (like session scope)
 // can coexist with long-lived callbacks (like GetModelInfo).
@@ -18,6 +27,7 @@ type Runtime struct {
 	ListSkillNames     func() []string
 	GetEnabledChannels func() []string
 	GetActiveTurn      func() any // Returning any to avoid circular dependency with agent package
+	GetContextStats    func() *ContextStats
 	SwitchModel        func(value string) (oldModel string, err error)
 	SwitchChannel      func(value string) error
 	ClearHistory       func() error