fix(agent): prevent duplicate history during subturn context recoveries

Problem: During subturn context limit or truncation recoveries, the recovery loops repeatedly called `runAgentLoop` with the same or modified `UserMessage`. Because `runAgentLoop` unconditionally adds the `UserMessage` to the session history, this resulted in: 1. Duplicate User Messages polluting the history upon `context_length_exceeded` retries. 2. The possibility of injecting empty User Messages if `opts.UserMessage` was artificially blanked out to work around the duplication. 3. Messy or duplicate entries during `finish_reason="truncated"` recovery injections. Solution: - Introduce `SkipAddUserMessage` boolean to `processOptions` to explicitly control whether the agent loop should write the user prompt to history. - Add an explicit `opts.UserMessage != ""` check in `runAgentLoop` to prevent polluting history with empty message content. - In `subturn.go`'s recovery loop, set `SkipAddUserMessage: contextRetryCount > 0` to skip writing the user message on context
2026-06-12 18:08:54 +00:00 · 2026-03-18 12:18:32 +08:00
parent f8defe3ae1
commit c7ea018a73
6 changed files with 834 additions and 14 deletions
@@ -0,0 +1,173 @@
+// PicoClaw - Ultra-lightweight personal AI agent
+// Inspired by and based on nanobot: https://github.com/HKUDS/nanobot
+// License: MIT
+//
+// Copyright (c) 2026 PicoClaw contributors
+
+package utils
+
+import (
+	"encoding/json"
+	"fmt"
+	"unicode/utf8"
+
+	"github.com/sipeed/picoclaw/pkg/providers"
+)
+
+// CalculateDefaultMaxContextRunes computes a default context limit based on the model's context window.
+// Strategy: Use 75% of the context window and convert to rune estimate.
+//
+// Token-to-rune conversion ratios (conservative estimates):
+//   - English: ~4 chars per token
+//   - Chinese: ~1.5-2 chars per token
+//   - Mixed: ~3 chars per token (used here for safety)
+func CalculateDefaultMaxContextRunes(contextWindow int) int {
+	if contextWindow <= 0 {
+		// Conservative fallback when context window is unknown
+		return 8000 // ~2000 tokens
+	}
+
+	// Use 75% of context window to leave headroom
+	targetTokens := int(float64(contextWindow) * 0.75)
+
+	// Convert tokens to runes using conservative ratio
+	const avgCharsPerToken = 3
+	return targetTokens * avgCharsPerToken
+}
+
+// ResolveMaxContextRunes determines the final MaxContextRunes value to use.
+// Priority: explicit config > auto-calculate > conservative default
+func ResolveMaxContextRunes(configValue, contextWindow int) int {
+	switch {
+	case configValue > 0:
+		// Explicitly configured, use as-is
+		return configValue
+	case configValue == -1:
+		// Explicitly disabled
+		return -1
+	default:
+		// 0 or unset: auto-calculate
+		return CalculateDefaultMaxContextRunes(contextWindow)
+	}
+}
+
+// MeasureContextRunes calculates the total rune count of a message list.
+// Includes content, reasoning content, and estimates for tool calls.
+func MeasureContextRunes(messages []providers.Message) int {
+	totalRunes := 0
+	for _, msg := range messages {
+		totalRunes += utf8.RuneCountInString(msg.Content)
+		totalRunes += utf8.RuneCountInString(msg.ReasoningContent)
+
+		// Tool calls: serialize to JSON and count
+		if len(msg.ToolCalls) > 0 {
+			for _, tc := range msg.ToolCalls {
+				totalRunes += utf8.RuneCountInString(tc.Name)
+				// Arguments: serialize and count
+				if argsJSON, err := json.Marshal(tc.Arguments); err == nil {
+					totalRunes += utf8.RuneCountInString(string(argsJSON))
+				} else {
+					// Fallback estimate if serialization fails
+					totalRunes += 100
+				}
+			}
+		}
+
+		// ToolCallID
+		totalRunes += utf8.RuneCountInString(msg.ToolCallID)
+	}
+	return totalRunes
+}
+
+// TruncateContextSmart intelligently truncates message history to fit within maxRunes.
+//
+// Strategy:
+//  1. Always preserve system messages (they define the agent's behavior)
+//  2. Keep the most recent messages (they contain current context)
+//  3. Drop older middle messages when necessary
+//  4. Insert a truncation notice to inform the LLM
+//
+// Returns the truncated message list.
+func TruncateContextSmart(messages []providers.Message, maxRunes int) []providers.Message {
+	if len(messages) == 0 {
+		return messages
+	}
+
+	// Separate system messages from others
+	var systemMsgs []providers.Message
+	var otherMsgs []providers.Message
+
+	for _, msg := range messages {
+		if msg.Role == "system" {
+			systemMsgs = append(systemMsgs, msg)
+		} else {
+			otherMsgs = append(otherMsgs, msg)
+		}
+	}
+
+	// Calculate system message size
+	systemRunes := 0
+	for _, msg := range systemMsgs {
+		systemRunes += utf8.RuneCountInString(msg.Content)
+		systemRunes += utf8.RuneCountInString(msg.ReasoningContent)
+	}
+
+	// Reserve space for truncation notice (estimate ~80 runes)
+	const truncationNoticeEstimate = 80
+
+	// Allocate remaining space for other messages
+	remainingRunes := maxRunes - systemRunes - truncationNoticeEstimate
+	if remainingRunes <= 0 {
+		// System messages already exceed limit - return only system messages
+		return systemMsgs
+	}
+
+	// Collect recent messages in reverse order until we hit the limit
+	var keptMsgs []providers.Message
+	currentRunes := 0
+
+	for i := len(otherMsgs) - 1; i >= 0; i-- {
+		msg := otherMsgs[i]
+		msgRunes := utf8.RuneCountInString(msg.Content) +
+			utf8.RuneCountInString(msg.ReasoningContent)
+
+		// Estimate tool call size
+		if len(msg.ToolCalls) > 0 {
+			for _, tc := range msg.ToolCalls {
+				msgRunes += utf8.RuneCountInString(tc.Name)
+				if argsJSON, err := json.Marshal(tc.Arguments); err == nil {
+					msgRunes += utf8.RuneCountInString(string(argsJSON))
+				} else {
+					msgRunes += 100
+				}
+			}
+		}
+		msgRunes += utf8.RuneCountInString(msg.ToolCallID)
+
+		if currentRunes+msgRunes > remainingRunes {
+			// Would exceed limit, stop collecting
+			break
+		}
+
+		// Prepend to maintain chronological order
+		keptMsgs = append([]providers.Message{msg}, keptMsgs...)
+		currentRunes += msgRunes
+	}
+
+	// If we dropped messages, add a truncation notice
+	result := systemMsgs
+	if len(keptMsgs) < len(otherMsgs) {
+		droppedCount := len(otherMsgs) - len(keptMsgs)
+		truncationNotice := providers.Message{
+			Role: "system",
+			Content: fmt.Sprintf(
+				"[Context truncated: %d earlier messages omitted to stay within context limits]",
+				droppedCount,
+			),
+		}
+		result = append(result, truncationNotice)
+	}
+
+	result = append(result, keptMsgs...)
+	return result
+}