picoclaw/pkg/agent/context_budget.go

// PicoClaw - Ultra-lightweight personal AI agent
// License: MIT
//
// Copyright (c) 2026 PicoClaw contributors

package agent

import (
	"encoding/json"
	"unicode/utf8"

	"github.com/sipeed/picoclaw/pkg/providers"
)

// isSafeBoundary reports whether index is a valid position to split a message
// history for truncation or compression. Splitting at index means:
//   - history[:index] is dropped or summarized
//   - history[index:] is kept
//
// A boundary is safe when the kept portion begins at a "user" message,
// ensuring no tool-call sequence (assistant+ToolCalls → tool results)
// is torn apart across the split.
func isSafeBoundary(history []providers.Message, index int) bool {
	if index <= 0 || index >= len(history) {
		return true
	}
	return history[index].Role == "user"
}

// findSafeBoundary locates the nearest safe split point to targetIndex.
// It scans backward first (preserving more context), then forward.
// Returns targetIndex unchanged only when no safe boundary exists.
func findSafeBoundary(history []providers.Message, targetIndex int) int {
	if len(history) == 0 {
		return 0
	}
	if targetIndex <= 0 {
		return 0
	}
	if targetIndex >= len(history) {
		return len(history)
	}

	if isSafeBoundary(history, targetIndex) {
		return targetIndex
	}

	// Backward scan: prefer keeping more messages.
	for i := targetIndex - 1; i > 0; i-- {
		if isSafeBoundary(history, i) {
			return i
		}
	}

	// Forward scan: fall back to keeping fewer messages.
	for i := targetIndex + 1; i < len(history); i++ {
		if isSafeBoundary(history, i) {
			return i
		}
	}

	return targetIndex
}

// estimateMessageTokens estimates the token count for a single message,
// including Content, ReasoningContent, ToolCalls arguments, ToolCallID
// metadata, and Media items. Uses a heuristic of 2.5 characters per token.
func estimateMessageTokens(msg providers.Message) int {
	chars := utf8.RuneCountInString(msg.Content)

	// ReasoningContent (extended thinking / chain-of-thought) can be
	// substantial and is stored in session history via AddFullMessage.
	if msg.ReasoningContent != "" {
		chars += utf8.RuneCountInString(msg.ReasoningContent)
	}

	for _, tc := range msg.ToolCalls {
		// Count tool call metadata: ID, type, function name
		chars += len(tc.ID) + len(tc.Type) + len(tc.Name)
		if tc.Function != nil {
			chars += len(tc.Function.Name) + len(tc.Function.Arguments)
		}
	}

	if msg.ToolCallID != "" {
		chars += len(msg.ToolCallID)
	}

	// Media items (images, files) are serialized by provider adapters into
	// multipart or image_url payloads. Use a fixed per-item estimate since
	// actual token cost depends on resolution and provider tokenization.
	const mediaTokensPerItem = 256
	chars += len(msg.Media) * mediaTokensPerItem

	// Per-message overhead for role label, JSON structure, separators.
	const messageOverhead = 12
	chars += messageOverhead

	return chars * 2 / 5
}

// estimateToolDefsTokens estimates the total token cost of tool definitions
// as they appear in the LLM request. Each tool's name, description, and
// JSON schema parameters contribute to the context window budget.
func estimateToolDefsTokens(defs []providers.ToolDefinition) int {
	if len(defs) == 0 {
		return 0
	}

	totalChars := 0
	for _, d := range defs {
		totalChars += len(d.Function.Name) + len(d.Function.Description)

		if d.Function.Parameters != nil {
			if paramJSON, err := json.Marshal(d.Function.Parameters); err == nil {
				totalChars += len(paramJSON)
			}
		}

		// Per-tool overhead: type field, JSON structure, separators.
		totalChars += 20
	}

	return totalChars * 2 / 5
}

// isOverContextBudget checks whether the assembled messages plus tool definitions
// and output reserve would exceed the model's context window. This enables
// proactive compression before calling the LLM, rather than reacting to 400 errors.
func isOverContextBudget(
	contextWindow int,
	messages []providers.Message,
	toolDefs []providers.ToolDefinition,
	maxTokens int,
) bool {
	msgTokens := 0
	for _, m := range messages {
		msgTokens += estimateMessageTokens(m)
	}

	toolTokens := estimateToolDefsTokens(toolDefs)
	total := msgTokens + toolTokens + maxTokens

	return total > contextWindow
}