mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
9c82b0baa2
Separate context_window from max_tokens — they serve different purposes (input capacity vs output generation limit). The previous conflation caused premature summarization or missed compression triggers. Changes: - Add context_window field to AgentDefaults config (default: 4x max_tokens) - Extract boundary-safe truncation helpers (isSafeBoundary, findSafeBoundary) into context_budget.go — pure functions with no AgentLoop dependency - forceCompression: align split to safe boundary so tool-call sequences (assistant+ToolCalls → tool results) are never torn apart - summarizeSession: use findSafeBoundary instead of hardcoded keep-last-4 - estimateTokens: count ToolCalls arguments and ToolCallID metadata, not just Content — fixes systematic undercounting in tool-heavy sessions - Add proactive context budget check before LLM call in runAgentLoop, preventing 400 context-length errors instead of reacting to them - Add estimateToolDefsTokens for tool definition token cost Closes #556, closes #665 Ref #1439
134 lines
3.6 KiB
Go
134 lines
3.6 KiB
Go
// PicoClaw - Ultra-lightweight personal AI agent
|
|
// License: MIT
|
|
//
|
|
// Copyright (c) 2026 PicoClaw contributors
|
|
|
|
package agent
|
|
|
|
import (
|
|
"encoding/json"
|
|
"unicode/utf8"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/providers"
|
|
)
|
|
|
|
// isSafeBoundary reports whether index is a valid position to split a message
|
|
// history for truncation or compression. Splitting at index means:
|
|
// - history[:index] is dropped or summarized
|
|
// - history[index:] is kept
|
|
//
|
|
// A boundary is safe when the kept portion begins at a "user" message,
|
|
// ensuring no tool-call sequence (assistant+ToolCalls → tool results)
|
|
// is torn apart across the split.
|
|
func isSafeBoundary(history []providers.Message, index int) bool {
|
|
if index <= 0 || index >= len(history) {
|
|
return true
|
|
}
|
|
return history[index].Role == "user"
|
|
}
|
|
|
|
// findSafeBoundary locates the nearest safe split point to targetIndex.
|
|
// It scans backward first (preserving more context), then forward.
|
|
// Returns targetIndex unchanged only when no safe boundary exists.
|
|
func findSafeBoundary(history []providers.Message, targetIndex int) int {
|
|
if len(history) == 0 {
|
|
return 0
|
|
}
|
|
if targetIndex <= 0 {
|
|
return 0
|
|
}
|
|
if targetIndex >= len(history) {
|
|
return len(history)
|
|
}
|
|
|
|
if isSafeBoundary(history, targetIndex) {
|
|
return targetIndex
|
|
}
|
|
|
|
// Backward scan: prefer keeping more messages.
|
|
for i := targetIndex - 1; i > 0; i-- {
|
|
if isSafeBoundary(history, i) {
|
|
return i
|
|
}
|
|
}
|
|
|
|
// Forward scan: fall back to keeping fewer messages.
|
|
for i := targetIndex + 1; i < len(history); i++ {
|
|
if isSafeBoundary(history, i) {
|
|
return i
|
|
}
|
|
}
|
|
|
|
return targetIndex
|
|
}
|
|
|
|
// estimateMessageTokens estimates the token count for a single message,
|
|
// including Content, ToolCalls arguments, and ToolCallID metadata.
|
|
// Uses a heuristic of 2.5 characters per token.
|
|
func estimateMessageTokens(msg providers.Message) int {
|
|
chars := utf8.RuneCountInString(msg.Content)
|
|
|
|
for _, tc := range msg.ToolCalls {
|
|
// Count tool call metadata: ID, type, function name
|
|
chars += len(tc.ID) + len(tc.Type) + len(tc.Name)
|
|
if tc.Function != nil {
|
|
chars += len(tc.Function.Name) + len(tc.Function.Arguments)
|
|
}
|
|
}
|
|
|
|
if msg.ToolCallID != "" {
|
|
chars += len(msg.ToolCallID)
|
|
}
|
|
|
|
// Per-message overhead for role label, JSON structure, separators.
|
|
const messageOverhead = 12
|
|
chars += messageOverhead
|
|
|
|
return chars * 2 / 5
|
|
}
|
|
|
|
// estimateToolDefsTokens estimates the total token cost of tool definitions
|
|
// as they appear in the LLM request. Each tool's name, description, and
|
|
// JSON schema parameters contribute to the context window budget.
|
|
func estimateToolDefsTokens(defs []providers.ToolDefinition) int {
|
|
if len(defs) == 0 {
|
|
return 0
|
|
}
|
|
|
|
totalChars := 0
|
|
for _, d := range defs {
|
|
totalChars += len(d.Function.Name) + len(d.Function.Description)
|
|
|
|
if d.Function.Parameters != nil {
|
|
if paramJSON, err := json.Marshal(d.Function.Parameters); err == nil {
|
|
totalChars += len(paramJSON)
|
|
}
|
|
}
|
|
|
|
// Per-tool overhead: type field, JSON structure, separators.
|
|
totalChars += 20
|
|
}
|
|
|
|
return totalChars * 2 / 5
|
|
}
|
|
|
|
// isOverContextBudget checks whether the assembled messages plus tool definitions
|
|
// and output reserve would exceed the model's context window. This enables
|
|
// proactive compression before calling the LLM, rather than reacting to 400 errors.
|
|
func isOverContextBudget(
|
|
contextWindow int,
|
|
messages []providers.Message,
|
|
toolDefs []providers.ToolDefinition,
|
|
maxTokens int,
|
|
) bool {
|
|
msgTokens := 0
|
|
for _, m := range messages {
|
|
msgTokens += estimateMessageTokens(m)
|
|
}
|
|
|
|
toolTokens := estimateToolDefsTokens(toolDefs)
|
|
total := msgTokens + toolTokens + maxTokens
|
|
|
|
return total > contextWindow
|
|
}
|