mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
e20ff43f8b
This commit addresses several critical concurrency and state management bugs within the SubTurn execution and delivery logic. 1. Fix Goroutine Leak & Deadlock in deliverSubTurnResult: - Replaced non-blocking select with a safe blocking select that listens to `resultChan` and a new `<-parentTS.Finished()` channel. - This ensures results are not arbitrarily dropped when the channel is full (preventing orphaned valid results), while also guaranteeing the child goroutine safely unblocks and exits if the parent finishes execution early. 2. Prevent "Send on Closed Channel" Fatal Panics: - Removed `close(pendingResults)` and `drainPendingResults` from `turnState.Finish()`. - The pendingResults channel is now naturally garbage collected, completely eliminating the race condition panic when a child attempts delivery at the exact moment the parent finishes. - Added a `defer recover()` failsafe inside deliverSubTurnResult to gracefully emit Orphan events in extreme edge cases. 3. Fix Truncation Recovery Prompt Drop: - Fixed the runTurn truncation retry logic by introducing an explicit `promptAlreadyAdded` boolean. - Ensures that the dynamically generated `recoveryPrompt` is correctly injected into the LLM history sequence on subsequent iterations, adhering to API roles without duplicating arrays. 4. Test Suite Stabilization: - Fixed TestDeliverSubTurnResultNoDeadlock to accurately wait for deterministic deliveries instead of racing timeouts. - Replaced defunct closed-channel tests with TestFinishedChannelClosedState matching the new Finished() mechanism. - Fixed the Finish(true) parameter in TestGrandchildAbort_CascadingCancellation to correctly validate Context cascade behavior. - All tests now pass cleanly without hanging or emitting false positives.
623 lines
21 KiB
Go
623 lines
21 KiB
Go
package agent
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/logger"
|
|
"github.com/sipeed/picoclaw/pkg/providers"
|
|
"github.com/sipeed/picoclaw/pkg/tools"
|
|
"github.com/sipeed/picoclaw/pkg/utils"
|
|
)
|
|
|
|
// ====================== Config & Constants ======================
|
|
const (
|
|
maxSubTurnDepth = 3
|
|
maxConcurrentSubTurns = 5
|
|
// concurrencyTimeout is the maximum time to wait for a concurrency slot.
|
|
// This prevents indefinite blocking when all slots are occupied by slow sub-turns.
|
|
concurrencyTimeout = 30 * time.Second
|
|
// maxEphemeralHistorySize limits the number of messages stored in ephemeral sessions.
|
|
// This prevents memory accumulation in long-running sub-turns.
|
|
maxEphemeralHistorySize = 50
|
|
// defaultSubTurnTimeout is the default maximum duration for a SubTurn.
|
|
// SubTurns that run longer than this will be cancelled.
|
|
defaultSubTurnTimeout = 5 * time.Minute
|
|
)
|
|
|
|
var (
|
|
ErrDepthLimitExceeded = errors.New("sub-turn depth limit exceeded")
|
|
ErrInvalidSubTurnConfig = errors.New("invalid sub-turn config")
|
|
ErrConcurrencyTimeout = errors.New("timeout waiting for concurrency slot")
|
|
)
|
|
|
|
// ====================== SubTurn Config ======================
|
|
|
|
// SubTurnConfig configures the execution of a child sub-turn.
|
|
//
|
|
// Usage Examples:
|
|
//
|
|
// Synchronous sub-turn (Async=false):
|
|
//
|
|
// cfg := SubTurnConfig{
|
|
// Model: "gpt-4o-mini",
|
|
// SystemPrompt: "Analyze this code",
|
|
// Async: false, // Result returned immediately
|
|
// }
|
|
// result, err := SpawnSubTurn(ctx, cfg)
|
|
// // Use result directly here
|
|
// processResult(result)
|
|
//
|
|
// Asynchronous sub-turn (Async=true):
|
|
//
|
|
// cfg := SubTurnConfig{
|
|
// Model: "gpt-4o-mini",
|
|
// SystemPrompt: "Background analysis",
|
|
// Async: true, // Result delivered to channel
|
|
// }
|
|
// result, err := SpawnSubTurn(ctx, cfg)
|
|
// // Result also available in parent's pendingResults channel
|
|
// // Parent turn will poll and process it in a later iteration
|
|
type SubTurnConfig struct {
|
|
Model string
|
|
Tools []tools.Tool
|
|
SystemPrompt string
|
|
MaxTokens int
|
|
|
|
// Async controls the result delivery mechanism:
|
|
//
|
|
// When Async = false (synchronous sub-turn):
|
|
// - The caller blocks until the sub-turn completes
|
|
// - The result is ONLY returned via the function return value
|
|
// - The result is NOT delivered to the parent's pendingResults channel
|
|
// - This prevents double delivery: caller gets result immediately, no need for channel
|
|
// - Use case: When the caller needs the result immediately to continue execution
|
|
// - Example: A tool that needs to process the sub-turn result before returning
|
|
//
|
|
// When Async = true (asynchronous sub-turn):
|
|
// - The sub-turn runs in the background (still blocks the caller, but semantically async)
|
|
// - The result is delivered to the parent's pendingResults channel
|
|
// - The result is ALSO returned via the function return value (for consistency)
|
|
// - The parent turn can poll pendingResults in later iterations to process results
|
|
// - Use case: Fire-and-forget operations, or when results are processed in batches
|
|
// - Example: Spawning multiple sub-turns in parallel and collecting results later
|
|
//
|
|
// IMPORTANT: The Async flag does NOT make the call non-blocking. It only controls
|
|
// whether the result is delivered via the channel. For true non-blocking execution,
|
|
// the caller must spawn the sub-turn in a separate goroutine.
|
|
Async bool
|
|
|
|
// Critical indicates this SubTurn's result is important and should continue
|
|
// running even after the parent turn finishes gracefully.
|
|
//
|
|
// When parent finishes gracefully (Finish(false)):
|
|
// - Critical=true: SubTurn continues running, delivers result as orphan
|
|
// - Critical=false: SubTurn exits gracefully without error
|
|
//
|
|
// When parent finishes with hard abort (Finish(true)):
|
|
// - All SubTurns are cancelled regardless of Critical flag
|
|
Critical bool
|
|
|
|
// Timeout is the maximum duration for this SubTurn.
|
|
// If the SubTurn runs longer than this, it will be cancelled.
|
|
// Default is 5 minutes (defaultSubTurnTimeout) if not specified.
|
|
Timeout time.Duration
|
|
|
|
// MaxContextRunes limits the context size (in runes) passed to the SubTurn.
|
|
// This prevents context window overflow by truncating message history before LLM calls.
|
|
//
|
|
// Values:
|
|
// 0 = Auto-calculate based on model's ContextWindow * 0.75 (default, recommended)
|
|
// -1 = No limit (disable soft truncation, rely only on hard context errors)
|
|
// >0 = Use specified rune limit
|
|
//
|
|
// The soft limit acts as a first line of defense before hitting the provider's
|
|
// hard context window limit. When exceeded, older messages are intelligently
|
|
// truncated while preserving system messages and recent context.
|
|
MaxContextRunes int
|
|
|
|
// Can be extended with temperature, topP, etc.
|
|
}
|
|
|
|
// ====================== Sub-turn Events (Aligned with EventBus) ======================
|
|
type SubTurnSpawnEvent struct {
|
|
ParentID string
|
|
ChildID string
|
|
Config SubTurnConfig
|
|
}
|
|
|
|
type SubTurnEndEvent struct {
|
|
ChildID string
|
|
Result *tools.ToolResult
|
|
Err error
|
|
}
|
|
|
|
type SubTurnResultDeliveredEvent struct {
|
|
ParentID string
|
|
ChildID string
|
|
Result *tools.ToolResult
|
|
}
|
|
|
|
type SubTurnOrphanResultEvent struct {
|
|
ParentID string
|
|
ChildID string
|
|
Result *tools.ToolResult
|
|
}
|
|
|
|
// ====================== Context Keys ======================
|
|
type agentLoopKeyType struct{}
|
|
|
|
var agentLoopKey = agentLoopKeyType{}
|
|
|
|
// WithAgentLoop injects AgentLoop into context for tool access
|
|
func WithAgentLoop(ctx context.Context, al *AgentLoop) context.Context {
|
|
return context.WithValue(ctx, agentLoopKey, al)
|
|
}
|
|
|
|
// AgentLoopFromContext retrieves AgentLoop from context
|
|
func AgentLoopFromContext(ctx context.Context) *AgentLoop {
|
|
al, _ := ctx.Value(agentLoopKey).(*AgentLoop)
|
|
return al
|
|
}
|
|
|
|
// ====================== Helper Functions ======================
|
|
|
|
func (al *AgentLoop) generateSubTurnID() string {
|
|
return fmt.Sprintf("subturn-%d", al.subTurnCounter.Add(1))
|
|
}
|
|
|
|
// ====================== Core Function: spawnSubTurn ======================
|
|
|
|
// AgentLoopSpawner implements tools.SubTurnSpawner interface.
|
|
// This allows tools to spawn sub-turns without circular dependency.
|
|
type AgentLoopSpawner struct {
|
|
al *AgentLoop
|
|
}
|
|
|
|
// SpawnSubTurn implements tools.SubTurnSpawner interface.
|
|
func (s *AgentLoopSpawner) SpawnSubTurn(ctx context.Context, cfg tools.SubTurnConfig) (*tools.ToolResult, error) {
|
|
parentTS := turnStateFromContext(ctx)
|
|
if parentTS == nil {
|
|
return nil, errors.New("parent turnState not found in context - cannot spawn sub-turn outside of a turn")
|
|
}
|
|
|
|
// Convert tools.SubTurnConfig to agent.SubTurnConfig
|
|
agentCfg := SubTurnConfig{
|
|
Model: cfg.Model,
|
|
Tools: cfg.Tools,
|
|
SystemPrompt: cfg.SystemPrompt,
|
|
MaxTokens: cfg.MaxTokens,
|
|
Async: cfg.Async,
|
|
}
|
|
|
|
return spawnSubTurn(ctx, s.al, parentTS, agentCfg)
|
|
}
|
|
|
|
// NewSubTurnSpawner creates a SubTurnSpawner for the given AgentLoop.
|
|
func NewSubTurnSpawner(al *AgentLoop) *AgentLoopSpawner {
|
|
return &AgentLoopSpawner{al: al}
|
|
}
|
|
|
|
// SpawnSubTurn is the exported entry point for tools to spawn sub-turns.
|
|
// It retrieves AgentLoop and parent turnState from context and delegates to spawnSubTurn.
|
|
func SpawnSubTurn(ctx context.Context, cfg SubTurnConfig) (*tools.ToolResult, error) {
|
|
al := AgentLoopFromContext(ctx)
|
|
if al == nil {
|
|
return nil, errors.New("AgentLoop not found in context - ensure context is properly initialized")
|
|
}
|
|
|
|
parentTS := turnStateFromContext(ctx)
|
|
if parentTS == nil {
|
|
return nil, errors.New("parent turnState not found in context - cannot spawn sub-turn outside of a turn")
|
|
}
|
|
|
|
return spawnSubTurn(ctx, al, parentTS, cfg)
|
|
}
|
|
|
|
func spawnSubTurn(ctx context.Context, al *AgentLoop, parentTS *turnState, cfg SubTurnConfig) (result *tools.ToolResult, err error) {
|
|
// 0. Acquire concurrency semaphore FIRST to ensure it's released even if early validation fails.
|
|
// Blocks if parent already has maxConcurrentSubTurns running, with a timeout to prevent indefinite blocking.
|
|
// Also respects context cancellation so we don't block forever if parent is aborted.
|
|
var semAcquired bool
|
|
if parentTS.concurrencySem != nil {
|
|
// Create a timeout context for semaphore acquisition
|
|
timeoutCtx, cancel := context.WithTimeout(ctx, concurrencyTimeout)
|
|
defer cancel()
|
|
|
|
select {
|
|
case parentTS.concurrencySem <- struct{}{}:
|
|
semAcquired = true
|
|
defer func() {
|
|
if semAcquired {
|
|
<-parentTS.concurrencySem
|
|
}
|
|
}()
|
|
case <-timeoutCtx.Done():
|
|
// Check parent context first - if it was cancelled, propagate that error
|
|
if ctx.Err() != nil {
|
|
return nil, ctx.Err()
|
|
}
|
|
// Otherwise it's our timeout
|
|
return nil, fmt.Errorf("%w: all %d slots occupied for %v",
|
|
ErrConcurrencyTimeout, maxConcurrentSubTurns, concurrencyTimeout)
|
|
}
|
|
}
|
|
|
|
// 1. Depth limit check
|
|
if parentTS.depth >= maxSubTurnDepth {
|
|
logger.WarnCF("subturn", "Depth limit exceeded", map[string]any{
|
|
"parent_id": parentTS.turnID,
|
|
"depth": parentTS.depth,
|
|
"max_depth": maxSubTurnDepth,
|
|
})
|
|
return nil, ErrDepthLimitExceeded
|
|
}
|
|
|
|
// 2. Config validation
|
|
if cfg.Model == "" {
|
|
return nil, ErrInvalidSubTurnConfig
|
|
}
|
|
|
|
// 3. Determine timeout for child SubTurn
|
|
timeout := cfg.Timeout
|
|
if timeout <= 0 {
|
|
timeout = defaultSubTurnTimeout
|
|
}
|
|
|
|
// 4. Create INDEPENDENT child context (not derived from parent ctx).
|
|
// This allows the child to continue running after parent finishes gracefully.
|
|
// The child has its own timeout for self-protection.
|
|
childCtx, cancel := context.WithTimeout(context.Background(), timeout)
|
|
defer cancel()
|
|
|
|
childID := al.generateSubTurnID()
|
|
childTS := newTurnState(childCtx, childID, parentTS)
|
|
// Set the cancel function so Finish(true) can trigger hard cancellation
|
|
childTS.cancelFunc = cancel
|
|
|
|
// IMPORTANT: Put childTS into childCtx so that code inside runTurn can retrieve it
|
|
childCtx = withTurnState(childCtx, childTS)
|
|
childCtx = WithAgentLoop(childCtx, al) // Propagate AgentLoop to child turn
|
|
|
|
// 5. Establish parent-child relationship (thread-safe)
|
|
parentTS.mu.Lock()
|
|
parentTS.childTurnIDs = append(parentTS.childTurnIDs, childID)
|
|
parentTS.mu.Unlock()
|
|
|
|
// 6. Emit Spawn event
|
|
MockEventBus.Emit(SubTurnSpawnEvent{
|
|
ParentID: parentTS.turnID,
|
|
ChildID: childID,
|
|
Config: cfg,
|
|
})
|
|
|
|
// 7. Defer cleanup: deliver result (for async), emit End event, and recover from panics
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
err = fmt.Errorf("subturn panicked: %v", r)
|
|
logger.ErrorCF("subturn", "SubTurn panicked", map[string]any{
|
|
"child_id": childID,
|
|
"parent_id": parentTS.turnID,
|
|
"panic": r,
|
|
})
|
|
}
|
|
|
|
// Result Delivery Strategy (Async vs Sync)
|
|
if cfg.Async {
|
|
deliverSubTurnResult(parentTS, childID, result)
|
|
}
|
|
|
|
MockEventBus.Emit(SubTurnEndEvent{
|
|
ChildID: childID,
|
|
Result: result,
|
|
Err: err,
|
|
})
|
|
}()
|
|
|
|
// 8. Execute sub-turn via the real agent loop.
|
|
result, err = runTurn(childCtx, al, childTS, cfg)
|
|
|
|
return result, err
|
|
}
|
|
|
|
// ====================== Result Delivery ======================
|
|
|
|
// deliverSubTurnResult delivers a sub-turn result to the parent turn's pendingResults channel.
|
|
//
|
|
// IMPORTANT: This function is ONLY called for asynchronous sub-turns (Async=true).
|
|
// For synchronous sub-turns (Async=false), results are returned directly via the function
|
|
// return value to avoid double delivery.
|
|
//
|
|
// Delivery behavior:
|
|
// - If parent turn is still running: attempts to deliver to pendingResults channel
|
|
// - If channel is full: emits SubTurnOrphanResultEvent (result is lost from channel but tracked)
|
|
// - If parent turn has finished: emits SubTurnOrphanResultEvent (late arrival)
|
|
//
|
|
// Thread safety:
|
|
// - Reads parent state under lock, then releases lock before channel send
|
|
// - Small race window exists but is acceptable (worst case: result becomes orphan)
|
|
//
|
|
// Event emissions:
|
|
// - SubTurnResultDeliveredEvent: successful delivery to channel
|
|
// - SubTurnOrphanResultEvent: delivery failed (parent finished or channel full)
|
|
func deliverSubTurnResult(parentTS *turnState, childID string, result *tools.ToolResult) {
|
|
// Let GC clean up the pendingResults channel; parent Finish will no longer close it.
|
|
// We use defer/recover to catch any unlikely channel panics if it were ever closed.
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
logger.WarnCF("subturn", "recovered panic sending to pendingResults", map[string]any{
|
|
"parent_id": parentTS.turnID,
|
|
"child_id": childID,
|
|
"recover": r,
|
|
})
|
|
if result != nil {
|
|
MockEventBus.Emit(SubTurnOrphanResultEvent{
|
|
ParentID: parentTS.turnID,
|
|
ChildID: childID,
|
|
Result: result,
|
|
})
|
|
}
|
|
}
|
|
}()
|
|
parentTS.mu.Lock()
|
|
isFinished := parentTS.isFinished
|
|
resultChan := parentTS.pendingResults
|
|
parentTS.mu.Unlock()
|
|
|
|
// If parent turn has already finished, treat this as an orphan result
|
|
if isFinished || resultChan == nil {
|
|
if result != nil {
|
|
MockEventBus.Emit(SubTurnOrphanResultEvent{
|
|
ParentID: parentTS.turnID,
|
|
ChildID: childID,
|
|
Result: result,
|
|
})
|
|
}
|
|
return
|
|
}
|
|
|
|
// Parent Turn is still running → attempt to deliver result
|
|
// We use a select statement with parentTS.Finished() to ensure that if the
|
|
// parent turn finishes while we are waiting to send the result (e.g. channel
|
|
// is full), we don't leak this goroutine by blocking forever.
|
|
select {
|
|
case resultChan <- result:
|
|
// Successfully delivered
|
|
MockEventBus.Emit(SubTurnResultDeliveredEvent{
|
|
ParentID: parentTS.turnID,
|
|
ChildID: childID,
|
|
Result: result,
|
|
})
|
|
case <-parentTS.Finished():
|
|
// Parent finished while we were waiting to deliver.
|
|
// The result cannot be delivered to the LLM, so it becomes an orphan.
|
|
logger.WarnCF("subturn", "parent finished before result could be delivered", map[string]any{
|
|
"parent_id": parentTS.turnID,
|
|
"child_id": childID,
|
|
})
|
|
if result != nil {
|
|
MockEventBus.Emit(SubTurnOrphanResultEvent{
|
|
ParentID: parentTS.turnID,
|
|
ChildID: childID,
|
|
Result: result,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// runTurn builds a temporary AgentInstance from SubTurnConfig and delegates to
|
|
// the real agent loop. The child's ephemeral session is used for history so it
|
|
// never pollutes the parent session.
|
|
//
|
|
// This function implements multiple layers of context protection and error recovery:
|
|
//
|
|
// 1. Soft Context Limit (MaxContextRunes):
|
|
// - Proactively truncates message history before LLM calls
|
|
// - Default: 75% of model's context window
|
|
// - Preserves system messages and recent context
|
|
// - First line of defense against context overflow
|
|
//
|
|
// 2. Hard Context Error Recovery:
|
|
// - Detects context_length_exceeded errors from provider
|
|
// - Triggers force compression and retries (up to 2 times)
|
|
// - Second line of defense when soft limit is insufficient
|
|
//
|
|
// 3. Truncation Recovery:
|
|
// - Detects when LLM response is truncated (finish_reason="truncated")
|
|
// - Injects recovery prompt asking for shorter response
|
|
// - Retries up to 2 times
|
|
// - Handles cases where max_tokens is hit
|
|
func runTurn(ctx context.Context, al *AgentLoop, ts *turnState, cfg SubTurnConfig) (*tools.ToolResult, error) {
|
|
// Derive candidates from the requested model using the parent loop's provider.
|
|
defaultProvider := al.GetConfig().Agents.Defaults.Provider
|
|
candidates := providers.ResolveCandidates(
|
|
providers.ModelConfig{Primary: cfg.Model},
|
|
defaultProvider,
|
|
)
|
|
|
|
// Build a minimal AgentInstance for this sub-turn.
|
|
// It reuses the parent loop's provider and config, but gets its own
|
|
// ephemeral session store and tool registry.
|
|
parentAgent := al.GetRegistry().GetDefaultAgent()
|
|
|
|
// Determine which tools to use: explicit config or inherit from parent
|
|
toolRegistry := tools.NewToolRegistry()
|
|
toolsToRegister := cfg.Tools
|
|
if len(toolsToRegister) == 0 {
|
|
toolsToRegister = parentAgent.Tools.GetAll()
|
|
}
|
|
for _, t := range toolsToRegister {
|
|
toolRegistry.Register(t)
|
|
}
|
|
|
|
childAgent := &AgentInstance{
|
|
ID: ts.turnID,
|
|
Model: cfg.Model,
|
|
MaxIterations: parentAgent.MaxIterations,
|
|
MaxTokens: cfg.MaxTokens,
|
|
Temperature: parentAgent.Temperature,
|
|
ThinkingLevel: parentAgent.ThinkingLevel,
|
|
ContextWindow: parentAgent.ContextWindow, // Inherit from parent agent
|
|
SummarizeMessageThreshold: parentAgent.SummarizeMessageThreshold,
|
|
SummarizeTokenPercent: parentAgent.SummarizeTokenPercent,
|
|
Provider: parentAgent.Provider,
|
|
Sessions: ts.session,
|
|
ContextBuilder: parentAgent.ContextBuilder,
|
|
Tools: toolRegistry,
|
|
Candidates: candidates,
|
|
}
|
|
if childAgent.MaxTokens == 0 {
|
|
childAgent.MaxTokens = parentAgent.MaxTokens
|
|
}
|
|
|
|
// Resolve MaxContextRunes configuration
|
|
maxContextRunes := utils.ResolveMaxContextRunes(cfg.MaxContextRunes, childAgent.ContextWindow)
|
|
|
|
logger.DebugCF("subturn", "Context limit resolved",
|
|
map[string]any{
|
|
"turn_id": ts.turnID,
|
|
"context_window": childAgent.ContextWindow,
|
|
"max_context_runes": maxContextRunes,
|
|
"configured_value": cfg.MaxContextRunes,
|
|
})
|
|
|
|
// Retry loop for truncation and context errors
|
|
const (
|
|
maxTruncationRetries = 2
|
|
maxContextRetries = 2
|
|
)
|
|
|
|
truncationRetryCount := 0
|
|
contextRetryCount := 0
|
|
currentPrompt := cfg.SystemPrompt
|
|
promptAlreadyAdded := false
|
|
|
|
for {
|
|
// Soft context limit: check and truncate before LLM call
|
|
if maxContextRunes > 0 {
|
|
messages := childAgent.Sessions.GetHistory(ts.turnID)
|
|
currentRunes := utils.MeasureContextRunes(messages)
|
|
|
|
if currentRunes > maxContextRunes {
|
|
logger.WarnCF("subturn", "Context exceeds soft limit, truncating",
|
|
map[string]any{
|
|
"turn_id": ts.turnID,
|
|
"current_runes": currentRunes,
|
|
"max_runes": maxContextRunes,
|
|
"overflow": currentRunes - maxContextRunes,
|
|
})
|
|
|
|
truncatedMessages := utils.TruncateContextSmart(messages, maxContextRunes)
|
|
childAgent.Sessions.SetHistory(ts.turnID, truncatedMessages)
|
|
|
|
// Log truncation result
|
|
newRunes := utils.MeasureContextRunes(truncatedMessages)
|
|
logger.InfoCF("subturn", "Context truncated successfully",
|
|
map[string]any{
|
|
"turn_id": ts.turnID,
|
|
"before_runes": currentRunes,
|
|
"after_runes": newRunes,
|
|
"saved_runes": currentRunes - newRunes,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Call the agent loop
|
|
finalContent, err := al.runAgentLoop(ctx, childAgent, processOptions{
|
|
SessionKey: ts.turnID,
|
|
UserMessage: currentPrompt,
|
|
DefaultResponse: "",
|
|
EnableSummary: false,
|
|
SendResponse: false,
|
|
SkipAddUserMessage: promptAlreadyAdded,
|
|
})
|
|
|
|
// Mark the prompt as added so subsequent truncation retries
|
|
// won't duplicate it in the history.
|
|
promptAlreadyAdded = true
|
|
|
|
// 1. Handle context length errors
|
|
if err != nil && isContextLengthError(err) {
|
|
if contextRetryCount >= maxContextRetries {
|
|
logger.ErrorCF("subturn", "Context limit exceeded after max retries",
|
|
map[string]any{
|
|
"turn_id": ts.turnID,
|
|
"retries": contextRetryCount,
|
|
"max_retries": maxContextRetries,
|
|
})
|
|
return nil, fmt.Errorf("context limit exceeded after %d retries: %w", maxContextRetries, err)
|
|
}
|
|
|
|
logger.WarnCF("subturn", "Context length exceeded, compressing and retrying",
|
|
map[string]any{
|
|
"turn_id": ts.turnID,
|
|
"retry": contextRetryCount + 1,
|
|
})
|
|
|
|
// Trigger force compression
|
|
al.forceCompression(childAgent, ts.turnID)
|
|
|
|
contextRetryCount++
|
|
continue // Retry with compressed history
|
|
}
|
|
|
|
if err != nil {
|
|
return nil, err // Other errors, return immediately
|
|
}
|
|
|
|
// 2. Check for truncation (retrieve finishReason from turnState)
|
|
finishReason := ts.GetLastFinishReason()
|
|
|
|
if finishReason == "truncated" && truncationRetryCount < maxTruncationRetries {
|
|
logger.WarnCF("subturn", "Response truncated, injecting recovery message",
|
|
map[string]any{
|
|
"turn_id": ts.turnID,
|
|
"retry": truncationRetryCount + 1,
|
|
})
|
|
|
|
// IMPORTANT: Do NOT manually add messages to history here.
|
|
// runAgentLoop has already saved both the assistant message (finalContent)
|
|
// and will save the next user message (currentPrompt) on the next iteration.
|
|
// Manually adding them would cause duplicates.
|
|
|
|
// Inject recovery prompt - it will be added by runAgentLoop on next iteration
|
|
recoveryPrompt := "Your previous response was truncated due to length. Please provide a shorter, complete response that finishes your thought."
|
|
currentPrompt = recoveryPrompt
|
|
promptAlreadyAdded = false // We need this new recovery prompt to be added
|
|
|
|
truncationRetryCount++
|
|
continue // Retry with recovery prompt
|
|
}
|
|
|
|
// 3. Success - return result
|
|
return &tools.ToolResult{ForLLM: finalContent}, nil
|
|
}
|
|
}
|
|
|
|
// isContextLengthError checks if the error is due to context length exceeded.
|
|
// It excludes timeout errors to avoid false positives.
|
|
func isContextLengthError(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
errMsg := strings.ToLower(err.Error())
|
|
|
|
// Exclude timeout errors
|
|
if strings.Contains(errMsg, "timeout") || strings.Contains(errMsg, "deadline exceeded") {
|
|
return false
|
|
}
|
|
|
|
// Detect context error patterns
|
|
return strings.Contains(errMsg, "context_length_exceeded") ||
|
|
strings.Contains(errMsg, "maximum context length") ||
|
|
strings.Contains(errMsg, "context window") ||
|
|
strings.Contains(errMsg, "too many tokens") ||
|
|
strings.Contains(errMsg, "token limit") ||
|
|
strings.Contains(errMsg, "prompt is too long")
|
|
}
|
|
|
|
// ====================== Other Types ======================
|