fix: distinguish network timeouts from context window errors

HTTP timeouts (context deadline exceeded, Client.Timeout) were
incorrectly classified as context window errors, triggering useless
history compression. Replace broad substring checks ("context",
"token", "length") with specific patterns for real context limit
errors and explicitly exclude timeout errors from that path.

Additionally, timeout errors were not retried at all — the retry
loop only handled context window errors. Now timeouts are retried
up to 2 times with exponential backoff (5s, 10s).
This commit is contained in:
Nikita Nafranets
2026-02-23 16:13:37 +03:00
parent fd26fa7459
commit a4b6cea103
+29 -3
View File
@@ -9,6 +9,7 @@ package agent
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"sync"
@@ -554,10 +555,35 @@ func (al *AgentLoop) runLLMIteration(
}
errMsg := strings.ToLower(err.Error())
isContextError := strings.Contains(errMsg, "token") ||
strings.Contains(errMsg, "context") ||
// Check if this is a network/HTTP timeout — not a context window error.
isTimeoutError := errors.Is(err, context.DeadlineExceeded) ||
strings.Contains(errMsg, "deadline exceeded") ||
strings.Contains(errMsg, "client.timeout") ||
strings.Contains(errMsg, "timed out") ||
strings.Contains(errMsg, "timeout exceeded")
// Detect real context window / token limit errors, excluding network timeouts.
isContextError := !isTimeoutError && (strings.Contains(errMsg, "context_length_exceeded") ||
strings.Contains(errMsg, "context window") ||
strings.Contains(errMsg, "maximum context length") ||
strings.Contains(errMsg, "token limit") ||
strings.Contains(errMsg, "too many tokens") ||
strings.Contains(errMsg, "max_tokens") ||
strings.Contains(errMsg, "invalidparameter") ||
strings.Contains(errMsg, "length")
strings.Contains(errMsg, "prompt is too long") ||
strings.Contains(errMsg, "request too large"))
if isTimeoutError && retry < maxRetries {
backoff := time.Duration(retry+1) * 5 * time.Second
logger.WarnCF("agent", "Timeout error, retrying after backoff", map[string]any{
"error": err.Error(),
"retry": retry,
"backoff": backoff.String(),
})
time.Sleep(backoff)
continue
}
if isContextError && retry < maxRetries {
logger.WarnCF("agent", "Context window error detected, attempting compression", map[string]any{