mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
feat(agent): add network error retry with configurable max retries and backoff
- Add isNetworkError detection for connection reset, broken pipe, read/write tcp, EOF - Add retry logic with configurable exponential backoff for network errors - Add config options max_llm_retries and llm_retry_backoff_secs in agents.defaults - Network errors now retry with backoff (was previously not retried) - Timeout errors now use configurable backoff instead of hardcoded 5s - Default: 2 retries with 2s backoff (3 total attempts)
This commit is contained in:
@@ -185,7 +185,14 @@ func (p *Pipeline) CallLLM(
|
||||
|
||||
// Retry loop
|
||||
var err error
|
||||
maxRetries := 2
|
||||
maxRetries := p.Cfg.Agents.Defaults.MaxLLMRetries
|
||||
if maxRetries <= 0 {
|
||||
maxRetries = 2
|
||||
}
|
||||
backoffSecs := p.Cfg.Agents.Defaults.LLMRetryBackoffSecs
|
||||
if backoffSecs <= 0 {
|
||||
backoffSecs = 2
|
||||
}
|
||||
for retry := 0; retry <= maxRetries; retry++ {
|
||||
exec.response, err = callLLM(exec.callMessages, exec.providerToolDefs)
|
||||
if err == nil {
|
||||
@@ -233,6 +240,15 @@ func (p *Pipeline) CallLLM(
|
||||
strings.Contains(errMsg, "timed out") ||
|
||||
strings.Contains(errMsg, "timeout exceeded")
|
||||
|
||||
isNetworkError := !isTimeoutError && (strings.Contains(errMsg, "connection reset") ||
|
||||
strings.Contains(errMsg, "connection refused") ||
|
||||
strings.Contains(errMsg, "broken pipe") ||
|
||||
strings.Contains(errMsg, "no such host") ||
|
||||
strings.Contains(errMsg, "network is unreachable") ||
|
||||
strings.Contains(errMsg, "read tcp") ||
|
||||
strings.Contains(errMsg, "write tcp") ||
|
||||
strings.Contains(errMsg, "eof"))
|
||||
|
||||
isContextError := !isTimeoutError && (strings.Contains(errMsg, "context_length_exceeded") ||
|
||||
strings.Contains(errMsg, "context window") ||
|
||||
strings.Contains(errMsg, "context_window") ||
|
||||
@@ -245,7 +261,7 @@ func (p *Pipeline) CallLLM(
|
||||
strings.Contains(errMsg, "request too large"))
|
||||
|
||||
if isTimeoutError && retry < maxRetries {
|
||||
backoff := time.Duration(retry+1) * 5 * time.Second
|
||||
backoff := time.Duration(retry+1) * time.Duration(backoffSecs) * time.Second
|
||||
al.emitEvent(
|
||||
EventKindLLMRetry,
|
||||
ts.eventMeta("runTurn", "turn.llm.retry"),
|
||||
@@ -273,6 +289,35 @@ func (p *Pipeline) CallLLM(
|
||||
continue
|
||||
}
|
||||
|
||||
if isNetworkError && retry < maxRetries {
|
||||
backoff := time.Duration(retry+1) * time.Duration(backoffSecs) * time.Second
|
||||
al.emitEvent(
|
||||
EventKindLLMRetry,
|
||||
ts.eventMeta("runTurn", "turn.llm.retry"),
|
||||
LLMRetryPayload{
|
||||
Attempt: retry + 1,
|
||||
MaxRetries: maxRetries,
|
||||
Reason: "network",
|
||||
Error: err.Error(),
|
||||
Backoff: backoff,
|
||||
},
|
||||
)
|
||||
logger.WarnCF("agent", "Network error, retrying after backoff", map[string]any{
|
||||
"error": err.Error(),
|
||||
"retry": retry,
|
||||
"backoff": backoff.String(),
|
||||
})
|
||||
if sleepErr := sleepWithContext(turnCtx, backoff); sleepErr != nil {
|
||||
if ts.hardAbortRequested() {
|
||||
_ = ts.requestHardAbort()
|
||||
return ControlBreak, nil
|
||||
}
|
||||
err = sleepErr
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if isContextError && retry < maxRetries && !ts.opts.NoHistory {
|
||||
al.emitEvent(
|
||||
EventKindLLMRetry,
|
||||
|
||||
@@ -275,6 +275,8 @@ type AgentDefaults struct {
|
||||
SplitOnMarker bool `json:"split_on_marker" env:"PICOCLAW_AGENTS_DEFAULTS_SPLIT_ON_MARKER"` // split messages on <|[SPLIT]|> marker
|
||||
ContextManager string `json:"context_manager,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_CONTEXT_MANAGER"`
|
||||
ContextManagerConfig json.RawMessage `json:"context_manager_config,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_CONTEXT_MANAGER_CONFIG"`
|
||||
MaxLLMRetries int `json:"max_llm_retries,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_LLM_RETRIES"`
|
||||
LLMRetryBackoffSecs int `json:"llm_retry_backoff_secs,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_LLM_RETRY_BACKOFF_SECS"`
|
||||
}
|
||||
|
||||
const DefaultMaxMediaSize = 20 * 1024 * 1024 // 20 MB
|
||||
|
||||
Reference in New Issue
Block a user