feat: add extended thinking support for Anthropic models (#1076)

* feat: add extended thinking support for Anthropic models Support configurable thinking levels (off/low/medium/high/xhigh/adaptive) via `agents.defaults.thinking_level` config field. - "adaptive": uses Anthropic's adaptive thinking API (Claude 4.6+) - "low/medium/high/xhigh": uses budget_tokens (all thinking-capable models) - "off": disables thinking (default) API constraints handled: - Temperature cleared when thinking is enabled - budget_tokens clamped to max_tokens-1 - Thinking response blocks parsed into Reasoning field Relates to #645, #966 * fix: address PR review feedback for thinking support - Add ThinkingCapable interface for provider capability detection - Warn when thinking_level is set but provider doesn't support it - Warn when temperature is cleared due to thinking enabled - Adjust budget values per Anthropic best practices (medium=16K, xhigh=64K) - Add budget clamp warning and 80% threshold warning - Add parseResponse thinking block tests - Add thinking_level field to config.example.json * refactor: move ThinkingLevel from AgentDefaults to ModelConfig Thinking is a model-level capability, not a global agent property. Per-model config avoids silent ignoring on non-Anthropic providers and eliminates spurious warning logs in multi-provider setups. Addresses PR #1076 review feedback from @yinwm.
2026-06-12 18:08:54 +00:00 · 2026-03-05 09:51:18 +08:00
parent 325af2163b
commit 204038ec60
9 changed files with 401 additions and 17 deletions
@@ -834,23 +834,29 @@ func (al *AgentLoop) runLLMIteration(
 		var response *providers.LLMResponse
 		var err error

+		llmOpts := map[string]any{
+			"max_tokens":       agent.MaxTokens,
+			"temperature":      agent.Temperature,
+			"prompt_cache_key": agent.ID,
+		}
+		// parseThinkingLevel guarantees ThinkingOff for empty/unknown values,
+		// so checking != ThinkingOff is sufficient.
+		if agent.ThinkingLevel != ThinkingOff {
+			if tc, ok := agent.Provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() {
+				llmOpts["thinking_level"] = string(agent.ThinkingLevel)
+			} else {
+				logger.WarnCF("agent", "thinking_level is set but current provider does not support it, ignoring",
+					map[string]any{"agent_id": agent.ID, "thinking_level": string(agent.ThinkingLevel)})
+			}
+		}
+
 		callLLM := func() (*providers.LLMResponse, error) {
 			if len(agent.Candidates) > 1 && al.fallback != nil {
 				fbResult, fbErr := al.fallback.Execute(
 					ctx,
 					agent.Candidates,
 					func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) {
-						return agent.Provider.Chat(
-							ctx,
-							messages,
-							providerToolDefs,
-							model,
-							map[string]any{
-								"max_tokens":       agent.MaxTokens,
-								"temperature":      agent.Temperature,
-								"prompt_cache_key": agent.ID,
-							},
-						)
+						return agent.Provider.Chat(ctx, messages, providerToolDefs, model, llmOpts)
 					},
 				)
 				if fbErr != nil {
@@ -866,11 +872,7 @@ func (al *AgentLoop) runLLMIteration(
 				}
 				return fbResult.Response, nil
 			}
-			return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]any{
-				"max_tokens":       agent.MaxTokens,
-				"temperature":      agent.Temperature,
-				"prompt_cache_key": agent.ID,
-			})
+			return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, llmOpts)
 		}

 		// Retry loop for context/token errors