feat: add extended thinking support for Anthropic models (#1076)

* feat: add extended thinking support for Anthropic models Support configurable thinking levels (off/low/medium/high/xhigh/adaptive) via `agents.defaults.thinking_level` config field. - "adaptive": uses Anthropic's adaptive thinking API (Claude 4.6+) - "low/medium/high/xhigh": uses budget_tokens (all thinking-capable models) - "off": disables thinking (default) API constraints handled: - Temperature cleared when thinking is enabled - budget_tokens clamped to max_tokens-1 - Thinking response blocks parsed into Reasoning field Relates to #645, #966 * fix: address PR review feedback for thinking support - Add ThinkingCapable interface for provider capability detection - Warn when thinking_level is set but provider doesn't support it - Warn when temperature is cleared due to thinking enabled - Adjust budget values per Anthropic best practices (medium=16K, xhigh=64K) - Add budget clamp warning and 80% threshold warning - Add parseResponse thinking block tests - Add thinking_level field to config.example.json * refactor: move ThinkingLevel from AgentDefaults to ModelConfig Thinking is a model-level capability, not a global agent property. Per-model config avoids silent ignoring on non-Anthropic providers and eliminates spurious warning logs in multi-provider setups. Addresses PR #1076 review feedback from @yinwm.
2026-06-12 18:08:54 +00:00 · 2026-03-05 09:51:18 +08:00
parent 325af2163b
commit 204038ec60
9 changed files with 401 additions and 17 deletions
@@ -26,6 +26,7 @@ type AgentInstance struct {
 	MaxIterations             int
 	MaxTokens                 int
 	Temperature               float64
+	ThinkingLevel             ThinkingLevel
 	ContextWindow             int
 	SummarizeMessageThreshold int
 	SummarizeTokenPercent     int
@@ -103,6 +104,12 @@ func NewAgentInstance(
 		temperature = *defaults.Temperature
 	}

+	var thinkingLevelStr string
+	if mc, err := cfg.GetModelConfig(model); err == nil {
+		thinkingLevelStr = mc.ThinkingLevel
+	}
+	thinkingLevel := parseThinkingLevel(thinkingLevelStr)
+
 	summarizeMessageThreshold := defaults.SummarizeMessageThreshold
 	if summarizeMessageThreshold == 0 {
 		summarizeMessageThreshold = 20
@@ -169,6 +176,7 @@ func NewAgentInstance(
 		MaxIterations:             maxIter,
 		MaxTokens:                 maxTokens,
 		Temperature:               temperature,
+		ThinkingLevel:             thinkingLevel,
 		ContextWindow:             maxTokens,
 		SummarizeMessageThreshold: summarizeMessageThreshold,
 		SummarizeTokenPercent:     summarizeTokenPercent,
@@ -834,23 +834,29 @@ func (al *AgentLoop) runLLMIteration(
 		var response *providers.LLMResponse
 		var err error

+		llmOpts := map[string]any{
+			"max_tokens":       agent.MaxTokens,
+			"temperature":      agent.Temperature,
+			"prompt_cache_key": agent.ID,
+		}
+		// parseThinkingLevel guarantees ThinkingOff for empty/unknown values,
+		// so checking != ThinkingOff is sufficient.
+		if agent.ThinkingLevel != ThinkingOff {
+			if tc, ok := agent.Provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() {
+				llmOpts["thinking_level"] = string(agent.ThinkingLevel)
+			} else {
+				logger.WarnCF("agent", "thinking_level is set but current provider does not support it, ignoring",
+					map[string]any{"agent_id": agent.ID, "thinking_level": string(agent.ThinkingLevel)})
+			}
+		}
+
 		callLLM := func() (*providers.LLMResponse, error) {
 			if len(agent.Candidates) > 1 && al.fallback != nil {
 				fbResult, fbErr := al.fallback.Execute(
 					ctx,
 					agent.Candidates,
 					func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) {
-						return agent.Provider.Chat(
-							ctx,
-							messages,
-							providerToolDefs,
-							model,
-							map[string]any{
-								"max_tokens":       agent.MaxTokens,
-								"temperature":      agent.Temperature,
-								"prompt_cache_key": agent.ID,
-							},
-						)
+						return agent.Provider.Chat(ctx, messages, providerToolDefs, model, llmOpts)
 					},
 				)
 				if fbErr != nil {
@@ -866,11 +872,7 @@ func (al *AgentLoop) runLLMIteration(
 				}
 				return fbResult.Response, nil
 			}
-			return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]any{
-				"max_tokens":       agent.MaxTokens,
-				"temperature":      agent.Temperature,
-				"prompt_cache_key": agent.ID,
-			})
+			return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, llmOpts)
 		}

 		// Retry loop for context/token errors
@@ -0,0 +1,39 @@
+package agent
+
+import "strings"
+
+// ThinkingLevel controls how the provider sends thinking parameters.
+//
+//   - "adaptive": sends {thinking: {type: "adaptive"}} + output_config.effort (Claude 4.6+)
+//   - "low"/"medium"/"high"/"xhigh": sends {thinking: {type: "enabled", budget_tokens: N}} (all models)
+//   - "off": disables thinking
+type ThinkingLevel string
+
+const (
+	ThinkingOff      ThinkingLevel = "off"
+	ThinkingLow      ThinkingLevel = "low"
+	ThinkingMedium   ThinkingLevel = "medium"
+	ThinkingHigh     ThinkingLevel = "high"
+	ThinkingXHigh    ThinkingLevel = "xhigh"
+	ThinkingAdaptive ThinkingLevel = "adaptive"
+)
+
+// parseThinkingLevel normalizes a config string to a ThinkingLevel.
+// Case-insensitive and whitespace-tolerant for user-facing config values.
+// Returns ThinkingOff for unknown or empty values.
+func parseThinkingLevel(level string) ThinkingLevel {
+	switch strings.ToLower(strings.TrimSpace(level)) {
+	case "adaptive":
+		return ThinkingAdaptive
+	case "low":
+		return ThinkingLow
+	case "medium":
+		return ThinkingMedium
+	case "high":
+		return ThinkingHigh
+	case "xhigh":
+		return ThinkingXHigh
+	default:
+		return ThinkingOff
+	}
+}
@@ -0,0 +1,35 @@
+package agent
+
+import "testing"
+
+func TestParseThinkingLevel(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  ThinkingLevel
+	}{
+		{"off", "off", ThinkingOff},
+		{"empty", "", ThinkingOff},
+		{"low", "low", ThinkingLow},
+		{"medium", "medium", ThinkingMedium},
+		{"high", "high", ThinkingHigh},
+		{"xhigh", "xhigh", ThinkingXHigh},
+		{"adaptive", "adaptive", ThinkingAdaptive},
+		{"unknown", "unknown", ThinkingOff},
+		// Case-insensitive and whitespace-tolerant
+		{"upper_Medium", "Medium", ThinkingMedium},
+		{"upper_HIGH", "HIGH", ThinkingHigh},
+		{"mixed_Adaptive", "Adaptive", ThinkingAdaptive},
+		{"leading_space", " high", ThinkingHigh},
+		{"trailing_space", "low ", ThinkingLow},
+		{"both_spaces", " medium ", ThinkingMedium},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := parseThinkingLevel(tt.input); got != tt.want {
+				t.Errorf("parseThinkingLevel(%q) = %q, want %q", tt.input, got, tt.want)
+			}
+		})
+	}
+}