feat: add extended thinking support for Anthropic models (#1076)

* feat: add extended thinking support for Anthropic models

Support configurable thinking levels (off/low/medium/high/xhigh/adaptive)
via `agents.defaults.thinking_level` config field.

- "adaptive": uses Anthropic's adaptive thinking API (Claude 4.6+)
- "low/medium/high/xhigh": uses budget_tokens (all thinking-capable models)
- "off": disables thinking (default)

API constraints handled:
- Temperature cleared when thinking is enabled
- budget_tokens clamped to max_tokens-1
- Thinking response blocks parsed into Reasoning field

Relates to #645, #966

* fix: address PR review feedback for thinking support

- Add ThinkingCapable interface for provider capability detection
- Warn when thinking_level is set but provider doesn't support it
- Warn when temperature is cleared due to thinking enabled
- Adjust budget values per Anthropic best practices (medium=16K, xhigh=64K)
- Add budget clamp warning and 80% threshold warning
- Add parseResponse thinking block tests
- Add thinking_level field to config.example.json

* refactor: move ThinkingLevel from AgentDefaults to ModelConfig

Thinking is a model-level capability, not a global agent property.
Per-model config avoids silent ignoring on non-Anthropic providers
and eliminates spurious warning logs in multi-provider setups.

Addresses PR #1076 review feedback from @yinwm.
This commit is contained in:
Larry Koo
2026-03-05 09:51:18 +08:00
committed by GitHub
parent 325af2163b
commit 204038ec60
9 changed files with 401 additions and 17 deletions
+2 -1
View File
@@ -22,7 +22,8 @@
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key",
"api_base": "https://api.anthropic.com/v1"
"api_base": "https://api.anthropic.com/v1",
"thinking_level": "high"
},
{
"model_name": "gemini",
+8
View File
@@ -26,6 +26,7 @@ type AgentInstance struct {
MaxIterations int
MaxTokens int
Temperature float64
ThinkingLevel ThinkingLevel
ContextWindow int
SummarizeMessageThreshold int
SummarizeTokenPercent int
@@ -103,6 +104,12 @@ func NewAgentInstance(
temperature = *defaults.Temperature
}
var thinkingLevelStr string
if mc, err := cfg.GetModelConfig(model); err == nil {
thinkingLevelStr = mc.ThinkingLevel
}
thinkingLevel := parseThinkingLevel(thinkingLevelStr)
summarizeMessageThreshold := defaults.SummarizeMessageThreshold
if summarizeMessageThreshold == 0 {
summarizeMessageThreshold = 20
@@ -169,6 +176,7 @@ func NewAgentInstance(
MaxIterations: maxIter,
MaxTokens: maxTokens,
Temperature: temperature,
ThinkingLevel: thinkingLevel,
ContextWindow: maxTokens,
SummarizeMessageThreshold: summarizeMessageThreshold,
SummarizeTokenPercent: summarizeTokenPercent,
+18 -16
View File
@@ -834,23 +834,29 @@ func (al *AgentLoop) runLLMIteration(
var response *providers.LLMResponse
var err error
llmOpts := map[string]any{
"max_tokens": agent.MaxTokens,
"temperature": agent.Temperature,
"prompt_cache_key": agent.ID,
}
// parseThinkingLevel guarantees ThinkingOff for empty/unknown values,
// so checking != ThinkingOff is sufficient.
if agent.ThinkingLevel != ThinkingOff {
if tc, ok := agent.Provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() {
llmOpts["thinking_level"] = string(agent.ThinkingLevel)
} else {
logger.WarnCF("agent", "thinking_level is set but current provider does not support it, ignoring",
map[string]any{"agent_id": agent.ID, "thinking_level": string(agent.ThinkingLevel)})
}
}
callLLM := func() (*providers.LLMResponse, error) {
if len(agent.Candidates) > 1 && al.fallback != nil {
fbResult, fbErr := al.fallback.Execute(
ctx,
agent.Candidates,
func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) {
return agent.Provider.Chat(
ctx,
messages,
providerToolDefs,
model,
map[string]any{
"max_tokens": agent.MaxTokens,
"temperature": agent.Temperature,
"prompt_cache_key": agent.ID,
},
)
return agent.Provider.Chat(ctx, messages, providerToolDefs, model, llmOpts)
},
)
if fbErr != nil {
@@ -866,11 +872,7 @@ func (al *AgentLoop) runLLMIteration(
}
return fbResult.Response, nil
}
return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]any{
"max_tokens": agent.MaxTokens,
"temperature": agent.Temperature,
"prompt_cache_key": agent.ID,
})
return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, llmOpts)
}
// Retry loop for context/token errors
+39
View File
@@ -0,0 +1,39 @@
package agent
import "strings"
// ThinkingLevel controls how the provider sends thinking parameters.
//
// - "adaptive": sends {thinking: {type: "adaptive"}} + output_config.effort (Claude 4.6+)
// - "low"/"medium"/"high"/"xhigh": sends {thinking: {type: "enabled", budget_tokens: N}} (all models)
// - "off": disables thinking
type ThinkingLevel string
const (
ThinkingOff ThinkingLevel = "off"
ThinkingLow ThinkingLevel = "low"
ThinkingMedium ThinkingLevel = "medium"
ThinkingHigh ThinkingLevel = "high"
ThinkingXHigh ThinkingLevel = "xhigh"
ThinkingAdaptive ThinkingLevel = "adaptive"
)
// parseThinkingLevel normalizes a config string to a ThinkingLevel.
// Case-insensitive and whitespace-tolerant for user-facing config values.
// Returns ThinkingOff for unknown or empty values.
func parseThinkingLevel(level string) ThinkingLevel {
switch strings.ToLower(strings.TrimSpace(level)) {
case "adaptive":
return ThinkingAdaptive
case "low":
return ThinkingLow
case "medium":
return ThinkingMedium
case "high":
return ThinkingHigh
case "xhigh":
return ThinkingXHigh
default:
return ThinkingOff
}
}
+35
View File
@@ -0,0 +1,35 @@
package agent
import "testing"
func TestParseThinkingLevel(t *testing.T) {
tests := []struct {
name string
input string
want ThinkingLevel
}{
{"off", "off", ThinkingOff},
{"empty", "", ThinkingOff},
{"low", "low", ThinkingLow},
{"medium", "medium", ThinkingMedium},
{"high", "high", ThinkingHigh},
{"xhigh", "xhigh", ThinkingXHigh},
{"adaptive", "adaptive", ThinkingAdaptive},
{"unknown", "unknown", ThinkingOff},
// Case-insensitive and whitespace-tolerant
{"upper_Medium", "Medium", ThinkingMedium},
{"upper_HIGH", "HIGH", ThinkingHigh},
{"mixed_Adaptive", "Adaptive", ThinkingAdaptive},
{"leading_space", " high", ThinkingHigh},
{"trailing_space", "low ", ThinkingLow},
{"both_spaces", " medium ", ThinkingMedium},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := parseThinkingLevel(tt.input); got != tt.want {
t.Errorf("parseThinkingLevel(%q) = %q, want %q", tt.input, got, tt.want)
}
})
}
}
+1
View File
@@ -507,6 +507,7 @@ type ModelConfig struct {
RPM int `json:"rpm,omitempty"` // Requests per minute limit
MaxTokensField string `json:"max_tokens_field,omitempty"` // Field name for max tokens (e.g., "max_completion_tokens")
RequestTimeout int `json:"request_timeout,omitempty"`
ThinkingLevel string `json:"thinking_level,omitempty"` // Extended thinking: off|low|medium|high|xhigh|adaptive
}
// Validate checks if the ModelConfig has all required fields.
+79
View File
@@ -31,6 +31,9 @@ type Provider struct {
baseURL string
}
// SupportsThinking implements providers.ThinkingCapable.
func (p *Provider) SupportsThinking() bool { return true }
func NewProvider(token string) *Provider {
return NewProviderWithBaseURL(token, "")
}
@@ -182,9 +185,80 @@ func buildParams(
params.Tools = translateTools(tools)
}
// Extended Thinking / Adaptive Thinking
// The thinking_level value directly determines the API parameter format:
// "adaptive" → {thinking: {type: "adaptive"}} + output_config.effort
// "low/medium/high/xhigh" → {thinking: {type: "enabled", budget_tokens: N}}
if level, ok := options["thinking_level"].(string); ok && level != "" && level != "off" {
applyThinkingConfig(&params, level)
}
return params, nil
}
// applyThinkingConfig sets thinking parameters based on the level value.
// "adaptive" uses the adaptive thinking API (Claude 4.6+).
// All other levels use budget_tokens which is universally supported.
//
// Anthropic API constraint: temperature must not be set when thinking is enabled.
// budget_tokens must be strictly less than max_tokens.
func applyThinkingConfig(params *anthropic.MessageNewParams, level string) {
// Anthropic API rejects requests with temperature set alongside thinking.
// Reset to zero value (omitted from JSON serialization).
if params.Temperature.Valid() {
log.Printf("anthropic: temperature cleared because thinking is enabled (level=%s)", level)
}
params.Temperature = anthropic.MessageNewParams{}.Temperature
if level == "adaptive" {
adaptive := anthropic.NewThinkingConfigAdaptiveParam()
params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: &adaptive}
params.OutputConfig = anthropic.OutputConfigParam{
Effort: anthropic.OutputConfigEffortHigh,
}
return
}
budget := int64(levelToBudget(level))
if budget <= 0 {
return
}
// budget_tokens must be < max_tokens; clamp to respect user's max_tokens setting.
if budget >= params.MaxTokens {
log.Printf("anthropic: budget_tokens (%d) clamped to %d (max_tokens-1)", budget, params.MaxTokens-1)
budget = params.MaxTokens - 1
} else if budget > params.MaxTokens*80/100 {
log.Printf("anthropic: thinking budget (%d) exceeds 80%% of max_tokens (%d), output may be truncated",
budget, params.MaxTokens)
}
params.Thinking = anthropic.ThinkingConfigParamOfEnabled(budget)
}
// levelToBudget maps a thinking level to budget_tokens.
// Values are based on Anthropic's recommendations and community best practices:
//
// low = 4,096 — simple reasoning, quick debugging (Claude Code "think")
// medium = 16,384 — Anthropic recommended sweet spot for most tasks
// high = 32,000 — complex architecture, deep analysis (diminishing returns above this)
// xhigh = 64,000 — extreme reasoning, research problems, benchmarks
//
// Note: For Claude 4.6+, prefer adaptive thinking over manual budget_tokens.
func levelToBudget(level string) int {
switch level {
case "low":
return 4096
case "medium":
return 16384
case "high":
return 32000
case "xhigh":
return 64000
default:
return 0
}
}
func translateTools(tools []ToolDefinition) []anthropic.ToolUnionParam {
result := make([]anthropic.ToolUnionParam, 0, len(tools))
for _, t := range tools {
@@ -213,10 +287,14 @@ func translateTools(tools []ToolDefinition) []anthropic.ToolUnionParam {
func parseResponse(resp *anthropic.Message) *LLMResponse {
var content strings.Builder
var reasoning strings.Builder
var toolCalls []ToolCall
for _, block := range resp.Content {
switch block.Type {
case "thinking":
tb := block.AsThinking()
reasoning.WriteString(tb.Thinking)
case "text":
tb := block.AsText()
content.WriteString(tb.Text)
@@ -247,6 +325,7 @@ func parseResponse(resp *anthropic.Message) *LLMResponse {
return &LLMResponse{
Content: content.String(),
Reasoning: reasoning.String(),
ToolCalls: toolCalls,
FinishReason: finishReason,
Usage: &UsageInfo{
+212
View File
@@ -0,0 +1,212 @@
package anthropicprovider
import (
"encoding/json"
"testing"
"github.com/anthropics/anthropic-sdk-go"
)
func TestApplyThinkingConfig_Adaptive(t *testing.T) {
params := anthropic.MessageNewParams{
MaxTokens: 16000,
Temperature: anthropic.Float(0.7),
}
applyThinkingConfig(&params, "adaptive")
if params.Thinking.OfAdaptive == nil {
t.Fatal("expected adaptive thinking")
}
if params.Thinking.OfEnabled != nil {
t.Error("should not set enabled thinking in adaptive mode")
}
if params.OutputConfig.Effort != anthropic.OutputConfigEffortHigh {
t.Errorf("effort = %q, want %q", params.OutputConfig.Effort, anthropic.OutputConfigEffortHigh)
}
if params.Temperature.Valid() {
t.Error("temperature should be cleared when thinking is enabled")
}
}
func TestApplyThinkingConfig_BudgetLevels(t *testing.T) {
tests := []struct {
level string
wantBudget int64
}{
{"low", 4096},
{"medium", 16384},
{"high", 32000},
{"xhigh", 64000},
}
for _, tt := range tests {
t.Run(tt.level, func(t *testing.T) {
params := anthropic.MessageNewParams{
MaxTokens: 200000,
Temperature: anthropic.Float(0.5),
}
applyThinkingConfig(&params, tt.level)
if params.Thinking.OfEnabled == nil {
t.Fatal("expected enabled thinking")
}
if params.Thinking.OfAdaptive != nil {
t.Error("should not set adaptive thinking")
}
if params.Thinking.OfEnabled.BudgetTokens != tt.wantBudget {
t.Errorf("budget_tokens = %d, want %d", params.Thinking.OfEnabled.BudgetTokens, tt.wantBudget)
}
if params.OutputConfig.Effort != "" {
t.Errorf("effort = %q, want empty", params.OutputConfig.Effort)
}
if params.Temperature.Valid() {
t.Error("temperature should be cleared when thinking is enabled")
}
})
}
}
func TestApplyThinkingConfig_BudgetClamp(t *testing.T) {
// budget_tokens must be < max_tokens; clamp budget down to respect user's max_tokens.
params := anthropic.MessageNewParams{MaxTokens: 4096}
applyThinkingConfig(&params, "high") // budget=32000 > maxTokens=4096
if params.Thinking.OfEnabled == nil {
t.Fatal("expected enabled thinking")
}
if params.Thinking.OfEnabled.BudgetTokens != 4095 {
t.Errorf("budget_tokens = %d, want 4095 (maxTokens-1)", params.Thinking.OfEnabled.BudgetTokens)
}
if params.MaxTokens != 4096 {
t.Errorf("max_tokens should not be modified, got %d", params.MaxTokens)
}
}
func TestApplyThinkingConfig_UnknownLevel(t *testing.T) {
params := anthropic.MessageNewParams{MaxTokens: 16000}
applyThinkingConfig(&params, "unknown")
if params.Thinking.OfEnabled != nil {
t.Error("should not set enabled thinking for unknown level")
}
if params.Thinking.OfAdaptive != nil {
t.Error("should not set adaptive thinking for unknown level")
}
}
func TestLevelToBudget(t *testing.T) {
tests := []struct {
name string
level string
want int
}{
{"low", "low", 4096},
{"medium", "medium", 16384},
{"high", "high", 32000},
{"xhigh", "xhigh", 64000},
{"off", "off", 0},
{"empty", "", 0},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := levelToBudget(tt.level); got != tt.want {
t.Errorf("levelToBudget(%q) = %d, want %d", tt.level, got, tt.want)
}
})
}
}
func TestBuildParams_ThinkingClearsTemperature(t *testing.T) {
msgs := []Message{{Role: "user", Content: "hello"}}
opts := map[string]any{
"max_tokens": 200000,
"temperature": 0.8,
"thinking_level": "medium",
}
params, err := buildParams(msgs, nil, "claude-sonnet-4-6", opts)
if err != nil {
t.Fatal(err)
}
if params.Temperature.Valid() {
t.Error("temperature should be cleared when thinking_level is set")
}
if params.Thinking.OfEnabled == nil {
t.Fatal("expected enabled thinking")
}
if params.Thinking.OfEnabled.BudgetTokens != 16384 {
t.Errorf("budget_tokens = %d, want 16384", params.Thinking.OfEnabled.BudgetTokens)
}
}
// unmarshalBlocks constructs []ContentBlockUnion via JSON round-trip so that
// the internal JSON.raw field is populated (required by AsText/AsThinking).
func unmarshalBlocks(t *testing.T, jsonStr string) []anthropic.ContentBlockUnion {
t.Helper()
var blocks []anthropic.ContentBlockUnion
if err := json.Unmarshal([]byte(jsonStr), &blocks); err != nil {
t.Fatalf("unmarshalBlocks: %v", err)
}
return blocks
}
func TestParseResponse_ThinkingBlock(t *testing.T) {
resp := &anthropic.Message{
Content: unmarshalBlocks(t, `[
{"type":"thinking","thinking":"Let me reason step by step...","signature":"sig"},
{"type":"text","text":"The answer is 42."}
]`),
StopReason: anthropic.StopReasonEndTurn,
}
result := parseResponse(resp)
if result.Reasoning != "Let me reason step by step..." {
t.Errorf("Reasoning = %q, want thinking content", result.Reasoning)
}
if result.Content != "The answer is 42." {
t.Errorf("Content = %q, want text content", result.Content)
}
if result.FinishReason != "stop" {
t.Errorf("FinishReason = %q, want stop", result.FinishReason)
}
}
func TestParseResponse_NoThinkingBlock(t *testing.T) {
resp := &anthropic.Message{
Content: unmarshalBlocks(t, `[
{"type":"text","text":"Just a normal response."}
]`),
StopReason: anthropic.StopReasonEndTurn,
}
result := parseResponse(resp)
if result.Reasoning != "" {
t.Errorf("Reasoning = %q, want empty", result.Reasoning)
}
if result.Content != "Just a normal response." {
t.Errorf("Content = %q, want text content", result.Content)
}
}
func TestBuildParams_NoThinkingKeepsTemperature(t *testing.T) {
msgs := []Message{{Role: "user", Content: "hello"}}
opts := map[string]any{
"temperature": 0.8,
}
params, err := buildParams(msgs, nil, "claude-sonnet-4-6", opts)
if err != nil {
t.Fatal(err)
}
if !params.Temperature.Valid() {
t.Error("temperature should be preserved when thinking is not set")
}
if params.Temperature.Value != 0.8 {
t.Errorf("temperature = %f, want 0.8", params.Temperature.Value)
}
}
+7
View File
@@ -37,6 +37,13 @@ type StatefulProvider interface {
Close()
}
// ThinkingCapable is an optional interface for providers that support
// extended thinking (e.g. Anthropic). Used by the agent loop to warn
// when thinking_level is configured but the active provider cannot use it.
type ThinkingCapable interface {
SupportsThinking() bool
}
// FailoverReason classifies why an LLM request failed for fallback decisions.
type FailoverReason string