mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
feat: add extended thinking support for Anthropic models (#1076)
* feat: add extended thinking support for Anthropic models Support configurable thinking levels (off/low/medium/high/xhigh/adaptive) via `agents.defaults.thinking_level` config field. - "adaptive": uses Anthropic's adaptive thinking API (Claude 4.6+) - "low/medium/high/xhigh": uses budget_tokens (all thinking-capable models) - "off": disables thinking (default) API constraints handled: - Temperature cleared when thinking is enabled - budget_tokens clamped to max_tokens-1 - Thinking response blocks parsed into Reasoning field Relates to #645, #966 * fix: address PR review feedback for thinking support - Add ThinkingCapable interface for provider capability detection - Warn when thinking_level is set but provider doesn't support it - Warn when temperature is cleared due to thinking enabled - Adjust budget values per Anthropic best practices (medium=16K, xhigh=64K) - Add budget clamp warning and 80% threshold warning - Add parseResponse thinking block tests - Add thinking_level field to config.example.json * refactor: move ThinkingLevel from AgentDefaults to ModelConfig Thinking is a model-level capability, not a global agent property. Per-model config avoids silent ignoring on non-Anthropic providers and eliminates spurious warning logs in multi-provider setups. Addresses PR #1076 review feedback from @yinwm.
This commit is contained in:
@@ -31,6 +31,9 @@ type Provider struct {
|
||||
baseURL string
|
||||
}
|
||||
|
||||
// SupportsThinking implements providers.ThinkingCapable.
|
||||
func (p *Provider) SupportsThinking() bool { return true }
|
||||
|
||||
func NewProvider(token string) *Provider {
|
||||
return NewProviderWithBaseURL(token, "")
|
||||
}
|
||||
@@ -182,9 +185,80 @@ func buildParams(
|
||||
params.Tools = translateTools(tools)
|
||||
}
|
||||
|
||||
// Extended Thinking / Adaptive Thinking
|
||||
// The thinking_level value directly determines the API parameter format:
|
||||
// "adaptive" → {thinking: {type: "adaptive"}} + output_config.effort
|
||||
// "low/medium/high/xhigh" → {thinking: {type: "enabled", budget_tokens: N}}
|
||||
if level, ok := options["thinking_level"].(string); ok && level != "" && level != "off" {
|
||||
applyThinkingConfig(¶ms, level)
|
||||
}
|
||||
|
||||
return params, nil
|
||||
}
|
||||
|
||||
// applyThinkingConfig sets thinking parameters based on the level value.
|
||||
// "adaptive" uses the adaptive thinking API (Claude 4.6+).
|
||||
// All other levels use budget_tokens which is universally supported.
|
||||
//
|
||||
// Anthropic API constraint: temperature must not be set when thinking is enabled.
|
||||
// budget_tokens must be strictly less than max_tokens.
|
||||
func applyThinkingConfig(params *anthropic.MessageNewParams, level string) {
|
||||
// Anthropic API rejects requests with temperature set alongside thinking.
|
||||
// Reset to zero value (omitted from JSON serialization).
|
||||
if params.Temperature.Valid() {
|
||||
log.Printf("anthropic: temperature cleared because thinking is enabled (level=%s)", level)
|
||||
}
|
||||
params.Temperature = anthropic.MessageNewParams{}.Temperature
|
||||
|
||||
if level == "adaptive" {
|
||||
adaptive := anthropic.NewThinkingConfigAdaptiveParam()
|
||||
params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: &adaptive}
|
||||
params.OutputConfig = anthropic.OutputConfigParam{
|
||||
Effort: anthropic.OutputConfigEffortHigh,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
budget := int64(levelToBudget(level))
|
||||
if budget <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// budget_tokens must be < max_tokens; clamp to respect user's max_tokens setting.
|
||||
if budget >= params.MaxTokens {
|
||||
log.Printf("anthropic: budget_tokens (%d) clamped to %d (max_tokens-1)", budget, params.MaxTokens-1)
|
||||
budget = params.MaxTokens - 1
|
||||
} else if budget > params.MaxTokens*80/100 {
|
||||
log.Printf("anthropic: thinking budget (%d) exceeds 80%% of max_tokens (%d), output may be truncated",
|
||||
budget, params.MaxTokens)
|
||||
}
|
||||
params.Thinking = anthropic.ThinkingConfigParamOfEnabled(budget)
|
||||
}
|
||||
|
||||
// levelToBudget maps a thinking level to budget_tokens.
|
||||
// Values are based on Anthropic's recommendations and community best practices:
|
||||
//
|
||||
// low = 4,096 — simple reasoning, quick debugging (Claude Code "think")
|
||||
// medium = 16,384 — Anthropic recommended sweet spot for most tasks
|
||||
// high = 32,000 — complex architecture, deep analysis (diminishing returns above this)
|
||||
// xhigh = 64,000 — extreme reasoning, research problems, benchmarks
|
||||
//
|
||||
// Note: For Claude 4.6+, prefer adaptive thinking over manual budget_tokens.
|
||||
func levelToBudget(level string) int {
|
||||
switch level {
|
||||
case "low":
|
||||
return 4096
|
||||
case "medium":
|
||||
return 16384
|
||||
case "high":
|
||||
return 32000
|
||||
case "xhigh":
|
||||
return 64000
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func translateTools(tools []ToolDefinition) []anthropic.ToolUnionParam {
|
||||
result := make([]anthropic.ToolUnionParam, 0, len(tools))
|
||||
for _, t := range tools {
|
||||
@@ -213,10 +287,14 @@ func translateTools(tools []ToolDefinition) []anthropic.ToolUnionParam {
|
||||
|
||||
func parseResponse(resp *anthropic.Message) *LLMResponse {
|
||||
var content strings.Builder
|
||||
var reasoning strings.Builder
|
||||
var toolCalls []ToolCall
|
||||
|
||||
for _, block := range resp.Content {
|
||||
switch block.Type {
|
||||
case "thinking":
|
||||
tb := block.AsThinking()
|
||||
reasoning.WriteString(tb.Thinking)
|
||||
case "text":
|
||||
tb := block.AsText()
|
||||
content.WriteString(tb.Text)
|
||||
@@ -247,6 +325,7 @@ func parseResponse(resp *anthropic.Message) *LLMResponse {
|
||||
|
||||
return &LLMResponse{
|
||||
Content: content.String(),
|
||||
Reasoning: reasoning.String(),
|
||||
ToolCalls: toolCalls,
|
||||
FinishReason: finishReason,
|
||||
Usage: &UsageInfo{
|
||||
|
||||
@@ -0,0 +1,212 @@
|
||||
package anthropicprovider
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/anthropics/anthropic-sdk-go"
|
||||
)
|
||||
|
||||
func TestApplyThinkingConfig_Adaptive(t *testing.T) {
|
||||
params := anthropic.MessageNewParams{
|
||||
MaxTokens: 16000,
|
||||
Temperature: anthropic.Float(0.7),
|
||||
}
|
||||
applyThinkingConfig(¶ms, "adaptive")
|
||||
|
||||
if params.Thinking.OfAdaptive == nil {
|
||||
t.Fatal("expected adaptive thinking")
|
||||
}
|
||||
if params.Thinking.OfEnabled != nil {
|
||||
t.Error("should not set enabled thinking in adaptive mode")
|
||||
}
|
||||
if params.OutputConfig.Effort != anthropic.OutputConfigEffortHigh {
|
||||
t.Errorf("effort = %q, want %q", params.OutputConfig.Effort, anthropic.OutputConfigEffortHigh)
|
||||
}
|
||||
if params.Temperature.Valid() {
|
||||
t.Error("temperature should be cleared when thinking is enabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyThinkingConfig_BudgetLevels(t *testing.T) {
|
||||
tests := []struct {
|
||||
level string
|
||||
wantBudget int64
|
||||
}{
|
||||
{"low", 4096},
|
||||
{"medium", 16384},
|
||||
{"high", 32000},
|
||||
{"xhigh", 64000},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.level, func(t *testing.T) {
|
||||
params := anthropic.MessageNewParams{
|
||||
MaxTokens: 200000,
|
||||
Temperature: anthropic.Float(0.5),
|
||||
}
|
||||
applyThinkingConfig(¶ms, tt.level)
|
||||
|
||||
if params.Thinking.OfEnabled == nil {
|
||||
t.Fatal("expected enabled thinking")
|
||||
}
|
||||
if params.Thinking.OfAdaptive != nil {
|
||||
t.Error("should not set adaptive thinking")
|
||||
}
|
||||
if params.Thinking.OfEnabled.BudgetTokens != tt.wantBudget {
|
||||
t.Errorf("budget_tokens = %d, want %d", params.Thinking.OfEnabled.BudgetTokens, tt.wantBudget)
|
||||
}
|
||||
if params.OutputConfig.Effort != "" {
|
||||
t.Errorf("effort = %q, want empty", params.OutputConfig.Effort)
|
||||
}
|
||||
if params.Temperature.Valid() {
|
||||
t.Error("temperature should be cleared when thinking is enabled")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyThinkingConfig_BudgetClamp(t *testing.T) {
|
||||
// budget_tokens must be < max_tokens; clamp budget down to respect user's max_tokens.
|
||||
params := anthropic.MessageNewParams{MaxTokens: 4096}
|
||||
applyThinkingConfig(¶ms, "high") // budget=32000 > maxTokens=4096
|
||||
|
||||
if params.Thinking.OfEnabled == nil {
|
||||
t.Fatal("expected enabled thinking")
|
||||
}
|
||||
if params.Thinking.OfEnabled.BudgetTokens != 4095 {
|
||||
t.Errorf("budget_tokens = %d, want 4095 (maxTokens-1)", params.Thinking.OfEnabled.BudgetTokens)
|
||||
}
|
||||
if params.MaxTokens != 4096 {
|
||||
t.Errorf("max_tokens should not be modified, got %d", params.MaxTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyThinkingConfig_UnknownLevel(t *testing.T) {
|
||||
params := anthropic.MessageNewParams{MaxTokens: 16000}
|
||||
applyThinkingConfig(¶ms, "unknown")
|
||||
|
||||
if params.Thinking.OfEnabled != nil {
|
||||
t.Error("should not set enabled thinking for unknown level")
|
||||
}
|
||||
if params.Thinking.OfAdaptive != nil {
|
||||
t.Error("should not set adaptive thinking for unknown level")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLevelToBudget(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
level string
|
||||
want int
|
||||
}{
|
||||
{"low", "low", 4096},
|
||||
{"medium", "medium", 16384},
|
||||
{"high", "high", 32000},
|
||||
{"xhigh", "xhigh", 64000},
|
||||
{"off", "off", 0},
|
||||
{"empty", "", 0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := levelToBudget(tt.level); got != tt.want {
|
||||
t.Errorf("levelToBudget(%q) = %d, want %d", tt.level, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildParams_ThinkingClearsTemperature(t *testing.T) {
|
||||
msgs := []Message{{Role: "user", Content: "hello"}}
|
||||
opts := map[string]any{
|
||||
"max_tokens": 200000,
|
||||
"temperature": 0.8,
|
||||
"thinking_level": "medium",
|
||||
}
|
||||
|
||||
params, err := buildParams(msgs, nil, "claude-sonnet-4-6", opts)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if params.Temperature.Valid() {
|
||||
t.Error("temperature should be cleared when thinking_level is set")
|
||||
}
|
||||
if params.Thinking.OfEnabled == nil {
|
||||
t.Fatal("expected enabled thinking")
|
||||
}
|
||||
if params.Thinking.OfEnabled.BudgetTokens != 16384 {
|
||||
t.Errorf("budget_tokens = %d, want 16384", params.Thinking.OfEnabled.BudgetTokens)
|
||||
}
|
||||
}
|
||||
|
||||
// unmarshalBlocks constructs []ContentBlockUnion via JSON round-trip so that
|
||||
// the internal JSON.raw field is populated (required by AsText/AsThinking).
|
||||
func unmarshalBlocks(t *testing.T, jsonStr string) []anthropic.ContentBlockUnion {
|
||||
t.Helper()
|
||||
var blocks []anthropic.ContentBlockUnion
|
||||
if err := json.Unmarshal([]byte(jsonStr), &blocks); err != nil {
|
||||
t.Fatalf("unmarshalBlocks: %v", err)
|
||||
}
|
||||
return blocks
|
||||
}
|
||||
|
||||
func TestParseResponse_ThinkingBlock(t *testing.T) {
|
||||
resp := &anthropic.Message{
|
||||
Content: unmarshalBlocks(t, `[
|
||||
{"type":"thinking","thinking":"Let me reason step by step...","signature":"sig"},
|
||||
{"type":"text","text":"The answer is 42."}
|
||||
]`),
|
||||
StopReason: anthropic.StopReasonEndTurn,
|
||||
}
|
||||
|
||||
result := parseResponse(resp)
|
||||
|
||||
if result.Reasoning != "Let me reason step by step..." {
|
||||
t.Errorf("Reasoning = %q, want thinking content", result.Reasoning)
|
||||
}
|
||||
if result.Content != "The answer is 42." {
|
||||
t.Errorf("Content = %q, want text content", result.Content)
|
||||
}
|
||||
if result.FinishReason != "stop" {
|
||||
t.Errorf("FinishReason = %q, want stop", result.FinishReason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseResponse_NoThinkingBlock(t *testing.T) {
|
||||
resp := &anthropic.Message{
|
||||
Content: unmarshalBlocks(t, `[
|
||||
{"type":"text","text":"Just a normal response."}
|
||||
]`),
|
||||
StopReason: anthropic.StopReasonEndTurn,
|
||||
}
|
||||
|
||||
result := parseResponse(resp)
|
||||
|
||||
if result.Reasoning != "" {
|
||||
t.Errorf("Reasoning = %q, want empty", result.Reasoning)
|
||||
}
|
||||
if result.Content != "Just a normal response." {
|
||||
t.Errorf("Content = %q, want text content", result.Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildParams_NoThinkingKeepsTemperature(t *testing.T) {
|
||||
msgs := []Message{{Role: "user", Content: "hello"}}
|
||||
opts := map[string]any{
|
||||
"temperature": 0.8,
|
||||
}
|
||||
|
||||
params, err := buildParams(msgs, nil, "claude-sonnet-4-6", opts)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if !params.Temperature.Valid() {
|
||||
t.Error("temperature should be preserved when thinking is not set")
|
||||
}
|
||||
if params.Temperature.Value != 0.8 {
|
||||
t.Errorf("temperature = %f, want 0.8", params.Temperature.Value)
|
||||
}
|
||||
}
|
||||
@@ -37,6 +37,13 @@ type StatefulProvider interface {
|
||||
Close()
|
||||
}
|
||||
|
||||
// ThinkingCapable is an optional interface for providers that support
|
||||
// extended thinking (e.g. Anthropic). Used by the agent loop to warn
|
||||
// when thinking_level is configured but the active provider cannot use it.
|
||||
type ThinkingCapable interface {
|
||||
SupportsThinking() bool
|
||||
}
|
||||
|
||||
// FailoverReason classifies why an LLM request failed for fallback decisions.
|
||||
type FailoverReason string
|
||||
|
||||
|
||||
Reference in New Issue
Block a user