fix(agent): honor explicit thinking off (#2898)

* fix(agent): honor explicit thinking off

* fix(agent): address thinking off lint failures

* Clarify unset thinking level display

* fix ci
This commit is contained in:
lxowalle
2026-05-21 11:07:39 +08:00
committed by GitHub
parent 33f9d63862
commit e7e21df354
18 changed files with 1243 additions and 44 deletions
+17 -1
View File
@@ -118,6 +118,22 @@ func (fc *FallbackChain) Execute(
ctx context.Context,
candidates []FallbackCandidate,
run func(ctx context.Context, provider, model string) (*LLMResponse, error),
) (*FallbackResult, error) {
return fc.ExecuteCandidate(
ctx,
candidates,
func(ctx context.Context, candidate FallbackCandidate) (*LLMResponse, error) {
return run(ctx, candidate.Provider, candidate.Model)
},
)
}
// ExecuteCandidate runs the fallback chain and passes the complete candidate
// to the caller so model-list identity metadata remains available.
func (fc *FallbackChain) ExecuteCandidate(
ctx context.Context,
candidates []FallbackCandidate,
run func(ctx context.Context, candidate FallbackCandidate) (*LLMResponse, error),
) (*FallbackResult, error) {
if len(candidates) == 0 {
return nil, fmt.Errorf("fallback: no candidates configured")
@@ -181,7 +197,7 @@ func (fc *FallbackChain) Execute(
// Execute the run function.
start := time.Now()
resp, err := run(ctx, candidate.Provider, candidate.Model)
resp, err := run(ctx, candidate)
elapsed := time.Since(start)
if err == nil {
+77
View File
@@ -193,6 +193,8 @@ func (p *Provider) buildRequestBody(
}
}
p.applyThinkingControl(requestBody, model, options)
// Merge extra body fields configured per-provider/model.
// These are injected last so they take precedence over defaults.
maps.Copy(requestBody, p.extraBody)
@@ -200,6 +202,81 @@ func (p *Provider) buildRequestBody(
return requestBody
}
func (p *Provider) applyThinkingControl(requestBody map[string]any, model string, options map[string]any) {
level, ok := normalizedThinkingLevel(options)
if !ok || level != "off" {
return
}
switch p.thinkingControlKind(model) {
case "thinking_type":
requestBody["thinking"] = map[string]any{"type": "disabled"}
case "enable_thinking":
requestBody["enable_thinking"] = false
}
}
func normalizedThinkingLevel(options map[string]any) (string, bool) {
raw, ok := options["thinking_level"].(string)
if !ok {
return "", false
}
switch strings.ToLower(strings.TrimSpace(raw)) {
case "off", "low", "medium", "high", "xhigh", "adaptive":
return strings.ToLower(strings.TrimSpace(raw)), true
default:
return "", false
}
}
func (p *Provider) thinkingControlKind(model string) string {
providerName := strings.ToLower(strings.TrimSpace(p.providerName))
lowerModel := strings.ToLower(strings.TrimSpace(model))
switch providerName {
case "volcengine":
return "thinking_type"
case "zhipu", "zai":
return "thinking_type"
case "qwen", "qwen-portal", "qwen-intl", "qwen-international", "dashscope-intl", "qwen-us", "dashscope-us":
return "enable_thinking"
case "modelscope":
if strings.Contains(lowerModel, "qwen") {
return "enable_thinking"
}
}
if providerName == "openai" || providerName == "" {
if isVolcengineHost(p.apiBase) || strings.Contains(lowerModel, "doubao") {
return "thinking_type"
}
if isDashScopeHost(p.apiBase) || strings.Contains(lowerModel, "qwen") {
return "enable_thinking"
}
}
return ""
}
func isVolcengineHost(apiBase string) bool {
host := normalizedHostname(apiBase)
return host == "volcengine.com" || strings.HasSuffix(host, ".volcengine.com") ||
host == "volces.com" || strings.HasSuffix(host, ".volces.com")
}
func isDashScopeHost(apiBase string) bool {
host := normalizedHostname(apiBase)
return host == "dashscope.aliyuncs.com" || strings.HasSuffix(host, ".dashscope.aliyuncs.com")
}
func normalizedHostname(rawURL string) string {
parsed, err := url.Parse(strings.TrimSpace(rawURL))
if err != nil {
return ""
}
return strings.ToLower(strings.TrimSpace(parsed.Hostname()))
}
func (p *Provider) applyCustomHeaders(req *http.Request) {
for k, v := range p.customHeaders {
if strings.TrimSpace(k) == "" {
@@ -62,6 +62,69 @@ func TestProviderChat_UsesMaxCompletionTokensForGLM(t *testing.T) {
}
}
func TestBuildRequestBody_DisablesDoubaoThinkingWhenThinkingLevelOff(t *testing.T) {
p := NewProvider("key", "https://ark.cn-beijing.volces.com/api/v3", "")
p.SetProviderName("openai")
body := p.buildRequestBody(
[]Message{{Role: "user", Content: "hi"}},
nil,
"doubao-seed-1-6-flash-250828",
map[string]any{"thinking_level": "off"},
)
thinking, ok := body["thinking"].(map[string]any)
if !ok {
t.Fatalf("thinking = %#v, want map", body["thinking"])
}
if got := thinking["type"]; got != "disabled" {
t.Fatalf("thinking.type = %#v, want %q", got, "disabled")
}
}
func TestBuildRequestBody_DisablesModelDependentQwenThinkingWhenThinkingLevelOff(t *testing.T) {
p := NewProvider("key", "https://api-inference.modelscope.cn/v1", "")
p.SetProviderName("modelscope")
body := p.buildRequestBody(
[]Message{{Role: "user", Content: "hi"}},
nil,
"qwen3-coder-plus",
map[string]any{"thinking_level": "off"},
)
if got := body["enable_thinking"]; got != false {
t.Fatalf("enable_thinking = %#v, want false", got)
}
}
func TestBuildRequestBody_PreservesDoubaoRequestWhenThinkingLevelIsNotOff(t *testing.T) {
p := NewProvider("key", "https://ark.cn-beijing.volces.com/api/v3", "")
p.SetProviderName("openai")
for _, level := range []string{"low", "adaptive", "unexpected"} {
t.Run(level, func(t *testing.T) {
body := p.buildRequestBody(
[]Message{{Role: "user", Content: "hi"}},
nil,
"doubao-seed-1-6-flash-250828",
map[string]any{"thinking_level": level},
)
if _, ok := body["thinking"]; ok {
t.Fatalf(
"thinking should be omitted for %q to preserve existing behavior, got %#v",
level,
body["thinking"],
)
}
if _, ok := body["enable_thinking"]; ok {
t.Fatalf("enable_thinking should be omitted for %q, got %#v", level, body["enable_thinking"])
}
})
}
}
func TestProviderChat_ParsesToolCalls(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
resp := map[string]any{