fix(agent): honor explicit thinking off (#2898)

* fix(agent): honor explicit thinking off * fix(agent): address thinking off lint failures * Clarify unset thinking level display * fix ci
2026-06-12 18:08:54 +00:00 · 2026-05-21 11:07:39 +08:00
parent 33f9d63862
commit e7e21df354
18 changed files with 1243 additions and 44 deletions
@@ -118,6 +118,22 @@ func (fc *FallbackChain) Execute(
 	ctx context.Context,
 	candidates []FallbackCandidate,
 	run func(ctx context.Context, provider, model string) (*LLMResponse, error),
+) (*FallbackResult, error) {
+	return fc.ExecuteCandidate(
+		ctx,
+		candidates,
+		func(ctx context.Context, candidate FallbackCandidate) (*LLMResponse, error) {
+			return run(ctx, candidate.Provider, candidate.Model)
+		},
+	)
+}
+
+// ExecuteCandidate runs the fallback chain and passes the complete candidate
+// to the caller so model-list identity metadata remains available.
+func (fc *FallbackChain) ExecuteCandidate(
+	ctx context.Context,
+	candidates []FallbackCandidate,
+	run func(ctx context.Context, candidate FallbackCandidate) (*LLMResponse, error),
 ) (*FallbackResult, error) {
 	if len(candidates) == 0 {
 		return nil, fmt.Errorf("fallback: no candidates configured")
@@ -181,7 +197,7 @@ func (fc *FallbackChain) Execute(

 		// Execute the run function.
 		start := time.Now()
-		resp, err := run(ctx, candidate.Provider, candidate.Model)
+		resp, err := run(ctx, candidate)
 		elapsed := time.Since(start)

 		if err == nil {
@@ -193,6 +193,8 @@ func (p *Provider) buildRequestBody(
 		}
 	}

+	p.applyThinkingControl(requestBody, model, options)
+
 	// Merge extra body fields configured per-provider/model.
 	// These are injected last so they take precedence over defaults.
 	maps.Copy(requestBody, p.extraBody)
@@ -200,6 +202,81 @@ func (p *Provider) buildRequestBody(
 	return requestBody
 }

+func (p *Provider) applyThinkingControl(requestBody map[string]any, model string, options map[string]any) {
+	level, ok := normalizedThinkingLevel(options)
+	if !ok || level != "off" {
+		return
+	}
+
+	switch p.thinkingControlKind(model) {
+	case "thinking_type":
+		requestBody["thinking"] = map[string]any{"type": "disabled"}
+	case "enable_thinking":
+		requestBody["enable_thinking"] = false
+	}
+}
+
+func normalizedThinkingLevel(options map[string]any) (string, bool) {
+	raw, ok := options["thinking_level"].(string)
+	if !ok {
+		return "", false
+	}
+	switch strings.ToLower(strings.TrimSpace(raw)) {
+	case "off", "low", "medium", "high", "xhigh", "adaptive":
+		return strings.ToLower(strings.TrimSpace(raw)), true
+	default:
+		return "", false
+	}
+}
+
+func (p *Provider) thinkingControlKind(model string) string {
+	providerName := strings.ToLower(strings.TrimSpace(p.providerName))
+	lowerModel := strings.ToLower(strings.TrimSpace(model))
+
+	switch providerName {
+	case "volcengine":
+		return "thinking_type"
+	case "zhipu", "zai":
+		return "thinking_type"
+	case "qwen", "qwen-portal", "qwen-intl", "qwen-international", "dashscope-intl", "qwen-us", "dashscope-us":
+		return "enable_thinking"
+	case "modelscope":
+		if strings.Contains(lowerModel, "qwen") {
+			return "enable_thinking"
+		}
+	}
+
+	if providerName == "openai" || providerName == "" {
+		if isVolcengineHost(p.apiBase) || strings.Contains(lowerModel, "doubao") {
+			return "thinking_type"
+		}
+		if isDashScopeHost(p.apiBase) || strings.Contains(lowerModel, "qwen") {
+			return "enable_thinking"
+		}
+	}
+
+	return ""
+}
+
+func isVolcengineHost(apiBase string) bool {
+	host := normalizedHostname(apiBase)
+	return host == "volcengine.com" || strings.HasSuffix(host, ".volcengine.com") ||
+		host == "volces.com" || strings.HasSuffix(host, ".volces.com")
+}
+
+func isDashScopeHost(apiBase string) bool {
+	host := normalizedHostname(apiBase)
+	return host == "dashscope.aliyuncs.com" || strings.HasSuffix(host, ".dashscope.aliyuncs.com")
+}
+
+func normalizedHostname(rawURL string) string {
+	parsed, err := url.Parse(strings.TrimSpace(rawURL))
+	if err != nil {
+		return ""
+	}
+	return strings.ToLower(strings.TrimSpace(parsed.Hostname()))
+}
+
 func (p *Provider) applyCustomHeaders(req *http.Request) {
 	for k, v := range p.customHeaders {
 		if strings.TrimSpace(k) == "" {
@@ -62,6 +62,69 @@ func TestProviderChat_UsesMaxCompletionTokensForGLM(t *testing.T) {
 	}
 }

+func TestBuildRequestBody_DisablesDoubaoThinkingWhenThinkingLevelOff(t *testing.T) {
+	p := NewProvider("key", "https://ark.cn-beijing.volces.com/api/v3", "")
+	p.SetProviderName("openai")
+
+	body := p.buildRequestBody(
+		[]Message{{Role: "user", Content: "hi"}},
+		nil,
+		"doubao-seed-1-6-flash-250828",
+		map[string]any{"thinking_level": "off"},
+	)
+
+	thinking, ok := body["thinking"].(map[string]any)
+	if !ok {
+		t.Fatalf("thinking = %#v, want map", body["thinking"])
+	}
+	if got := thinking["type"]; got != "disabled" {
+		t.Fatalf("thinking.type = %#v, want %q", got, "disabled")
+	}
+}
+
+func TestBuildRequestBody_DisablesModelDependentQwenThinkingWhenThinkingLevelOff(t *testing.T) {
+	p := NewProvider("key", "https://api-inference.modelscope.cn/v1", "")
+	p.SetProviderName("modelscope")
+
+	body := p.buildRequestBody(
+		[]Message{{Role: "user", Content: "hi"}},
+		nil,
+		"qwen3-coder-plus",
+		map[string]any{"thinking_level": "off"},
+	)
+
+	if got := body["enable_thinking"]; got != false {
+		t.Fatalf("enable_thinking = %#v, want false", got)
+	}
+}
+
+func TestBuildRequestBody_PreservesDoubaoRequestWhenThinkingLevelIsNotOff(t *testing.T) {
+	p := NewProvider("key", "https://ark.cn-beijing.volces.com/api/v3", "")
+	p.SetProviderName("openai")
+
+	for _, level := range []string{"low", "adaptive", "unexpected"} {
+		t.Run(level, func(t *testing.T) {
+			body := p.buildRequestBody(
+				[]Message{{Role: "user", Content: "hi"}},
+				nil,
+				"doubao-seed-1-6-flash-250828",
+				map[string]any{"thinking_level": level},
+			)
+
+			if _, ok := body["thinking"]; ok {
+				t.Fatalf(
+					"thinking should be omitted for %q to preserve existing behavior, got %#v",
+					level,
+					body["thinking"],
+				)
+			}
+			if _, ok := body["enable_thinking"]; ok {
+				t.Fatalf("enable_thinking should be omitted for %q, got %#v", level, body["enable_thinking"])
+			}
+		})
+	}
+}
+
 func TestProviderChat_ParsesToolCalls(t *testing.T) {
 	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		resp := map[string]any{