diff --git a/pkg/agent/agent_command.go b/pkg/agent/agent_command.go index ae0293d71..944c58f3d 100644 --- a/pkg/agent/agent_command.go +++ b/pkg/agent/agent_command.go @@ -319,6 +319,7 @@ func (al *AgentLoop) buildCommandsRuntime( agent.Provider = nextProvider agent.Candidates = nextCandidates agent.ThinkingLevel = parseThinkingLevel(modelCfg.ThinkingLevel) + agent.ThinkingLevelConfigured = isConfiguredThinkingLevel(modelCfg.ThinkingLevel) if oldProvider != nil && oldProvider != nextProvider { if stateful, ok := oldProvider.(providers.StatefulProvider); ok { diff --git a/pkg/agent/agent_test.go b/pkg/agent/agent_test.go index aaf3d1a88..fe775656e 100644 --- a/pkg/agent/agent_test.go +++ b/pkg/agent/agent_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "maps" "net/http" "net/http/httptest" "os" @@ -138,6 +139,121 @@ func (r *recordingProvider) GetDefaultModel() string { return "mock-model" } +type thinkingRecordingProvider struct { + lastOptions map[string]any +} + +func (r *thinkingRecordingProvider) Chat( + ctx context.Context, + messages []providers.Message, + tools []providers.ToolDefinition, + model string, + opts map[string]any, +) (*providers.LLMResponse, error) { + r.lastOptions = maps.Clone(opts) + return &providers.LLMResponse{ + Content: "Mock response", + ToolCalls: []providers.ToolCall{}, + }, nil +} + +func (r *thinkingRecordingProvider) GetDefaultModel() string { + return "mock-model" +} + +func (r *thinkingRecordingProvider) SupportsThinking() bool { + return true +} + +type thinkingOptionRecordingProvider struct { + lastOptions map[string]any +} + +func (r *thinkingOptionRecordingProvider) Chat( + ctx context.Context, + messages []providers.Message, + tools []providers.ToolDefinition, + model string, + opts map[string]any, +) (*providers.LLMResponse, error) { + r.lastOptions = maps.Clone(opts) + return &providers.LLMResponse{ + Content: "Mock response", + ToolCalls: []providers.ToolCall{}, + }, nil +} + +func (r *thinkingOptionRecordingProvider) GetDefaultModel() string { + return "mock-model" +} + +type reasoningOptionRecordingProvider struct { + lastOptions map[string]any +} + +func (r *reasoningOptionRecordingProvider) Chat( + ctx context.Context, + messages []providers.Message, + tools []providers.ToolDefinition, + model string, + opts map[string]any, +) (*providers.LLMResponse, error) { + r.lastOptions = maps.Clone(opts) + return &providers.LLMResponse{ + Content: "final answer", + ReasoningContent: "thinking trace", + ToolCalls: []providers.ToolCall{}, + }, nil +} + +func (r *reasoningOptionRecordingProvider) GetDefaultModel() string { + return "mock-model" +} + +type reasoningResponseProvider struct{} + +func (p *reasoningResponseProvider) Chat( + ctx context.Context, + messages []providers.Message, + tools []providers.ToolDefinition, + model string, + opts map[string]any, +) (*providers.LLMResponse, error) { + return &providers.LLMResponse{ + Content: "Mock response", + ReasoningContent: "thinking trace", + ToolCalls: []providers.ToolCall{}, + }, nil +} + +func (p *reasoningResponseProvider) GetDefaultModel() string { + return "mock-model" +} + +type sideQuestionFallbackTestProvider struct { + model string +} + +func (p *sideQuestionFallbackTestProvider) Chat( + ctx context.Context, + messages []providers.Message, + tools []providers.ToolDefinition, + model string, + opts map[string]any, +) (*providers.LLMResponse, error) { + if p.model == "test-model" { + return nil, context.DeadlineExceeded + } + return &providers.LLMResponse{ + ReasoningContent: "thinking trace", + ToolCalls: []providers.ToolCall{}, + }, nil +} + +func (p *sideQuestionFallbackTestProvider) GetDefaultModel() string { + return p.model +} + type modelRewriteHook struct { model string } @@ -386,6 +502,463 @@ func TestProcessMessage_IncludesCurrentSenderInDynamicContext(t *testing.T) { } } +func TestProcessMessage_DoesNotPassImplicitThinkingOffToCapableProvider(t *testing.T) { + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: t.TempDir(), + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + } + + provider := &thinkingRecordingProvider{} + al := NewAgentLoop(cfg, bus.NewMessageBus(), provider) + + response, err := al.processMessage(context.Background(), testInboundMessage(bus.InboundMessage{ + Channel: "pico", + ChatID: "chat-1", + Content: "hello", + })) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "Mock response" { + t.Fatalf("processMessage() response = %q, want %q", response, "Mock response") + } + if _, ok := provider.lastOptions["thinking_level"]; ok { + t.Fatalf("thinking_level option should be omitted when unset, got %#v", provider.lastOptions["thinking_level"]) + } +} + +func TestProcessMessage_PassesExplicitThinkingOffToCapableProvider(t *testing.T) { + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: t.TempDir(), + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + ModelList: []*config.ModelConfig{{ + ModelName: "test-model", + Model: "test-model", + ThinkingLevel: "off", + }}, + } + + provider := &thinkingRecordingProvider{} + al := NewAgentLoop(cfg, bus.NewMessageBus(), provider) + + response, err := al.processMessage(context.Background(), testInboundMessage(bus.InboundMessage{ + Channel: "pico", + ChatID: "chat-1", + Content: "hello", + })) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "Mock response" { + t.Fatalf("processMessage() response = %q, want %q", response, "Mock response") + } + if got := provider.lastOptions["thinking_level"]; got != "off" { + t.Fatalf("thinking_level option = %#v, want %q", got, "off") + } +} + +func TestProcessMessage_PassesExplicitThinkingOffToProviderWithoutThinkingCapability(t *testing.T) { + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: t.TempDir(), + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + ModelList: []*config.ModelConfig{{ + ModelName: "test-model", + Model: "test-model", + ThinkingLevel: "off", + }}, + } + + provider := &thinkingOptionRecordingProvider{} + al := NewAgentLoop(cfg, bus.NewMessageBus(), provider) + + response, err := al.processMessage(context.Background(), testInboundMessage(bus.InboundMessage{ + Channel: "pico", + ChatID: "chat-1", + Content: "hello", + })) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "Mock response" { + t.Fatalf("processMessage() response = %q, want %q", response, "Mock response") + } + if got := provider.lastOptions["thinking_level"]; got != "off" { + t.Fatalf("thinking_level option = %#v, want %q", got, "off") + } +} + +func TestProcessMessage_SuppressesReasoningWhenThinkingOff(t *testing.T) { + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: t.TempDir(), + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + ModelList: []*config.ModelConfig{{ + ModelName: "test-model", + Model: "test-model", + ThinkingLevel: "off", + }}, + } + + msgBus := bus.NewMessageBus() + al := NewAgentLoop(cfg, msgBus, &reasoningResponseProvider{}) + + response, err := al.runAgentLoop( + context.Background(), + al.GetRegistry().GetDefaultAgent(), + processOptions{ + SessionKey: "agent:main:pico:chat-1", + Channel: "pico", + ChatID: "chat-1", + UserMessage: "hello", + SendResponse: false, + DefaultResponse: defaultResponse, + NoHistory: true, + }, + ) + if err != nil { + t.Fatalf("runAgentLoop() error = %v", err) + } + if response != "Mock response" { + t.Fatalf("response = %q, want %q", response, "Mock response") + } + select { + case outbound := <-msgBus.OutboundChan(): + t.Fatalf("expected no reasoning outbound when thinking is off, got %+v", outbound) + case <-time.After(50 * time.Millisecond): + } +} + +func TestProcessMessage_BeforeLLMModelRewriteReevaluatesThinkingOff(t *testing.T) { + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: t.TempDir(), + ModelName: "plain-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + ModelList: []*config.ModelConfig{ + { + ModelName: "plain-model", + Model: "openai/plain-model", + }, + { + ModelName: "off-model", + Model: "openai/off-model", + ThinkingLevel: "off", + }, + }, + } + + msgBus := bus.NewMessageBus() + provider := &reasoningOptionRecordingProvider{} + al := NewAgentLoop(cfg, msgBus, provider) + if err := al.MountHook(NamedHook("rewrite-model", modelRewriteHook{model: "off-model"})); err != nil { + t.Fatalf("MountHook failed: %v", err) + } + + response, err := al.processMessage(context.Background(), bus.InboundMessage{ + Channel: "pico", + SenderID: "user1", + ChatID: "pico:test-session", + Content: "hello", + }) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "final answer" { + t.Fatalf("processMessage() response = %q, want final answer", response) + } + if got := provider.lastOptions["thinking_level"]; got != "off" { + t.Fatalf("thinking_level option = %#v, want off after hook model rewrite", got) + } + select { + case outbound := <-msgBus.OutboundChan(): + t.Fatalf("expected no reasoning outbound after hook rewrote to off model, got %+v", outbound) + case <-time.After(50 * time.Millisecond): + } +} + +func TestProcessMessage_BeforeLLMModelRewriteDoesNotLeakThinkingOff(t *testing.T) { + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: t.TempDir(), + ModelName: "off-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + ModelList: []*config.ModelConfig{ + { + ModelName: "off-model", + Model: "openai/off-model", + ThinkingLevel: "off", + }, + { + ModelName: "plain-model", + Model: "openai/plain-model", + }, + }, + } + + msgBus := bus.NewMessageBus() + provider := &reasoningOptionRecordingProvider{} + al := NewAgentLoop(cfg, msgBus, provider) + if err := al.MountHook(NamedHook("rewrite-model", modelRewriteHook{model: "plain-model"})); err != nil { + t.Fatalf("MountHook failed: %v", err) + } + + response, err := al.processMessage(context.Background(), bus.InboundMessage{ + Channel: "pico", + SenderID: "user1", + ChatID: "pico:test-session", + Content: "hello", + }) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "final answer" { + t.Fatalf("processMessage() response = %q, want final answer", response) + } + if _, ok := provider.lastOptions["thinking_level"]; ok { + t.Fatalf( + "thinking_level option should be cleared after hook rewrote away from off model, got %#v", + provider.lastOptions["thinking_level"], + ) + } + select { + case outbound := <-msgBus.OutboundChan(): + if outbound.Content != "thinking trace" { + t.Fatalf("reasoning outbound content = %q, want thinking trace", outbound.Content) + } + case <-time.After(3 * time.Second): + t.Fatal("expected reasoning outbound after hook rewrote away from off model") + } +} + +func TestProcessMessage_BtwCommandSuppressesReasoningWhenThinkingOff(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + ModelList: []*config.ModelConfig{{ + ModelName: "test-model", + Model: "openai/test-model", + ThinkingLevel: "off", + }}, + } + + al := NewAgentLoop(cfg, bus.NewMessageBus(), &sideQuestionFallbackTestProvider{model: "test-model"}) + al.providerFactory = func(mc *config.ModelConfig) (providers.LLMProvider, string, error) { + model := "" + if mc != nil { + _, model = providers.ExtractProtocol(mc) + } + if model == "" { + model = "test-model" + } + return &sideQuestionFallbackTestProvider{model: model}, model, nil + } + + response, err := al.processMessage(context.Background(), bus.InboundMessage{ + Channel: "telegram", + SenderID: "telegram:123", + ChatID: "chat-1", + Content: "/btw explain privately", + }) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if strings.Contains(response, "thinking trace") { + t.Fatalf("processMessage() response = %q, should not expose reasoning with thinking off", response) + } +} + +func TestProcessMessage_BtwHookModelRewriteReevaluatesThinkingOff(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "plain-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + ModelList: []*config.ModelConfig{ + { + ModelName: "plain-model", + Model: "openai/plain-model", + }, + { + ModelName: "off-model", + Model: "openai/off-model", + ThinkingLevel: "off", + }, + }, + } + + al := NewAgentLoop(cfg, bus.NewMessageBus(), &sideQuestionFallbackTestProvider{model: "plain-model"}) + al.providerFactory = func(mc *config.ModelConfig) (providers.LLMProvider, string, error) { + model := "" + if mc != nil { + _, model = providers.ExtractProtocol(mc) + } + if model == "" { + model = "plain-model" + } + return &sideQuestionFallbackTestProvider{model: model}, model, nil + } + if err := al.MountHook(NamedHook("rewrite-model", modelRewriteHook{model: "off-model"})); err != nil { + t.Fatalf("MountHook failed: %v", err) + } + + response, err := al.processMessage(context.Background(), bus.InboundMessage{ + Channel: "telegram", + SenderID: "telegram:123", + ChatID: "chat-1", + Content: "/btw explain privately", + }) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if strings.Contains(response, "thinking trace") { + t.Fatalf( + "processMessage() response = %q, should not expose reasoning after hook rewrote to off model", + response, + ) + } +} + +func TestProcessMessage_BtwHookModelRewriteDoesNotLeakThinkingOff(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "off-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + ModelList: []*config.ModelConfig{ + { + ModelName: "off-model", + Model: "openai/off-model", + ThinkingLevel: "off", + }, + { + ModelName: "plain-model", + Model: "openai/plain-model", + }, + }, + } + + al := NewAgentLoop(cfg, bus.NewMessageBus(), &sideQuestionFallbackTestProvider{model: "off-model"}) + al.providerFactory = func(mc *config.ModelConfig) (providers.LLMProvider, string, error) { + model := "" + if mc != nil { + _, model = providers.ExtractProtocol(mc) + } + if model == "" { + model = "off-model" + } + return &sideQuestionFallbackTestProvider{model: model}, model, nil + } + if err := al.MountHook(NamedHook("rewrite-model", modelRewriteHook{model: "plain-model"})); err != nil { + t.Fatalf("MountHook failed: %v", err) + } + + response, err := al.processMessage(context.Background(), bus.InboundMessage{ + Channel: "telegram", + SenderID: "telegram:123", + ChatID: "chat-1", + Content: "/btw explain privately", + }) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "thinking trace" { + t.Fatalf("processMessage() response = %q, want reasoning after hook rewrote away from off model", response) + } +} + +func TestProcessMessage_BtwFallbackDoesNotInheritPrimaryThinkingOff(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "test-model", + ModelFallbacks: []string{"openai/fallback-model"}, + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + ModelList: []*config.ModelConfig{{ + ModelName: "test-model", + Model: "openai/test-model", + ThinkingLevel: "off", + }}, + } + + al := NewAgentLoop(cfg, bus.NewMessageBus(), &sideQuestionFallbackTestProvider{model: "test-model"}) + al.providerFactory = func(mc *config.ModelConfig) (providers.LLMProvider, string, error) { + model := "" + if mc != nil { + _, model = providers.ExtractProtocol(mc) + } + if model == "" { + model = "test-model" + } + return &sideQuestionFallbackTestProvider{model: model}, model, nil + } + + response, err := al.processMessage(context.Background(), bus.InboundMessage{ + Channel: "telegram", + SenderID: "telegram:123", + ChatID: "chat-1", + Content: "/btw explain fallback reasoning", + }) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "thinking trace" { + t.Fatalf("processMessage() response = %q, want fallback reasoning when fallback has no off", response) + } +} + func TestProcessMessage_UseCommandLoadsRequestedSkill(t *testing.T) { tmpDir := t.TempDir() skillDir := filepath.Join(tmpDir, "skills", "shell") @@ -3178,6 +3751,300 @@ func TestProcessMessage_FallbackUsesPerCandidateProvider(t *testing.T) { } } +func TestProcessMessage_FallbackReceivesExplicitThinkingOff(t *testing.T) { + workspace := t.TempDir() + + primaryServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusTooManyRequests) + _ = json.NewEncoder(w).Encode(map[string]any{ + "error": map[string]any{ + "message": "rate limit exceeded", + "type": "rate_limit_error", + }, + }) + })) + defer primaryServer.Close() + + fallbackCalls := 0 + fallbackServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fallbackCalls++ + if r.URL.Path != "/chat/completions" { + t.Fatalf("fallback server path = %q, want /chat/completions", r.URL.Path) + } + defer r.Body.Close() + + var req map[string]any + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("decode fallback request: %v", err) + } + if got := req["model"]; got != "doubao-seed-1-6-flash-250828" { + t.Fatalf("fallback request model = %#v, want doubao-seed-1-6-flash-250828", got) + } + thinking, ok := req["thinking"].(map[string]any) + if !ok { + t.Fatalf("fallback request thinking = %#v, want map", req["thinking"]) + } + if got := thinking["type"]; got != "disabled" { + t.Fatalf("fallback request thinking.type = %#v, want disabled", got) + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(map[string]any{ + "choices": []map[string]any{ + { + "message": map[string]any{"content": "fallback reply"}, + "finish_reason": "stop", + }, + }, + }); err != nil { + t.Fatalf("encode fallback response: %v", err) + } + })) + defer fallbackServer.Close() + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: workspace, + ModelName: "primary-model", + ModelFallbacks: []string{"doubao-fallback"}, + MaxTokens: 4096, + MaxToolIterations: 3, + }, + }, + ModelList: []*config.ModelConfig{ + { + ModelName: "primary-model", + Model: "openrouter/primary-model", + APIBase: primaryServer.URL, + APIKeys: config.SimpleSecureStrings("primary-key"), + Workspace: workspace, + }, + { + ModelName: "doubao-fallback", + Model: "openai/doubao-seed-1-6-flash-250828", + APIBase: fallbackServer.URL, + APIKeys: config.SimpleSecureStrings("fallback-key"), + ThinkingLevel: "off", + Workspace: workspace, + }, + }, + } + + provider, _, err := providers.CreateProvider(cfg) + if err != nil { + t.Fatalf("CreateProvider() error = %v", err) + } + al := NewAgentLoop(cfg, bus.NewMessageBus(), provider) + helper := testHelper{al: al} + + resp := helper.executeAndGetResponse(t, context.Background(), bus.InboundMessage{ + Channel: "telegram", + SenderID: "user1", + ChatID: "chat1", + Content: "hi", + }) + + if resp != "fallback reply" { + t.Fatalf("response = %q, want fallback reply", resp) + } + if fallbackCalls != 1 { + t.Fatalf("fallback server calls = %d, want 1", fallbackCalls) + } +} + +func TestProcessMessage_PrimaryThinkingOffDoesNotLeakToFallback(t *testing.T) { + workspace := t.TempDir() + + primaryServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusTooManyRequests) + _ = json.NewEncoder(w).Encode(map[string]any{ + "error": map[string]any{ + "message": "rate limit exceeded", + "type": "rate_limit_error", + }, + }) + })) + defer primaryServer.Close() + + fallbackServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/chat/completions" { + t.Fatalf("fallback server path = %q, want /chat/completions", r.URL.Path) + } + defer r.Body.Close() + + var req map[string]any + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("decode fallback request: %v", err) + } + if _, ok := req["thinking"]; ok { + t.Fatalf("fallback request should not inherit primary thinking off, got thinking=%#v", req["thinking"]) + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(map[string]any{ + "choices": []map[string]any{ + { + "message": map[string]any{"content": "fallback reply"}, + "finish_reason": "stop", + }, + }, + }); err != nil { + t.Fatalf("encode fallback response: %v", err) + } + })) + defer fallbackServer.Close() + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: workspace, + ModelName: "primary-model", + ModelFallbacks: []string{"doubao-fallback"}, + MaxTokens: 4096, + MaxToolIterations: 3, + }, + }, + ModelList: []*config.ModelConfig{ + { + ModelName: "primary-model", + Model: "openrouter/primary-model", + APIBase: primaryServer.URL, + APIKeys: config.SimpleSecureStrings("primary-key"), + ThinkingLevel: "off", + Workspace: workspace, + }, + { + ModelName: "doubao-fallback", + Model: "openai/doubao-seed-1-6-flash-250828", + APIBase: fallbackServer.URL, + APIKeys: config.SimpleSecureStrings("fallback-key"), + Workspace: workspace, + }, + }, + } + + provider, _, err := providers.CreateProvider(cfg) + if err != nil { + t.Fatalf("CreateProvider() error = %v", err) + } + al := NewAgentLoop(cfg, bus.NewMessageBus(), provider) + helper := testHelper{al: al} + + resp := helper.executeAndGetResponse(t, context.Background(), bus.InboundMessage{ + Channel: "telegram", + SenderID: "user1", + ChatID: "chat1", + Content: "hi", + }) + if resp != "fallback reply" { + t.Fatalf("response = %q, want fallback reply", resp) + } +} + +func TestProcessMessage_FallbackThinkingOffUsesCandidateIdentity(t *testing.T) { + workspace := t.TempDir() + + primaryServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusTooManyRequests) + _ = json.NewEncoder(w).Encode(map[string]any{ + "error": map[string]any{ + "message": "rate limit exceeded", + "type": "rate_limit_error", + }, + }) + })) + defer primaryServer.Close() + + fallbackServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/chat/completions" { + t.Fatalf("fallback server path = %q, want /chat/completions", r.URL.Path) + } + defer r.Body.Close() + + var req map[string]any + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("decode fallback request: %v", err) + } + thinking, ok := req["thinking"].(map[string]any) + if !ok { + t.Fatalf("fallback request thinking = %#v, want map", req["thinking"]) + } + if got := thinking["type"]; got != "disabled" { + t.Fatalf("fallback request thinking.type = %#v, want disabled", got) + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(map[string]any{ + "choices": []map[string]any{ + { + "message": map[string]any{"content": "fallback reply"}, + "finish_reason": "stop", + }, + }, + }); err != nil { + t.Fatalf("encode fallback response: %v", err) + } + })) + defer fallbackServer.Close() + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: workspace, + ModelName: "primary-model", + ModelFallbacks: []string{"doubao-off"}, + MaxTokens: 4096, + MaxToolIterations: 3, + }, + }, + ModelList: []*config.ModelConfig{ + { + ModelName: "primary-model", + Model: "openrouter/primary-model", + APIBase: primaryServer.URL, + APIKeys: config.SimpleSecureStrings("primary-key"), + Workspace: workspace, + }, + { + ModelName: "doubao-default", + Model: "openai/doubao-seed-1-6-flash-250828", + APIBase: fallbackServer.URL, + APIKeys: config.SimpleSecureStrings("fallback-key"), + Workspace: workspace, + }, + { + ModelName: "doubao-off", + Model: "openai/doubao-seed-1-6-flash-250828", + APIBase: fallbackServer.URL, + APIKeys: config.SimpleSecureStrings("fallback-key"), + ThinkingLevel: "off", + Workspace: workspace, + }, + }, + } + + provider, _, err := providers.CreateProvider(cfg) + if err != nil { + t.Fatalf("CreateProvider() error = %v", err) + } + al := NewAgentLoop(cfg, bus.NewMessageBus(), provider) + helper := testHelper{al: al} + + resp := helper.executeAndGetResponse(t, context.Background(), bus.InboundMessage{ + Channel: "telegram", + SenderID: "user1", + ChatID: "chat1", + Content: "hi", + }) + if resp != "fallback reply" { + t.Fatalf("response = %q, want fallback reply", resp) + } +} + // TestProcessMessage_FallbackUsesActiveProviderWhenCandidateNotRegistered verifies // that when a candidate has no model_list entry it is absent from CandidateProviders // and the fallback closure falls back to activeProvider instead of panicking. diff --git a/pkg/agent/agent_utils.go b/pkg/agent/agent_utils.go index 432a9f24c..8c638deb8 100644 --- a/pkg/agent/agent_utils.go +++ b/pkg/agent/agent_utils.go @@ -560,6 +560,7 @@ func sideQuestionModelName(agent *AgentInstance, usedLight bool) string { } func modelNameFromIdentityKey(identityKey string) string { + identityKey = strings.TrimSpace(identityKey) if identityKey == "" { return "" } diff --git a/pkg/agent/instance.go b/pkg/agent/instance.go index 4ed713035..68150ce78 100644 --- a/pkg/agent/instance.go +++ b/pkg/agent/instance.go @@ -31,6 +31,7 @@ type AgentInstance struct { MaxTokens int Temperature float64 ThinkingLevel ThinkingLevel + ThinkingLevelConfigured bool ContextWindow int SummarizeMessageThreshold int SummarizeTokenPercent int @@ -184,6 +185,7 @@ func NewAgentInstance( thinkingLevelStr = mc.ThinkingLevel } thinkingLevel := parseThinkingLevel(thinkingLevelStr) + thinkingLevelConfigured := isConfiguredThinkingLevel(thinkingLevelStr) summarizeMessageThreshold := defaults.SummarizeMessageThreshold if summarizeMessageThreshold == 0 { @@ -251,6 +253,7 @@ func NewAgentInstance( MaxTokens: maxTokens, Temperature: temperature, ThinkingLevel: thinkingLevel, + ThinkingLevelConfigured: thinkingLevelConfigured, ContextWindow: contextWindow, SummarizeMessageThreshold: summarizeMessageThreshold, SummarizeTokenPercent: summarizeTokenPercent, diff --git a/pkg/agent/pipeline_llm.go b/pkg/agent/pipeline_llm.go index aaae765ef..6df2e4abd 100644 --- a/pkg/agent/pipeline_llm.go +++ b/pkg/agent/pipeline_llm.go @@ -73,14 +73,7 @@ func (p *Pipeline) CallLLM( if exec.useNativeSearch { exec.llmOpts["native_search"] = true } - if ts.agent.ThinkingLevel != ThinkingOff { - if tc, ok := ts.agent.Provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() { - exec.llmOpts["thinking_level"] = string(ts.agent.ThinkingLevel) - } else { - logger.WarnCF("agent", "thinking_level is set but current provider does not support it, ignoring", - map[string]any{"agent_id": ts.agent.ID, "thinking_level": string(ts.agent.ThinkingLevel)}) - } - } + applyTurnThinkingOptions(exec, ts.agent, exec.activeProvider, true) exec.llmModel = exec.activeModel @@ -105,6 +98,7 @@ func (p *Pipeline) CallLLM( exec.llmOpts = llmReq.Options if strings.TrimSpace(exec.llmModel) != "" && exec.llmModel != prevModel { p.applyBeforeLLMModelRewrite(ts, exec) + applyTurnThinkingOptions(exec, ts.agent, exec.activeProvider, true) } } case HookActionAbortTurn: @@ -172,21 +166,33 @@ func (p *Pipeline) CallLLM( } if len(exec.activeCandidates) > 1 && p.Fallback != nil { - fbResult, fbErr := p.Fallback.Execute( + fbResult, fbErr := p.Fallback.ExecuteCandidate( providerCtx, exec.activeCandidates, - func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) { + func(ctx context.Context, candidate providers.FallbackCandidate) (*providers.LLMResponse, error) { candidateProvider, err := providerForFallbackCandidate( ts.agent, exec.activeProvider, exec.activeCandidates, - provider, - model, + candidate.Provider, + candidate.Model, ) if err != nil { return nil, err } - return candidateProvider.Chat(ctx, messagesForCall, toolDefsForCall, model, exec.llmOpts) + callOpts := shallowCloneLLMOptions(exec.llmOpts) + delete(callOpts, "thinking_level") + candidateCfg := resolveActiveModelConfig( + p.Cfg, + ts.agent.Workspace, + []providers.FallbackCandidate{candidate}, + candidate.Model, + p.Cfg.Agents.Defaults.Provider, + ) + candidateThinking := thinkingSettingsFromModelConfig(candidateCfg) + applyThinkingOption(callOpts, candidateProvider, candidateThinking, true, ts.agent.ID) + exec.suppressReasoning = shouldSuppressReasoningFor(candidateThinking) + return candidateProvider.Chat(ctx, messagesForCall, toolDefsForCall, candidate.Model, callOpts) }, ) if fbErr != nil { @@ -469,6 +475,11 @@ func (p *Pipeline) CallLLM( } } + if exec.suppressReasoning { + exec.response.Reasoning = "" + exec.response.ReasoningContent = "" + exec.response.ReasoningDetails = nil + } reasoningContent := responseReasoningContent(exec.response) shouldPublishPicoToolCallInterim := ts.channel == "pico" && len(exec.response.ToolCalls) > 0 if shouldPublishPicoToolCallInterim { diff --git a/pkg/agent/pipeline_streaming.go b/pkg/agent/pipeline_streaming.go index d0ef90524..115cbffcb 100644 --- a/pkg/agent/pipeline_streaming.go +++ b/pkg/agent/pipeline_streaming.go @@ -85,7 +85,7 @@ func (p *Pipeline) tryConfiguredStreamingLLM( exec.llmOpts, func(chunk providers.StreamChunk) { recordChunk() - if strings.TrimSpace(chunk.ReasoningContent) != "" { + if !exec.suppressReasoning && strings.TrimSpace(chunk.ReasoningContent) != "" { publisher.UpdateReasoning(ctx, chunk.ReasoningContent) } if strings.TrimSpace(chunk.Content) != "" { diff --git a/pkg/agent/pipeline_streaming_test.go b/pkg/agent/pipeline_streaming_test.go index a18e4c6f1..9bba1b901 100644 --- a/pkg/agent/pipeline_streaming_test.go +++ b/pkg/agent/pipeline_streaming_test.go @@ -517,6 +517,46 @@ func TestConfiguredStreamingStreamsPicoReasoningBeforeAnswerContent(t *testing.T } } +func TestConfiguredStreamingSuppressesPicoReasoningWhenThinkingOff(t *testing.T) { + cfg := newConfiguredStreamingTestConfig(t, true, true, nil) + cfg.ModelList[0].ThinkingLevel = "off" + streamer := &recordingStreamer{} + msgBus := bus.NewMessageBus() + msgBus.SetStreamDelegate(configuredStreamingDelegate{streamer: streamer}) + provider := &configuredStreamingProvider{ + eventPlan: []configuredStreamingEventCall{{ + chunks: []providers.StreamChunk{ + {ReasoningContent: "thinking"}, + {Content: "answer"}, + }, + response: &providers.LLMResponse{ + Content: "answer", + ReasoningContent: "thinking", + }, + }}, + } + al := NewAgentLoop(cfg, msgBus, provider) + + got := runConfiguredStreamingTurn(t, al, "pico") + if got != "answer" { + t.Fatalf("response = %q, want answer", got) + } + if len(streamer.reasoningUpdates) != 0 { + t.Fatalf("reasoning updates = %v, want none when thinking is off", streamer.reasoningUpdates) + } + if len(streamer.reasoningFinalized) != 0 { + t.Fatalf("reasoning finalized = %v, want none when thinking is off", streamer.reasoningFinalized) + } + if len(streamer.updates) != 1 || streamer.updates[0] != "answer" { + t.Fatalf("content updates = %v, want [answer]", streamer.updates) + } + select { + case outbound := <-msgBus.OutboundChan(): + t.Fatalf("expected no reasoning outbound when thinking is off, got %+v", outbound) + case <-time.After(50 * time.Millisecond): + } +} + func TestConfiguredStreamingFinalFlushFailureAfterVisibleOutputReturnsErrorWithoutFallbackOrCancel(t *testing.T) { cfg := newConfiguredStreamingTestConfig(t, true, true, nil) streamer := &failingFinalizeStreamer{err: errors.New("final failed")} diff --git a/pkg/agent/thinking.go b/pkg/agent/thinking.go index 015b69282..087f5136f 100644 --- a/pkg/agent/thinking.go +++ b/pkg/agent/thinking.go @@ -1,6 +1,12 @@ package agent -import "strings" +import ( + "strings" + + "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/logger" + "github.com/sipeed/picoclaw/pkg/providers" +) // ThinkingLevel controls how the provider sends thinking parameters. // @@ -37,3 +43,87 @@ func parseThinkingLevel(level string) ThinkingLevel { return ThinkingOff } } + +func isConfiguredThinkingLevel(level string) bool { + switch strings.ToLower(strings.TrimSpace(level)) { + case "off", "low", "medium", "high", "xhigh", "adaptive": + return true + default: + return false + } +} + +type thinkingSettings struct { + level ThinkingLevel + configured bool +} + +func thinkingSettingsFromModelConfig(mc *config.ModelConfig) thinkingSettings { + if mc == nil || !isConfiguredThinkingLevel(mc.ThinkingLevel) { + return thinkingSettings{} + } + return thinkingSettings{ + level: parseThinkingLevel(mc.ThinkingLevel), + configured: true, + } +} + +func activeThinkingSettings(agent *AgentInstance, modelCfg *config.ModelConfig) thinkingSettings { + if settings := thinkingSettingsFromModelConfig(modelCfg); settings.configured { + return settings + } + if modelCfg == nil && agent != nil { + return thinkingSettings{ + level: agent.ThinkingLevel, + configured: agent.ThinkingLevelConfigured, + } + } + return thinkingSettings{} +} + +func applyThinkingOption( + opts map[string]any, + provider providers.LLMProvider, + settings thinkingSettings, + warnUnsupported bool, + agentID string, +) { + if opts == nil || !settings.configured { + return + } + if settings.level == ThinkingOff { + opts["thinking_level"] = string(settings.level) + return + } + if tc, ok := provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() { + opts["thinking_level"] = string(settings.level) + return + } + if warnUnsupported { + logger.WarnCF("agent", "thinking_level is set but current provider does not support it, ignoring", + map[string]any{"agent_id": agentID, "thinking_level": string(settings.level)}) + } +} + +func applyTurnThinkingOptions( + exec *turnExecution, + agent *AgentInstance, + provider providers.LLMProvider, + warnUnsupported bool, +) { + if exec == nil || exec.llmOpts == nil { + return + } + delete(exec.llmOpts, "thinking_level") + settings := activeThinkingSettings(agent, exec.activeModelConfig) + agentID := "" + if agent != nil { + agentID = agent.ID + } + applyThinkingOption(exec.llmOpts, provider, settings, warnUnsupported, agentID) + exec.suppressReasoning = shouldSuppressReasoningFor(settings) +} + +func shouldSuppressReasoningFor(settings thinkingSettings) bool { + return settings.configured && settings.level == ThinkingOff +} diff --git a/pkg/agent/turn_coord.go b/pkg/agent/turn_coord.go index 060346339..a8a6ff893 100644 --- a/pkg/agent/turn_coord.go +++ b/pkg/agent/turn_coord.go @@ -423,6 +423,7 @@ func (al *AgentLoop) askSideQuestion( } hookModelChanged := false + sideSuppressReasoning := false callProvider := func( ctx context.Context, candidate providers.FallbackCandidate, @@ -430,7 +431,15 @@ func (al *AgentLoop) askSideQuestion( forceModel bool, callMessages []providers.Message, ) (*providers.LLMResponse, error) { - provider, providerModel, cleanup, err := al.isolatedSideQuestionProvider(agent, selectedModelName, candidate) + baseModelName := selectedModelName + if forceModel && strings.TrimSpace(model) != "" { + baseModelName = model + } + provider, providerModel, modelCfg, cleanup, err := al.isolatedSideQuestionProvider( + agent, + baseModelName, + candidate, + ) if err != nil { return nil, err } @@ -439,10 +448,12 @@ func (al *AgentLoop) askSideQuestion( model = providerModel } callOpts := llmOpts - if _, exists := callOpts["thinking_level"]; !exists && agent.ThinkingLevel != ThinkingOff { - if tc, ok := provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() { + settings := thinkingSettingsFromModelConfig(modelCfg) + sideSuppressReasoning = shouldSuppressReasoningFor(settings) + if _, exists := callOpts["thinking_level"]; !exists { + if settings.configured { callOpts = shallowCloneLLMOptions(llmOpts) - callOpts["thinking_level"] = string(agent.ThinkingLevel) + applyThinkingOption(callOpts, provider, settings, false, agent.ID) } } return provider.Chat(ctx, callMessages, nil, model, callOpts) @@ -500,18 +511,11 @@ func (al *AgentLoop) askSideQuestion( callSideLLM := func(callMessages []providers.Message) (*providers.LLMResponse, error) { if len(activeCandidates) > 1 && al.fallback != nil { - fbResult, err := al.fallback.Execute( + fbResult, err := al.fallback.ExecuteCandidate( ctx, activeCandidates, - func(ctx context.Context, providerName, model string) (*providers.LLMResponse, error) { - candidate := providers.FallbackCandidate{Provider: providerName, Model: model} - for _, activeCandidate := range activeCandidates { - if activeCandidate.Provider == providerName && activeCandidate.Model == model { - candidate = activeCandidate - break - } - } - return callProvider(ctx, candidate, model, false, callMessages) + func(ctx context.Context, candidate providers.FallbackCandidate) (*providers.LLMResponse, error) { + return callProvider(ctx, candidate, candidate.Model, false, callMessages) }, ) if err != nil { @@ -584,6 +588,11 @@ func (al *AgentLoop) askSideQuestion( return "", fmt.Errorf("hook aborted turn during after_llm: %s", reason) } } + if sideSuppressReasoning { + resp.Reasoning = "" + resp.ReasoningContent = "" + resp.ReasoningDetails = nil + } return sideQuestionResponseContent(resp), nil } @@ -592,14 +601,14 @@ func (al *AgentLoop) isolatedSideQuestionProvider( agent *AgentInstance, baseModelName string, candidate providers.FallbackCandidate, -) (providers.LLMProvider, string, func(), error) { +) (providers.LLMProvider, string, *config.ModelConfig, func(), error) { if agent == nil { - return nil, "", func() {}, fmt.Errorf("isolatedSideQuestionProvider: no agent available for /btw") + return nil, "", nil, func() {}, fmt.Errorf("isolatedSideQuestionProvider: no agent available for /btw") } modelCfg, err := al.sideQuestionModelConfig(agent, baseModelName, candidate) if err != nil { - return nil, "", func() {}, fmt.Errorf("isolatedSideQuestionProvider: %w", err) + return nil, "", nil, func() {}, fmt.Errorf("isolatedSideQuestionProvider: %w", err) } factory := al.providerFactory @@ -608,13 +617,13 @@ func (al *AgentLoop) isolatedSideQuestionProvider( } provider, modelID, err := factory(modelCfg) if err != nil { - return nil, "", func() {}, fmt.Errorf("isolatedSideQuestionProvider: %w", err) + return nil, "", nil, func() {}, fmt.Errorf("isolatedSideQuestionProvider: %w", err) } cleanup := func() { closeProviderIfStateful(provider) } - return provider, modelID, cleanup, nil + return provider, modelID, modelCfg, cleanup, nil } func (al *AgentLoop) sideQuestionModelConfig( @@ -626,7 +635,15 @@ func (al *AgentLoop) sideQuestionModelConfig( return nil, fmt.Errorf("sideQuestionModelConfig: no agent available for /btw") } - // If candidate has an identity key, use that + if name := modelAliasFromCandidateIdentityKey(candidate.IdentityKey); name != "" { + modelCfg, err := resolvedModelConfig(al.GetConfig(), name, agent.Workspace) + if err == nil { + return modelCfg, nil + } + // Fallback: create a minimal config if lookup fails + } + + // Older identity keys used provider/model; keep resolving those by model. if name := modelNameFromIdentityKey(candidate.IdentityKey); name != "" { modelCfg, err := resolvedModelConfig(al.GetConfig(), name, agent.Workspace) if err == nil { @@ -635,6 +652,18 @@ func (al *AgentLoop) sideQuestionModelConfig( // Fallback: create a minimal config if lookup fails } + if candidate.Provider != "" && candidate.Model != "" { + candidateRef := providers.NormalizeProvider(candidate.Provider) + "/" + candidate.Model + if modelCfg, err := resolvedModelConfig(al.GetConfig(), candidateRef, agent.Workspace); err == nil { + return modelCfg, nil + } + return &config.ModelConfig{ + ModelName: candidateRef, + Model: candidateRef, + Workspace: agent.Workspace, + }, nil + } + // Otherwise, clean up the base model name and use it baseModelName = strings.TrimSpace(baseModelName) modelCfg, err := resolvedModelConfig(al.GetConfig(), baseModelName, agent.Workspace) @@ -658,8 +687,5 @@ func (al *AgentLoop) sideQuestionModelConfig( // If candidate specifies a different provider/model, override clone := *modelCfg - if candidate.Provider != "" && candidate.Model != "" { - clone.Model = providers.NormalizeProvider(candidate.Provider) + "/" + candidate.Model - } return &clone, nil } diff --git a/pkg/agent/turn_state.go b/pkg/agent/turn_state.go index ddd1eb894..2f40c2d30 100644 --- a/pkg/agent/turn_state.go +++ b/pkg/agent/turn_state.go @@ -138,6 +138,7 @@ type turnExecution struct { allResponsesHandled bool streamingPublisher *streamingChunkPublisher streamingFallback bool + suppressReasoning bool callMessages []providers.Message providerToolDefs []providers.ToolDefinition llmModel string diff --git a/pkg/providers/fallback.go b/pkg/providers/fallback.go index ca0e652d1..6529e2111 100644 --- a/pkg/providers/fallback.go +++ b/pkg/providers/fallback.go @@ -118,6 +118,22 @@ func (fc *FallbackChain) Execute( ctx context.Context, candidates []FallbackCandidate, run func(ctx context.Context, provider, model string) (*LLMResponse, error), +) (*FallbackResult, error) { + return fc.ExecuteCandidate( + ctx, + candidates, + func(ctx context.Context, candidate FallbackCandidate) (*LLMResponse, error) { + return run(ctx, candidate.Provider, candidate.Model) + }, + ) +} + +// ExecuteCandidate runs the fallback chain and passes the complete candidate +// to the caller so model-list identity metadata remains available. +func (fc *FallbackChain) ExecuteCandidate( + ctx context.Context, + candidates []FallbackCandidate, + run func(ctx context.Context, candidate FallbackCandidate) (*LLMResponse, error), ) (*FallbackResult, error) { if len(candidates) == 0 { return nil, fmt.Errorf("fallback: no candidates configured") @@ -181,7 +197,7 @@ func (fc *FallbackChain) Execute( // Execute the run function. start := time.Now() - resp, err := run(ctx, candidate.Provider, candidate.Model) + resp, err := run(ctx, candidate) elapsed := time.Since(start) if err == nil { diff --git a/pkg/providers/openai_compat/provider.go b/pkg/providers/openai_compat/provider.go index 2e85297eb..5990f58e9 100644 --- a/pkg/providers/openai_compat/provider.go +++ b/pkg/providers/openai_compat/provider.go @@ -193,6 +193,8 @@ func (p *Provider) buildRequestBody( } } + p.applyThinkingControl(requestBody, model, options) + // Merge extra body fields configured per-provider/model. // These are injected last so they take precedence over defaults. maps.Copy(requestBody, p.extraBody) @@ -200,6 +202,81 @@ func (p *Provider) buildRequestBody( return requestBody } +func (p *Provider) applyThinkingControl(requestBody map[string]any, model string, options map[string]any) { + level, ok := normalizedThinkingLevel(options) + if !ok || level != "off" { + return + } + + switch p.thinkingControlKind(model) { + case "thinking_type": + requestBody["thinking"] = map[string]any{"type": "disabled"} + case "enable_thinking": + requestBody["enable_thinking"] = false + } +} + +func normalizedThinkingLevel(options map[string]any) (string, bool) { + raw, ok := options["thinking_level"].(string) + if !ok { + return "", false + } + switch strings.ToLower(strings.TrimSpace(raw)) { + case "off", "low", "medium", "high", "xhigh", "adaptive": + return strings.ToLower(strings.TrimSpace(raw)), true + default: + return "", false + } +} + +func (p *Provider) thinkingControlKind(model string) string { + providerName := strings.ToLower(strings.TrimSpace(p.providerName)) + lowerModel := strings.ToLower(strings.TrimSpace(model)) + + switch providerName { + case "volcengine": + return "thinking_type" + case "zhipu", "zai": + return "thinking_type" + case "qwen", "qwen-portal", "qwen-intl", "qwen-international", "dashscope-intl", "qwen-us", "dashscope-us": + return "enable_thinking" + case "modelscope": + if strings.Contains(lowerModel, "qwen") { + return "enable_thinking" + } + } + + if providerName == "openai" || providerName == "" { + if isVolcengineHost(p.apiBase) || strings.Contains(lowerModel, "doubao") { + return "thinking_type" + } + if isDashScopeHost(p.apiBase) || strings.Contains(lowerModel, "qwen") { + return "enable_thinking" + } + } + + return "" +} + +func isVolcengineHost(apiBase string) bool { + host := normalizedHostname(apiBase) + return host == "volcengine.com" || strings.HasSuffix(host, ".volcengine.com") || + host == "volces.com" || strings.HasSuffix(host, ".volces.com") +} + +func isDashScopeHost(apiBase string) bool { + host := normalizedHostname(apiBase) + return host == "dashscope.aliyuncs.com" || strings.HasSuffix(host, ".dashscope.aliyuncs.com") +} + +func normalizedHostname(rawURL string) string { + parsed, err := url.Parse(strings.TrimSpace(rawURL)) + if err != nil { + return "" + } + return strings.ToLower(strings.TrimSpace(parsed.Hostname())) +} + func (p *Provider) applyCustomHeaders(req *http.Request) { for k, v := range p.customHeaders { if strings.TrimSpace(k) == "" { diff --git a/pkg/providers/openai_compat/provider_test.go b/pkg/providers/openai_compat/provider_test.go index 73615b6ed..e8396171f 100644 --- a/pkg/providers/openai_compat/provider_test.go +++ b/pkg/providers/openai_compat/provider_test.go @@ -62,6 +62,69 @@ func TestProviderChat_UsesMaxCompletionTokensForGLM(t *testing.T) { } } +func TestBuildRequestBody_DisablesDoubaoThinkingWhenThinkingLevelOff(t *testing.T) { + p := NewProvider("key", "https://ark.cn-beijing.volces.com/api/v3", "") + p.SetProviderName("openai") + + body := p.buildRequestBody( + []Message{{Role: "user", Content: "hi"}}, + nil, + "doubao-seed-1-6-flash-250828", + map[string]any{"thinking_level": "off"}, + ) + + thinking, ok := body["thinking"].(map[string]any) + if !ok { + t.Fatalf("thinking = %#v, want map", body["thinking"]) + } + if got := thinking["type"]; got != "disabled" { + t.Fatalf("thinking.type = %#v, want %q", got, "disabled") + } +} + +func TestBuildRequestBody_DisablesModelDependentQwenThinkingWhenThinkingLevelOff(t *testing.T) { + p := NewProvider("key", "https://api-inference.modelscope.cn/v1", "") + p.SetProviderName("modelscope") + + body := p.buildRequestBody( + []Message{{Role: "user", Content: "hi"}}, + nil, + "qwen3-coder-plus", + map[string]any{"thinking_level": "off"}, + ) + + if got := body["enable_thinking"]; got != false { + t.Fatalf("enable_thinking = %#v, want false", got) + } +} + +func TestBuildRequestBody_PreservesDoubaoRequestWhenThinkingLevelIsNotOff(t *testing.T) { + p := NewProvider("key", "https://ark.cn-beijing.volces.com/api/v3", "") + p.SetProviderName("openai") + + for _, level := range []string{"low", "adaptive", "unexpected"} { + t.Run(level, func(t *testing.T) { + body := p.buildRequestBody( + []Message{{Role: "user", Content: "hi"}}, + nil, + "doubao-seed-1-6-flash-250828", + map[string]any{"thinking_level": level}, + ) + + if _, ok := body["thinking"]; ok { + t.Fatalf( + "thinking should be omitted for %q to preserve existing behavior, got %#v", + level, + body["thinking"], + ) + } + if _, ok := body["enable_thinking"]; ok { + t.Fatalf("enable_thinking should be omitted for %q, got %#v", level, body["enable_thinking"]) + } + }) + } +} + func TestProviderChat_ParsesToolCalls(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { resp := map[string]any{ diff --git a/web/frontend/src/components/models/add-model-sheet.tsx b/web/frontend/src/components/models/add-model-sheet.tsx index 453bbe255..653380c88 100644 --- a/web/frontend/src/components/models/add-model-sheet.tsx +++ b/web/frontend/src/components/models/add-model-sheet.tsx @@ -720,7 +720,7 @@ export function AddModelSheet({ diff --git a/web/frontend/src/components/models/edit-model-sheet.tsx b/web/frontend/src/components/models/edit-model-sheet.tsx index c01732bed..44cd6b668 100644 --- a/web/frontend/src/components/models/edit-model-sheet.tsx +++ b/web/frontend/src/components/models/edit-model-sheet.tsx @@ -690,7 +690,7 @@ export function EditModelSheet({ diff --git a/web/frontend/src/i18n/locales/en.json b/web/frontend/src/i18n/locales/en.json index 9b66f598e..987990205 100644 --- a/web/frontend/src/i18n/locales/en.json +++ b/web/frontend/src/i18n/locales/en.json @@ -313,7 +313,8 @@ "rpm": "Rate Limit (RPM)", "rpmHint": "Maximum requests per minute. 0 = no limit.", "thinkingLevel": "Thinking Level", - "thinkingLevelHint": "Extended thinking budget: off, low, medium, high, xhigh, adaptive.", + "thinkingLevelHint": "Leave blank to omit thinking_level and use the provider default. Values: off, low, medium, high, xhigh, adaptive.", + "providerDefault": "provider default", "maxTokensField": "Max Tokens Field", "maxTokensFieldHint": "Override the request field name for max tokens, e.g. max_completion_tokens.", "toolSchemaTransform": "Tool Schema Transform", diff --git a/web/frontend/src/i18n/locales/pt-br.json b/web/frontend/src/i18n/locales/pt-br.json index 458fd5724..bb17eea0a 100644 --- a/web/frontend/src/i18n/locales/pt-br.json +++ b/web/frontend/src/i18n/locales/pt-br.json @@ -312,7 +312,8 @@ "rpm": "Limite de Taxa (RPM)", "rpmHint": "Máximo de requisições por minuto. 0 = sem limite.", "thinkingLevel": "Nível de Pensamento", - "thinkingLevelHint": "Orçamento de pensamento estendido: off, low, medium, high, xhigh, adaptive.", + "thinkingLevelHint": "Deixe em branco para omitir thinking_level e usar o padrão do provider. Valores: off, low, medium, high, xhigh, adaptive.", + "providerDefault": "padrão do provider", "maxTokensField": "Campo de Max Tokens", "maxTokensFieldHint": "Sobrescreve o nome do campo de max tokens na requisição, ex: max_completion_tokens.", "toolSchemaTransform": "Transformação de Schema de Ferramentas", diff --git a/web/frontend/src/i18n/locales/zh.json b/web/frontend/src/i18n/locales/zh.json index 21bb6879b..5dde72e1c 100644 --- a/web/frontend/src/i18n/locales/zh.json +++ b/web/frontend/src/i18n/locales/zh.json @@ -313,7 +313,8 @@ "rpm": "速率限制(RPM)", "rpmHint": "每分钟最大请求数,0 表示不限制。", "thinkingLevel": "思考级别", - "thinkingLevelHint": "扩展思考预算:off、low、medium、high、xhigh、adaptive。", + "thinkingLevelHint": "留空则不传 thinking_level,使用 Provider 默认值。可选值:off、low、medium、high、xhigh、adaptive。", + "providerDefault": "Provider 默认", "maxTokensField": "Max Tokens 字段名", "maxTokensFieldHint": "覆盖请求中 max_tokens 的字段名,例如 max_completion_tokens。", "toolSchemaTransform": "工具 Schema 转换",