From 09a0d19119060c48ce5a24a5692daf2536b59b54 Mon Sep 17 00:00:00 2001 From: yinwm Date: Thu, 19 Feb 2026 01:43:24 +0800 Subject: [PATCH] fix: add VLLM default API base and implement MaxTokensField support 1. Add VLLM default API base (http://localhost:8000/v1) - Previously returned empty string, causing provider creation to fail 2. Implement MaxTokensField configuration - Add maxTokensField field to HTTPProvider - Add NewHTTPProviderWithMaxTokensField constructor - Use configured field name for max_tokens parameter - Fallback to model-based detection for backward compatibility 3. Add tests for VLLM, deepseek, ollama default API bases Example config usage: { "model_name": "glm-4", "model": "openai/glm-4", "max_tokens_field": "max_completion_tokens" } Co-Authored-By: Claude Opus 4.6 --- pkg/providers/factory_provider.go | 6 +++-- pkg/providers/factory_provider_test.go | 3 +++ pkg/providers/http_provider.go | 34 +++++++++++++++++--------- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/pkg/providers/factory_provider.go b/pkg/providers/factory_provider.go index 8ed7559c6..7851c7c5d 100644 --- a/pkg/providers/factory_provider.go +++ b/pkg/providers/factory_provider.go @@ -81,7 +81,7 @@ func CreateProviderFromConfig(cfg *config.ModelConfig) (LLMProvider, string, err if apiBase == "" { apiBase = getDefaultAPIBase(protocol) } - return NewHTTPProvider(cfg.APIKey, apiBase, cfg.Proxy), modelID, nil + return NewHTTPProviderWithMaxTokensField(cfg.APIKey, apiBase, cfg.Proxy, cfg.MaxTokensField), modelID, nil case "anthropic": if cfg.AuthMethod == "oauth" || cfg.AuthMethod == "token" { @@ -100,7 +100,7 @@ func CreateProviderFromConfig(cfg *config.ModelConfig) (LLMProvider, string, err if cfg.APIKey == "" { return nil, "", fmt.Errorf("api_key is required for anthropic protocol (model: %s)", cfg.Model) } - return NewHTTPProvider(cfg.APIKey, apiBase, cfg.Proxy), modelID, nil + return NewHTTPProviderWithMaxTokensField(cfg.APIKey, apiBase, cfg.Proxy, cfg.MaxTokensField), modelID, nil case "antigravity": return NewAntigravityProvider(), modelID, nil @@ -168,6 +168,8 @@ func getDefaultAPIBase(protocol string) string { return "https://ark.cn-beijing.volces.com/api/v3" case "qwen": return "https://dashscope.aliyuncs.com/compatible-mode/v1" + case "vllm": + return "http://localhost:8000/v1" default: return "" } diff --git a/pkg/providers/factory_provider_test.go b/pkg/providers/factory_provider_test.go index f7c1aa58c..4aac982cb 100644 --- a/pkg/providers/factory_provider_test.go +++ b/pkg/providers/factory_provider_test.go @@ -106,6 +106,9 @@ func TestCreateProviderFromConfig_DefaultAPIBase(t *testing.T) { {"openrouter", "openrouter", "https://openrouter.ai/api/v1"}, {"cerebras", "cerebras", "https://api.cerebras.ai/v1"}, {"qwen", "qwen", "https://dashscope.aliyuncs.com/compatible-mode/v1"}, + {"vllm", "vllm", "http://localhost:8000/v1"}, + {"deepseek", "deepseek", "https://api.deepseek.com/v1"}, + {"ollama", "ollama", "http://localhost:11434/v1"}, } for _, tt := range tests { diff --git a/pkg/providers/http_provider.go b/pkg/providers/http_provider.go index 6d2ca1eb7..15b22e3a0 100644 --- a/pkg/providers/http_provider.go +++ b/pkg/providers/http_provider.go @@ -19,12 +19,17 @@ import ( ) type HTTPProvider struct { - apiKey string - apiBase string - httpClient *http.Client + apiKey string + apiBase string + maxTokensField string // Field name for max tokens (e.g., "max_completion_tokens" for o1/glm models) + httpClient *http.Client } func NewHTTPProvider(apiKey, apiBase, proxy string) *HTTPProvider { + return NewHTTPProviderWithMaxTokensField(apiKey, apiBase, proxy, "") +} + +func NewHTTPProviderWithMaxTokensField(apiKey, apiBase, proxy, maxTokensField string) *HTTPProvider { client := &http.Client{ Timeout: 120 * time.Second, } @@ -39,9 +44,10 @@ func NewHTTPProvider(apiKey, apiBase, proxy string) *HTTPProvider { } return &HTTPProvider{ - apiKey: apiKey, - apiBase: strings.TrimRight(apiBase, "/"), - httpClient: client, + apiKey: apiKey, + apiBase: strings.TrimRight(apiBase, "/"), + maxTokensField: maxTokensField, + httpClient: client, } } @@ -69,12 +75,18 @@ func (p *HTTPProvider) Chat(ctx context.Context, messages []Message, tools []Too } if maxTokens, ok := options["max_tokens"].(int); ok { - lowerModel := strings.ToLower(model) - if strings.Contains(lowerModel, "glm") || strings.Contains(lowerModel, "o1") { - requestBody["max_completion_tokens"] = maxTokens - } else { - requestBody["max_tokens"] = maxTokens + // Use configured max_tokens_field if specified, otherwise fallback to model-based detection + fieldName := p.maxTokensField + if fieldName == "" { + // Fallback: detect from model name for backward compatibility + lowerModel := strings.ToLower(model) + if strings.Contains(lowerModel, "glm") || strings.Contains(lowerModel, "o1") { + fieldName = "max_completion_tokens" + } else { + fieldName = "max_tokens" + } } + requestBody[fieldName] = maxTokens } if temperature, ok := options["temperature"].(float64); ok {