fix: add VLLM default API base and implement MaxTokensField support

1. Add VLLM default API base (http://localhost:8000/v1) - Previously returned empty string, causing provider creation to fail 2. Implement MaxTokensField configuration - Add maxTokensField field to HTTPProvider - Add NewHTTPProviderWithMaxTokensField constructor - Use configured field name for max_tokens parameter - Fallback to model-based detection for backward compatibility 3. Add tests for VLLM, deepseek, ollama default API bases Example config usage: { "model_name": "glm-4", "model": "openai/glm-4", "max_tokens_field": "max_completion_tokens" } Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-12 18:08:54 +00:00 · 2026-02-19 01:43:24 +08:00
parent e1583f3b13
commit 09a0d19119
3 changed files with 30 additions and 13 deletions
@@ -81,7 +81,7 @@ func CreateProviderFromConfig(cfg *config.ModelConfig) (LLMProvider, string, err
 		if apiBase == "" {
 			apiBase = getDefaultAPIBase(protocol)
 		}
-		return NewHTTPProvider(cfg.APIKey, apiBase, cfg.Proxy), modelID, nil
+		return NewHTTPProviderWithMaxTokensField(cfg.APIKey, apiBase, cfg.Proxy, cfg.MaxTokensField), modelID, nil

 	case "anthropic":
 		if cfg.AuthMethod == "oauth" || cfg.AuthMethod == "token" {
@@ -100,7 +100,7 @@ func CreateProviderFromConfig(cfg *config.ModelConfig) (LLMProvider, string, err
 		if cfg.APIKey == "" {
 			return nil, "", fmt.Errorf("api_key is required for anthropic protocol (model: %s)", cfg.Model)
 		}
-		return NewHTTPProvider(cfg.APIKey, apiBase, cfg.Proxy), modelID, nil
+		return NewHTTPProviderWithMaxTokensField(cfg.APIKey, apiBase, cfg.Proxy, cfg.MaxTokensField), modelID, nil

 	case "antigravity":
 		return NewAntigravityProvider(), modelID, nil
@@ -168,6 +168,8 @@ func getDefaultAPIBase(protocol string) string {
 		return "https://ark.cn-beijing.volces.com/api/v3"
 	case "qwen":
 		return "https://dashscope.aliyuncs.com/compatible-mode/v1"
+	case "vllm":
+		return "http://localhost:8000/v1"
 	default:
 		return ""
 	}
@@ -106,6 +106,9 @@ func TestCreateProviderFromConfig_DefaultAPIBase(t *testing.T) {
 		{"openrouter", "openrouter", "https://openrouter.ai/api/v1"},
 		{"cerebras", "cerebras", "https://api.cerebras.ai/v1"},
 		{"qwen", "qwen", "https://dashscope.aliyuncs.com/compatible-mode/v1"},
+		{"vllm", "vllm", "http://localhost:8000/v1"},
+		{"deepseek", "deepseek", "https://api.deepseek.com/v1"},
+		{"ollama", "ollama", "http://localhost:11434/v1"},
 	}

 	for _, tt := range tests {
@@ -19,12 +19,17 @@ import (
 )

 type HTTPProvider struct {
-	apiKey     string
-	apiBase    string
-	httpClient *http.Client
+	apiKey         string
+	apiBase        string
+	maxTokensField string // Field name for max tokens (e.g., "max_completion_tokens" for o1/glm models)
+	httpClient     *http.Client
 }

 func NewHTTPProvider(apiKey, apiBase, proxy string) *HTTPProvider {
+	return NewHTTPProviderWithMaxTokensField(apiKey, apiBase, proxy, "")
+}
+
+func NewHTTPProviderWithMaxTokensField(apiKey, apiBase, proxy, maxTokensField string) *HTTPProvider {
 	client := &http.Client{
 		Timeout: 120 * time.Second,
 	}
@@ -39,9 +44,10 @@ func NewHTTPProvider(apiKey, apiBase, proxy string) *HTTPProvider {
 	}

 	return &HTTPProvider{
-		apiKey:     apiKey,
-		apiBase:    strings.TrimRight(apiBase, "/"),
-		httpClient: client,
+		apiKey:         apiKey,
+		apiBase:        strings.TrimRight(apiBase, "/"),
+		maxTokensField: maxTokensField,
+		httpClient:     client,
 	}
 }

@@ -69,12 +75,18 @@ func (p *HTTPProvider) Chat(ctx context.Context, messages []Message, tools []Too
 	}

 	if maxTokens, ok := options["max_tokens"].(int); ok {
-		lowerModel := strings.ToLower(model)
-		if strings.Contains(lowerModel, "glm") || strings.Contains(lowerModel, "o1") {
-			requestBody["max_completion_tokens"] = maxTokens
-		} else {
-			requestBody["max_tokens"] = maxTokens
+		// Use configured max_tokens_field if specified, otherwise fallback to model-based detection
+		fieldName := p.maxTokensField
+		if fieldName == "" {
+			// Fallback: detect from model name for backward compatibility
+			lowerModel := strings.ToLower(model)
+			if strings.Contains(lowerModel, "glm") || strings.Contains(lowerModel, "o1") {
+				fieldName = "max_completion_tokens"
+			} else {
+				fieldName = "max_tokens"
+			}
 		}
+		requestBody[fieldName] = maxTokens
 	}

 	if temperature, ok := options["temperature"].(float64); ok {