fix: add VLLM default API base and implement MaxTokensField support

1. Add VLLM default API base (http://localhost:8000/v1)
   - Previously returned empty string, causing provider creation to fail

2. Implement MaxTokensField configuration
   - Add maxTokensField field to HTTPProvider
   - Add NewHTTPProviderWithMaxTokensField constructor
   - Use configured field name for max_tokens parameter
   - Fallback to model-based detection for backward compatibility

3. Add tests for VLLM, deepseek, ollama default API bases

Example config usage:
{
  "model_name": "glm-4",
  "model": "openai/glm-4",
  "max_tokens_field": "max_completion_tokens"
}

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
yinwm
2026-02-19 01:43:24 +08:00
parent e1583f3b13
commit 09a0d19119
3 changed files with 30 additions and 13 deletions
+4 -2
View File
@@ -81,7 +81,7 @@ func CreateProviderFromConfig(cfg *config.ModelConfig) (LLMProvider, string, err
if apiBase == "" {
apiBase = getDefaultAPIBase(protocol)
}
return NewHTTPProvider(cfg.APIKey, apiBase, cfg.Proxy), modelID, nil
return NewHTTPProviderWithMaxTokensField(cfg.APIKey, apiBase, cfg.Proxy, cfg.MaxTokensField), modelID, nil
case "anthropic":
if cfg.AuthMethod == "oauth" || cfg.AuthMethod == "token" {
@@ -100,7 +100,7 @@ func CreateProviderFromConfig(cfg *config.ModelConfig) (LLMProvider, string, err
if cfg.APIKey == "" {
return nil, "", fmt.Errorf("api_key is required for anthropic protocol (model: %s)", cfg.Model)
}
return NewHTTPProvider(cfg.APIKey, apiBase, cfg.Proxy), modelID, nil
return NewHTTPProviderWithMaxTokensField(cfg.APIKey, apiBase, cfg.Proxy, cfg.MaxTokensField), modelID, nil
case "antigravity":
return NewAntigravityProvider(), modelID, nil
@@ -168,6 +168,8 @@ func getDefaultAPIBase(protocol string) string {
return "https://ark.cn-beijing.volces.com/api/v3"
case "qwen":
return "https://dashscope.aliyuncs.com/compatible-mode/v1"
case "vllm":
return "http://localhost:8000/v1"
default:
return ""
}
+3
View File
@@ -106,6 +106,9 @@ func TestCreateProviderFromConfig_DefaultAPIBase(t *testing.T) {
{"openrouter", "openrouter", "https://openrouter.ai/api/v1"},
{"cerebras", "cerebras", "https://api.cerebras.ai/v1"},
{"qwen", "qwen", "https://dashscope.aliyuncs.com/compatible-mode/v1"},
{"vllm", "vllm", "http://localhost:8000/v1"},
{"deepseek", "deepseek", "https://api.deepseek.com/v1"},
{"ollama", "ollama", "http://localhost:11434/v1"},
}
for _, tt := range tests {
+23 -11
View File
@@ -19,12 +19,17 @@ import (
)
type HTTPProvider struct {
apiKey string
apiBase string
httpClient *http.Client
apiKey string
apiBase string
maxTokensField string // Field name for max tokens (e.g., "max_completion_tokens" for o1/glm models)
httpClient *http.Client
}
func NewHTTPProvider(apiKey, apiBase, proxy string) *HTTPProvider {
return NewHTTPProviderWithMaxTokensField(apiKey, apiBase, proxy, "")
}
func NewHTTPProviderWithMaxTokensField(apiKey, apiBase, proxy, maxTokensField string) *HTTPProvider {
client := &http.Client{
Timeout: 120 * time.Second,
}
@@ -39,9 +44,10 @@ func NewHTTPProvider(apiKey, apiBase, proxy string) *HTTPProvider {
}
return &HTTPProvider{
apiKey: apiKey,
apiBase: strings.TrimRight(apiBase, "/"),
httpClient: client,
apiKey: apiKey,
apiBase: strings.TrimRight(apiBase, "/"),
maxTokensField: maxTokensField,
httpClient: client,
}
}
@@ -69,12 +75,18 @@ func (p *HTTPProvider) Chat(ctx context.Context, messages []Message, tools []Too
}
if maxTokens, ok := options["max_tokens"].(int); ok {
lowerModel := strings.ToLower(model)
if strings.Contains(lowerModel, "glm") || strings.Contains(lowerModel, "o1") {
requestBody["max_completion_tokens"] = maxTokens
} else {
requestBody["max_tokens"] = maxTokens
// Use configured max_tokens_field if specified, otherwise fallback to model-based detection
fieldName := p.maxTokensField
if fieldName == "" {
// Fallback: detect from model name for backward compatibility
lowerModel := strings.ToLower(model)
if strings.Contains(lowerModel, "glm") || strings.Contains(lowerModel, "o1") {
fieldName = "max_completion_tokens"
} else {
fieldName = "max_tokens"
}
}
requestBody[fieldName] = maxTokens
}
if temperature, ok := options["temperature"].(float64); ok {