diff --git a/pkg/providers/openai_compat/provider.go b/pkg/providers/openai_compat/provider.go index 74e612046..726a34dee 100644 --- a/pkg/providers/openai_compat/provider.go +++ b/pkg/providers/openai_compat/provider.go @@ -116,7 +116,7 @@ func (p *Provider) Chat( requestBody := map[string]any{ "model": model, - "messages": stripSystemParts(messages), + "messages": serializeMessages(messages), } if len(tools) > 0 { @@ -195,6 +195,47 @@ func (p *Provider) Chat( return parseResponse(body) } +func serializeMessages(messages []Message) []map[string]interface{} { + result := make([]map[string]interface{}, 0, len(messages)) + for _, m := range messages { + if len(m.Media) == 0 { + msg := map[string]interface{}{ + "role": m.Role, + "content": m.Content, + } + if m.ToolCallID != "" { + msg["tool_call_id"] = m.ToolCallID + } + if len(m.ToolCalls) > 0 { + msg["tool_calls"] = m.ToolCalls + } + result = append(result, msg) + continue + } + + parts := make([]map[string]interface{}, 0, 1+len(m.Media)) + if m.Content != "" { + parts = append(parts, map[string]interface{}{ + "type": "text", + "text": m.Content, + }) + } + for _, mediaURL := range m.Media { + parts = append(parts, map[string]interface{}{ + "type": "image_url", + "image_url": map[string]interface{}{ + "url": mediaURL, + }, + }) + } + result = append(result, map[string]interface{}{ + "role": m.Role, + "content": parts, + }) + } + return result +} + func parseResponse(body []byte) (*LLMResponse, error) { var apiResponse struct { Choices []struct { diff --git a/pkg/providers/protocoltypes/types.go b/pkg/providers/protocoltypes/types.go index 99f13334e..efac1e10b 100644 --- a/pkg/providers/protocoltypes/types.go +++ b/pkg/providers/protocoltypes/types.go @@ -65,6 +65,7 @@ type ContentBlock struct { type Message struct { Role string `json:"role"` Content string `json:"content"` + Media []string `json:"media,omitempty"` // URLs of images or other media attachments ReasoningContent string `json:"reasoning_content,omitempty"` SystemParts []ContentBlock `json:"system_parts,omitempty"` // structured system blocks for cache-aware adapters ToolCalls []ToolCall `json:"tool_calls,omitempty"`