mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
8ad4b9b497
- Add `AudioModelTranscriber` for model-based audio transcription via LLM providers - Support selecting a transcription model with `voice.model_name` in config - Keep Groq transcription as a fallback and move it into dedicated files with focused tests - Serialize `data:audio/...` media as input_audio for OpenAI-compatible providers - Improve transcription logging by rendering error fields as strings - Add coverage for transcriber detection, audio-model behavior, provider audio serialization, and Groq transcription Fixes #1890.
44 lines
1.1 KiB
Go
44 lines
1.1 KiB
Go
package voice
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/config"
|
|
)
|
|
|
|
type Transcriber interface {
|
|
Name() string
|
|
Transcribe(ctx context.Context, audioFilePath string) (*TranscriptionResponse, error)
|
|
}
|
|
|
|
type TranscriptionResponse struct {
|
|
Text string `json:"text"`
|
|
Language string `json:"language,omitempty"`
|
|
Duration float64 `json:"duration,omitempty"`
|
|
}
|
|
|
|
// DetectTranscriber inspects cfg and returns the appropriate Transcriber, or
|
|
// nil if no supported transcription provider is configured.
|
|
func DetectTranscriber(cfg *config.Config) Transcriber {
|
|
if modelName := strings.TrimSpace(cfg.Voice.ModelName); modelName != "" {
|
|
modelCfg, err := cfg.GetModelConfig(modelName)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
return NewAudioModelTranscriber(modelCfg)
|
|
}
|
|
|
|
// Direct Groq provider config takes priority.
|
|
if key := cfg.Providers.Groq.APIKey; key != "" {
|
|
return NewGroqTranscriber(key)
|
|
}
|
|
// Fall back to any model-list entry that uses the groq/ protocol.
|
|
for _, mc := range cfg.ModelList {
|
|
if strings.HasPrefix(mc.Model, "groq/") && mc.APIKey != "" {
|
|
return NewGroqTranscriber(mc.APIKey)
|
|
}
|
|
}
|
|
return nil
|
|
}
|