Files
picoclaw/pkg/voice/transcriber.go
T
RussellLuo 8ad4b9b497 feat(voice): add audio-model transcription support
- Add `AudioModelTranscriber` for model-based audio transcription via LLM providers
- Support selecting a transcription model with `voice.model_name` in config
- Keep Groq transcription as a fallback and move it into dedicated files with focused tests
- Serialize `data:audio/...` media as input_audio for OpenAI-compatible providers
- Improve transcription logging by rendering error fields as strings
- Add coverage for transcriber detection, audio-model behavior, provider audio serialization, and Groq transcription

Fixes #1890.
2026-03-22 20:07:22 +08:00

44 lines
1.1 KiB
Go

package voice
import (
"context"
"strings"
"github.com/sipeed/picoclaw/pkg/config"
)
type Transcriber interface {
Name() string
Transcribe(ctx context.Context, audioFilePath string) (*TranscriptionResponse, error)
}
type TranscriptionResponse struct {
Text string `json:"text"`
Language string `json:"language,omitempty"`
Duration float64 `json:"duration,omitempty"`
}
// DetectTranscriber inspects cfg and returns the appropriate Transcriber, or
// nil if no supported transcription provider is configured.
func DetectTranscriber(cfg *config.Config) Transcriber {
if modelName := strings.TrimSpace(cfg.Voice.ModelName); modelName != "" {
modelCfg, err := cfg.GetModelConfig(modelName)
if err != nil {
return nil
}
return NewAudioModelTranscriber(modelCfg)
}
// Direct Groq provider config takes priority.
if key := cfg.Providers.Groq.APIKey; key != "" {
return NewGroqTranscriber(key)
}
// Fall back to any model-list entry that uses the groq/ protocol.
for _, mc := range cfg.ModelList {
if strings.HasPrefix(mc.Model, "groq/") && mc.APIKey != "" {
return NewGroqTranscriber(mc.APIKey)
}
}
return nil
}