From b74f92ed28bc4f14641cc993311d928034f10a87 Mon Sep 17 00:00:00 2001 From: Dimitrij Denissenko Date: Sun, 1 Mar 2026 21:02:16 +0000 Subject: [PATCH] A more neutral and elegant voice.Transcriber interface --- cmd/picoclaw/internal/gateway/helpers.go | 17 +---- pkg/agent/loop.go | 2 +- pkg/voice/transcriber.go | 26 ++++++-- pkg/voice/transcriber_test.go | 85 +++++++++++++++++++++--- 4 files changed, 99 insertions(+), 31 deletions(-) diff --git a/cmd/picoclaw/internal/gateway/helpers.go b/cmd/picoclaw/internal/gateway/helpers.go index c4a6f59fe..5225340c7 100644 --- a/cmd/picoclaw/internal/gateway/helpers.go +++ b/cmd/picoclaw/internal/gateway/helpers.go @@ -7,7 +7,6 @@ import ( "os" "os/signal" "path/filepath" - "strings" "time" "github.com/sipeed/picoclaw/cmd/picoclaw/internal" @@ -136,20 +135,10 @@ func gatewayCmd(debug bool) error { agentLoop.SetChannelManager(channelManager) agentLoop.SetMediaStore(mediaStore) - // Wire up voice transcription if Groq API key is available - groqAPIKey := cfg.Providers.Groq.APIKey - if groqAPIKey == "" { - for _, mc := range cfg.ModelList { - if strings.HasPrefix(mc.Model, "groq/") && mc.APIKey != "" { - groqAPIKey = mc.APIKey - break - } - } - } - if groqAPIKey != "" { - transcriber := voice.NewGroqTranscriber(groqAPIKey) + // Wire up voice transcription if a supported provider is configured. + if transcriber := voice.DetectTranscriber(cfg); transcriber != nil { agentLoop.SetTranscriber(transcriber) - logger.InfoC("voice", "Groq voice transcription enabled (agent-level)") + logger.InfoCF("voice", "Transcription enabled (agent-level)", map[string]any{"provider": transcriber.Name()}) } enabledChannels := channelManager.GetEnabledChannels() diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 0a2633d90..f37d419b1 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -276,7 +276,7 @@ var audioAnnotationRe = regexp.MustCompile(`\[(voice|audio)(?::[^\]]*)?\]`) // transcribeAudioInMessage resolves audio media refs, transcribes them, and // replaces audio annotations in msg.Content with the transcribed text. func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.InboundMessage) bus.InboundMessage { - if al.transcriber == nil || !al.transcriber.IsAvailable() || al.mediaStore == nil || len(msg.Media) == 0 { + if al.transcriber == nil || al.mediaStore == nil || len(msg.Media) == 0 { return msg } diff --git a/pkg/voice/transcriber.go b/pkg/voice/transcriber.go index bf48d0fda..e949d7a22 100644 --- a/pkg/voice/transcriber.go +++ b/pkg/voice/transcriber.go @@ -10,15 +10,17 @@ import ( "net/http" "os" "path/filepath" + "strings" "time" + "github.com/sipeed/picoclaw/pkg/config" "github.com/sipeed/picoclaw/pkg/logger" "github.com/sipeed/picoclaw/pkg/utils" ) type Transcriber interface { + Name() string Transcribe(ctx context.Context, audioFilePath string) (*TranscriptionResponse, error) - IsAvailable() bool } type GroqTranscriber struct { @@ -157,8 +159,22 @@ func (t *GroqTranscriber) Transcribe(ctx context.Context, audioFilePath string) return &result, nil } -func (t *GroqTranscriber) IsAvailable() bool { - available := t.apiKey != "" - logger.DebugCF("voice", "Checking transcriber availability", map[string]any{"available": available}) - return available +func (t *GroqTranscriber) Name() string { + return "groq" +} + +// DetectTranscriber inspects cfg and returns the appropriate Transcriber, or +// nil if no supported transcription provider is configured. +func DetectTranscriber(cfg *config.Config) Transcriber { + // Direct Groq provider config takes priority. + if key := cfg.Providers.Groq.APIKey; key != "" { + return NewGroqTranscriber(key) + } + // Fall back to any model-list entry that uses the groq/ protocol. + for _, mc := range cfg.ModelList { + if strings.HasPrefix(mc.Model, "groq/") && mc.APIKey != "" { + return NewGroqTranscriber(mc.APIKey) + } + } + return nil } diff --git a/pkg/voice/transcriber_test.go b/pkg/voice/transcriber_test.go index c4755dd54..6a28b3664 100644 --- a/pkg/voice/transcriber_test.go +++ b/pkg/voice/transcriber_test.go @@ -8,25 +8,88 @@ import ( "os" "path/filepath" "testing" + + "github.com/sipeed/picoclaw/pkg/config" ) // Ensure GroqTranscriber satisfies the Transcriber interface at compile time. var _ Transcriber = (*GroqTranscriber)(nil) -func TestIsAvailable(t *testing.T) { - tests := []struct { - name string - apiKey string - want bool - }{ - {"with key", "sk-test-key", true}, - {"empty key", "", false}, +func TestGroqTranscriberName(t *testing.T) { + tr := NewGroqTranscriber("sk-test") + if got := tr.Name(); got != "groq" { + t.Errorf("Name() = %q, want %q", got, "groq") } +} + +func TestDetectTranscriber(t *testing.T) { + tests := []struct { + name string + cfg *config.Config + wantNil bool + wantName string + }{ + { + name: "no config", + cfg: &config.Config{}, + wantNil: true, + }, + { + name: "groq provider key", + cfg: &config.Config{ + Providers: config.ProvidersConfig{ + Groq: config.ProviderConfig{APIKey: "sk-groq-direct"}, + }, + }, + wantName: "groq", + }, + { + name: "groq via model list", + cfg: &config.Config{ + ModelList: []config.ModelConfig{ + {Model: "openai/gpt-4o", APIKey: "sk-openai"}, + {Model: "groq/llama-3.3-70b", APIKey: "sk-groq-model"}, + }, + }, + wantName: "groq", + }, + { + name: "groq model list entry without key is skipped", + cfg: &config.Config{ + ModelList: []config.ModelConfig{ + {Model: "groq/llama-3.3-70b", APIKey: ""}, + }, + }, + wantNil: true, + }, + { + name: "provider key takes priority over model list", + cfg: &config.Config{ + Providers: config.ProvidersConfig{ + Groq: config.ProviderConfig{APIKey: "sk-groq-direct"}, + }, + ModelList: []config.ModelConfig{ + {Model: "groq/llama-3.3-70b", APIKey: "sk-groq-model"}, + }, + }, + wantName: "groq", + }, + } + for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - tr := NewGroqTranscriber(tc.apiKey) - if got := tr.IsAvailable(); got != tc.want { - t.Errorf("IsAvailable() = %v, want %v", got, tc.want) + tr := DetectTranscriber(tc.cfg) + if tc.wantNil { + if tr != nil { + t.Errorf("DetectTranscriber() = %v, want nil", tr) + } + return + } + if tr == nil { + t.Fatal("DetectTranscriber() = nil, want non-nil") + } + if got := tr.Name(); got != tc.wantName { + t.Errorf("Name() = %q, want %q", got, tc.wantName) } }) }