Files
picoclaw/pkg/voice/elevenlabs_transcriber_test.go
T
Orkun Manap dd9adf8a04 feat: add ElevenLabs Scribe STT transcriber and Telegram SendVoice support (#1905)
* feat: add ElevenLabs Scribe STT transcriber and Telegram SendVoice support

Add ElevenLabsTranscriber as an alternative speech-to-text provider using
the ElevenLabs Scribe API (scribe_v1). This enables voice message
transcription for users who already have an ElevenLabs API key, without
requiring a separate Groq account.

Changes:
- Add ElevenLabsTranscriber implementing the Transcriber interface
- Update DetectTranscriber to check providers.elevenlabs.api_key first,
  falling back to Groq for backward compatibility
- Add ElevenLabs to ProvidersConfig
- Add "voice" media type for OGG files with "voice" in filename
- Add SendVoice support in Telegram channel for voice bubble messages
- Add comprehensive tests for ElevenLabs transcriber

Configuration:
  "providers": {
    "elevenlabs": {
      "api_key": "sk_your_key_here"
    }
  }

Closes #1503 (partial)

* fix: move voice-bubble detection into Telegram channel to avoid regression in other channels

Address review feedback: keep inferMediaType returning "audio" for all
OGG files. Voice-bubble detection (SendVoice vs SendAudio) is now done
inside the Telegram channel based on filename, so other channels that
map "audio" explicitly are unaffected.

* fix: align VoiceConfig struct tags to pass golines formatter

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix(agent): use ModelName in loop test added by upstream

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-23 22:11:10 +01:00

84 lines
2.3 KiB
Go

package voice
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
)
// Ensure ElevenLabsTranscriber satisfies the Transcriber interface at compile time.
var _ Transcriber = (*ElevenLabsTranscriber)(nil)
func TestElevenLabsTranscriberName(t *testing.T) {
tr := NewElevenLabsTranscriber("sk_test")
if got := tr.Name(); got != "elevenlabs" {
t.Errorf("Name() = %q, want %q", got, "elevenlabs")
}
}
func TestElevenLabsTranscribe(t *testing.T) {
tmpDir := t.TempDir()
audioPath := filepath.Join(tmpDir, "clip.ogg")
if err := os.WriteFile(audioPath, []byte("fake-audio-data"), 0o644); err != nil {
t.Fatalf("failed to write fake audio file: %v", err)
}
t.Run("success", func(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/v1/speech-to-text" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
if r.Header.Get("Xi-Api-Key") != "sk_test" {
t.Errorf("unexpected xi-api-key header: %s", r.Header.Get("Xi-Api-Key"))
}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(TranscriptionResponse{
Text: "hello from elevenlabs",
Language: "en",
})
}))
defer srv.Close()
tr := NewElevenLabsTranscriber("sk_test")
tr.apiBase = srv.URL
resp, err := tr.Transcribe(context.Background(), audioPath)
if err != nil {
t.Fatalf("Transcribe() error: %v", err)
}
if resp.Text != "hello from elevenlabs" {
t.Errorf("Text = %q, want %q", resp.Text, "hello from elevenlabs")
}
if resp.Language != "en" {
t.Errorf("Language = %q, want %q", resp.Language, "en")
}
})
t.Run("api error", func(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, `{"error":"invalid_api_key"}`, http.StatusUnauthorized)
}))
defer srv.Close()
tr := NewElevenLabsTranscriber("sk_bad")
tr.apiBase = srv.URL
_, err := tr.Transcribe(context.Background(), audioPath)
if err == nil {
t.Fatal("expected error for non-200 response, got nil")
}
})
t.Run("missing file", func(t *testing.T) {
tr := NewElevenLabsTranscriber("sk_test")
_, err := tr.Transcribe(context.Background(), filepath.Join(tmpDir, "nonexistent.ogg"))
if err == nil {
t.Fatal("expected error for missing file, got nil")
}
})
}