mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
refactor(channels): remove channel-side voice transcription (Phase 12)
Remove SetTranscriber and inline transcription logic from 4 channels (Telegram, Discord, Slack, OneBot) and the gateway wiring. Voice/audio files are still downloaded and stored in MediaStore with simple text annotations ([voice], [audio: filename], [file: name]). The pkg/voice package is preserved for future Agent-level transcription middleware.
This commit is contained in:
@@ -6,7 +6,6 @@ import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/sipeed/picoclaw/cmd/picoclaw/internal"
|
||||
@@ -14,14 +13,14 @@ import (
|
||||
"github.com/sipeed/picoclaw/pkg/bus"
|
||||
"github.com/sipeed/picoclaw/pkg/channels"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/dingtalk"
|
||||
dch "github.com/sipeed/picoclaw/pkg/channels/discord"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/discord"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/feishu"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/line"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/maixcam"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/onebot"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/qq"
|
||||
slackch "github.com/sipeed/picoclaw/pkg/channels/slack"
|
||||
tgramch "github.com/sipeed/picoclaw/pkg/channels/telegram"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/slack"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/telegram"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/wecom"
|
||||
_ "github.com/sipeed/picoclaw/pkg/channels/whatsapp"
|
||||
"github.com/sipeed/picoclaw/pkg/config"
|
||||
@@ -34,7 +33,6 @@ import (
|
||||
"github.com/sipeed/picoclaw/pkg/providers"
|
||||
"github.com/sipeed/picoclaw/pkg/state"
|
||||
"github.com/sipeed/picoclaw/pkg/tools"
|
||||
"github.com/sipeed/picoclaw/pkg/voice"
|
||||
)
|
||||
|
||||
func gatewayCmd(debug bool) error {
|
||||
@@ -127,42 +125,6 @@ func gatewayCmd(debug bool) error {
|
||||
agentLoop.SetChannelManager(channelManager)
|
||||
agentLoop.SetMediaStore(mediaStore)
|
||||
|
||||
var transcriber *voice.GroqTranscriber
|
||||
groqAPIKey := cfg.Providers.Groq.APIKey
|
||||
if groqAPIKey == "" {
|
||||
for _, mc := range cfg.ModelList {
|
||||
if strings.HasPrefix(mc.Model, "groq/") && mc.APIKey != "" {
|
||||
groqAPIKey = mc.APIKey
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if groqAPIKey != "" {
|
||||
transcriber = voice.NewGroqTranscriber(groqAPIKey)
|
||||
logger.InfoC("voice", "Groq voice transcription enabled")
|
||||
}
|
||||
|
||||
if transcriber != nil {
|
||||
if telegramChannel, ok := channelManager.GetChannel("telegram"); ok {
|
||||
if tc, ok := telegramChannel.(*tgramch.TelegramChannel); ok {
|
||||
tc.SetTranscriber(transcriber)
|
||||
logger.InfoC("voice", "Groq transcription attached to Telegram channel")
|
||||
}
|
||||
}
|
||||
if discordChannel, ok := channelManager.GetChannel("discord"); ok {
|
||||
if dc, ok := discordChannel.(*dch.DiscordChannel); ok {
|
||||
dc.SetTranscriber(transcriber)
|
||||
logger.InfoC("voice", "Groq transcription attached to Discord channel")
|
||||
}
|
||||
}
|
||||
if slackChannel, ok := channelManager.GetChannel("slack"); ok {
|
||||
if sc, ok := slackChannel.(*slackch.SlackChannel); ok {
|
||||
sc.SetTranscriber(transcriber)
|
||||
logger.InfoC("voice", "Groq transcription attached to Slack channel")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enabledChannels := channelManager.GetEnabledChannels()
|
||||
if len(enabledChannels) > 0 {
|
||||
fmt.Printf("✓ Channels enabled: %s\n", enabledChannels)
|
||||
|
||||
@@ -16,24 +16,21 @@ import (
|
||||
"github.com/sipeed/picoclaw/pkg/logger"
|
||||
"github.com/sipeed/picoclaw/pkg/media"
|
||||
"github.com/sipeed/picoclaw/pkg/utils"
|
||||
"github.com/sipeed/picoclaw/pkg/voice"
|
||||
)
|
||||
|
||||
const (
|
||||
transcriptionTimeout = 30 * time.Second
|
||||
sendTimeout = 10 * time.Second
|
||||
sendTimeout = 10 * time.Second
|
||||
)
|
||||
|
||||
type DiscordChannel struct {
|
||||
*channels.BaseChannel
|
||||
session *discordgo.Session
|
||||
config config.DiscordConfig
|
||||
transcriber *voice.GroqTranscriber
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
typingMu sync.Mutex
|
||||
typingStop map[string]chan struct{} // chatID → stop signal
|
||||
botUserID string // stored for mention checking
|
||||
session *discordgo.Session
|
||||
config config.DiscordConfig
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
typingMu sync.Mutex
|
||||
typingStop map[string]chan struct{} // chatID → stop signal
|
||||
botUserID string // stored for mention checking
|
||||
}
|
||||
|
||||
func NewDiscordChannel(cfg config.DiscordConfig, bus *bus.MessageBus) (*DiscordChannel, error) {
|
||||
@@ -48,16 +45,11 @@ func NewDiscordChannel(cfg config.DiscordConfig, bus *bus.MessageBus) (*DiscordC
|
||||
BaseChannel: base,
|
||||
session: session,
|
||||
config: cfg,
|
||||
transcriber: nil,
|
||||
ctx: context.Background(),
|
||||
typingStop: make(map[string]chan struct{}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *DiscordChannel) SetTranscriber(transcriber *voice.GroqTranscriber) {
|
||||
c.transcriber = transcriber
|
||||
}
|
||||
|
||||
func (c *DiscordChannel) Start(ctx context.Context) error {
|
||||
logger.InfoC("discord", "Starting Discord bot")
|
||||
|
||||
@@ -265,7 +257,7 @@ func (c *DiscordChannel) handleMessage(s *discordgo.Session, m *discordgo.Messag
|
||||
return
|
||||
}
|
||||
|
||||
// Check allowlist first to avoid downloading attachments and transcribing for rejected users
|
||||
// Check allowlist first to avoid downloading attachments for rejected users
|
||||
if !c.IsAllowed(m.Author.ID) {
|
||||
logger.DebugCF("discord", "Message rejected by allowlist", map[string]any{
|
||||
"user_id": m.Author.ID,
|
||||
@@ -323,29 +315,8 @@ func (c *DiscordChannel) handleMessage(s *discordgo.Session, m *discordgo.Messag
|
||||
if isAudio {
|
||||
localPath := c.downloadAttachment(attachment.URL, attachment.Filename)
|
||||
if localPath != "" {
|
||||
transcribedText := ""
|
||||
if c.transcriber != nil && c.transcriber.IsAvailable() {
|
||||
ctx, cancel := context.WithTimeout(c.ctx, transcriptionTimeout)
|
||||
result, err := c.transcriber.Transcribe(ctx, localPath)
|
||||
cancel() // Release context resources immediately to avoid leaks in for loop
|
||||
|
||||
if err != nil {
|
||||
logger.ErrorCF("discord", "Voice transcription failed", map[string]any{
|
||||
"error": err.Error(),
|
||||
})
|
||||
transcribedText = fmt.Sprintf("[audio: %s (transcription failed)]", attachment.Filename)
|
||||
} else {
|
||||
transcribedText = fmt.Sprintf("[audio transcription: %s]", result.Text)
|
||||
logger.DebugCF("discord", "Audio transcribed successfully", map[string]any{
|
||||
"text": result.Text,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
transcribedText = fmt.Sprintf("[audio: %s]", attachment.Filename)
|
||||
}
|
||||
|
||||
mediaPaths = append(mediaPaths, storeMedia(localPath, attachment.Filename))
|
||||
content = appendContent(content, transcribedText)
|
||||
content = appendContent(content, fmt.Sprintf("[audio: %s]", attachment.Filename))
|
||||
} else {
|
||||
logger.WarnCF("discord", "Failed to download audio attachment", map[string]any{
|
||||
"url": attachment.URL,
|
||||
|
||||
@@ -18,7 +18,6 @@ import (
|
||||
"github.com/sipeed/picoclaw/pkg/logger"
|
||||
"github.com/sipeed/picoclaw/pkg/media"
|
||||
"github.com/sipeed/picoclaw/pkg/utils"
|
||||
"github.com/sipeed/picoclaw/pkg/voice"
|
||||
)
|
||||
|
||||
type OneBotChannel struct {
|
||||
@@ -36,7 +35,6 @@ type OneBotChannel struct {
|
||||
selfID int64
|
||||
pending map[string]chan json.RawMessage
|
||||
pendingMu sync.Mutex
|
||||
transcriber *voice.GroqTranscriber
|
||||
lastMessageID sync.Map
|
||||
pendingEmojiMsg sync.Map
|
||||
}
|
||||
@@ -112,10 +110,6 @@ func NewOneBotChannel(cfg config.OneBotConfig, messageBus *bus.MessageBus) (*One
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *OneBotChannel) SetTranscriber(transcriber *voice.GroqTranscriber) {
|
||||
c.transcriber = transcriber
|
||||
}
|
||||
|
||||
func (c *OneBotChannel) setMsgEmojiLike(messageID string, emojiID int, set bool) {
|
||||
go func() {
|
||||
_, err := c.sendAPIRequest("set_msg_emoji_like", map[string]any{
|
||||
@@ -794,25 +788,8 @@ func (c *OneBotChannel) parseMessageSegments(
|
||||
LoggerPrefix: "onebot",
|
||||
})
|
||||
if localPath != "" {
|
||||
if c.transcriber != nil && c.transcriber.IsAvailable() {
|
||||
tctx, tcancel := context.WithTimeout(c.ctx, 30*time.Second)
|
||||
result, err := c.transcriber.Transcribe(tctx, localPath)
|
||||
tcancel()
|
||||
if err != nil {
|
||||
logger.WarnCF("onebot", "Voice transcription failed", map[string]any{
|
||||
"error": err.Error(),
|
||||
})
|
||||
textParts = append(textParts, "[voice (transcription failed)]")
|
||||
mediaRefs = append(mediaRefs, storeFile(localPath, "voice.amr"))
|
||||
} else {
|
||||
textParts = append(textParts, fmt.Sprintf("[voice transcription: %s]", result.Text))
|
||||
// Still store the file so it can be released later
|
||||
storeFile(localPath, "voice.amr")
|
||||
}
|
||||
} else {
|
||||
textParts = append(textParts, "[voice]")
|
||||
mediaRefs = append(mediaRefs, storeFile(localPath, "voice.amr"))
|
||||
}
|
||||
textParts = append(textParts, "[voice]")
|
||||
mediaRefs = append(mediaRefs, storeFile(localPath, "voice.amr"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/slack-go/slack"
|
||||
"github.com/slack-go/slack/slackevents"
|
||||
@@ -17,7 +16,6 @@ import (
|
||||
"github.com/sipeed/picoclaw/pkg/logger"
|
||||
"github.com/sipeed/picoclaw/pkg/media"
|
||||
"github.com/sipeed/picoclaw/pkg/utils"
|
||||
"github.com/sipeed/picoclaw/pkg/voice"
|
||||
)
|
||||
|
||||
type SlackChannel struct {
|
||||
@@ -27,7 +25,6 @@ type SlackChannel struct {
|
||||
socketClient *socketmode.Client
|
||||
botUserID string
|
||||
teamID string
|
||||
transcriber *voice.GroqTranscriber
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
pendingAcks sync.Map
|
||||
@@ -60,10 +57,6 @@ func NewSlackChannel(cfg config.SlackConfig, messageBus *bus.MessageBus) (*Slack
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *SlackChannel) SetTranscriber(transcriber *voice.GroqTranscriber) {
|
||||
c.transcriber = transcriber
|
||||
}
|
||||
|
||||
func (c *SlackChannel) Start(ctx context.Context) error {
|
||||
logger.InfoC("slack", "Starting Slack channel (Socket Mode)")
|
||||
|
||||
@@ -311,21 +304,7 @@ func (c *SlackChannel) handleMessageEvent(ev *slackevents.MessageEvent) {
|
||||
continue
|
||||
}
|
||||
mediaPaths = append(mediaPaths, storeMedia(localPath, file.Name))
|
||||
|
||||
if utils.IsAudioFile(file.Name, file.Mimetype) && c.transcriber != nil && c.transcriber.IsAvailable() {
|
||||
ctx, cancel := context.WithTimeout(c.ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
result, err := c.transcriber.Transcribe(ctx, localPath)
|
||||
|
||||
if err != nil {
|
||||
logger.ErrorCF("slack", "Voice transcription failed", map[string]any{"error": err.Error()})
|
||||
content += fmt.Sprintf("\n[audio: %s (transcription failed)]", file.Name)
|
||||
} else {
|
||||
content += fmt.Sprintf("\n[voice transcription: %s]", result.Text)
|
||||
}
|
||||
} else {
|
||||
content += fmt.Sprintf("\n[file: %s]", file.Name)
|
||||
}
|
||||
content += fmt.Sprintf("\n[file: %s]", file.Name)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
"github.com/sipeed/picoclaw/pkg/logger"
|
||||
"github.com/sipeed/picoclaw/pkg/media"
|
||||
"github.com/sipeed/picoclaw/pkg/utils"
|
||||
"github.com/sipeed/picoclaw/pkg/voice"
|
||||
)
|
||||
|
||||
type TelegramChannel struct {
|
||||
@@ -32,7 +31,6 @@ type TelegramChannel struct {
|
||||
commands TelegramCommander
|
||||
config *config.Config
|
||||
chatIDs map[string]int64
|
||||
transcriber *voice.GroqTranscriber
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
placeholders sync.Map // chatID -> messageID
|
||||
@@ -91,16 +89,11 @@ func NewTelegramChannel(cfg *config.Config, bus *bus.MessageBus) (*TelegramChann
|
||||
bot: bot,
|
||||
config: cfg,
|
||||
chatIDs: make(map[string]int64),
|
||||
transcriber: nil,
|
||||
placeholders: sync.Map{},
|
||||
stopThinking: sync.Map{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *TelegramChannel) SetTranscriber(transcriber *voice.GroqTranscriber) {
|
||||
c.transcriber = transcriber
|
||||
}
|
||||
|
||||
func (c *TelegramChannel) Start(ctx context.Context) error {
|
||||
logger.InfoC("telegram", "Starting Telegram bot (polling mode)...")
|
||||
|
||||
@@ -391,32 +384,10 @@ func (c *TelegramChannel) handleMessage(ctx context.Context, message *telego.Mes
|
||||
if voicePath != "" {
|
||||
mediaPaths = append(mediaPaths, storeMedia(voicePath, "voice.ogg"))
|
||||
|
||||
transcribedText := ""
|
||||
if c.transcriber != nil && c.transcriber.IsAvailable() {
|
||||
transcriberCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
result, err := c.transcriber.Transcribe(transcriberCtx, voicePath)
|
||||
if err != nil {
|
||||
logger.ErrorCF("telegram", "Voice transcription failed", map[string]any{
|
||||
"error": err.Error(),
|
||||
"path": voicePath,
|
||||
})
|
||||
transcribedText = "[voice (transcription failed)]"
|
||||
} else {
|
||||
transcribedText = fmt.Sprintf("[voice transcription: %s]", result.Text)
|
||||
logger.InfoCF("telegram", "Voice transcribed successfully", map[string]any{
|
||||
"text": result.Text,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
transcribedText = "[voice]"
|
||||
}
|
||||
|
||||
if content != "" {
|
||||
content += "\n"
|
||||
}
|
||||
content += transcribedText
|
||||
content += "[voice]"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user