diff --git a/cmd/picoclaw/internal/gateway/helpers.go b/cmd/picoclaw/internal/gateway/helpers.go index 5ebf26d78..758157f53 100644 --- a/cmd/picoclaw/internal/gateway/helpers.go +++ b/cmd/picoclaw/internal/gateway/helpers.go @@ -6,7 +6,6 @@ import ( "os" "os/signal" "path/filepath" - "strings" "time" "github.com/sipeed/picoclaw/cmd/picoclaw/internal" @@ -14,14 +13,14 @@ import ( "github.com/sipeed/picoclaw/pkg/bus" "github.com/sipeed/picoclaw/pkg/channels" _ "github.com/sipeed/picoclaw/pkg/channels/dingtalk" - dch "github.com/sipeed/picoclaw/pkg/channels/discord" + _ "github.com/sipeed/picoclaw/pkg/channels/discord" _ "github.com/sipeed/picoclaw/pkg/channels/feishu" _ "github.com/sipeed/picoclaw/pkg/channels/line" _ "github.com/sipeed/picoclaw/pkg/channels/maixcam" _ "github.com/sipeed/picoclaw/pkg/channels/onebot" _ "github.com/sipeed/picoclaw/pkg/channels/qq" - slackch "github.com/sipeed/picoclaw/pkg/channels/slack" - tgramch "github.com/sipeed/picoclaw/pkg/channels/telegram" + _ "github.com/sipeed/picoclaw/pkg/channels/slack" + _ "github.com/sipeed/picoclaw/pkg/channels/telegram" _ "github.com/sipeed/picoclaw/pkg/channels/wecom" _ "github.com/sipeed/picoclaw/pkg/channels/whatsapp" "github.com/sipeed/picoclaw/pkg/config" @@ -34,7 +33,6 @@ import ( "github.com/sipeed/picoclaw/pkg/providers" "github.com/sipeed/picoclaw/pkg/state" "github.com/sipeed/picoclaw/pkg/tools" - "github.com/sipeed/picoclaw/pkg/voice" ) func gatewayCmd(debug bool) error { @@ -127,42 +125,6 @@ func gatewayCmd(debug bool) error { agentLoop.SetChannelManager(channelManager) agentLoop.SetMediaStore(mediaStore) - var transcriber *voice.GroqTranscriber - groqAPIKey := cfg.Providers.Groq.APIKey - if groqAPIKey == "" { - for _, mc := range cfg.ModelList { - if strings.HasPrefix(mc.Model, "groq/") && mc.APIKey != "" { - groqAPIKey = mc.APIKey - break - } - } - } - if groqAPIKey != "" { - transcriber = voice.NewGroqTranscriber(groqAPIKey) - logger.InfoC("voice", "Groq voice transcription enabled") - } - - if transcriber != nil { - if telegramChannel, ok := channelManager.GetChannel("telegram"); ok { - if tc, ok := telegramChannel.(*tgramch.TelegramChannel); ok { - tc.SetTranscriber(transcriber) - logger.InfoC("voice", "Groq transcription attached to Telegram channel") - } - } - if discordChannel, ok := channelManager.GetChannel("discord"); ok { - if dc, ok := discordChannel.(*dch.DiscordChannel); ok { - dc.SetTranscriber(transcriber) - logger.InfoC("voice", "Groq transcription attached to Discord channel") - } - } - if slackChannel, ok := channelManager.GetChannel("slack"); ok { - if sc, ok := slackChannel.(*slackch.SlackChannel); ok { - sc.SetTranscriber(transcriber) - logger.InfoC("voice", "Groq transcription attached to Slack channel") - } - } - } - enabledChannels := channelManager.GetEnabledChannels() if len(enabledChannels) > 0 { fmt.Printf("✓ Channels enabled: %s\n", enabledChannels) diff --git a/pkg/channels/discord/discord.go b/pkg/channels/discord/discord.go index 7987f45a9..68725b124 100644 --- a/pkg/channels/discord/discord.go +++ b/pkg/channels/discord/discord.go @@ -16,24 +16,21 @@ import ( "github.com/sipeed/picoclaw/pkg/logger" "github.com/sipeed/picoclaw/pkg/media" "github.com/sipeed/picoclaw/pkg/utils" - "github.com/sipeed/picoclaw/pkg/voice" ) const ( - transcriptionTimeout = 30 * time.Second - sendTimeout = 10 * time.Second + sendTimeout = 10 * time.Second ) type DiscordChannel struct { *channels.BaseChannel - session *discordgo.Session - config config.DiscordConfig - transcriber *voice.GroqTranscriber - ctx context.Context - cancel context.CancelFunc - typingMu sync.Mutex - typingStop map[string]chan struct{} // chatID → stop signal - botUserID string // stored for mention checking + session *discordgo.Session + config config.DiscordConfig + ctx context.Context + cancel context.CancelFunc + typingMu sync.Mutex + typingStop map[string]chan struct{} // chatID → stop signal + botUserID string // stored for mention checking } func NewDiscordChannel(cfg config.DiscordConfig, bus *bus.MessageBus) (*DiscordChannel, error) { @@ -48,16 +45,11 @@ func NewDiscordChannel(cfg config.DiscordConfig, bus *bus.MessageBus) (*DiscordC BaseChannel: base, session: session, config: cfg, - transcriber: nil, ctx: context.Background(), typingStop: make(map[string]chan struct{}), }, nil } -func (c *DiscordChannel) SetTranscriber(transcriber *voice.GroqTranscriber) { - c.transcriber = transcriber -} - func (c *DiscordChannel) Start(ctx context.Context) error { logger.InfoC("discord", "Starting Discord bot") @@ -265,7 +257,7 @@ func (c *DiscordChannel) handleMessage(s *discordgo.Session, m *discordgo.Messag return } - // Check allowlist first to avoid downloading attachments and transcribing for rejected users + // Check allowlist first to avoid downloading attachments for rejected users if !c.IsAllowed(m.Author.ID) { logger.DebugCF("discord", "Message rejected by allowlist", map[string]any{ "user_id": m.Author.ID, @@ -323,29 +315,8 @@ func (c *DiscordChannel) handleMessage(s *discordgo.Session, m *discordgo.Messag if isAudio { localPath := c.downloadAttachment(attachment.URL, attachment.Filename) if localPath != "" { - transcribedText := "" - if c.transcriber != nil && c.transcriber.IsAvailable() { - ctx, cancel := context.WithTimeout(c.ctx, transcriptionTimeout) - result, err := c.transcriber.Transcribe(ctx, localPath) - cancel() // Release context resources immediately to avoid leaks in for loop - - if err != nil { - logger.ErrorCF("discord", "Voice transcription failed", map[string]any{ - "error": err.Error(), - }) - transcribedText = fmt.Sprintf("[audio: %s (transcription failed)]", attachment.Filename) - } else { - transcribedText = fmt.Sprintf("[audio transcription: %s]", result.Text) - logger.DebugCF("discord", "Audio transcribed successfully", map[string]any{ - "text": result.Text, - }) - } - } else { - transcribedText = fmt.Sprintf("[audio: %s]", attachment.Filename) - } - mediaPaths = append(mediaPaths, storeMedia(localPath, attachment.Filename)) - content = appendContent(content, transcribedText) + content = appendContent(content, fmt.Sprintf("[audio: %s]", attachment.Filename)) } else { logger.WarnCF("discord", "Failed to download audio attachment", map[string]any{ "url": attachment.URL, diff --git a/pkg/channels/onebot/onebot.go b/pkg/channels/onebot/onebot.go index fb357cf27..001965238 100644 --- a/pkg/channels/onebot/onebot.go +++ b/pkg/channels/onebot/onebot.go @@ -18,7 +18,6 @@ import ( "github.com/sipeed/picoclaw/pkg/logger" "github.com/sipeed/picoclaw/pkg/media" "github.com/sipeed/picoclaw/pkg/utils" - "github.com/sipeed/picoclaw/pkg/voice" ) type OneBotChannel struct { @@ -36,7 +35,6 @@ type OneBotChannel struct { selfID int64 pending map[string]chan json.RawMessage pendingMu sync.Mutex - transcriber *voice.GroqTranscriber lastMessageID sync.Map pendingEmojiMsg sync.Map } @@ -112,10 +110,6 @@ func NewOneBotChannel(cfg config.OneBotConfig, messageBus *bus.MessageBus) (*One }, nil } -func (c *OneBotChannel) SetTranscriber(transcriber *voice.GroqTranscriber) { - c.transcriber = transcriber -} - func (c *OneBotChannel) setMsgEmojiLike(messageID string, emojiID int, set bool) { go func() { _, err := c.sendAPIRequest("set_msg_emoji_like", map[string]any{ @@ -794,25 +788,8 @@ func (c *OneBotChannel) parseMessageSegments( LoggerPrefix: "onebot", }) if localPath != "" { - if c.transcriber != nil && c.transcriber.IsAvailable() { - tctx, tcancel := context.WithTimeout(c.ctx, 30*time.Second) - result, err := c.transcriber.Transcribe(tctx, localPath) - tcancel() - if err != nil { - logger.WarnCF("onebot", "Voice transcription failed", map[string]any{ - "error": err.Error(), - }) - textParts = append(textParts, "[voice (transcription failed)]") - mediaRefs = append(mediaRefs, storeFile(localPath, "voice.amr")) - } else { - textParts = append(textParts, fmt.Sprintf("[voice transcription: %s]", result.Text)) - // Still store the file so it can be released later - storeFile(localPath, "voice.amr") - } - } else { - textParts = append(textParts, "[voice]") - mediaRefs = append(mediaRefs, storeFile(localPath, "voice.amr")) - } + textParts = append(textParts, "[voice]") + mediaRefs = append(mediaRefs, storeFile(localPath, "voice.amr")) } } } diff --git a/pkg/channels/slack/slack.go b/pkg/channels/slack/slack.go index f2dda15ac..a8d329d65 100644 --- a/pkg/channels/slack/slack.go +++ b/pkg/channels/slack/slack.go @@ -5,7 +5,6 @@ import ( "fmt" "strings" "sync" - "time" "github.com/slack-go/slack" "github.com/slack-go/slack/slackevents" @@ -17,7 +16,6 @@ import ( "github.com/sipeed/picoclaw/pkg/logger" "github.com/sipeed/picoclaw/pkg/media" "github.com/sipeed/picoclaw/pkg/utils" - "github.com/sipeed/picoclaw/pkg/voice" ) type SlackChannel struct { @@ -27,7 +25,6 @@ type SlackChannel struct { socketClient *socketmode.Client botUserID string teamID string - transcriber *voice.GroqTranscriber ctx context.Context cancel context.CancelFunc pendingAcks sync.Map @@ -60,10 +57,6 @@ func NewSlackChannel(cfg config.SlackConfig, messageBus *bus.MessageBus) (*Slack }, nil } -func (c *SlackChannel) SetTranscriber(transcriber *voice.GroqTranscriber) { - c.transcriber = transcriber -} - func (c *SlackChannel) Start(ctx context.Context) error { logger.InfoC("slack", "Starting Slack channel (Socket Mode)") @@ -311,21 +304,7 @@ func (c *SlackChannel) handleMessageEvent(ev *slackevents.MessageEvent) { continue } mediaPaths = append(mediaPaths, storeMedia(localPath, file.Name)) - - if utils.IsAudioFile(file.Name, file.Mimetype) && c.transcriber != nil && c.transcriber.IsAvailable() { - ctx, cancel := context.WithTimeout(c.ctx, 30*time.Second) - defer cancel() - result, err := c.transcriber.Transcribe(ctx, localPath) - - if err != nil { - logger.ErrorCF("slack", "Voice transcription failed", map[string]any{"error": err.Error()}) - content += fmt.Sprintf("\n[audio: %s (transcription failed)]", file.Name) - } else { - content += fmt.Sprintf("\n[voice transcription: %s]", result.Text) - } - } else { - content += fmt.Sprintf("\n[file: %s]", file.Name) - } + content += fmt.Sprintf("\n[file: %s]", file.Name) } } diff --git a/pkg/channels/telegram/telegram.go b/pkg/channels/telegram/telegram.go index f9390b8ed..9544987ec 100644 --- a/pkg/channels/telegram/telegram.go +++ b/pkg/channels/telegram/telegram.go @@ -22,7 +22,6 @@ import ( "github.com/sipeed/picoclaw/pkg/logger" "github.com/sipeed/picoclaw/pkg/media" "github.com/sipeed/picoclaw/pkg/utils" - "github.com/sipeed/picoclaw/pkg/voice" ) type TelegramChannel struct { @@ -32,7 +31,6 @@ type TelegramChannel struct { commands TelegramCommander config *config.Config chatIDs map[string]int64 - transcriber *voice.GroqTranscriber ctx context.Context cancel context.CancelFunc placeholders sync.Map // chatID -> messageID @@ -91,16 +89,11 @@ func NewTelegramChannel(cfg *config.Config, bus *bus.MessageBus) (*TelegramChann bot: bot, config: cfg, chatIDs: make(map[string]int64), - transcriber: nil, placeholders: sync.Map{}, stopThinking: sync.Map{}, }, nil } -func (c *TelegramChannel) SetTranscriber(transcriber *voice.GroqTranscriber) { - c.transcriber = transcriber -} - func (c *TelegramChannel) Start(ctx context.Context) error { logger.InfoC("telegram", "Starting Telegram bot (polling mode)...") @@ -391,32 +384,10 @@ func (c *TelegramChannel) handleMessage(ctx context.Context, message *telego.Mes if voicePath != "" { mediaPaths = append(mediaPaths, storeMedia(voicePath, "voice.ogg")) - transcribedText := "" - if c.transcriber != nil && c.transcriber.IsAvailable() { - transcriberCtx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - - result, err := c.transcriber.Transcribe(transcriberCtx, voicePath) - if err != nil { - logger.ErrorCF("telegram", "Voice transcription failed", map[string]any{ - "error": err.Error(), - "path": voicePath, - }) - transcribedText = "[voice (transcription failed)]" - } else { - transcribedText = fmt.Sprintf("[voice transcription: %s]", result.Text) - logger.InfoCF("telegram", "Voice transcribed successfully", map[string]any{ - "text": result.Text, - }) - } - } else { - transcribedText = "[voice]" - } - if content != "" { content += "\n" } - content += transcribedText + content += "[voice]" } }