Files
picoclaw/pkg/agent/loop_transcribe.go
T
sky5454 12d5421c26 refactor(agent): split loop.go into focused sub-packages
Break up the monolithic 4384-line loop.go into 12 focused files:
- loop.go: core AgentLoop struct and main Run loop
- loop_turn.go: turn execution logic (runTurn, askSideQuestion, etc.)
- loop_utils.go: pure utility functions (formatters, helpers)
- loop_init.go: constructor and tool registration
- loop_message.go: message handling (processMessage, routing)
- loop_command.go: command processing (/use, /btw, etc.)
- loop_mcp.go: MCP runtime management
- loop_event.go: event/hook system helpers
- loop_media.go: media resolution and artifact handling
- loop_outbound.go: response publishing
- loop_transcribe.go: audio transcription
- loop_steering.go: steering queue and continuation
- loop_inject.go: setter injection methods

No functional changes - pure code movement with updated imports.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-17 11:17:12 +08:00

110 lines
2.8 KiB
Go

// PicoClaw - Ultra-lightweight personal AI agent
package agent
import (
"context"
"strings"
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/utils"
)
func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.InboundMessage) (bus.InboundMessage, bool) {
if al.transcriber == nil || al.mediaStore == nil || len(msg.Media) == 0 {
return msg, false
}
// Transcribe each audio media ref in order.
var transcriptions []string
var keptMedia []string
for _, ref := range msg.Media {
path, meta, err := al.mediaStore.ResolveWithMeta(ref)
if err != nil {
logger.WarnCF("voice", "Failed to resolve media ref", map[string]any{"ref": ref, "error": err})
keptMedia = append(keptMedia, ref)
continue
}
if !utils.IsAudioFile(meta.Filename, meta.ContentType) {
keptMedia = append(keptMedia, ref)
continue
}
result, err := al.transcriber.Transcribe(ctx, path)
if err != nil {
logger.WarnCF("voice", "Transcription failed", map[string]any{"ref": ref, "error": err})
transcriptions = append(transcriptions, "")
keptMedia = append(keptMedia, ref)
continue
}
transcriptions = append(transcriptions, result.Text)
}
if len(transcriptions) == 0 {
return msg, false
}
al.sendTranscriptionFeedback(ctx, msg.Channel, msg.ChatID, msg.MessageID, transcriptions)
// Replace audio annotations sequentially with transcriptions.
idx := 0
newContent := audioAnnotationRe.ReplaceAllStringFunc(msg.Content, func(match string) string {
if idx >= len(transcriptions) {
return match
}
text := transcriptions[idx]
idx++
if text == "" {
return match
}
return "[voice: " + text + "]"
})
// Append any remaining transcriptions not matched by an annotation.
for ; idx < len(transcriptions); idx++ {
if transcriptions[idx] != "" {
newContent += "\n[voice: " + transcriptions[idx] + "]"
}
}
msg.Content = newContent
msg.Media = keptMedia
return msg, true
}
func (al *AgentLoop) sendTranscriptionFeedback(
ctx context.Context,
channel, chatID, messageID string,
validTexts []string,
) {
if !al.cfg.Voice.EchoTranscription {
return
}
if al.channelManager == nil {
return
}
var nonEmpty []string
for _, t := range validTexts {
if t != "" {
nonEmpty = append(nonEmpty, t)
}
}
var feedbackMsg string
if len(nonEmpty) > 0 {
feedbackMsg = "Transcript: " + strings.Join(nonEmpty, "\n")
} else {
feedbackMsg = "No voice detected in the audio"
}
err := al.channelManager.SendMessage(ctx, bus.OutboundMessage{
Context: bus.NewOutboundContext(channel, chatID, messageID),
Content: feedbackMsg,
ReplyToMessageID: messageID,
})
if err != nil {
logger.WarnCF("voice", "Failed to send transcription feedback", map[string]any{"error": err.Error()})
}
}