mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
12d5421c26
Break up the monolithic 4384-line loop.go into 12 focused files: - loop.go: core AgentLoop struct and main Run loop - loop_turn.go: turn execution logic (runTurn, askSideQuestion, etc.) - loop_utils.go: pure utility functions (formatters, helpers) - loop_init.go: constructor and tool registration - loop_message.go: message handling (processMessage, routing) - loop_command.go: command processing (/use, /btw, etc.) - loop_mcp.go: MCP runtime management - loop_event.go: event/hook system helpers - loop_media.go: media resolution and artifact handling - loop_outbound.go: response publishing - loop_transcribe.go: audio transcription - loop_steering.go: steering queue and continuation - loop_inject.go: setter injection methods No functional changes - pure code movement with updated imports. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
110 lines
2.8 KiB
Go
110 lines
2.8 KiB
Go
// PicoClaw - Ultra-lightweight personal AI agent
|
|
|
|
package agent
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/bus"
|
|
"github.com/sipeed/picoclaw/pkg/logger"
|
|
"github.com/sipeed/picoclaw/pkg/utils"
|
|
)
|
|
|
|
func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.InboundMessage) (bus.InboundMessage, bool) {
|
|
if al.transcriber == nil || al.mediaStore == nil || len(msg.Media) == 0 {
|
|
return msg, false
|
|
}
|
|
|
|
// Transcribe each audio media ref in order.
|
|
var transcriptions []string
|
|
var keptMedia []string
|
|
for _, ref := range msg.Media {
|
|
path, meta, err := al.mediaStore.ResolveWithMeta(ref)
|
|
if err != nil {
|
|
logger.WarnCF("voice", "Failed to resolve media ref", map[string]any{"ref": ref, "error": err})
|
|
keptMedia = append(keptMedia, ref)
|
|
continue
|
|
}
|
|
if !utils.IsAudioFile(meta.Filename, meta.ContentType) {
|
|
keptMedia = append(keptMedia, ref)
|
|
continue
|
|
}
|
|
result, err := al.transcriber.Transcribe(ctx, path)
|
|
if err != nil {
|
|
logger.WarnCF("voice", "Transcription failed", map[string]any{"ref": ref, "error": err})
|
|
transcriptions = append(transcriptions, "")
|
|
keptMedia = append(keptMedia, ref)
|
|
continue
|
|
}
|
|
transcriptions = append(transcriptions, result.Text)
|
|
}
|
|
|
|
if len(transcriptions) == 0 {
|
|
return msg, false
|
|
}
|
|
|
|
al.sendTranscriptionFeedback(ctx, msg.Channel, msg.ChatID, msg.MessageID, transcriptions)
|
|
|
|
// Replace audio annotations sequentially with transcriptions.
|
|
idx := 0
|
|
newContent := audioAnnotationRe.ReplaceAllStringFunc(msg.Content, func(match string) string {
|
|
if idx >= len(transcriptions) {
|
|
return match
|
|
}
|
|
text := transcriptions[idx]
|
|
idx++
|
|
if text == "" {
|
|
return match
|
|
}
|
|
return "[voice: " + text + "]"
|
|
})
|
|
|
|
// Append any remaining transcriptions not matched by an annotation.
|
|
for ; idx < len(transcriptions); idx++ {
|
|
if transcriptions[idx] != "" {
|
|
newContent += "\n[voice: " + transcriptions[idx] + "]"
|
|
}
|
|
}
|
|
|
|
msg.Content = newContent
|
|
msg.Media = keptMedia
|
|
return msg, true
|
|
}
|
|
|
|
func (al *AgentLoop) sendTranscriptionFeedback(
|
|
ctx context.Context,
|
|
channel, chatID, messageID string,
|
|
validTexts []string,
|
|
) {
|
|
if !al.cfg.Voice.EchoTranscription {
|
|
return
|
|
}
|
|
if al.channelManager == nil {
|
|
return
|
|
}
|
|
|
|
var nonEmpty []string
|
|
for _, t := range validTexts {
|
|
if t != "" {
|
|
nonEmpty = append(nonEmpty, t)
|
|
}
|
|
}
|
|
|
|
var feedbackMsg string
|
|
if len(nonEmpty) > 0 {
|
|
feedbackMsg = "Transcript: " + strings.Join(nonEmpty, "\n")
|
|
} else {
|
|
feedbackMsg = "No voice detected in the audio"
|
|
}
|
|
|
|
err := al.channelManager.SendMessage(ctx, bus.OutboundMessage{
|
|
Context: bus.NewOutboundContext(channel, chatID, messageID),
|
|
Content: feedbackMsg,
|
|
ReplyToMessageID: messageID,
|
|
})
|
|
if err != nil {
|
|
logger.WarnCF("voice", "Failed to send transcription feedback", map[string]any{"error": err.Error()})
|
|
}
|
|
}
|