mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
329e68e017
* refactor(agent): introduce interfaces for MessageBus and ChannelManager Phase 2 of loop.go refactor — dependency inversion using adapter pattern. - Add interfaces.MessageBus and interfaces.ChannelManager interfaces - Create adapters/messagebus.go wrapping *bus.MessageBus - Create adapters/channelmanager.go wrapping *channels.Manager - Update AgentLoop to use interfaces instead of concrete types - Update registerSharedTools to accept interfaces.MessageBus Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * refactor(agent): restructure pipeline and rename loop files Pipeline refactoring: - Split pipeline.go (1400 lines) into focused files: - pipeline_setup.go (~115 lines): SetupTurn method - pipeline_llm.go (~519 lines): CallLLM method - pipeline_execute.go (~693 lines): ExecuteTools method - pipeline_finalize.go (~78 lines): Finalize method - Pipeline struct and NewPipeline remain in pipeline.go (~39 lines) Agent file renaming: - Rename loop_*.go to agent_*.go for consistent naming: - loop.go -> agent.go, loop_message.go -> agent_message.go, etc. - Merge turn.go + turn_exec.go into turn_state.go - Rename loop_turn.go -> turn_coord.go Documentation: - Update docs/pipeline-restructuring-plan.md - Add docs/agent-rename-plan.md Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * fix(agent): code format fixed * refactor(agent): code test file added/renamed * docs(agent): update agent refactor docs * fix(agent): fix agent hardAbortX --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
110 lines
2.8 KiB
Go
110 lines
2.8 KiB
Go
// PicoClaw - Ultra-lightweight personal AI agent
|
|
|
|
package agent
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/bus"
|
|
"github.com/sipeed/picoclaw/pkg/logger"
|
|
"github.com/sipeed/picoclaw/pkg/utils"
|
|
)
|
|
|
|
func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.InboundMessage) (bus.InboundMessage, bool) {
|
|
if al.transcriber == nil || al.mediaStore == nil || len(msg.Media) == 0 {
|
|
return msg, false
|
|
}
|
|
|
|
// Transcribe each audio media ref in order.
|
|
var transcriptions []string
|
|
var keptMedia []string
|
|
for _, ref := range msg.Media {
|
|
path, meta, err := al.mediaStore.ResolveWithMeta(ref)
|
|
if err != nil {
|
|
logger.WarnCF("voice", "Failed to resolve media ref", map[string]any{"ref": ref, "error": err})
|
|
keptMedia = append(keptMedia, ref)
|
|
continue
|
|
}
|
|
if !utils.IsAudioFile(meta.Filename, meta.ContentType) {
|
|
keptMedia = append(keptMedia, ref)
|
|
continue
|
|
}
|
|
result, err := al.transcriber.Transcribe(ctx, path)
|
|
if err != nil {
|
|
logger.WarnCF("voice", "Transcription failed", map[string]any{"ref": ref, "error": err})
|
|
transcriptions = append(transcriptions, "")
|
|
keptMedia = append(keptMedia, ref)
|
|
continue
|
|
}
|
|
transcriptions = append(transcriptions, result.Text)
|
|
}
|
|
|
|
if len(transcriptions) == 0 {
|
|
return msg, false
|
|
}
|
|
|
|
al.sendTranscriptionFeedback(ctx, msg.Channel, msg.ChatID, msg.MessageID, transcriptions)
|
|
|
|
// Replace audio annotations sequentially with transcriptions.
|
|
idx := 0
|
|
newContent := audioAnnotationRe.ReplaceAllStringFunc(msg.Content, func(match string) string {
|
|
if idx >= len(transcriptions) {
|
|
return match
|
|
}
|
|
text := transcriptions[idx]
|
|
idx++
|
|
if text == "" {
|
|
return match
|
|
}
|
|
return "[voice: " + text + "]"
|
|
})
|
|
|
|
// Append any remaining transcriptions not matched by an annotation.
|
|
for ; idx < len(transcriptions); idx++ {
|
|
if transcriptions[idx] != "" {
|
|
newContent += "\n[voice: " + transcriptions[idx] + "]"
|
|
}
|
|
}
|
|
|
|
msg.Content = newContent
|
|
msg.Media = keptMedia
|
|
return msg, true
|
|
}
|
|
|
|
func (al *AgentLoop) sendTranscriptionFeedback(
|
|
ctx context.Context,
|
|
channel, chatID, messageID string,
|
|
validTexts []string,
|
|
) {
|
|
if !al.cfg.Voice.EchoTranscription {
|
|
return
|
|
}
|
|
if al.channelManager == nil {
|
|
return
|
|
}
|
|
|
|
var nonEmpty []string
|
|
for _, t := range validTexts {
|
|
if t != "" {
|
|
nonEmpty = append(nonEmpty, t)
|
|
}
|
|
}
|
|
|
|
var feedbackMsg string
|
|
if len(nonEmpty) > 0 {
|
|
feedbackMsg = "Transcript: " + strings.Join(nonEmpty, "\n")
|
|
} else {
|
|
feedbackMsg = "No voice detected in the audio"
|
|
}
|
|
|
|
err := al.channelManager.SendMessage(ctx, bus.OutboundMessage{
|
|
Context: bus.NewOutboundContext(channel, chatID, messageID),
|
|
Content: feedbackMsg,
|
|
ReplyToMessageID: messageID,
|
|
})
|
|
if err != nil {
|
|
logger.WarnCF("voice", "Failed to send transcription feedback", map[string]any{"error": err.Error()})
|
|
}
|
|
}
|