Removed the old heavy logic

This commit is contained in:
afjcjsbx
2026-03-08 18:22:15 +01:00
parent f87ab99833
commit 536e26aff1
5 changed files with 162 additions and 109 deletions
+54 -44
View File
@@ -413,9 +413,10 @@ var audioAnnotationRe = regexp.MustCompile(`\[(voice|audio)(?::[^\]]*)?\]`)
// transcribeAudioInMessage resolves audio media refs, transcribes them, and
// replaces audio annotations in msg.Content with the transcribed text.
func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.InboundMessage) bus.InboundMessage {
// Returns the (possibly modified) message and true if audio was transcribed.
func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.InboundMessage) (bus.InboundMessage, bool) {
if al.transcriber == nil || al.mediaStore == nil || len(msg.Media) == 0 {
return msg
return msg, false
}
// Transcribe each audio media ref in order.
@@ -439,10 +440,10 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou
}
if len(transcriptions) == 0 {
return msg
return msg, false
}
al.sendTranscriptionFeedback(msg.Channel, msg.ChatID, msg.MessageID, transcriptions)
al.sendTranscriptionFeedback(ctx, msg.Channel, msg.ChatID, msg.MessageID, transcriptions)
// Replace audio annotations sequentially with transcriptions.
idx := 0
@@ -461,45 +462,56 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou
}
msg.Content = newContent
return msg
return msg, true
}
// sendTranscriptionFeedback Asynchronously sends feedback to the user
// with the result of audio transcription if the option is enabled.
func (al *AgentLoop) sendTranscriptionFeedback(channel, chatID string, messageID string, validTexts []string) {
// sendTranscriptionFeedback sends feedback to the user with the result of
// audio transcription if the option is enabled. It sends the message directly
// through the channel (bypassing the bus queue) so that ordering with the
// subsequent placeholder is guaranteed.
func (al *AgentLoop) sendTranscriptionFeedback(
ctx context.Context,
channel, chatID, messageID string,
validTexts []string,
) {
if !al.cfg.Voice.EchoTranscription {
return
}
if al.channelManager == nil {
return
}
go func() {
pubCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
var nonEmpty []string
for _, t := range validTexts {
if t != "" {
nonEmpty = append(nonEmpty, t)
}
var nonEmpty []string
for _, t := range validTexts {
if t != "" {
nonEmpty = append(nonEmpty, t)
}
}
var feedbackMsg string
if len(nonEmpty) > 0 {
feedbackMsg = "Transcript: " + strings.Join(nonEmpty, "\n")
} else {
feedbackMsg = "No voice detected in the audio"
}
var feedbackMsg string
if len(nonEmpty) > 0 {
feedbackMsg = "Transcript: " + strings.Join(nonEmpty, "\n")
} else {
feedbackMsg = "No voice detected in the audio"
}
err := al.bus.PublishOutbound(pubCtx, bus.OutboundMessage{
Channel: channel,
ChatID: chatID,
Content: feedbackMsg,
ReplyToMessageID: messageID,
SkipPlaceholder: true, // It serves to avoid consuming the message "Thinking..."
})
if err != nil {
logger.WarnCF("voice", "Failed to send transcription feedback", map[string]any{"error": err.Error()})
}
}()
ch, ok := al.channelManager.GetChannel(channel)
if !ok {
return
}
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
err := ch.Send(sendCtx, bus.OutboundMessage{
Channel: channel,
ChatID: chatID,
Content: feedbackMsg,
ReplyToMessageID: messageID,
})
if err != nil {
logger.WarnCF("voice", "Failed to send transcription feedback", map[string]any{"error": err.Error()})
}
}
// inferMediaType determines the media type ("image", "audio", "video", "file")
@@ -613,7 +625,14 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage)
},
)
msg = al.transcribeAudioInMessage(ctx, msg)
var hadAudio bool
msg, hadAudio = al.transcribeAudioInMessage(ctx, msg)
// For audio messages the placeholder was deferred by the channel.
// Now that transcription (and optional feedback) is done, send it.
if hadAudio && al.channelManager != nil {
al.channelManager.SendPlaceholder(ctx, msg.Channel, msg.ChatID)
}
// Route system messages to processSystemMessage
if msg.Channel == "system" {
@@ -803,15 +822,6 @@ func (al *AgentLoop) runAgentLoop(
// 2. Save user message to session
agent.Sessions.AddMessage(opts.SessionKey, "user", opts.UserMessage)
// thinking message only for channels, not for background tasks
if opts.Channel != "" && opts.ChatID != "" && !constants.IsInternalChannel(opts.Channel) && !opts.NoHistory {
al.bus.PublishOutbound(ctx, bus.OutboundMessage{
Channel: opts.Channel,
ChatID: opts.ChatID,
TriggerPlaceholder: true,
})
}
// 3. Run LLM iteration loop
finalContent, iteration, err := al.runLLMIteration(ctx, agent, messages, opts)
if err != nil {
+4 -6
View File
@@ -30,12 +30,10 @@ type InboundMessage struct {
}
type OutboundMessage struct {
Channel string `json:"channel"`
ChatID string `json:"chat_id"`
Content string `json:"content"`
ReplyToMessageID string `json:"reply_to_message_id,omitempty"`
SkipPlaceholder bool `json:"skip_placeholder,omitempty"` // Tells Manager not to use Thinking
TriggerPlaceholder bool `json:"trigger_placeholder,omitempty"`
Channel string `json:"channel"`
ChatID string `json:"chat_id"`
Content string `json:"content"`
ReplyToMessageID string `json:"reply_to_message_id,omitempty"`
}
// MediaPart describes a single media attachment to send.
+15
View File
@@ -5,6 +5,7 @@ import (
"crypto/rand"
"encoding/binary"
"encoding/hex"
"regexp"
"strconv"
"strings"
"sync/atomic"
@@ -32,6 +33,9 @@ func init() {
uniqueIDPrefix = hex.EncodeToString(b[:])
}
// audioAnnotationRe matches audio/voice annotations injected by channels (e.g. [voice], [audio: file.ogg]).
var audioAnnotationRe = regexp.MustCompile(`\[(voice|audio)(?::[^\]]*)?\]`)
// uniqueID generates a process-unique ID using a random prefix and an atomic counter.
// This ID is intended for internal correlation (e.g. media scope keys) and is NOT
// cryptographically secure — it must not be used in contexts where unpredictability matters.
@@ -284,6 +288,17 @@ func (c *BaseChannel) HandleMessage(
c.placeholderRecorder.RecordReactionUndo(c.name, chatID, undo)
}
}
// Placeholder — independent pipeline.
// Skip when the message contains audio: the agent will send the
// placeholder after transcription completes, so the user sees
// "Thinking…" only once the voice has been processed.
if !audioAnnotationRe.MatchString(content) {
if pc, ok := c.owner.(PlaceholderCapable); ok {
if phID, err := pc.SendPlaceholder(ctx, chatID); err == nil && phID != "" {
c.placeholderRecorder.RecordPlaceholder(c.name, chatID, phID)
}
}
}
}
if err := c.bus.PublishInbound(ctx, msg); err != nil {
+27 -17
View File
@@ -100,6 +100,27 @@ func (m *Manager) RecordPlaceholder(channel, chatID, placeholderID string) {
m.placeholders.Store(key, placeholderEntry{id: placeholderID, createdAt: time.Now()})
}
// SendPlaceholder sends a "Thinking…" placeholder for the given channel/chatID
// and records it for later editing. Returns true if a placeholder was sent.
func (m *Manager) SendPlaceholder(ctx context.Context, channel, chatID string) bool {
m.mu.RLock()
ch, ok := m.channels[channel]
m.mu.RUnlock()
if !ok {
return false
}
pc, ok := ch.(PlaceholderCapable)
if !ok {
return false
}
phID, err := pc.SendPlaceholder(ctx, chatID)
if err != nil || phID == "" {
return false
}
m.RecordPlaceholder(channel, chatID, phID)
return true
}
// RecordTypingStop registers a typing stop function for later invocation.
// Implements PlaceholderRecorder.
func (m *Manager) RecordTypingStop(channel, chatID string, stop func()) {
@@ -134,15 +155,13 @@ func (m *Manager) preSend(ctx context.Context, name string, msg bus.OutboundMess
}
// 3. Try editing placeholder
if !msg.SkipPlaceholder {
if v, loaded := m.placeholders.LoadAndDelete(key); loaded {
if entry, ok := v.(placeholderEntry); ok && entry.id != "" {
if editor, ok := ch.(MessageEditor); ok {
if err := editor.EditMessage(ctx, msg.ChatID, entry.id, msg.Content); err == nil {
return true // edited successfully, skip Send
}
// edit failed → fall through to normal Send
if v, loaded := m.placeholders.LoadAndDelete(key); loaded {
if entry, ok := v.(placeholderEntry); ok && entry.id != "" {
if editor, ok := ch.(MessageEditor); ok {
if err := editor.EditMessage(ctx, msg.ChatID, entry.id, msg.Content); err == nil {
return true // edited successfully, skip Send
}
// edit failed → fall through to normal Send
}
}
}
@@ -500,15 +519,6 @@ func (m *Manager) sendWithRetry(ctx context.Context, name string, w *channelWork
return
}
if msg.TriggerPlaceholder {
if pc, ok := w.ch.(PlaceholderCapable); ok {
if phID, err := pc.SendPlaceholder(ctx, msg.ChatID); err == nil && phID != "" {
m.RecordPlaceholder(name, msg.ChatID, phID)
}
}
return
}
// Pre-send: stop typing and try to edit placeholder
if m.preSend(ctx, name, msg, w.ch) {
return // placeholder was edited successfully, skip Send
+62 -42
View File
@@ -877,7 +877,7 @@ func TestBuildMediaScope_WithMessageID(t *testing.T) {
}
}
func TestManager_PlaceholderLogic(t *testing.T) {
func TestManager_PlaceholderConsumedByResponse(t *testing.T) {
mgr := &Manager{
channels: make(map[string]Channel),
workers: make(map[string]*channelWorker),
@@ -894,50 +894,37 @@ func TestManager_PlaceholderLogic(t *testing.T) {
mgr.workers["mock"] = worker
ctx := context.Background()
key := "mock:chat-1"
// Scenario 1: TriggerPlaceholder creates a placeholder but does NOT send text messages
msgTrigger := bus.OutboundMessage{
Channel: "mock",
ChatID: "chat-1",
TriggerPlaceholder: true,
// Simulate a placeholder recorded by base.go HandleMessage
mgr.RecordPlaceholder("mock", "chat-1", "ph-123")
if _, ok := mgr.placeholders.Load(key); !ok {
t.Fatal("expected placeholder to be recorded")
}
mgr.sendWithRetry(ctx, "mock", worker, msgTrigger)
if mockCh.placeholdersSent != 1 {
t.Errorf("expected 1 placeholder sent, got %d", mockCh.placeholdersSent)
// Transcription feedback arrives first — it should consume the placeholder
// and be delivered via EditMessage, not Send.
msgTranscript := bus.OutboundMessage{
Channel: "mock",
ChatID: "chat-1",
Content: "Transcript: hello",
}
mgr.sendWithRetry(ctx, "mock", worker, msgTranscript)
if mockCh.editedMessages != 1 {
t.Errorf("expected 1 edited message (placeholder consumed by transcript), got %d", mockCh.editedMessages)
}
if len(mockCh.sentMessages) != 0 {
t.Errorf("expected 0 normal messages sent, got %d", len(mockCh.sentMessages))
t.Errorf("expected 0 normal messages (transcript used edit), got %d", len(mockCh.sentMessages))
}
// Verify that the placeholder has been registered in the manager
key := "mock:chat-1"
if _, ok := mgr.placeholders.Load(key); !ok {
t.Errorf("expected placeholder to be recorded in manager")
// Placeholder should be gone now
if _, ok := mgr.placeholders.Load(key); ok {
t.Error("expected placeholder to be removed after being consumed")
}
// Scenario 2: SkipPlaceholder (simulates transcription). Must send normally, ignoring Edit.
msgSkip := bus.OutboundMessage{
Channel: "mock",
ChatID: "chat-1",
Content: "Transcript: hello",
SkipPlaceholder: true,
}
mgr.sendWithRetry(ctx, "mock", worker, msgSkip)
if mockCh.editedMessages != 0 {
t.Errorf("expected 0 edited messages due to SkipPlaceholder, got %d", mockCh.editedMessages)
}
if len(mockCh.sentMessages) != 1 {
t.Errorf("expected 1 normal message sent, got %d", len(mockCh.sentMessages))
}
// The placeholder must still exist for the next response
if _, ok := mgr.placeholders.Load(key); !ok {
t.Errorf("expected placeholder to STILL be in manager after SkipPlaceholder")
}
// Scenario 3: Normal Message (simulates the final LLM response). Must consume the placeholder.
// Final LLM response arrives — no placeholder left, so it goes through Send
msgFinal := bus.OutboundMessage{
Channel: "mock",
ChatID: "chat-1",
@@ -945,11 +932,44 @@ func TestManager_PlaceholderLogic(t *testing.T) {
}
mgr.sendWithRetry(ctx, "mock", worker, msgFinal)
if mockCh.editedMessages != 1 {
t.Errorf("expected 1 edited message (consuming placeholder), got %d", mockCh.editedMessages)
}
// The placeholder must have been removed
if _, ok := mgr.placeholders.Load(key); ok {
t.Errorf("expected placeholder to be removed after being consumed")
if len(mockCh.sentMessages) != 1 {
t.Errorf("expected 1 normal message sent, got %d", len(mockCh.sentMessages))
}
}
func TestManager_SendPlaceholder(t *testing.T) {
mgr := &Manager{
channels: make(map[string]Channel),
workers: make(map[string]*channelWorker),
placeholders: sync.Map{},
}
mockCh := &mockChannel{
sendFn: func(ctx context.Context, msg bus.OutboundMessage) error {
return nil
},
}
mgr.channels["mock"] = mockCh
ctx := context.Background()
// SendPlaceholder should send a placeholder and record it
ok := mgr.SendPlaceholder(ctx, "mock", "chat-1")
if !ok {
t.Fatal("expected SendPlaceholder to succeed")
}
if mockCh.placeholdersSent != 1 {
t.Errorf("expected 1 placeholder sent, got %d", mockCh.placeholdersSent)
}
key := "mock:chat-1"
if _, loaded := mgr.placeholders.Load(key); !loaded {
t.Error("expected placeholder to be recorded in manager")
}
// SendPlaceholder on unknown channel should return false
ok = mgr.SendPlaceholder(ctx, "unknown", "chat-1")
if ok {
t.Error("expected SendPlaceholder to fail for unknown channel")
}
}