mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
83 lines
2.0 KiB
Go
83 lines
2.0 KiB
Go
package integrationtools
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/audio/tts"
|
|
"github.com/sipeed/picoclaw/pkg/media"
|
|
)
|
|
|
|
type SendTTSTool struct {
|
|
provider tts.TTSProvider
|
|
mediaStore media.MediaStore
|
|
}
|
|
|
|
func NewSendTTSTool(provider tts.TTSProvider, store media.MediaStore) *SendTTSTool {
|
|
return &SendTTSTool{
|
|
provider: provider,
|
|
mediaStore: store,
|
|
}
|
|
}
|
|
|
|
func (t *SendTTSTool) Name() string { return "send_tts" }
|
|
|
|
func (t *SendTTSTool) Description() string {
|
|
return "Synthesize speech from text and send it as an audio file to the user."
|
|
}
|
|
|
|
func (t *SendTTSTool) Parameters() map[string]any {
|
|
return map[string]any{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"text": map[string]any{
|
|
"type": "string",
|
|
"description": "The text to synthesize into speech. NOTE: Reply in a highly concise, conversational, oral style suitable for text-to-speech. Do not use markdown, emojis, asterisks, or code blocks. Speak naturally.",
|
|
},
|
|
"filename": map[string]any{
|
|
"type": "string",
|
|
"description": "Optional filename for the audio file (e.g., response.ogg).",
|
|
},
|
|
},
|
|
"required": []string{"text"},
|
|
}
|
|
}
|
|
|
|
func (t *SendTTSTool) SetMediaStore(store media.MediaStore) {
|
|
t.mediaStore = store
|
|
}
|
|
|
|
func (t *SendTTSTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
|
|
text, _ := args["text"].(string)
|
|
text = strings.TrimSpace(text)
|
|
if text == "" {
|
|
return ErrorResult("text is required")
|
|
}
|
|
|
|
channel := ToolChannel(ctx)
|
|
chatID := ToolChatID(ctx)
|
|
filename, _ := args["filename"].(string)
|
|
|
|
ref, err := tts.SynthesizeAndStore(
|
|
ctx,
|
|
t.provider,
|
|
t.mediaStore,
|
|
text,
|
|
filename,
|
|
channel,
|
|
chatID,
|
|
)
|
|
if err != nil {
|
|
return ErrorResult(err.Error()).WithError(err)
|
|
}
|
|
|
|
// Return with ForUser set to original text, Media containing the audio ref,
|
|
// and mark as ResponseHandled so the audio is sent immediately without LLM intervention.
|
|
return &ToolResult{
|
|
ForLLM: "TTS audio sent",
|
|
ForUser: text,
|
|
Media: []string{ref},
|
|
ResponseHandled: true,
|
|
}
|
|
}
|