picoclaw/pkg/tools/integration/tts_send.go

package integrationtools

import (
	"context"
	"strings"

	"github.com/sipeed/picoclaw/pkg/audio/tts"
	"github.com/sipeed/picoclaw/pkg/media"
)

type SendTTSTool struct {
	provider   tts.TTSProvider
	mediaStore media.MediaStore
}

func NewSendTTSTool(provider tts.TTSProvider, store media.MediaStore) *SendTTSTool {
	return &SendTTSTool{
		provider:   provider,
		mediaStore: store,
	}
}

func (t *SendTTSTool) Name() string { return "send_tts" }

func (t *SendTTSTool) Description() string {
	return "Synthesize speech from text and send it as an audio file to the user."
}

func (t *SendTTSTool) Parameters() map[string]any {
	return map[string]any{
		"type": "object",
		"properties": map[string]any{
			"text": map[string]any{
				"type":        "string",
				"description": "The text to synthesize into speech. NOTE: Reply in a highly concise, conversational, oral style suitable for text-to-speech. Do not use markdown, emojis, asterisks, or code blocks. Speak naturally.",
			},
			"filename": map[string]any{
				"type":        "string",
				"description": "Optional filename for the audio file (e.g., response.ogg).",
			},
		},
		"required": []string{"text"},
	}
}

func (t *SendTTSTool) SetMediaStore(store media.MediaStore) {
	t.mediaStore = store
}

func (t *SendTTSTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
	text, _ := args["text"].(string)
	text = strings.TrimSpace(text)
	if text == "" {
		return ErrorResult("text is required")
	}

	channel := ToolChannel(ctx)
	chatID := ToolChatID(ctx)
	filename, _ := args["filename"].(string)

	ref, err := tts.SynthesizeAndStore(
		ctx,
		t.provider,
		t.mediaStore,
		text,
		filename,
		channel,
		chatID,
	)
	if err != nil {
		return ErrorResult(err.Error()).WithError(err)
	}

	// Return with ForUser set to original text, Media containing the audio ref,
	// and mark as ResponseHandled so the audio is sent immediately without LLM intervention.
	return &ToolResult{
		ForLLM:          "TTS audio sent",
		ForUser:         text,
		Media:           []string{ref},
		ResponseHandled: true,
	}
}