mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-05-25 16:00:35 +00:00
0f395ce110
* refactor: update ASR and TTS implementations * fix lint * Integrating asr/tts models w/ new security config * update documents * add arbitrary whisper transcriptor support * update documents * fix lint * add mimo tts
97 lines
2.2 KiB
Go
97 lines
2.2 KiB
Go
package audio
|
||
|
||
import (
|
||
"strings"
|
||
"unicode"
|
||
)
|
||
|
||
// SplitSentences splits text into sentence-sized chunks suitable for TTS synthesis.
|
||
// It splits on sentence-ending punctuation (.!?\n, as well as CJK 。, !, ?) while avoiding false splits
|
||
// on decimal numbers. Very short fragments are merged with
|
||
// the next sentence to prevent choppy playback.
|
||
func SplitSentences(text string) []string {
|
||
if text == "" {
|
||
return nil
|
||
}
|
||
|
||
var sentences []string
|
||
var current strings.Builder
|
||
runes := []rune(text)
|
||
|
||
for i := 0; i < len(runes); i++ {
|
||
r := runes[i]
|
||
if r == '\n' {
|
||
s := strings.TrimSpace(current.String())
|
||
if s != "" {
|
||
sentences = append(sentences, s)
|
||
}
|
||
current.Reset()
|
||
continue
|
||
}
|
||
|
||
current.WriteRune(r)
|
||
|
||
if r == '.' || r == '!' || r == '?' || r == '。' || r == '!' || r == '?' {
|
||
// Avoid splitting on decimal numbers like "3.14"
|
||
if r == '.' && i > 0 && unicode.IsDigit(runes[i-1]) &&
|
||
i+1 < len(runes) && unicode.IsDigit(runes[i+1]) {
|
||
continue
|
||
}
|
||
|
||
// Consume contiguous punctuation clusters (e.g., "..." or "?!").
|
||
for i+1 < len(runes) && (runes[i+1] == '.' || runes[i+1] == '!' || runes[i+1] == '?' || runes[i+1] == '。' || runes[i+1] == '!' || runes[i+1] == '?') {
|
||
i++
|
||
current.WriteRune(runes[i])
|
||
}
|
||
|
||
s := strings.TrimSpace(current.String())
|
||
if s != "" {
|
||
sentences = append(sentences, s)
|
||
}
|
||
current.Reset()
|
||
}
|
||
}
|
||
|
||
// Flush remaining text
|
||
if s := strings.TrimSpace(current.String()); s != "" {
|
||
sentences = append(sentences, s)
|
||
}
|
||
|
||
// Merge very short fragments with the next sentence
|
||
return mergeShorties(sentences, 15)
|
||
}
|
||
|
||
// mergeShorties merges sentences shorter than minLen characters with the following sentence.
|
||
func mergeShorties(sentences []string, minLen int) []string {
|
||
if len(sentences) <= 1 {
|
||
return sentences
|
||
}
|
||
|
||
var merged []string
|
||
var buf string
|
||
|
||
for _, s := range sentences {
|
||
if buf != "" {
|
||
buf += " " + s
|
||
if len([]rune(buf)) >= minLen {
|
||
merged = append(merged, buf)
|
||
buf = ""
|
||
}
|
||
} else if len([]rune(s)) < minLen {
|
||
buf = s
|
||
} else {
|
||
merged = append(merged, s)
|
||
}
|
||
}
|
||
|
||
if buf != "" {
|
||
if len(merged) > 0 {
|
||
merged[len(merged)-1] += " " + buf
|
||
} else {
|
||
merged = append(merged, buf)
|
||
}
|
||
}
|
||
|
||
return merged
|
||
}
|