mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
Merge upstream/main into fix/1323-telegram-endless-typing
Made-with: Cursor
This commit is contained in:
@@ -0,0 +1,197 @@
|
||||
package telegram
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// mdV2SpecialChars are all characters that must be escaped in Telegram MarkdownV2
|
||||
var mdV2SpecialChars = map[rune]bool{
|
||||
'*': true,
|
||||
'_': true,
|
||||
'[': true,
|
||||
']': true,
|
||||
'(': true,
|
||||
')': true,
|
||||
'~': true,
|
||||
'`': true,
|
||||
'>': true,
|
||||
'<': true,
|
||||
'#': true,
|
||||
'+': true,
|
||||
'-': true,
|
||||
'=': true,
|
||||
'|': true,
|
||||
'{': true,
|
||||
'}': true,
|
||||
'.': true,
|
||||
'!': true,
|
||||
'\\': true,
|
||||
}
|
||||
|
||||
// entityPattern describes one Telegram MarkdownV2 inline entity type.
|
||||
type entityPattern struct {
|
||||
re *regexp.Regexp
|
||||
open string
|
||||
close string
|
||||
}
|
||||
|
||||
// allEntityPatterns lists every recognized entity in priority order
|
||||
// (longer / more-specific delimiters first so they win over shorter ones).
|
||||
// Each entry's regex is anchored to find the first occurrence in a string.
|
||||
var allEntityPatterns = []entityPattern{
|
||||
// fenced code block — content is completely verbatim
|
||||
{re: regexp.MustCompile("(?s)```(?:[\\w]*\\n)?[\\s\\S]*?```"), open: "```", close: "```"},
|
||||
// inline code — content is completely verbatim
|
||||
{re: regexp.MustCompile("`(?:[^`\\\n]|\\\\.)*`"), open: "`", close: "`"},
|
||||
// expandable block-quote opener **>…
|
||||
{re: regexp.MustCompile(`(?m)\*\*>(?:[^\n]*)`), open: "**>", close: ""},
|
||||
// block-quote line >…
|
||||
{re: regexp.MustCompile(`(?m)^>(?:[^\n]*)`), open: ">", close: ""},
|
||||
// custom emoji / timestamp  — must come before plain link
|
||||
{re: regexp.MustCompile(`!\[[^\]]*\]\([^)]*\)`), open: "!", close: ""},
|
||||
// inline URL / user mention […](…)
|
||||
{re: regexp.MustCompile(`\[[^\]]*\]\([^)]*\)`), open: "[", close: ""},
|
||||
// spoiler ||…|| — before single | so it wins
|
||||
{re: regexp.MustCompile(`\|\|(?:[^|\\\n]|\\.)*\|\|`), open: "||", close: "||"},
|
||||
// underline __…__ — before single _ so it wins
|
||||
{re: regexp.MustCompile(`__(?:[^_\\\n]|\\.)*__`), open: "__", close: "__"},
|
||||
// bold *…*
|
||||
{re: regexp.MustCompile(`\*(?:[^*\\\n]|\\.)*\*`), open: "*", close: "*"},
|
||||
// italic _…_
|
||||
{re: regexp.MustCompile(`_(?:[^_\\\n]|\\.)*_`), open: "_", close: "_"},
|
||||
// strikethrough ~…~
|
||||
{re: regexp.MustCompile(`~(?:[^~\\\n]|\\.)*~`), open: "~", close: "~"},
|
||||
}
|
||||
|
||||
// verbatimEntities are entity types whose inner content must never be
|
||||
// touched (code blocks, URLs, quotes, custom emoji).
|
||||
// Their content is passed through completely unchanged.
|
||||
var verbatimEntities = map[string]bool{
|
||||
"```": true,
|
||||
"`": true,
|
||||
"**>": true,
|
||||
">": true,
|
||||
"!": true,
|
||||
"[": true,
|
||||
}
|
||||
|
||||
// markdownToTelegramMarkdownV2 converts a Markdown string into a string safe
|
||||
// for sending with Telegram's MarkdownV2 parse mode.
|
||||
//
|
||||
// Rules:
|
||||
// - Markdown headings (# … ######) are converted to *bold*.
|
||||
// - **bold** Markdown syntax is converted to *bold*.
|
||||
// - Recognized Telegram MarkdownV2 entity spans are preserved; their inner
|
||||
// content is processed recursively so that nested valid entities are kept
|
||||
// intact while stray special characters are escaped.
|
||||
// - All plain-text segments have their MarkdownV2 special characters escaped.
|
||||
//
|
||||
// Reference: https://core.telegram.org/bots/api#formatting-options
|
||||
func markdownToTelegramMarkdownV2(text string) string {
|
||||
// 1. Convert Markdown headings → *escaped heading text*
|
||||
text = reHeading.ReplaceAllStringFunc(text, func(match string) string {
|
||||
sub := reHeading.FindStringSubmatch(match)
|
||||
if len(sub) < 2 {
|
||||
return match
|
||||
}
|
||||
// The heading content is fresh plain text — escape everything
|
||||
// including * so the resulting *…* bold span stays valid.
|
||||
return "*" + escapeMarkdownV2(sub[1]) + "*"
|
||||
})
|
||||
|
||||
// 2. Convert **bold** → *bold*
|
||||
text = reBoldStar.ReplaceAllString(text, "*$1*")
|
||||
|
||||
// 3. Recursively escape the full string.
|
||||
return processText(text)
|
||||
}
|
||||
|
||||
// processText walks `text`, finds the leftmost / longest matching entity,
|
||||
// escapes the gap before it, processes the entity (recursing into its inner
|
||||
// content when appropriate), then continues with the remainder.
|
||||
func processText(text string) string {
|
||||
if text == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Find the leftmost match among all entity patterns.
|
||||
bestStart := -1
|
||||
bestEnd := -1
|
||||
var bestPat *entityPattern
|
||||
|
||||
for i := range allEntityPatterns {
|
||||
p := &allEntityPatterns[i]
|
||||
loc := p.re.FindStringIndex(text)
|
||||
if loc == nil {
|
||||
continue
|
||||
}
|
||||
if bestStart == -1 || loc[0] < bestStart ||
|
||||
(loc[0] == bestStart && (loc[1]-loc[0]) > (bestEnd-bestStart)) {
|
||||
bestStart = loc[0]
|
||||
bestEnd = loc[1]
|
||||
bestPat = p
|
||||
}
|
||||
}
|
||||
|
||||
if bestPat == nil {
|
||||
// No entity found — escape everything.
|
||||
return escapeMarkdownV2(text)
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
// Plain text before the entity.
|
||||
if bestStart > 0 {
|
||||
b.WriteString(escapeMarkdownV2(text[:bestStart]))
|
||||
}
|
||||
|
||||
// The matched entity span.
|
||||
matched := text[bestStart:bestEnd]
|
||||
|
||||
if verbatimEntities[bestPat.open] {
|
||||
// Code blocks, URLs, quotes: pass through completely untouched.
|
||||
b.WriteString(matched)
|
||||
} else {
|
||||
// Inline formatting (bold, italic, underline, strikethrough, spoiler):
|
||||
// keep the delimiters and recursively process the inner content so that
|
||||
// nested entities survive but stray specials get escaped.
|
||||
openLen := len(bestPat.open)
|
||||
closeLen := len(bestPat.close)
|
||||
inner := matched[openLen : len(matched)-closeLen]
|
||||
|
||||
b.WriteString(bestPat.open)
|
||||
b.WriteString(processText(inner))
|
||||
b.WriteString(bestPat.close)
|
||||
}
|
||||
|
||||
// Continue with the remainder of the string.
|
||||
b.WriteString(processText(text[bestEnd:]))
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// escapeMarkdownV2 escapes every MarkdownV2 special character in a plain-text
|
||||
// segment (i.e. a segment that is not part of any recognized entity).
|
||||
// Already-escaped sequences (backslash + char) are forwarded verbatim to avoid
|
||||
// double-escaping.
|
||||
func escapeMarkdownV2(s string) string {
|
||||
var b strings.Builder
|
||||
b.Grow(len(s) + 8)
|
||||
runes := []rune(s)
|
||||
for i := 0; i < len(runes); i++ {
|
||||
ch := runes[i]
|
||||
// Forward an existing escape sequence verbatim.
|
||||
if ch == '\\' && i+1 < len(runes) {
|
||||
b.WriteRune(ch)
|
||||
b.WriteRune(runes[i+1])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if mdV2SpecialChars[ch] {
|
||||
b.WriteByte('\\')
|
||||
}
|
||||
b.WriteRune(ch)
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
package telegram
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
//go:embed testdata/md2_all_formats.txt
|
||||
var md2AllFormats string
|
||||
|
||||
func Test_markdownToTelegramMarkdownV2(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "heading -> bolding",
|
||||
input: `## HeadingH2 #`,
|
||||
expected: "*HeadingH2 \\#*",
|
||||
},
|
||||
{
|
||||
name: "strikethrough",
|
||||
input: "~strikethroughMD~",
|
||||
expected: "~strikethroughMD~",
|
||||
},
|
||||
{
|
||||
name: "inline URL",
|
||||
input: "[inline URL](http://www.example.com/)",
|
||||
expected: "[inline URL](http://www.example.com/)",
|
||||
},
|
||||
{
|
||||
name: "all telegram formats",
|
||||
input: md2AllFormats,
|
||||
expected: md2AllFormats,
|
||||
},
|
||||
{
|
||||
name: "empty",
|
||||
input: "",
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "one letter",
|
||||
input: "o",
|
||||
expected: "o",
|
||||
},
|
||||
{
|
||||
name: "",
|
||||
input: "*Last update: ~10 24h*",
|
||||
expected: "*Last update: \\~10 24h*",
|
||||
},
|
||||
{
|
||||
name: "",
|
||||
input: "<Market Capitalization>",
|
||||
expected: "\\<Market Capitalization\\>",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
actual := markdownToTelegramMarkdownV2(tc.input)
|
||||
|
||||
require.EqualValues(t, tc.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
package telegram
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func markdownToTelegramHTML(text string) string {
|
||||
if text == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
codeBlocks := extractCodeBlocks(text)
|
||||
text = codeBlocks.text
|
||||
|
||||
inlineCodes := extractInlineCodes(text)
|
||||
text = inlineCodes.text
|
||||
|
||||
text = reHeading.ReplaceAllString(text, "$1")
|
||||
|
||||
text = reBlockquote.ReplaceAllString(text, "$1")
|
||||
|
||||
text = escapeHTML(text)
|
||||
|
||||
text = reLink.ReplaceAllString(text, `<a href="$2">$1</a>`)
|
||||
|
||||
text = reBoldStar.ReplaceAllString(text, "<b>$1</b>")
|
||||
|
||||
text = reBoldUnder.ReplaceAllString(text, "<b>$1</b>")
|
||||
|
||||
text = reItalic.ReplaceAllStringFunc(text, func(s string) string {
|
||||
match := reItalic.FindStringSubmatch(s)
|
||||
if len(match) < 2 {
|
||||
return s
|
||||
}
|
||||
return "<i>" + match[1] + "</i>"
|
||||
})
|
||||
|
||||
text = reStrike.ReplaceAllString(text, "<s>$1</s>")
|
||||
|
||||
text = reListItem.ReplaceAllString(text, "• ")
|
||||
|
||||
for i, code := range inlineCodes.codes {
|
||||
escaped := escapeHTML(code)
|
||||
text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("<code>%s</code>", escaped))
|
||||
}
|
||||
|
||||
for i, code := range codeBlocks.codes {
|
||||
escaped := escapeHTML(code)
|
||||
text = strings.ReplaceAll(
|
||||
text,
|
||||
fmt.Sprintf("\x00CB%d\x00", i),
|
||||
fmt.Sprintf("<pre><code>%s</code></pre>", escaped),
|
||||
)
|
||||
}
|
||||
|
||||
return text
|
||||
}
|
||||
|
||||
type codeBlockMatch struct {
|
||||
text string
|
||||
codes []string
|
||||
}
|
||||
|
||||
func extractCodeBlocks(text string) codeBlockMatch {
|
||||
matches := reCodeBlock.FindAllStringSubmatch(text, -1)
|
||||
|
||||
codes := make([]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
codes = append(codes, match[1])
|
||||
}
|
||||
|
||||
i := 0
|
||||
text = reCodeBlock.ReplaceAllStringFunc(text, func(m string) string {
|
||||
placeholder := fmt.Sprintf("\x00CB%d\x00", i)
|
||||
i++
|
||||
return placeholder
|
||||
})
|
||||
|
||||
return codeBlockMatch{text: text, codes: codes}
|
||||
}
|
||||
|
||||
type inlineCodeMatch struct {
|
||||
text string
|
||||
codes []string
|
||||
}
|
||||
|
||||
func extractInlineCodes(text string) inlineCodeMatch {
|
||||
matches := reInlineCode.FindAllStringSubmatch(text, -1)
|
||||
|
||||
codes := make([]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
codes = append(codes, match[1])
|
||||
}
|
||||
|
||||
i := 0
|
||||
text = reInlineCode.ReplaceAllStringFunc(text, func(m string) string {
|
||||
placeholder := fmt.Sprintf("\x00IC%d\x00", i)
|
||||
i++
|
||||
return placeholder
|
||||
})
|
||||
|
||||
return inlineCodeMatch{text: text, codes: codes}
|
||||
}
|
||||
|
||||
func escapeHTML(text string) string {
|
||||
text = strings.ReplaceAll(text, "&", "&")
|
||||
text = strings.ReplaceAll(text, "<", "<")
|
||||
text = strings.ReplaceAll(text, ">", ">")
|
||||
return text
|
||||
}
|
||||
+129
-128
@@ -3,6 +3,7 @@ package telegram
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
@@ -26,7 +27,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
reHeading = regexp.MustCompile(`^#{1,6}\s+(.+)$`)
|
||||
reHeading = regexp.MustCompile(`(?m)^#{1,6}\s+([^\n]+)`)
|
||||
reBlockquote = regexp.MustCompile(`^>\s*(.*)$`)
|
||||
reLink = regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`)
|
||||
reBoldStar = regexp.MustCompile(`\*\*(.+?)\*\*`)
|
||||
@@ -169,6 +170,8 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err
|
||||
return channels.ErrNotRunning
|
||||
}
|
||||
|
||||
useMarkdownV2 := c.config.Channels.Telegram.UseMarkdownV2
|
||||
|
||||
chatID, threadID, err := parseTelegramChatID(msg.ChatID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid chat ID %s: %w", msg.ChatID, channels.ErrSendFailed)
|
||||
@@ -187,22 +190,65 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err
|
||||
chunk := queue[0]
|
||||
queue = queue[1:]
|
||||
|
||||
htmlContent := markdownToTelegramHTML(chunk)
|
||||
content := parseContent(chunk, useMarkdownV2)
|
||||
|
||||
if len([]rune(htmlContent)) > 4096 {
|
||||
ratio := float64(len([]rune(chunk))) / float64(len([]rune(htmlContent)))
|
||||
if len([]rune(content)) > 4096 {
|
||||
runeChunk := []rune(chunk)
|
||||
ratio := float64(len(runeChunk)) / float64(len([]rune(content)))
|
||||
smallerLen := int(float64(4096) * ratio * 0.95) // 5% safety margin
|
||||
if smallerLen < 100 {
|
||||
smallerLen = 100
|
||||
|
||||
// Guarantee progress: if estimated length is >= chunk length, force it smaller
|
||||
if smallerLen >= len(runeChunk) {
|
||||
smallerLen = len(runeChunk) - 1
|
||||
}
|
||||
// Push sub-chunks back to the front of the queue for
|
||||
// re-validation instead of sending them blindly.
|
||||
|
||||
if smallerLen <= 0 {
|
||||
if err := c.sendChunk(ctx, sendChunkParams{
|
||||
chatID: chatID,
|
||||
threadID: threadID,
|
||||
content: content,
|
||||
replyToID: replyToID,
|
||||
mdFallback: chunk,
|
||||
useMarkdownV2: useMarkdownV2,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
replyToID = ""
|
||||
continue
|
||||
}
|
||||
|
||||
// Use the estimated smaller length as a guide for SplitMessage.
|
||||
// SplitMessage will find natural break points (newlines/spaces) and respect code blocks.
|
||||
subChunks := channels.SplitMessage(chunk, smallerLen)
|
||||
queue = append(subChunks, queue...)
|
||||
|
||||
// Safety fallback: If SplitMessage failed to shorten the chunk, force a manual hard split.
|
||||
if len(subChunks) == 1 && subChunks[0] == chunk {
|
||||
part1 := string(runeChunk[:smallerLen])
|
||||
part2 := string(runeChunk[smallerLen:])
|
||||
subChunks = []string{part1, part2}
|
||||
}
|
||||
|
||||
// Filter out empty chunks to avoid sending empty messages to Telegram.
|
||||
nonEmpty := make([]string, 0, len(subChunks))
|
||||
for _, s := range subChunks {
|
||||
if s != "" {
|
||||
nonEmpty = append(nonEmpty, s)
|
||||
}
|
||||
}
|
||||
|
||||
// Push sub-chunks back to the front of the queue
|
||||
queue = append(nonEmpty, queue...)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := c.sendHTMLChunk(ctx, chatID, threadID, htmlContent, chunk, replyToID); err != nil {
|
||||
if err := c.sendChunk(ctx, sendChunkParams{
|
||||
chatID: chatID,
|
||||
threadID: threadID,
|
||||
content: content,
|
||||
replyToID: replyToID,
|
||||
mdFallback: chunk,
|
||||
useMarkdownV2: useMarkdownV2,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
// Only the first chunk should be a reply; subsequent chunks are normal messages.
|
||||
@@ -212,17 +258,31 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err
|
||||
return nil
|
||||
}
|
||||
|
||||
// sendHTMLChunk sends a single HTML message, falling back to the original
|
||||
// markdown as plain text on parse failure so users never see raw HTML tags.
|
||||
func (c *TelegramChannel) sendHTMLChunk(
|
||||
ctx context.Context, chatID int64, threadID int, htmlContent, mdFallback string, replyToID string,
|
||||
) error {
|
||||
tgMsg := tu.Message(tu.ID(chatID), htmlContent)
|
||||
tgMsg.ParseMode = telego.ModeHTML
|
||||
tgMsg.MessageThreadID = threadID
|
||||
type sendChunkParams struct {
|
||||
chatID int64
|
||||
threadID int
|
||||
content string
|
||||
replyToID string
|
||||
mdFallback string
|
||||
useMarkdownV2 bool
|
||||
}
|
||||
|
||||
if replyToID != "" {
|
||||
if mid, parseErr := strconv.Atoi(replyToID); parseErr == nil {
|
||||
// sendChunk sends a single HTML/MarkdownV2 message, falling back to the original
|
||||
// markdown as plain text on parse failure so users never see raw HTML/MarkdownV2 tags.
|
||||
func (c *TelegramChannel) sendChunk(
|
||||
ctx context.Context,
|
||||
params sendChunkParams,
|
||||
) error {
|
||||
tgMsg := tu.Message(tu.ID(params.chatID), params.content)
|
||||
tgMsg.MessageThreadID = params.threadID
|
||||
if params.useMarkdownV2 {
|
||||
tgMsg.WithParseMode(telego.ModeMarkdownV2)
|
||||
} else {
|
||||
tgMsg.WithParseMode(telego.ModeHTML)
|
||||
}
|
||||
|
||||
if params.replyToID != "" {
|
||||
if mid, parseErr := strconv.Atoi(params.replyToID); parseErr == nil {
|
||||
tgMsg.ReplyParameters = &telego.ReplyParameters{
|
||||
MessageID: mid,
|
||||
}
|
||||
@@ -230,15 +290,15 @@ func (c *TelegramChannel) sendHTMLChunk(
|
||||
}
|
||||
|
||||
if _, err := c.bot.SendMessage(ctx, tgMsg); err != nil {
|
||||
logger.ErrorCF("telegram", "HTML parse failed, falling back to plain text", map[string]any{
|
||||
"error": err.Error(),
|
||||
})
|
||||
tgMsg.Text = mdFallback
|
||||
logParseFailed(err, params.useMarkdownV2)
|
||||
|
||||
tgMsg.Text = params.mdFallback
|
||||
tgMsg.ParseMode = ""
|
||||
if _, err = c.bot.SendMessage(ctx, tgMsg); err != nil {
|
||||
return fmt.Errorf("telegram send: %w", channels.ErrTemporary)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -289,6 +349,7 @@ func (c *TelegramChannel) StartTyping(ctx context.Context, chatID string) (func(
|
||||
|
||||
// EditMessage implements channels.MessageEditor.
|
||||
func (c *TelegramChannel) EditMessage(ctx context.Context, chatID string, messageID string, content string) error {
|
||||
useMarkdownV2 := c.config.Channels.Telegram.UseMarkdownV2
|
||||
cid, _, err := parseTelegramChatID(chatID)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -297,10 +358,19 @@ func (c *TelegramChannel) EditMessage(ctx context.Context, chatID string, messag
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
htmlContent := markdownToTelegramHTML(content)
|
||||
editMsg := tu.EditMessageText(tu.ID(cid), mid, htmlContent)
|
||||
editMsg.ParseMode = telego.ModeHTML
|
||||
parsedContent := parseContent(content, useMarkdownV2)
|
||||
editMsg := tu.EditMessageText(tu.ID(cid), mid, parsedContent)
|
||||
if useMarkdownV2 {
|
||||
editMsg.WithParseMode(telego.ModeMarkdownV2)
|
||||
} else {
|
||||
editMsg.WithParseMode(telego.ModeHTML)
|
||||
}
|
||||
_, err = c.bot.EditMessageText(ctx, editMsg)
|
||||
if err != nil {
|
||||
logParseFailed(err, useMarkdownV2)
|
||||
_, err = c.bot.EditMessageText(ctx, tu.EditMessageText(tu.ID(cid), mid, content))
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -377,6 +447,20 @@ func (c *TelegramChannel) SendMedia(ctx context.Context, msg bus.OutboundMediaMe
|
||||
Caption: part.Caption,
|
||||
}
|
||||
_, err = c.bot.SendPhoto(ctx, params)
|
||||
if err != nil && strings.Contains(err.Error(), "PHOTO_INVALID_DIMENSIONS") {
|
||||
if _, seekErr := file.Seek(0, io.SeekStart); seekErr != nil {
|
||||
file.Close()
|
||||
return fmt.Errorf("telegram rewind media after photo failure: %w", channels.ErrTemporary)
|
||||
}
|
||||
|
||||
docParams := &telego.SendDocumentParams{
|
||||
ChatID: tu.ID(chatID),
|
||||
MessageThreadID: threadID,
|
||||
Document: telego.InputFile{File: file},
|
||||
Caption: part.Caption,
|
||||
}
|
||||
_, err = c.bot.SendDocument(ctx, docParams)
|
||||
}
|
||||
case "audio":
|
||||
params := &telego.SendAudioParams{
|
||||
ChatID: tu.ID(chatID),
|
||||
@@ -634,6 +718,14 @@ func (c *TelegramChannel) downloadFile(ctx context.Context, fileID, ext string)
|
||||
return c.downloadFileWithInfo(file, ext)
|
||||
}
|
||||
|
||||
func parseContent(text string, useMarkdownV2 bool) string {
|
||||
if useMarkdownV2 {
|
||||
return markdownToTelegramMarkdownV2(text)
|
||||
}
|
||||
|
||||
return markdownToTelegramHTML(text)
|
||||
}
|
||||
|
||||
// parseTelegramChatID splits "chatID/threadID" into its components.
|
||||
// Returns threadID=0 when no "/" is present (non-forum messages).
|
||||
func parseTelegramChatID(chatID string) (int64, int, error) {
|
||||
@@ -653,109 +745,18 @@ func parseTelegramChatID(chatID string) (int64, int, error) {
|
||||
return cid, tid, nil
|
||||
}
|
||||
|
||||
func markdownToTelegramHTML(text string) string {
|
||||
if text == "" {
|
||||
return ""
|
||||
func logParseFailed(err error, useMarkdownV2 bool) {
|
||||
parsingName := "HTML"
|
||||
if useMarkdownV2 {
|
||||
parsingName = "MarkdownV2"
|
||||
}
|
||||
|
||||
codeBlocks := extractCodeBlocks(text)
|
||||
text = codeBlocks.text
|
||||
|
||||
inlineCodes := extractInlineCodes(text)
|
||||
text = inlineCodes.text
|
||||
|
||||
text = reHeading.ReplaceAllString(text, "$1")
|
||||
|
||||
text = reBlockquote.ReplaceAllString(text, "$1")
|
||||
|
||||
text = escapeHTML(text)
|
||||
|
||||
text = reLink.ReplaceAllString(text, `<a href="$2">$1</a>`)
|
||||
|
||||
text = reBoldStar.ReplaceAllString(text, "<b>$1</b>")
|
||||
|
||||
text = reBoldUnder.ReplaceAllString(text, "<b>$1</b>")
|
||||
|
||||
text = reItalic.ReplaceAllStringFunc(text, func(s string) string {
|
||||
match := reItalic.FindStringSubmatch(s)
|
||||
if len(match) < 2 {
|
||||
return s
|
||||
}
|
||||
return "<i>" + match[1] + "</i>"
|
||||
})
|
||||
|
||||
text = reStrike.ReplaceAllString(text, "<s>$1</s>")
|
||||
|
||||
text = reListItem.ReplaceAllString(text, "• ")
|
||||
|
||||
for i, code := range inlineCodes.codes {
|
||||
escaped := escapeHTML(code)
|
||||
text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("<code>%s</code>", escaped))
|
||||
}
|
||||
|
||||
for i, code := range codeBlocks.codes {
|
||||
escaped := escapeHTML(code)
|
||||
text = strings.ReplaceAll(
|
||||
text,
|
||||
fmt.Sprintf("\x00CB%d\x00", i),
|
||||
fmt.Sprintf("<pre><code>%s</code></pre>", escaped),
|
||||
)
|
||||
}
|
||||
|
||||
return text
|
||||
}
|
||||
|
||||
type codeBlockMatch struct {
|
||||
text string
|
||||
codes []string
|
||||
}
|
||||
|
||||
func extractCodeBlocks(text string) codeBlockMatch {
|
||||
matches := reCodeBlock.FindAllStringSubmatch(text, -1)
|
||||
|
||||
codes := make([]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
codes = append(codes, match[1])
|
||||
}
|
||||
|
||||
i := 0
|
||||
text = reCodeBlock.ReplaceAllStringFunc(text, func(m string) string {
|
||||
placeholder := fmt.Sprintf("\x00CB%d\x00", i)
|
||||
i++
|
||||
return placeholder
|
||||
})
|
||||
|
||||
return codeBlockMatch{text: text, codes: codes}
|
||||
}
|
||||
|
||||
type inlineCodeMatch struct {
|
||||
text string
|
||||
codes []string
|
||||
}
|
||||
|
||||
func extractInlineCodes(text string) inlineCodeMatch {
|
||||
matches := reInlineCode.FindAllStringSubmatch(text, -1)
|
||||
|
||||
codes := make([]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
codes = append(codes, match[1])
|
||||
}
|
||||
|
||||
i := 0
|
||||
text = reInlineCode.ReplaceAllStringFunc(text, func(m string) string {
|
||||
placeholder := fmt.Sprintf("\x00IC%d\x00", i)
|
||||
i++
|
||||
return placeholder
|
||||
})
|
||||
|
||||
return inlineCodeMatch{text: text, codes: codes}
|
||||
}
|
||||
|
||||
func escapeHTML(text string) string {
|
||||
text = strings.ReplaceAll(text, "&", "&")
|
||||
text = strings.ReplaceAll(text, "<", "<")
|
||||
text = strings.ReplaceAll(text, ">", ">")
|
||||
return text
|
||||
logger.ErrorCF("telegram",
|
||||
fmt.Sprintf("%s parse failed, falling back to plain text", parsingName),
|
||||
map[string]any{
|
||||
"error": err.Error(),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// isBotMentioned checks if the bot is mentioned in the message via entities.
|
||||
|
||||
@@ -3,7 +3,6 @@ package telegram
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mymmrac/telego"
|
||||
|
||||
@@ -36,10 +35,7 @@ func TestHandleMessage_DoesNotConsumeGenericCommandsLocally(t *testing.T) {
|
||||
t.Fatalf("handleMessage error: %v", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
|
||||
inbound, ok := messageBus.ConsumeInbound(ctx)
|
||||
inbound, ok := <-messageBus.InboundChan()
|
||||
if !ok {
|
||||
t.Fatal("expected inbound message to be forwarded")
|
||||
}
|
||||
|
||||
@@ -108,22 +108,24 @@ func TestHandleMessage_GroupMentionOnly_BotCommandEntity(t *testing.T) {
|
||||
t.Fatalf("handleMessage error: %v", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 150*time.Millisecond)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Microsecond)
|
||||
defer cancel()
|
||||
|
||||
inbound, ok := messageBus.ConsumeInbound(ctx)
|
||||
if tc.wantForwarded {
|
||||
if !ok {
|
||||
t.Fatal("expected inbound message to be forwarded")
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
if tc.wantForwarded {
|
||||
t.Fatal("timeout waiting for message to be forwarded")
|
||||
return
|
||||
}
|
||||
if inbound.Content != tc.wantContent {
|
||||
t.Fatalf("content=%q want=%q", inbound.Content, tc.wantContent)
|
||||
case inbound, ok := <-messageBus.InboundChan():
|
||||
if tc.wantForwarded {
|
||||
if !ok {
|
||||
t.Fatal("expected inbound message to be forwarded")
|
||||
}
|
||||
if inbound.Content != tc.wantContent {
|
||||
t.Fatalf("content=%q want=%q", inbound.Content, tc.wantContent)
|
||||
}
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if ok {
|
||||
t.Fatalf("expected message to be filtered, got content=%q", inbound.Content)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -4,9 +4,11 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mymmrac/telego"
|
||||
ta "github.com/mymmrac/telego/telegoapi"
|
||||
@@ -15,6 +17,8 @@ import (
|
||||
|
||||
"github.com/sipeed/picoclaw/pkg/bus"
|
||||
"github.com/sipeed/picoclaw/pkg/channels"
|
||||
"github.com/sipeed/picoclaw/pkg/config"
|
||||
"github.com/sipeed/picoclaw/pkg/media"
|
||||
)
|
||||
|
||||
const testToken = "1234567890:aaaabbbbaaaabbbbaaaabbbbaaaabbbbccc"
|
||||
@@ -38,8 +42,20 @@ func (s *stubCaller) Call(ctx context.Context, url string, data *ta.RequestData)
|
||||
// stubConstructor implements ta.RequestConstructor for testing.
|
||||
type stubConstructor struct{}
|
||||
|
||||
type multipartCall struct {
|
||||
Parameters map[string]string
|
||||
FileSizes map[string]int
|
||||
}
|
||||
|
||||
func (s *stubConstructor) JSONRequest(parameters any) (*ta.RequestData, error) {
|
||||
return &ta.RequestData{}, nil
|
||||
b, err := json.Marshal(parameters)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &ta.RequestData{
|
||||
ContentType: "application/json",
|
||||
BodyRaw: b,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *stubConstructor) MultipartRequest(
|
||||
@@ -49,6 +65,36 @@ func (s *stubConstructor) MultipartRequest(
|
||||
return &ta.RequestData{}, nil
|
||||
}
|
||||
|
||||
type multipartRecordingConstructor struct {
|
||||
stubConstructor
|
||||
calls []multipartCall
|
||||
}
|
||||
|
||||
func (s *multipartRecordingConstructor) MultipartRequest(
|
||||
parameters map[string]string,
|
||||
files map[string]ta.NamedReader,
|
||||
) (*ta.RequestData, error) {
|
||||
call := multipartCall{
|
||||
Parameters: make(map[string]string, len(parameters)),
|
||||
FileSizes: make(map[string]int, len(files)),
|
||||
}
|
||||
for k, v := range parameters {
|
||||
call.Parameters[k] = v
|
||||
}
|
||||
for field, file := range files {
|
||||
if file == nil {
|
||||
continue
|
||||
}
|
||||
data, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
call.FileSizes[field] = len(data)
|
||||
}
|
||||
s.calls = append(s.calls, call)
|
||||
return &ta.RequestData{}, nil
|
||||
}
|
||||
|
||||
// successResponse returns a ta.Response that telego will treat as a successful SendMessage.
|
||||
func successResponse(t *testing.T) *ta.Response {
|
||||
t.Helper()
|
||||
@@ -60,11 +106,19 @@ func successResponse(t *testing.T) *ta.Response {
|
||||
|
||||
// newTestChannel creates a TelegramChannel with a mocked bot for unit testing.
|
||||
func newTestChannel(t *testing.T, caller *stubCaller) *TelegramChannel {
|
||||
return newTestChannelWithConstructor(t, caller, &stubConstructor{})
|
||||
}
|
||||
|
||||
func newTestChannelWithConstructor(
|
||||
t *testing.T,
|
||||
caller *stubCaller,
|
||||
constructor ta.RequestConstructor,
|
||||
) *TelegramChannel {
|
||||
t.Helper()
|
||||
|
||||
bot, err := telego.NewBot(testToken,
|
||||
telego.WithAPICaller(caller),
|
||||
telego.WithRequestConstructor(&stubConstructor{}),
|
||||
telego.WithRequestConstructor(constructor),
|
||||
telego.WithDiscardLogger(),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
@@ -78,9 +132,96 @@ func newTestChannel(t *testing.T, caller *stubCaller) *TelegramChannel {
|
||||
BaseChannel: base,
|
||||
bot: bot,
|
||||
chatIDs: make(map[string]int64),
|
||||
config: config.DefaultConfig(),
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendMedia_ImageFallbacksToDocumentOnInvalidDimensions(t *testing.T) {
|
||||
constructor := &multipartRecordingConstructor{}
|
||||
caller := &stubCaller{
|
||||
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
|
||||
switch {
|
||||
case strings.Contains(url, "sendPhoto"):
|
||||
return nil, errors.New(`api: 400 "Bad Request: PHOTO_INVALID_DIMENSIONS"`)
|
||||
case strings.Contains(url, "sendDocument"):
|
||||
return successResponse(t), nil
|
||||
default:
|
||||
t.Fatalf("unexpected API call: %s", url)
|
||||
return nil, nil
|
||||
}
|
||||
},
|
||||
}
|
||||
ch := newTestChannelWithConstructor(t, caller, constructor)
|
||||
|
||||
store := media.NewFileMediaStore()
|
||||
ch.SetMediaStore(store)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
localPath := filepath.Join(tmpDir, "woodstock-en-10s.png")
|
||||
content := []byte("fake-png-content")
|
||||
require.NoError(t, os.WriteFile(localPath, content, 0o644))
|
||||
|
||||
ref, err := store.Store(
|
||||
localPath,
|
||||
media.MediaMeta{Filename: "woodstock-en-10s.png", ContentType: "image/png"},
|
||||
"scope-1",
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
|
||||
ChatID: "12345",
|
||||
Parts: []bus.MediaPart{{
|
||||
Type: "image",
|
||||
Ref: ref,
|
||||
Caption: "caption",
|
||||
}},
|
||||
})
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, caller.calls, 2)
|
||||
assert.Contains(t, caller.calls[0].URL, "sendPhoto")
|
||||
assert.Contains(t, caller.calls[1].URL, "sendDocument")
|
||||
require.Len(t, constructor.calls, 2)
|
||||
assert.Equal(t, len(content), constructor.calls[0].FileSizes["photo"])
|
||||
assert.Equal(t, len(content), constructor.calls[1].FileSizes["document"])
|
||||
assert.Equal(t, "caption", constructor.calls[1].Parameters["caption"])
|
||||
}
|
||||
|
||||
func TestSendMedia_ImageNonDimensionErrorDoesNotFallback(t *testing.T) {
|
||||
constructor := &multipartRecordingConstructor{}
|
||||
caller := &stubCaller{
|
||||
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
|
||||
return nil, errors.New("api: 500 \"server exploded\"")
|
||||
},
|
||||
}
|
||||
ch := newTestChannelWithConstructor(t, caller, constructor)
|
||||
|
||||
store := media.NewFileMediaStore()
|
||||
ch.SetMediaStore(store)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
localPath := filepath.Join(tmpDir, "image.png")
|
||||
require.NoError(t, os.WriteFile(localPath, []byte("fake-png-content"), 0o644))
|
||||
|
||||
ref, err := store.Store(localPath, media.MediaMeta{Filename: "image.png", ContentType: "image/png"}, "scope-1")
|
||||
require.NoError(t, err)
|
||||
|
||||
err = ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
|
||||
ChatID: "12345",
|
||||
Parts: []bus.MediaPart{{
|
||||
Type: "image",
|
||||
Ref: ref,
|
||||
}},
|
||||
})
|
||||
|
||||
require.Error(t, err)
|
||||
assert.ErrorIs(t, err, channels.ErrTemporary)
|
||||
require.Len(t, caller.calls, 1)
|
||||
assert.Contains(t, caller.calls[0].URL, "sendPhoto")
|
||||
require.Len(t, constructor.calls, 1)
|
||||
assert.NotContains(t, caller.calls[0].URL, "sendDocument")
|
||||
}
|
||||
|
||||
func TestSend_EmptyContent(t *testing.T) {
|
||||
caller := &stubCaller{
|
||||
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
|
||||
@@ -235,6 +376,55 @@ func TestSend_MarkdownShortButHTMLLong_MultipleCalls(t *testing.T) {
|
||||
)
|
||||
}
|
||||
|
||||
func TestSend_HTMLOverflow_WordBoundary(t *testing.T) {
|
||||
caller := &stubCaller{
|
||||
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
|
||||
return successResponse(t), nil
|
||||
},
|
||||
}
|
||||
ch := newTestChannel(t, caller)
|
||||
|
||||
// We want to force a split near index ~2600 while keeping markdown length <= 4000.
|
||||
// Prefix of 430 bold units (6 chars each) = 2580 chars.
|
||||
// Expansion per unit is +3 chars when converted to HTML, so 2580 + 430*3 = 3870.
|
||||
prefix := strings.Repeat("**a** ", 430)
|
||||
targetWord := "TARGETWORDTHATSTAYSTOGETHER"
|
||||
// Suffix of 230 bold units (6 chars each) = 1380 chars.
|
||||
// Total markdown length: 2580 (prefix) + 27 (target word) + 1380 (suffix) = 3987 <= 4000.
|
||||
// HTML expansion adds ~3 chars per bold unit: (430 + 230)*3 = 1980 extra chars,
|
||||
// so total HTML length comfortably exceeds 4096.
|
||||
suffix := strings.Repeat(" **b**", 230)
|
||||
content := prefix + targetWord + suffix
|
||||
|
||||
// Ensure the test content matches the intended boundary conditions.
|
||||
assert.LessOrEqual(t, len([]rune(content)), 4000, "markdown content must not exceed chunk size for this test")
|
||||
|
||||
err := ch.Send(context.Background(), bus.OutboundMessage{
|
||||
ChatID: "123456",
|
||||
Content: content,
|
||||
})
|
||||
|
||||
assert.NoError(t, err)
|
||||
|
||||
foundFullWord := false
|
||||
for i, call := range caller.calls {
|
||||
var params map[string]any
|
||||
err := json.Unmarshal(call.Data.BodyRaw, ¶ms)
|
||||
require.NoError(t, err)
|
||||
text, _ := params["text"].(string)
|
||||
|
||||
hasWord := strings.Contains(text, targetWord)
|
||||
t.Logf("Chunk %d length: %d, contains target word: %v", i, len(text), hasWord)
|
||||
|
||||
if hasWord {
|
||||
foundFullWord = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
assert.True(t, foundFullWord, "The target word should not be split between chunks")
|
||||
}
|
||||
|
||||
func TestSend_NotRunning(t *testing.T) {
|
||||
caller := &stubCaller{
|
||||
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
|
||||
@@ -355,10 +545,7 @@ func TestHandleMessage_ForumTopic_SetsMetadata(t *testing.T) {
|
||||
err := ch.handleMessage(context.Background(), msg)
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
|
||||
inbound, ok := messageBus.ConsumeInbound(ctx)
|
||||
inbound, ok := <-messageBus.InboundChan()
|
||||
require.True(t, ok, "expected inbound message")
|
||||
|
||||
// Composite chatID should include thread ID
|
||||
@@ -397,10 +584,7 @@ func TestHandleMessage_NoForum_NoThreadMetadata(t *testing.T) {
|
||||
err := ch.handleMessage(context.Background(), msg)
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
|
||||
inbound, ok := messageBus.ConsumeInbound(ctx)
|
||||
inbound, ok := <-messageBus.InboundChan()
|
||||
require.True(t, ok)
|
||||
|
||||
// Plain chatID without thread suffix
|
||||
@@ -443,10 +627,7 @@ func TestHandleMessage_ReplyThread_NonForum_NoIsolation(t *testing.T) {
|
||||
err := ch.handleMessage(context.Background(), msg)
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
|
||||
inbound, ok := messageBus.ConsumeInbound(ctx)
|
||||
inbound, ok := <-messageBus.InboundChan()
|
||||
require.True(t, ok)
|
||||
|
||||
// chatID should NOT include thread suffix for non-forum groups
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
*bold \*text*
|
||||
_italic \*text_
|
||||
__underline__
|
||||
~strikethrough~
|
||||
||spoiler||
|
||||
*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic bold___ bold*
|
||||
[inline URL](http://www.example.com/)
|
||||
[inline mention of a user](tg://user?id=123456789)
|
||||

|
||||

|
||||

|
||||

|
||||

|
||||
`inline fixed-width code`
|
||||
```
|
||||
pre-formatted fixed-width code block
|
||||
```
|
||||
```python
|
||||
pre-formatted fixed-width code block written in the Python programming language
|
||||
```
|
||||
>Block quotation started
|
||||
>Block quotation continued
|
||||
>Block quotation continued
|
||||
>Block quotation continued
|
||||
>The last line of the block quotation
|
||||
**>The expandable block quotation started right after the previous block quotation
|
||||
>It is separated from the previous block quotation by an empty bold entity
|
||||
>Expandable block quotation continued
|
||||
>Hidden by default part of the expandable block quotation started
|
||||
>Expandable block quotation continued
|
||||
>The last line of the expandable block quotation with the expandability mark||
|
||||
Reference in New Issue
Block a user