mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
c36b06a901
reItalic (_text_) ran after reLink converted [text](url) to <a href>, injecting <i> tags into URLs containing underscores (e.g. Google Flights URL-safe base64 in the tfs param). Telegram silently dropped such malformed <a> tags, causing only 1 of 3 links to appear in messages. Fix: extract markdown links into placeholders before any formatting runs, restore them as <a href> last — same pattern used for code blocks.
142 lines
3.1 KiB
Go
142 lines
3.1 KiB
Go
package telegram
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
func markdownToTelegramHTML(text string) string {
|
|
if text == "" {
|
|
return ""
|
|
}
|
|
|
|
codeBlocks := extractCodeBlocks(text)
|
|
text = codeBlocks.text
|
|
|
|
inlineCodes := extractInlineCodes(text)
|
|
text = inlineCodes.text
|
|
|
|
links := extractLinks(text)
|
|
text = links.text
|
|
|
|
text = reHeading.ReplaceAllString(text, "$1")
|
|
|
|
text = reBlockquote.ReplaceAllString(text, "$1")
|
|
|
|
text = escapeHTML(text)
|
|
|
|
text = reBoldStar.ReplaceAllString(text, "<b>$1</b>")
|
|
|
|
text = reBoldUnder.ReplaceAllString(text, "<b>$1</b>")
|
|
|
|
text = reItalic.ReplaceAllStringFunc(text, func(s string) string {
|
|
match := reItalic.FindStringSubmatch(s)
|
|
if len(match) < 2 {
|
|
return s
|
|
}
|
|
return "<i>" + match[1] + "</i>"
|
|
})
|
|
|
|
text = reStrike.ReplaceAllString(text, "<s>$1</s>")
|
|
|
|
text = reListItem.ReplaceAllString(text, "• ")
|
|
|
|
for i, lnk := range links.links {
|
|
label := escapeHTML(lnk[0])
|
|
url := lnk[1]
|
|
text = strings.ReplaceAll(text, fmt.Sprintf("\x00LK%d\x00", i), fmt.Sprintf(`<a href="%s">%s</a>`, url, label))
|
|
}
|
|
|
|
for i, code := range inlineCodes.codes {
|
|
escaped := escapeHTML(code)
|
|
text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("<code>%s</code>", escaped))
|
|
}
|
|
|
|
for i, code := range codeBlocks.codes {
|
|
escaped := escapeHTML(code)
|
|
text = strings.ReplaceAll(
|
|
text,
|
|
fmt.Sprintf("\x00CB%d\x00", i),
|
|
fmt.Sprintf("<pre><code>%s</code></pre>", escaped),
|
|
)
|
|
}
|
|
|
|
return text
|
|
}
|
|
|
|
type linkMatch struct {
|
|
text string
|
|
links [][2]string // [label, url]
|
|
}
|
|
|
|
func extractLinks(text string) linkMatch {
|
|
matches := reLink.FindAllStringSubmatch(text, -1)
|
|
|
|
extracted := make([][2]string, 0, len(matches))
|
|
for _, match := range matches {
|
|
extracted = append(extracted, [2]string{match[1], match[2]})
|
|
}
|
|
|
|
i := 0
|
|
text = reLink.ReplaceAllStringFunc(text, func(m string) string {
|
|
placeholder := fmt.Sprintf("\x00LK%d\x00", i)
|
|
i++
|
|
return placeholder
|
|
})
|
|
|
|
return linkMatch{text: text, links: extracted}
|
|
}
|
|
|
|
type codeBlockMatch struct {
|
|
text string
|
|
codes []string
|
|
}
|
|
|
|
func extractCodeBlocks(text string) codeBlockMatch {
|
|
matches := reCodeBlock.FindAllStringSubmatch(text, -1)
|
|
|
|
codes := make([]string, 0, len(matches))
|
|
for _, match := range matches {
|
|
codes = append(codes, match[1])
|
|
}
|
|
|
|
i := 0
|
|
text = reCodeBlock.ReplaceAllStringFunc(text, func(m string) string {
|
|
placeholder := fmt.Sprintf("\x00CB%d\x00", i)
|
|
i++
|
|
return placeholder
|
|
})
|
|
|
|
return codeBlockMatch{text: text, codes: codes}
|
|
}
|
|
|
|
type inlineCodeMatch struct {
|
|
text string
|
|
codes []string
|
|
}
|
|
|
|
func extractInlineCodes(text string) inlineCodeMatch {
|
|
matches := reInlineCode.FindAllStringSubmatch(text, -1)
|
|
|
|
codes := make([]string, 0, len(matches))
|
|
for _, match := range matches {
|
|
codes = append(codes, match[1])
|
|
}
|
|
|
|
i := 0
|
|
text = reInlineCode.ReplaceAllStringFunc(text, func(m string) string {
|
|
placeholder := fmt.Sprintf("\x00IC%d\x00", i)
|
|
i++
|
|
return placeholder
|
|
})
|
|
|
|
return inlineCodeMatch{text: text, codes: codes}
|
|
}
|
|
|
|
func escapeHTML(text string) string {
|
|
text = strings.ReplaceAll(text, "&", "&")
|
|
text = strings.ReplaceAll(text, "<", "<")
|
|
text = strings.ReplaceAll(text, ">", ">")
|
|
return text
|
|
}
|