mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
Fix Telegram HTML links broken by italic regex matching inside href URLs (#2164)
reItalic (_text_) ran after reLink converted [text](url) to <a href>, injecting <i> tags into URLs containing underscores (e.g. Google Flights URL-safe base64 in the tfs param). Telegram silently dropped such malformed <a> tags, causing only 1 of 3 links to appear in messages. Fix: extract markdown links into placeholders before any formatting runs, restore them as <a href> last — same pattern used for code blocks.
This commit is contained in:
@@ -16,14 +16,15 @@ func markdownToTelegramHTML(text string) string {
|
||||
inlineCodes := extractInlineCodes(text)
|
||||
text = inlineCodes.text
|
||||
|
||||
links := extractLinks(text)
|
||||
text = links.text
|
||||
|
||||
text = reHeading.ReplaceAllString(text, "$1")
|
||||
|
||||
text = reBlockquote.ReplaceAllString(text, "$1")
|
||||
|
||||
text = escapeHTML(text)
|
||||
|
||||
text = reLink.ReplaceAllString(text, `<a href="$2">$1</a>`)
|
||||
|
||||
text = reBoldStar.ReplaceAllString(text, "<b>$1</b>")
|
||||
|
||||
text = reBoldUnder.ReplaceAllString(text, "<b>$1</b>")
|
||||
@@ -40,6 +41,12 @@ func markdownToTelegramHTML(text string) string {
|
||||
|
||||
text = reListItem.ReplaceAllString(text, "• ")
|
||||
|
||||
for i, lnk := range links.links {
|
||||
label := escapeHTML(lnk[0])
|
||||
url := lnk[1]
|
||||
text = strings.ReplaceAll(text, fmt.Sprintf("\x00LK%d\x00", i), fmt.Sprintf(`<a href="%s">%s</a>`, url, label))
|
||||
}
|
||||
|
||||
for i, code := range inlineCodes.codes {
|
||||
escaped := escapeHTML(code)
|
||||
text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("<code>%s</code>", escaped))
|
||||
@@ -57,6 +64,29 @@ func markdownToTelegramHTML(text string) string {
|
||||
return text
|
||||
}
|
||||
|
||||
type linkMatch struct {
|
||||
text string
|
||||
links [][2]string // [label, url]
|
||||
}
|
||||
|
||||
func extractLinks(text string) linkMatch {
|
||||
matches := reLink.FindAllStringSubmatch(text, -1)
|
||||
|
||||
extracted := make([][2]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
extracted = append(extracted, [2]string{match[1], match[2]})
|
||||
}
|
||||
|
||||
i := 0
|
||||
text = reLink.ReplaceAllStringFunc(text, func(m string) string {
|
||||
placeholder := fmt.Sprintf("\x00LK%d\x00", i)
|
||||
i++
|
||||
return placeholder
|
||||
})
|
||||
|
||||
return linkMatch{text: text, links: extracted}
|
||||
}
|
||||
|
||||
type codeBlockMatch struct {
|
||||
text string
|
||||
codes []string
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
package telegram
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func Test_markdownToTelegramHTML(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "plain text",
|
||||
input: "hello world",
|
||||
expected: "hello world",
|
||||
},
|
||||
{
|
||||
name: "bold",
|
||||
input: "**bold text**",
|
||||
expected: "<b>bold text</b>",
|
||||
},
|
||||
{
|
||||
name: "italic",
|
||||
input: "_italic text_",
|
||||
expected: "<i>italic text</i>",
|
||||
},
|
||||
{
|
||||
name: "link without underscores in URL",
|
||||
input: "[click here](https://example.com/path)",
|
||||
expected: `<a href="https://example.com/path">click here</a>`,
|
||||
},
|
||||
{
|
||||
name: "link with underscores in URL is not corrupted by italic regex",
|
||||
// Google Flights URLs use URL-safe base64 with underscores in the tfs param.
|
||||
// Previously reItalic ran after reLink, matching _text_ inside href and injecting
|
||||
// <i> tags into the URL, which broke the link in Telegram.
|
||||
input: "[3 → 10 сентября — от $202](https://www.google.com/travel/flights/search?tfs=CBwQAho_EgoyURL_safe_base64)",
|
||||
expected: `<a href="https://www.google.com/travel/flights/search?tfs=CBwQAho_EgoyURL_safe_base64">3 → 10 сентября — от $202</a>`,
|
||||
},
|
||||
{
|
||||
name: "multiple links all survive",
|
||||
input: "[first](https://a.com/path_one) and [second](https://b.com/path_two_x)",
|
||||
expected: `<a href="https://a.com/path_one">first</a> and <a href="https://b.com/path_two_x">second</a>`,
|
||||
},
|
||||
{
|
||||
name: "link label with HTML special chars is escaped",
|
||||
input: "[a & b](https://example.com)",
|
||||
expected: `<a href="https://example.com">a & b</a>`,
|
||||
},
|
||||
{
|
||||
name: "HTML special chars in plain text are escaped",
|
||||
input: "a & b < c > d",
|
||||
expected: "a & b < c > d",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
actual := markdownToTelegramHTML(tc.input)
|
||||
require.Equal(t, tc.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user