Fix Telegram HTML links broken by italic regex matching inside href URLs (#2164)

reItalic (_text_) ran after reLink converted [text](url) to <a href>,
injecting <i> tags into URLs containing underscores (e.g. Google Flights
URL-safe base64 in the tfs param). Telegram silently dropped such malformed
<a> tags, causing only 1 of 3 links to appear in messages.

Fix: extract markdown links into placeholders before any formatting runs,
restore them as <a href> last — same pattern used for code blocks.
This commit is contained in:
DimonB
2026-03-31 06:46:06 +03:00
committed by GitHub
parent 6c0798ca3f
commit c36b06a901
2 changed files with 98 additions and 2 deletions
@@ -16,14 +16,15 @@ func markdownToTelegramHTML(text string) string {
inlineCodes := extractInlineCodes(text)
text = inlineCodes.text
links := extractLinks(text)
text = links.text
text = reHeading.ReplaceAllString(text, "$1")
text = reBlockquote.ReplaceAllString(text, "$1")
text = escapeHTML(text)
text = reLink.ReplaceAllString(text, `<a href="$2">$1</a>`)
text = reBoldStar.ReplaceAllString(text, "<b>$1</b>")
text = reBoldUnder.ReplaceAllString(text, "<b>$1</b>")
@@ -40,6 +41,12 @@ func markdownToTelegramHTML(text string) string {
text = reListItem.ReplaceAllString(text, "• ")
for i, lnk := range links.links {
label := escapeHTML(lnk[0])
url := lnk[1]
text = strings.ReplaceAll(text, fmt.Sprintf("\x00LK%d\x00", i), fmt.Sprintf(`<a href="%s">%s</a>`, url, label))
}
for i, code := range inlineCodes.codes {
escaped := escapeHTML(code)
text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("<code>%s</code>", escaped))
@@ -57,6 +64,29 @@ func markdownToTelegramHTML(text string) string {
return text
}
type linkMatch struct {
text string
links [][2]string // [label, url]
}
func extractLinks(text string) linkMatch {
matches := reLink.FindAllStringSubmatch(text, -1)
extracted := make([][2]string, 0, len(matches))
for _, match := range matches {
extracted = append(extracted, [2]string{match[1], match[2]})
}
i := 0
text = reLink.ReplaceAllStringFunc(text, func(m string) string {
placeholder := fmt.Sprintf("\x00LK%d\x00", i)
i++
return placeholder
})
return linkMatch{text: text, links: extracted}
}
type codeBlockMatch struct {
text string
codes []string
@@ -0,0 +1,66 @@
package telegram
import (
"testing"
"github.com/stretchr/testify/require"
)
func Test_markdownToTelegramHTML(t *testing.T) {
cases := []struct {
name string
input string
expected string
}{
{
name: "plain text",
input: "hello world",
expected: "hello world",
},
{
name: "bold",
input: "**bold text**",
expected: "<b>bold text</b>",
},
{
name: "italic",
input: "_italic text_",
expected: "<i>italic text</i>",
},
{
name: "link without underscores in URL",
input: "[click here](https://example.com/path)",
expected: `<a href="https://example.com/path">click here</a>`,
},
{
name: "link with underscores in URL is not corrupted by italic regex",
// Google Flights URLs use URL-safe base64 with underscores in the tfs param.
// Previously reItalic ran after reLink, matching _text_ inside href and injecting
// <i> tags into the URL, which broke the link in Telegram.
input: "[3 → 10 сентября — от $202](https://www.google.com/travel/flights/search?tfs=CBwQAho_EgoyURL_safe_base64)",
expected: `<a href="https://www.google.com/travel/flights/search?tfs=CBwQAho_EgoyURL_safe_base64">3 → 10 сентября — от $202</a>`,
},
{
name: "multiple links all survive",
input: "[first](https://a.com/path_one) and [second](https://b.com/path_two_x)",
expected: `<a href="https://a.com/path_one">first</a> and <a href="https://b.com/path_two_x">second</a>`,
},
{
name: "link label with HTML special chars is escaped",
input: "[a & b](https://example.com)",
expected: `<a href="https://example.com">a &amp; b</a>`,
},
{
name: "HTML special chars in plain text are escaped",
input: "a & b < c > d",
expected: "a &amp; b &lt; c &gt; d",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
actual := markdownToTelegramHTML(tc.input)
require.Equal(t, tc.expected, actual)
})
}
}