From c36b06a901c3a4aa1870c0f9b8ca7c908779f23e Mon Sep 17 00:00:00 2001 From: DimonB Date: Tue, 31 Mar 2026 06:46:06 +0300 Subject: [PATCH] Fix Telegram HTML links broken by italic regex matching inside href URLs (#2164) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reItalic (_text_) ran after reLink converted [text](url) to , injecting tags into URLs containing underscores (e.g. Google Flights URL-safe base64 in the tfs param). Telegram silently dropped such malformed tags, causing only 1 of 3 links to appear in messages. Fix: extract markdown links into placeholders before any formatting runs, restore them as last — same pattern used for code blocks. --- .../telegram/parser_markdown_to_html.go | 34 +++++++++- .../telegram/parser_markdown_to_html_test.go | 66 +++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 pkg/channels/telegram/parser_markdown_to_html_test.go diff --git a/pkg/channels/telegram/parser_markdown_to_html.go b/pkg/channels/telegram/parser_markdown_to_html.go index bdaa51807..95dc3e9d6 100644 --- a/pkg/channels/telegram/parser_markdown_to_html.go +++ b/pkg/channels/telegram/parser_markdown_to_html.go @@ -16,14 +16,15 @@ func markdownToTelegramHTML(text string) string { inlineCodes := extractInlineCodes(text) text = inlineCodes.text + links := extractLinks(text) + text = links.text + text = reHeading.ReplaceAllString(text, "$1") text = reBlockquote.ReplaceAllString(text, "$1") text = escapeHTML(text) - text = reLink.ReplaceAllString(text, `$1`) - text = reBoldStar.ReplaceAllString(text, "$1") text = reBoldUnder.ReplaceAllString(text, "$1") @@ -40,6 +41,12 @@ func markdownToTelegramHTML(text string) string { text = reListItem.ReplaceAllString(text, "• ") + for i, lnk := range links.links { + label := escapeHTML(lnk[0]) + url := lnk[1] + text = strings.ReplaceAll(text, fmt.Sprintf("\x00LK%d\x00", i), fmt.Sprintf(`%s`, url, label)) + } + for i, code := range inlineCodes.codes { escaped := escapeHTML(code) text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("%s", escaped)) @@ -57,6 +64,29 @@ func markdownToTelegramHTML(text string) string { return text } +type linkMatch struct { + text string + links [][2]string // [label, url] +} + +func extractLinks(text string) linkMatch { + matches := reLink.FindAllStringSubmatch(text, -1) + + extracted := make([][2]string, 0, len(matches)) + for _, match := range matches { + extracted = append(extracted, [2]string{match[1], match[2]}) + } + + i := 0 + text = reLink.ReplaceAllStringFunc(text, func(m string) string { + placeholder := fmt.Sprintf("\x00LK%d\x00", i) + i++ + return placeholder + }) + + return linkMatch{text: text, links: extracted} +} + type codeBlockMatch struct { text string codes []string diff --git a/pkg/channels/telegram/parser_markdown_to_html_test.go b/pkg/channels/telegram/parser_markdown_to_html_test.go new file mode 100644 index 000000000..7754ee076 --- /dev/null +++ b/pkg/channels/telegram/parser_markdown_to_html_test.go @@ -0,0 +1,66 @@ +package telegram + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_markdownToTelegramHTML(t *testing.T) { + cases := []struct { + name string + input string + expected string + }{ + { + name: "plain text", + input: "hello world", + expected: "hello world", + }, + { + name: "bold", + input: "**bold text**", + expected: "bold text", + }, + { + name: "italic", + input: "_italic text_", + expected: "italic text", + }, + { + name: "link without underscores in URL", + input: "[click here](https://example.com/path)", + expected: `click here`, + }, + { + name: "link with underscores in URL is not corrupted by italic regex", + // Google Flights URLs use URL-safe base64 with underscores in the tfs param. + // Previously reItalic ran after reLink, matching _text_ inside href and injecting + // tags into the URL, which broke the link in Telegram. + input: "[3 → 10 сентября — от $202](https://www.google.com/travel/flights/search?tfs=CBwQAho_EgoyURL_safe_base64)", + expected: `3 → 10 сентября — от $202`, + }, + { + name: "multiple links all survive", + input: "[first](https://a.com/path_one) and [second](https://b.com/path_two_x)", + expected: `first and second`, + }, + { + name: "link label with HTML special chars is escaped", + input: "[a & b](https://example.com)", + expected: `a & b`, + }, + { + name: "HTML special chars in plain text are escaped", + input: "a & b < c > d", + expected: "a & b < c > d", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + actual := markdownToTelegramHTML(tc.input) + require.Equal(t, tc.expected, actual) + }) + } +}