From 34b9d5d6fa2cbeb805cc9be506c5e4300f88d161 Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sun, 12 Apr 2026 10:44:09 +0200 Subject: [PATCH] fix(telegram): preserve raw OAuth links in HTML rendering --- .../telegram/parser_markdown_to_html.go | 45 ++++++++++++++++++- .../telegram/parser_markdown_to_html_test.go | 10 +++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/pkg/channels/telegram/parser_markdown_to_html.go b/pkg/channels/telegram/parser_markdown_to_html.go index 95dc3e9d6..0614b6e32 100644 --- a/pkg/channels/telegram/parser_markdown_to_html.go +++ b/pkg/channels/telegram/parser_markdown_to_html.go @@ -2,9 +2,13 @@ package telegram import ( "fmt" + "html" + "regexp" "strings" ) +var reRawURL = regexp.MustCompile(`https?://[^\s<]+`) + func markdownToTelegramHTML(text string) string { if text == "" { return "" @@ -19,6 +23,9 @@ func markdownToTelegramHTML(text string) string { links := extractLinks(text) text = links.text + rawURLs := extractRawURLs(text) + text = rawURLs.text + text = reHeading.ReplaceAllString(text, "$1") text = reBlockquote.ReplaceAllString(text, "$1") @@ -43,10 +50,19 @@ func markdownToTelegramHTML(text string) string { for i, lnk := range links.links { label := escapeHTML(lnk[0]) - url := lnk[1] + url := escapeHTMLAttr(lnk[1]) text = strings.ReplaceAll(text, fmt.Sprintf("\x00LK%d\x00", i), fmt.Sprintf(`%s`, url, label)) } + for i, rawURL := range rawURLs.urls { + escaped := escapeHTML(rawURL) + text = strings.ReplaceAll( + text, + fmt.Sprintf("\x00RU%d\x00", i), + fmt.Sprintf(`%s`, escapeHTMLAttr(rawURL), escaped), + ) + } + for i, code := range inlineCodes.codes { escaped := escapeHTML(code) text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("%s", escaped)) @@ -92,6 +108,11 @@ type codeBlockMatch struct { codes []string } +type rawURLMatch struct { + text string + urls []string +} + func extractCodeBlocks(text string) codeBlockMatch { matches := reCodeBlock.FindAllStringSubmatch(text, -1) @@ -110,6 +131,24 @@ func extractCodeBlocks(text string) codeBlockMatch { return codeBlockMatch{text: text, codes: codes} } +func extractRawURLs(text string) rawURLMatch { + matches := reRawURL.FindAllString(text, -1) + + urls := make([]string, 0, len(matches)) + for _, match := range matches { + urls = append(urls, match) + } + + i := 0 + text = reRawURL.ReplaceAllStringFunc(text, func(string) string { + placeholder := fmt.Sprintf("\x00RU%d\x00", i) + i++ + return placeholder + }) + + return rawURLMatch{text: text, urls: urls} +} + type inlineCodeMatch struct { text string codes []string @@ -139,3 +178,7 @@ func escapeHTML(text string) string { text = strings.ReplaceAll(text, ">", ">") return text } + +func escapeHTMLAttr(text string) string { + return html.EscapeString(text) +} diff --git a/pkg/channels/telegram/parser_markdown_to_html_test.go b/pkg/channels/telegram/parser_markdown_to_html_test.go index 7754ee076..a05b39877 100644 --- a/pkg/channels/telegram/parser_markdown_to_html_test.go +++ b/pkg/channels/telegram/parser_markdown_to_html_test.go @@ -32,6 +32,11 @@ func Test_markdownToTelegramHTML(t *testing.T) { input: "[click here](https://example.com/path)", expected: `click here`, }, + { + name: "raw oauth url with underscores survives", + input: "Apri https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=test-client&redirect_uri=http%3A%2F%2Flocalhost%3A8001%2Foauth2callback&code_challenge=abc_def&code_challenge_method=S256", + expected: `Apri https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=test-client&redirect_uri=http%3A%2F%2Flocalhost%3A8001%2Foauth2callback&code_challenge=abc_def&code_challenge_method=S256`, + }, { name: "link with underscores in URL is not corrupted by italic regex", // Google Flights URLs use URL-safe base64 with underscores in the tfs param. @@ -45,6 +50,11 @@ func Test_markdownToTelegramHTML(t *testing.T) { input: "[first](https://a.com/path_one) and [second](https://b.com/path_two_x)", expected: `first and second`, }, + { + name: "markdown link query params are escaped in href", + input: "[oauth](https://example.com/cb?response_type=code&client_id=test-client)", + expected: `oauth`, + }, { name: "link label with HTML special chars is escaped", input: "[a & b](https://example.com)",