diff --git a/pkg/channels/telegram/parser_markdown_to_html.go b/pkg/channels/telegram/parser_markdown_to_html.go
index 95dc3e9d6..0614b6e32 100644
--- a/pkg/channels/telegram/parser_markdown_to_html.go
+++ b/pkg/channels/telegram/parser_markdown_to_html.go
@@ -2,9 +2,13 @@ package telegram
import (
"fmt"
+ "html"
+ "regexp"
"strings"
)
+var reRawURL = regexp.MustCompile(`https?://[^\s<]+`)
+
func markdownToTelegramHTML(text string) string {
if text == "" {
return ""
@@ -19,6 +23,9 @@ func markdownToTelegramHTML(text string) string {
links := extractLinks(text)
text = links.text
+ rawURLs := extractRawURLs(text)
+ text = rawURLs.text
+
text = reHeading.ReplaceAllString(text, "$1")
text = reBlockquote.ReplaceAllString(text, "$1")
@@ -43,10 +50,19 @@ func markdownToTelegramHTML(text string) string {
for i, lnk := range links.links {
label := escapeHTML(lnk[0])
- url := lnk[1]
+ url := escapeHTMLAttr(lnk[1])
text = strings.ReplaceAll(text, fmt.Sprintf("\x00LK%d\x00", i), fmt.Sprintf(`%s`, url, label))
}
+ for i, rawURL := range rawURLs.urls {
+ escaped := escapeHTML(rawURL)
+ text = strings.ReplaceAll(
+ text,
+ fmt.Sprintf("\x00RU%d\x00", i),
+ fmt.Sprintf(`%s`, escapeHTMLAttr(rawURL), escaped),
+ )
+ }
+
for i, code := range inlineCodes.codes {
escaped := escapeHTML(code)
text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("%s", escaped))
@@ -92,6 +108,11 @@ type codeBlockMatch struct {
codes []string
}
+type rawURLMatch struct {
+ text string
+ urls []string
+}
+
func extractCodeBlocks(text string) codeBlockMatch {
matches := reCodeBlock.FindAllStringSubmatch(text, -1)
@@ -110,6 +131,24 @@ func extractCodeBlocks(text string) codeBlockMatch {
return codeBlockMatch{text: text, codes: codes}
}
+func extractRawURLs(text string) rawURLMatch {
+ matches := reRawURL.FindAllString(text, -1)
+
+ urls := make([]string, 0, len(matches))
+ for _, match := range matches {
+ urls = append(urls, match)
+ }
+
+ i := 0
+ text = reRawURL.ReplaceAllStringFunc(text, func(string) string {
+ placeholder := fmt.Sprintf("\x00RU%d\x00", i)
+ i++
+ return placeholder
+ })
+
+ return rawURLMatch{text: text, urls: urls}
+}
+
type inlineCodeMatch struct {
text string
codes []string
@@ -139,3 +178,7 @@ func escapeHTML(text string) string {
text = strings.ReplaceAll(text, ">", ">")
return text
}
+
+func escapeHTMLAttr(text string) string {
+ return html.EscapeString(text)
+}
diff --git a/pkg/channels/telegram/parser_markdown_to_html_test.go b/pkg/channels/telegram/parser_markdown_to_html_test.go
index 7754ee076..a05b39877 100644
--- a/pkg/channels/telegram/parser_markdown_to_html_test.go
+++ b/pkg/channels/telegram/parser_markdown_to_html_test.go
@@ -32,6 +32,11 @@ func Test_markdownToTelegramHTML(t *testing.T) {
input: "[click here](https://example.com/path)",
expected: `click here`,
},
+ {
+ name: "raw oauth url with underscores survives",
+ input: "Apri https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=test-client&redirect_uri=http%3A%2F%2Flocalhost%3A8001%2Foauth2callback&code_challenge=abc_def&code_challenge_method=S256",
+ expected: `Apri https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=test-client&redirect_uri=http%3A%2F%2Flocalhost%3A8001%2Foauth2callback&code_challenge=abc_def&code_challenge_method=S256`,
+ },
{
name: "link with underscores in URL is not corrupted by italic regex",
// Google Flights URLs use URL-safe base64 with underscores in the tfs param.
@@ -45,6 +50,11 @@ func Test_markdownToTelegramHTML(t *testing.T) {
input: "[first](https://a.com/path_one) and [second](https://b.com/path_two_x)",
expected: `first and second`,
},
+ {
+ name: "markdown link query params are escaped in href",
+ input: "[oauth](https://example.com/cb?response_type=code&client_id=test-client)",
+ expected: `oauth`,
+ },
{
name: "link label with HTML special chars is escaped",
input: "[a & b](https://example.com)",