mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
fix(telegram): preserve raw OAuth links in HTML rendering
This commit is contained in:
@@ -2,9 +2,13 @@ package telegram
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var reRawURL = regexp.MustCompile(`https?://[^\s<]+`)
|
||||
|
||||
func markdownToTelegramHTML(text string) string {
|
||||
if text == "" {
|
||||
return ""
|
||||
@@ -19,6 +23,9 @@ func markdownToTelegramHTML(text string) string {
|
||||
links := extractLinks(text)
|
||||
text = links.text
|
||||
|
||||
rawURLs := extractRawURLs(text)
|
||||
text = rawURLs.text
|
||||
|
||||
text = reHeading.ReplaceAllString(text, "$1")
|
||||
|
||||
text = reBlockquote.ReplaceAllString(text, "$1")
|
||||
@@ -43,10 +50,19 @@ func markdownToTelegramHTML(text string) string {
|
||||
|
||||
for i, lnk := range links.links {
|
||||
label := escapeHTML(lnk[0])
|
||||
url := lnk[1]
|
||||
url := escapeHTMLAttr(lnk[1])
|
||||
text = strings.ReplaceAll(text, fmt.Sprintf("\x00LK%d\x00", i), fmt.Sprintf(`<a href="%s">%s</a>`, url, label))
|
||||
}
|
||||
|
||||
for i, rawURL := range rawURLs.urls {
|
||||
escaped := escapeHTML(rawURL)
|
||||
text = strings.ReplaceAll(
|
||||
text,
|
||||
fmt.Sprintf("\x00RU%d\x00", i),
|
||||
fmt.Sprintf(`<a href="%s">%s</a>`, escapeHTMLAttr(rawURL), escaped),
|
||||
)
|
||||
}
|
||||
|
||||
for i, code := range inlineCodes.codes {
|
||||
escaped := escapeHTML(code)
|
||||
text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("<code>%s</code>", escaped))
|
||||
@@ -92,6 +108,11 @@ type codeBlockMatch struct {
|
||||
codes []string
|
||||
}
|
||||
|
||||
type rawURLMatch struct {
|
||||
text string
|
||||
urls []string
|
||||
}
|
||||
|
||||
func extractCodeBlocks(text string) codeBlockMatch {
|
||||
matches := reCodeBlock.FindAllStringSubmatch(text, -1)
|
||||
|
||||
@@ -110,6 +131,24 @@ func extractCodeBlocks(text string) codeBlockMatch {
|
||||
return codeBlockMatch{text: text, codes: codes}
|
||||
}
|
||||
|
||||
func extractRawURLs(text string) rawURLMatch {
|
||||
matches := reRawURL.FindAllString(text, -1)
|
||||
|
||||
urls := make([]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
urls = append(urls, match)
|
||||
}
|
||||
|
||||
i := 0
|
||||
text = reRawURL.ReplaceAllStringFunc(text, func(string) string {
|
||||
placeholder := fmt.Sprintf("\x00RU%d\x00", i)
|
||||
i++
|
||||
return placeholder
|
||||
})
|
||||
|
||||
return rawURLMatch{text: text, urls: urls}
|
||||
}
|
||||
|
||||
type inlineCodeMatch struct {
|
||||
text string
|
||||
codes []string
|
||||
@@ -139,3 +178,7 @@ func escapeHTML(text string) string {
|
||||
text = strings.ReplaceAll(text, ">", ">")
|
||||
return text
|
||||
}
|
||||
|
||||
func escapeHTMLAttr(text string) string {
|
||||
return html.EscapeString(text)
|
||||
}
|
||||
|
||||
@@ -32,6 +32,11 @@ func Test_markdownToTelegramHTML(t *testing.T) {
|
||||
input: "[click here](https://example.com/path)",
|
||||
expected: `<a href="https://example.com/path">click here</a>`,
|
||||
},
|
||||
{
|
||||
name: "raw oauth url with underscores survives",
|
||||
input: "Apri https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=test-client&redirect_uri=http%3A%2F%2Flocalhost%3A8001%2Foauth2callback&code_challenge=abc_def&code_challenge_method=S256",
|
||||
expected: `Apri <a href="https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=test-client&redirect_uri=http%3A%2F%2Flocalhost%3A8001%2Foauth2callback&code_challenge=abc_def&code_challenge_method=S256">https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=test-client&redirect_uri=http%3A%2F%2Flocalhost%3A8001%2Foauth2callback&code_challenge=abc_def&code_challenge_method=S256</a>`,
|
||||
},
|
||||
{
|
||||
name: "link with underscores in URL is not corrupted by italic regex",
|
||||
// Google Flights URLs use URL-safe base64 with underscores in the tfs param.
|
||||
@@ -45,6 +50,11 @@ func Test_markdownToTelegramHTML(t *testing.T) {
|
||||
input: "[first](https://a.com/path_one) and [second](https://b.com/path_two_x)",
|
||||
expected: `<a href="https://a.com/path_one">first</a> and <a href="https://b.com/path_two_x">second</a>`,
|
||||
},
|
||||
{
|
||||
name: "markdown link query params are escaped in href",
|
||||
input: "[oauth](https://example.com/cb?response_type=code&client_id=test-client)",
|
||||
expected: `<a href="https://example.com/cb?response_type=code&client_id=test-client">oauth</a>`,
|
||||
},
|
||||
{
|
||||
name: "link label with HTML special chars is escaped",
|
||||
input: "[a & b](https://example.com)",
|
||||
|
||||
Reference in New Issue
Block a user