Merge upstream/main into feat/searxng

Resolve merge conflicts to keep both SearXNG and GLM Search
providers. Updated search priority order to:
Perplexity > Brave > SearXNG > Tavily > DuckDuckGo > GLM Search

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Truong Vinh Tran
2026-03-04 21:42:03 +01:00
240 changed files with 32624 additions and 5595 deletions
+214 -78
View File
@@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
@@ -15,6 +16,27 @@ import (
const (
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
// HTTP client timeouts for web tool providers.
searchTimeout = 10 * time.Second // Brave, Tavily, DuckDuckGo
perplexityTimeout = 30 * time.Second // Perplexity (LLM-based, slower)
fetchTimeout = 60 * time.Second // WebFetchTool
defaultMaxChars = 50000
maxRedirects = 5
)
// Pre-compiled regexes for HTML text extraction
var (
reScript = regexp.MustCompile(`<script[\s\S]*?</script>`)
reStyle = regexp.MustCompile(`<style[\s\S]*?</style>`)
reTags = regexp.MustCompile(`<[^>]+>`)
reWhitespace = regexp.MustCompile(`[^\S\n]+`)
reBlankLines = regexp.MustCompile(`\n{3,}`)
// DuckDuckGo result extraction
reDDGLink = regexp.MustCompile(`<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>`)
reDDGSnippet = regexp.MustCompile(`<a class="result__snippet[^"]*".*?>([\s\S]*?)</a>`)
)
// createHTTPClient creates an HTTP client with optional proxy support
@@ -61,6 +83,7 @@ type SearchProvider interface {
type BraveSearchProvider struct {
apiKey string
proxy string
client *http.Client
}
func (p *BraveSearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
@@ -75,11 +98,7 @@ func (p *BraveSearchProvider) Search(ctx context.Context, query string, count in
req.Header.Set("Accept", "application/json")
req.Header.Set("X-Subscription-Token", p.apiKey)
client, err := createHTTPClient(p.proxy, 10*time.Second)
if err != nil {
return "", fmt.Errorf("failed to create HTTP client: %w", err)
}
resp, err := client.Do(req)
resp, err := p.client.Do(req)
if err != nil {
return "", fmt.Errorf("request failed: %w", err)
}
@@ -90,6 +109,10 @@ func (p *BraveSearchProvider) Search(ctx context.Context, query string, count in
return "", fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("brave api error (status %d): %s", resp.StatusCode, string(body))
}
var searchResp struct {
Web struct {
Results []struct {
@@ -130,6 +153,7 @@ type TavilySearchProvider struct {
apiKey string
baseURL string
proxy string
client *http.Client
}
func (p *TavilySearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
@@ -161,11 +185,7 @@ func (p *TavilySearchProvider) Search(ctx context.Context, query string, count i
req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", userAgent)
client, err := createHTTPClient(p.proxy, 10*time.Second)
if err != nil {
return "", fmt.Errorf("failed to create HTTP client: %w", err)
}
resp, err := client.Do(req)
resp, err := p.client.Do(req)
if err != nil {
return "", fmt.Errorf("request failed: %w", err)
}
@@ -213,7 +233,8 @@ func (p *TavilySearchProvider) Search(ctx context.Context, query string, count i
}
type DuckDuckGoSearchProvider struct {
proxy string
proxy string
client *http.Client
}
func (p *DuckDuckGoSearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
@@ -226,11 +247,7 @@ func (p *DuckDuckGoSearchProvider) Search(ctx context.Context, query string, cou
req.Header.Set("User-Agent", userAgent)
client, err := createHTTPClient(p.proxy, 10*time.Second)
if err != nil {
return "", fmt.Errorf("failed to create HTTP client: %w", err)
}
resp, err := client.Do(req)
resp, err := p.client.Do(req)
if err != nil {
return "", fmt.Errorf("request failed: %w", err)
}
@@ -251,8 +268,7 @@ func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query
// Try finding the result links directly first, as they are the most critical
// Pattern: <a class="result__a" href="...">Title</a>
// The previous regex was a bit strict. Let's make it more flexible for attributes order/content
reLink := regexp.MustCompile(`<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>`)
matches := reLink.FindAllStringSubmatch(html, count+5)
matches := reDDGLink.FindAllStringSubmatch(html, count+5)
if len(matches) == 0 {
return fmt.Sprintf("No results found or extraction failed. Query: %s", query), nil
@@ -269,12 +285,11 @@ func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query
// A better regex approach: iterate through text and find matches in order
// But for now, let's grab all snippets too
reSnippet := regexp.MustCompile(`<a class="result__snippet[^"]*".*?>([\s\S]*?)</a>`)
snippetMatches := reSnippet.FindAllStringSubmatch(html, count+5)
snippetMatches := reDDGSnippet.FindAllStringSubmatch(html, count+5)
maxItems := min(len(matches), count)
for i := 0; i < maxItems; i++ {
for i := range maxItems {
urlStr := matches[i][1]
title := stripTags(matches[i][2])
title = strings.TrimSpace(title)
@@ -282,9 +297,9 @@ func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query
// URL decoding if needed
if strings.Contains(urlStr, "uddg=") {
if u, err := url.QueryUnescape(urlStr); err == nil {
idx := strings.Index(u, "uddg=")
if idx != -1 {
urlStr = u[idx+5:]
_, after, ok := strings.Cut(u, "uddg=")
if ok {
urlStr = after
}
}
}
@@ -305,13 +320,13 @@ func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query
}
func stripTags(content string) string {
re := regexp.MustCompile(`<[^>]+>`)
return re.ReplaceAllString(content, "")
return reTags.ReplaceAllString(content, "")
}
type PerplexitySearchProvider struct {
apiKey string
proxy string
client *http.Client
}
func (p *PerplexitySearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
@@ -346,11 +361,7 @@ func (p *PerplexitySearchProvider) Search(ctx context.Context, query string, cou
req.Header.Set("Authorization", "Bearer "+p.apiKey)
req.Header.Set("User-Agent", userAgent)
client, err := createHTTPClient(p.proxy, 30*time.Second)
if err != nil {
return "", fmt.Errorf("failed to create HTTP client: %w", err)
}
resp, err := client.Do(req)
resp, err := p.client.Do(req)
if err != nil {
return "", fmt.Errorf("request failed: %w", err)
}
@@ -446,6 +457,88 @@ func (p *SearXNGSearchProvider) Search(ctx context.Context, query string, count
return b.String(), nil
}
type GLMSearchProvider struct {
apiKey string
baseURL string
searchEngine string
proxy string
client *http.Client
}
func (p *GLMSearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
searchURL := p.baseURL
if searchURL == "" {
searchURL = "https://open.bigmodel.cn/api/paas/v4/web_search"
}
payload := map[string]any{
"search_query": query,
"search_engine": p.searchEngine,
"search_intent": false,
"count": count,
"content_size": "medium",
}
bodyBytes, err := json.Marshal(payload)
if err != nil {
return "", fmt.Errorf("failed to marshal payload: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", searchURL, bytes.NewReader(bodyBytes))
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+p.apiKey)
resp, err := p.client.Do(req)
if err != nil {
return "", fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if err != nil {
return "", fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("GLM Search API error (status %d): %s", resp.StatusCode, string(body))
}
var searchResp struct {
SearchResult []struct {
Title string `json:"title"`
Content string `json:"content"`
Link string `json:"link"`
} `json:"search_result"`
}
if err := json.Unmarshal(body, &searchResp); err != nil {
return "", fmt.Errorf("failed to parse response: %w", err)
}
results := searchResp.SearchResult
if len(results) == 0 {
return fmt.Sprintf("No results for: %s", query), nil
}
var lines []string
lines = append(lines, fmt.Sprintf("Results for: %s (via GLM Search)", query))
for i, item := range results {
if i >= count {
break
}
lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, item.Title, item.Link))
if item.Content != "" {
lines = append(lines, fmt.Sprintf(" %s", item.Content))
}
}
return strings.Join(lines, "\n"), nil
}
type WebSearchTool struct {
provider SearchProvider
maxResults int
@@ -467,21 +560,34 @@ type WebSearchToolOptions struct {
SearXNGBaseURL string
SearXNGMaxResults int
SearXNGEnabled bool
GLMSearchAPIKey string
GLMSearchBaseURL string
GLMSearchEngine string
GLMSearchMaxResults int
GLMSearchEnabled bool
Proxy string
}
func NewWebSearchTool(opts WebSearchToolOptions) *WebSearchTool {
func NewWebSearchTool(opts WebSearchToolOptions) (*WebSearchTool, error) {
var provider SearchProvider
maxResults := 5
// Priority: Perplexity > Brave > SearXNG > Tavily > DuckDuckGo
// Priority: Perplexity > Brave > SearXNG > Tavily > DuckDuckGo > GLM Search
if opts.PerplexityEnabled && opts.PerplexityAPIKey != "" {
provider = &PerplexitySearchProvider{apiKey: opts.PerplexityAPIKey, proxy: opts.Proxy}
client, err := createHTTPClient(opts.Proxy, perplexityTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for Perplexity: %w", err)
}
provider = &PerplexitySearchProvider{apiKey: opts.PerplexityAPIKey, proxy: opts.Proxy, client: client}
if opts.PerplexityMaxResults > 0 {
maxResults = opts.PerplexityMaxResults
}
} else if opts.BraveEnabled && opts.BraveAPIKey != "" {
provider = &BraveSearchProvider{apiKey: opts.BraveAPIKey, proxy: opts.Proxy}
client, err := createHTTPClient(opts.Proxy, searchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for Brave: %w", err)
}
provider = &BraveSearchProvider{apiKey: opts.BraveAPIKey, proxy: opts.Proxy, client: client}
if opts.BraveMaxResults > 0 {
maxResults = opts.BraveMaxResults
}
@@ -491,27 +597,55 @@ func NewWebSearchTool(opts WebSearchToolOptions) *WebSearchTool {
maxResults = opts.SearXNGMaxResults
}
} else if opts.TavilyEnabled && opts.TavilyAPIKey != "" {
client, err := createHTTPClient(opts.Proxy, searchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for Tavily: %w", err)
}
provider = &TavilySearchProvider{
apiKey: opts.TavilyAPIKey,
baseURL: opts.TavilyBaseURL,
proxy: opts.Proxy,
client: client,
}
if opts.TavilyMaxResults > 0 {
maxResults = opts.TavilyMaxResults
}
} else if opts.DuckDuckGoEnabled {
provider = &DuckDuckGoSearchProvider{proxy: opts.Proxy}
client, err := createHTTPClient(opts.Proxy, searchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for DuckDuckGo: %w", err)
}
provider = &DuckDuckGoSearchProvider{proxy: opts.Proxy, client: client}
if opts.DuckDuckGoMaxResults > 0 {
maxResults = opts.DuckDuckGoMaxResults
}
} else if opts.GLMSearchEnabled && opts.GLMSearchAPIKey != "" {
client, err := createHTTPClient(opts.Proxy, searchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for GLM Search: %w", err)
}
searchEngine := opts.GLMSearchEngine
if searchEngine == "" {
searchEngine = "search_std"
}
provider = &GLMSearchProvider{
apiKey: opts.GLMSearchAPIKey,
baseURL: opts.GLMSearchBaseURL,
searchEngine: searchEngine,
proxy: opts.Proxy,
client: client,
}
if opts.GLMSearchMaxResults > 0 {
maxResults = opts.GLMSearchMaxResults
}
} else {
return nil
return nil, nil
}
return &WebSearchTool{
provider: provider,
maxResults: maxResults,
}
}, nil
}
func (t *WebSearchTool) Name() string {
@@ -566,27 +700,40 @@ func (t *WebSearchTool) Execute(ctx context.Context, args map[string]any) *ToolR
}
type WebFetchTool struct {
maxChars int
proxy string
maxChars int
proxy string
client *http.Client
fetchLimitBytes int64
}
func NewWebFetchTool(maxChars int) *WebFetchTool {
if maxChars <= 0 {
maxChars = 50000
}
return &WebFetchTool{
maxChars: maxChars,
}
func NewWebFetchTool(maxChars int, fetchLimitBytes int64) (*WebFetchTool, error) {
// createHTTPClient cannot fail with an empty proxy string.
return NewWebFetchToolWithProxy(maxChars, "", fetchLimitBytes)
}
func NewWebFetchToolWithProxy(maxChars int, proxy string) *WebFetchTool {
func NewWebFetchToolWithProxy(maxChars int, proxy string, fetchLimitBytes int64) (*WebFetchTool, error) {
if maxChars <= 0 {
maxChars = 50000
maxChars = defaultMaxChars
}
client, err := createHTTPClient(proxy, fetchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for web fetch: %w", err)
}
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
if len(via) >= maxRedirects {
return fmt.Errorf("stopped after %d redirects", maxRedirects)
}
return nil
}
if fetchLimitBytes <= 0 {
fetchLimitBytes = 10 * 1024 * 1024 // Security Fallback
}
return &WebFetchTool{
maxChars: maxChars,
proxy: proxy,
}
maxChars: maxChars,
proxy: proxy,
client: client,
fetchLimitBytes: fetchLimitBytes,
}, nil
}
func (t *WebFetchTool) Name() string {
@@ -648,27 +795,21 @@ func (t *WebFetchTool) Execute(ctx context.Context, args map[string]any) *ToolRe
req.Header.Set("User-Agent", userAgent)
client, err := createHTTPClient(t.proxy, 60*time.Second)
if err != nil {
return ErrorResult(fmt.Sprintf("failed to create HTTP client: %v", err))
}
// Configure redirect handling
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
if len(via) >= 5 {
return fmt.Errorf("stopped after 5 redirects")
}
return nil
}
resp, err := client.Do(req)
resp, err := t.client.Do(req)
if err != nil {
return ErrorResult(fmt.Sprintf("request failed: %v", err))
}
resp.Body = http.MaxBytesReader(nil, resp.Body, t.fetchLimitBytes)
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
var maxBytesErr *http.MaxBytesError
if errors.As(err, &maxBytesErr) {
return ErrorResult(fmt.Sprintf("failed to read response: size exceeded %d bytes limit", t.fetchLimitBytes))
}
return ErrorResult(fmt.Sprintf("failed to read response: %v", err))
}
@@ -712,31 +853,26 @@ func (t *WebFetchTool) Execute(ctx context.Context, args map[string]any) *ToolRe
resultJSON, _ := json.MarshalIndent(result, "", " ")
return &ToolResult{
ForLLM: fmt.Sprintf(
ForLLM: string(resultJSON),
ForUser: fmt.Sprintf(
"Fetched %d bytes from %s (extractor: %s, truncated: %v)",
len(text),
urlStr,
extractor,
truncated,
),
ForUser: string(resultJSON),
}
}
func (t *WebFetchTool) extractText(htmlContent string) string {
re := regexp.MustCompile(`<script[\s\S]*?</script>`)
result := re.ReplaceAllLiteralString(htmlContent, "")
re = regexp.MustCompile(`<style[\s\S]*?</style>`)
result = re.ReplaceAllLiteralString(result, "")
re = regexp.MustCompile(`<[^>]+>`)
result = re.ReplaceAllLiteralString(result, "")
result := reScript.ReplaceAllLiteralString(htmlContent, "")
result = reStyle.ReplaceAllLiteralString(result, "")
result = reTags.ReplaceAllLiteralString(result, "")
result = strings.TrimSpace(result)
re = regexp.MustCompile(`[^\S\n]+`)
result = re.ReplaceAllString(result, " ")
re = regexp.MustCompile(`\n{3,}`)
result = re.ReplaceAllString(result, "\n\n")
result = reWhitespace.ReplaceAllString(result, " ")
result = reBlankLines.ReplaceAllString(result, "\n\n")
lines := strings.Split(result, "\n")
var cleanLines []string