package tools import ( "bytes" "context" "encoding/json" "errors" "fmt" "io" "net/http" "net/url" "regexp" "strings" "time" ) const ( userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" // HTTP client timeouts for web tool providers. searchTimeout = 10 * time.Second // Brave, Tavily, DuckDuckGo perplexityTimeout = 30 * time.Second // Perplexity (LLM-based, slower) fetchTimeout = 60 * time.Second // WebFetchTool defaultMaxChars = 50000 maxRedirects = 5 ) // Pre-compiled regexes for HTML text extraction var ( reScript = regexp.MustCompile(``) reStyle = regexp.MustCompile(``) reTags = regexp.MustCompile(`<[^>]+>`) reWhitespace = regexp.MustCompile(`[^\S\n]+`) reBlankLines = regexp.MustCompile(`\n{3,}`) // DuckDuckGo result extraction reDDGLink = regexp.MustCompile(`]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)`) reDDGSnippet = regexp.MustCompile(`([\s\S]*?)`) ) // createHTTPClient creates an HTTP client with optional proxy support func createHTTPClient(proxyURL string, timeout time.Duration) (*http.Client, error) { client := &http.Client{ Timeout: timeout, Transport: &http.Transport{ MaxIdleConns: 10, IdleConnTimeout: 30 * time.Second, DisableCompression: false, TLSHandshakeTimeout: 15 * time.Second, }, } if proxyURL != "" { proxy, err := url.Parse(proxyURL) if err != nil { return nil, fmt.Errorf("invalid proxy URL: %w", err) } scheme := strings.ToLower(proxy.Scheme) switch scheme { case "http", "https", "socks5", "socks5h": default: return nil, fmt.Errorf( "unsupported proxy scheme %q (supported: http, https, socks5, socks5h)", proxy.Scheme, ) } if proxy.Host == "" { return nil, fmt.Errorf("invalid proxy URL: missing host") } client.Transport.(*http.Transport).Proxy = http.ProxyURL(proxy) } else { client.Transport.(*http.Transport).Proxy = http.ProxyFromEnvironment } return client, nil } type SearchProvider interface { Search(ctx context.Context, query string, count int) (string, error) } type BraveSearchProvider struct { apiKey string proxy string client *http.Client } func (p *BraveSearchProvider) Search(ctx context.Context, query string, count int) (string, error) { searchURL := fmt.Sprintf("https://api.search.brave.com/res/v1/web/search?q=%s&count=%d", url.QueryEscape(query), count) req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) } req.Header.Set("Accept", "application/json") req.Header.Set("X-Subscription-Token", p.apiKey) resp, err := p.client.Do(req) if err != nil { return "", fmt.Errorf("request failed: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("failed to read response: %w", err) } var searchResp struct { Web struct { Results []struct { Title string `json:"title"` URL string `json:"url"` Description string `json:"description"` } `json:"results"` } `json:"web"` } if err := json.Unmarshal(body, &searchResp); err != nil { // Log error body for debugging fmt.Printf("Brave API Error Body: %s\n", string(body)) return "", fmt.Errorf("failed to parse response: %w", err) } results := searchResp.Web.Results if len(results) == 0 { return fmt.Sprintf("No results for: %s", query), nil } var lines []string lines = append(lines, fmt.Sprintf("Results for: %s", query)) for i, item := range results { if i >= count { break } lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, item.Title, item.URL)) if item.Description != "" { lines = append(lines, fmt.Sprintf(" %s", item.Description)) } } return strings.Join(lines, "\n"), nil } type TavilySearchProvider struct { apiKey string baseURL string proxy string client *http.Client } func (p *TavilySearchProvider) Search(ctx context.Context, query string, count int) (string, error) { searchURL := p.baseURL if searchURL == "" { searchURL = "https://api.tavily.com/search" } payload := map[string]any{ "api_key": p.apiKey, "query": query, "search_depth": "advanced", "include_answer": false, "include_images": false, "include_raw_content": false, "max_results": count, } bodyBytes, err := json.Marshal(payload) if err != nil { return "", fmt.Errorf("failed to marshal payload: %w", err) } req, err := http.NewRequestWithContext(ctx, "POST", searchURL, bytes.NewBuffer(bodyBytes)) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) } req.Header.Set("Content-Type", "application/json") req.Header.Set("User-Agent", userAgent) resp, err := p.client.Do(req) if err != nil { return "", fmt.Errorf("request failed: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("failed to read response: %w", err) } if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("tavily api error (status %d): %s", resp.StatusCode, string(body)) } var searchResp struct { Results []struct { Title string `json:"title"` URL string `json:"url"` Content string `json:"content"` } `json:"results"` } if err := json.Unmarshal(body, &searchResp); err != nil { return "", fmt.Errorf("failed to parse response: %w", err) } results := searchResp.Results if len(results) == 0 { return fmt.Sprintf("No results for: %s", query), nil } var lines []string lines = append(lines, fmt.Sprintf("Results for: %s (via Tavily)", query)) for i, item := range results { if i >= count { break } lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, item.Title, item.URL)) if item.Content != "" { lines = append(lines, fmt.Sprintf(" %s", item.Content)) } } return strings.Join(lines, "\n"), nil } type DuckDuckGoSearchProvider struct { proxy string client *http.Client } func (p *DuckDuckGoSearchProvider) Search(ctx context.Context, query string, count int) (string, error) { searchURL := fmt.Sprintf("https://html.duckduckgo.com/html/?q=%s", url.QueryEscape(query)) req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) } req.Header.Set("User-Agent", userAgent) resp, err := p.client.Do(req) if err != nil { return "", fmt.Errorf("request failed: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("failed to read response: %w", err) } return p.extractResults(string(body), count, query) } func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query string) (string, error) { // Simple regex based extraction for DDG HTML // Strategy: Find all result containers or key anchors directly // Try finding the result links directly first, as they are the most critical // Pattern: Title // The previous regex was a bit strict. Let's make it more flexible for attributes order/content matches := reDDGLink.FindAllStringSubmatch(html, count+5) if len(matches) == 0 { return fmt.Sprintf("No results found or extraction failed. Query: %s", query), nil } var lines []string lines = append(lines, fmt.Sprintf("Results for: %s (via DuckDuckGo)", query)) // Pre-compile snippet regex to run inside the loop // We'll search for snippets relative to the link position or just globally if needed // But simple global search for snippets might mismatch order. // Since we only have the raw HTML string, let's just extract snippets globally and assume order matches (risky but simple for regex) // Or better: Let's assume the snippet follows the link in the HTML // A better regex approach: iterate through text and find matches in order // But for now, let's grab all snippets too snippetMatches := reDDGSnippet.FindAllStringSubmatch(html, count+5) maxItems := min(len(matches), count) for i := range maxItems { urlStr := matches[i][1] title := stripTags(matches[i][2]) title = strings.TrimSpace(title) // URL decoding if needed if strings.Contains(urlStr, "uddg=") { if u, err := url.QueryUnescape(urlStr); err == nil { _, after, ok := strings.Cut(u, "uddg=") if ok { urlStr = after } } } lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, title, urlStr)) // Attempt to attach snippet if available and index aligns if i < len(snippetMatches) { snippet := stripTags(snippetMatches[i][1]) snippet = strings.TrimSpace(snippet) if snippet != "" { lines = append(lines, fmt.Sprintf(" %s", snippet)) } } } return strings.Join(lines, "\n"), nil } func stripTags(content string) string { return reTags.ReplaceAllString(content, "") } type PerplexitySearchProvider struct { apiKey string proxy string client *http.Client } func (p *PerplexitySearchProvider) Search(ctx context.Context, query string, count int) (string, error) { searchURL := "https://api.perplexity.ai/chat/completions" payload := map[string]any{ "model": "sonar", "messages": []map[string]string{ { "role": "system", "content": "You are a search assistant. Provide concise search results with titles, URLs, and brief descriptions in the following format:\n1. Title\n URL\n Description\n\nDo not add extra commentary.", }, { "role": "user", "content": fmt.Sprintf("Search for: %s. Provide up to %d relevant results.", query, count), }, }, "max_tokens": 1000, } payloadBytes, err := json.Marshal(payload) if err != nil { return "", fmt.Errorf("failed to marshal request: %w", err) } req, err := http.NewRequestWithContext(ctx, "POST", searchURL, strings.NewReader(string(payloadBytes))) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) } req.Header.Set("Content-Type", "application/json") req.Header.Set("Authorization", "Bearer "+p.apiKey) req.Header.Set("User-Agent", userAgent) resp, err := p.client.Do(req) if err != nil { return "", fmt.Errorf("request failed: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("failed to read response: %w", err) } if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("Perplexity API error: %s", string(body)) } var searchResp struct { Choices []struct { Message struct { Content string `json:"content"` } `json:"message"` } `json:"choices"` } if err := json.Unmarshal(body, &searchResp); err != nil { return "", fmt.Errorf("failed to parse response: %w", err) } if len(searchResp.Choices) == 0 { return fmt.Sprintf("No results for: %s", query), nil } return fmt.Sprintf("Results for: %s (via Perplexity)\n%s", query, searchResp.Choices[0].Message.Content), nil } type WebSearchTool struct { provider SearchProvider maxResults int } type WebSearchToolOptions struct { BraveAPIKey string BraveMaxResults int BraveEnabled bool TavilyAPIKey string TavilyBaseURL string TavilyMaxResults int TavilyEnabled bool DuckDuckGoMaxResults int DuckDuckGoEnabled bool PerplexityAPIKey string PerplexityMaxResults int PerplexityEnabled bool Proxy string } func NewWebSearchTool(opts WebSearchToolOptions) (*WebSearchTool, error) { var provider SearchProvider maxResults := 5 // Priority: Perplexity > Brave > Tavily > DuckDuckGo if opts.PerplexityEnabled && opts.PerplexityAPIKey != "" { client, err := createHTTPClient(opts.Proxy, perplexityTimeout) if err != nil { return nil, fmt.Errorf("failed to create HTTP client for Perplexity: %w", err) } provider = &PerplexitySearchProvider{apiKey: opts.PerplexityAPIKey, proxy: opts.Proxy, client: client} if opts.PerplexityMaxResults > 0 { maxResults = opts.PerplexityMaxResults } } else if opts.BraveEnabled && opts.BraveAPIKey != "" { client, err := createHTTPClient(opts.Proxy, searchTimeout) if err != nil { return nil, fmt.Errorf("failed to create HTTP client for Brave: %w", err) } provider = &BraveSearchProvider{apiKey: opts.BraveAPIKey, proxy: opts.Proxy, client: client} if opts.BraveMaxResults > 0 { maxResults = opts.BraveMaxResults } } else if opts.TavilyEnabled && opts.TavilyAPIKey != "" { client, err := createHTTPClient(opts.Proxy, searchTimeout) if err != nil { return nil, fmt.Errorf("failed to create HTTP client for Tavily: %w", err) } provider = &TavilySearchProvider{ apiKey: opts.TavilyAPIKey, baseURL: opts.TavilyBaseURL, proxy: opts.Proxy, client: client, } if opts.TavilyMaxResults > 0 { maxResults = opts.TavilyMaxResults } } else if opts.DuckDuckGoEnabled { client, err := createHTTPClient(opts.Proxy, searchTimeout) if err != nil { return nil, fmt.Errorf("failed to create HTTP client for DuckDuckGo: %w", err) } provider = &DuckDuckGoSearchProvider{proxy: opts.Proxy, client: client} if opts.DuckDuckGoMaxResults > 0 { maxResults = opts.DuckDuckGoMaxResults } } else { return nil, nil } return &WebSearchTool{ provider: provider, maxResults: maxResults, }, nil } func (t *WebSearchTool) Name() string { return "web_search" } func (t *WebSearchTool) Description() string { return "Search the web for current information. Returns titles, URLs, and snippets from search results." } func (t *WebSearchTool) Parameters() map[string]any { return map[string]any{ "type": "object", "properties": map[string]any{ "query": map[string]any{ "type": "string", "description": "Search query", }, "count": map[string]any{ "type": "integer", "description": "Number of results (1-10)", "minimum": 1.0, "maximum": 10.0, }, }, "required": []string{"query"}, } } func (t *WebSearchTool) Execute(ctx context.Context, args map[string]any) *ToolResult { query, ok := args["query"].(string) if !ok { return ErrorResult("query is required") } count := t.maxResults if c, ok := args["count"].(float64); ok { if int(c) > 0 && int(c) <= 10 { count = int(c) } } result, err := t.provider.Search(ctx, query, count) if err != nil { return ErrorResult(fmt.Sprintf("search failed: %v", err)) } return &ToolResult{ ForLLM: result, ForUser: result, } } type WebFetchTool struct { maxChars int proxy string client *http.Client fetchLimitBytes int64 } func NewWebFetchTool(maxChars int, fetchLimitBytes int64) (*WebFetchTool, error) { // createHTTPClient cannot fail with an empty proxy string. return NewWebFetchToolWithProxy(maxChars, "", fetchLimitBytes) } func NewWebFetchToolWithProxy(maxChars int, proxy string, fetchLimitBytes int64) (*WebFetchTool, error) { if maxChars <= 0 { maxChars = defaultMaxChars } client, err := createHTTPClient(proxy, fetchTimeout) if err != nil { return nil, fmt.Errorf("failed to create HTTP client for web fetch: %w", err) } client.CheckRedirect = func(req *http.Request, via []*http.Request) error { if len(via) >= maxRedirects { return fmt.Errorf("stopped after %d redirects", maxRedirects) } return nil } if fetchLimitBytes <= 0 { fetchLimitBytes = 10 * 1024 * 1024 // Security Fallback } return &WebFetchTool{ maxChars: maxChars, proxy: proxy, client: client, fetchLimitBytes: fetchLimitBytes, }, nil } func (t *WebFetchTool) Name() string { return "web_fetch" } func (t *WebFetchTool) Description() string { return "Fetch a URL and extract readable content (HTML to text). Use this to get weather info, news, articles, or any web content." } func (t *WebFetchTool) Parameters() map[string]any { return map[string]any{ "type": "object", "properties": map[string]any{ "url": map[string]any{ "type": "string", "description": "URL to fetch", }, "maxChars": map[string]any{ "type": "integer", "description": "Maximum characters to extract", "minimum": 100.0, }, }, "required": []string{"url"}, } } func (t *WebFetchTool) Execute(ctx context.Context, args map[string]any) *ToolResult { urlStr, ok := args["url"].(string) if !ok { return ErrorResult("url is required") } parsedURL, err := url.Parse(urlStr) if err != nil { return ErrorResult(fmt.Sprintf("invalid URL: %v", err)) } if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { return ErrorResult("only http/https URLs are allowed") } if parsedURL.Host == "" { return ErrorResult("missing domain in URL") } maxChars := t.maxChars if mc, ok := args["maxChars"].(float64); ok { if int(mc) > 100 { maxChars = int(mc) } } req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil) if err != nil { return ErrorResult(fmt.Sprintf("failed to create request: %v", err)) } req.Header.Set("User-Agent", userAgent) resp, err := t.client.Do(req) if err != nil { return ErrorResult(fmt.Sprintf("request failed: %v", err)) } resp.Body = http.MaxBytesReader(nil, resp.Body, t.fetchLimitBytes) defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { var maxBytesErr *http.MaxBytesError if errors.As(err, &maxBytesErr) { return ErrorResult(fmt.Sprintf("failed to read response: size exceeded %d bytes limit", t.fetchLimitBytes)) } return ErrorResult(fmt.Sprintf("failed to read response: %v", err)) } contentType := resp.Header.Get("Content-Type") var text, extractor string if strings.Contains(contentType, "application/json") { var jsonData any if err := json.Unmarshal(body, &jsonData); err == nil { formatted, _ := json.MarshalIndent(jsonData, "", " ") text = string(formatted) extractor = "json" } else { text = string(body) extractor = "raw" } } else if strings.Contains(contentType, "text/html") || len(body) > 0 && (strings.HasPrefix(string(body), " maxChars if truncated { text = text[:maxChars] } result := map[string]any{ "url": urlStr, "status": resp.StatusCode, "extractor": extractor, "truncated": truncated, "length": len(text), "text": text, } resultJSON, _ := json.MarshalIndent(result, "", " ") return &ToolResult{ ForLLM: string(resultJSON), ForUser: fmt.Sprintf( "Fetched %d bytes from %s (extractor: %s, truncated: %v)", len(text), urlStr, extractor, truncated, ), } } func (t *WebFetchTool) extractText(htmlContent string) string { result := reScript.ReplaceAllLiteralString(htmlContent, "") result = reStyle.ReplaceAllLiteralString(result, "") result = reTags.ReplaceAllLiteralString(result, "") result = strings.TrimSpace(result) result = reWhitespace.ReplaceAllString(result, " ") result = reBlankLines.ReplaceAllString(result, "\n\n") lines := strings.Split(result, "\n") var cleanLines []string for _, line := range lines { line = strings.TrimSpace(line) if line != "" { cleanLines = append(cleanLines, line) } } return strings.Join(cleanLines, "\n") }