mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
4b7e8d9cb9
Add Exa (https://exa.ai) as a new web search provider option, slotting into the priority chain between Perplexity and Brave. Configurable via config.json or PICOCLAW_TOOLS_WEB_EXA_* environment variables. Results are capped to the requested count for consistency with other search providers. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
790 lines
21 KiB
Go
790 lines
21 KiB
Go
package tools
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
|
|
// HTTP client timeouts for web tool providers.
|
|
searchTimeout = 10 * time.Second // Brave, Tavily, DuckDuckGo
|
|
perplexityTimeout = 30 * time.Second // Perplexity (LLM-based, slower)
|
|
fetchTimeout = 60 * time.Second // WebFetchTool
|
|
|
|
defaultMaxChars = 50000
|
|
maxRedirects = 5
|
|
)
|
|
|
|
// Pre-compiled regexes for HTML text extraction
|
|
var (
|
|
reScript = regexp.MustCompile(`<script[\s\S]*?</script>`)
|
|
reStyle = regexp.MustCompile(`<style[\s\S]*?</style>`)
|
|
reTags = regexp.MustCompile(`<[^>]+>`)
|
|
reWhitespace = regexp.MustCompile(`[^\S\n]+`)
|
|
reBlankLines = regexp.MustCompile(`\n{3,}`)
|
|
|
|
// DuckDuckGo result extraction
|
|
reDDGLink = regexp.MustCompile(`<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>`)
|
|
reDDGSnippet = regexp.MustCompile(`<a class="result__snippet[^"]*".*?>([\s\S]*?)</a>`)
|
|
)
|
|
|
|
// createHTTPClient creates an HTTP client with optional proxy support
|
|
func createHTTPClient(proxyURL string, timeout time.Duration) (*http.Client, error) {
|
|
client := &http.Client{
|
|
Timeout: timeout,
|
|
Transport: &http.Transport{
|
|
MaxIdleConns: 10,
|
|
IdleConnTimeout: 30 * time.Second,
|
|
DisableCompression: false,
|
|
TLSHandshakeTimeout: 15 * time.Second,
|
|
},
|
|
}
|
|
|
|
if proxyURL != "" {
|
|
proxy, err := url.Parse(proxyURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid proxy URL: %w", err)
|
|
}
|
|
scheme := strings.ToLower(proxy.Scheme)
|
|
switch scheme {
|
|
case "http", "https", "socks5", "socks5h":
|
|
default:
|
|
return nil, fmt.Errorf(
|
|
"unsupported proxy scheme %q (supported: http, https, socks5, socks5h)",
|
|
proxy.Scheme,
|
|
)
|
|
}
|
|
if proxy.Host == "" {
|
|
return nil, fmt.Errorf("invalid proxy URL: missing host")
|
|
}
|
|
client.Transport.(*http.Transport).Proxy = http.ProxyURL(proxy)
|
|
} else {
|
|
client.Transport.(*http.Transport).Proxy = http.ProxyFromEnvironment
|
|
}
|
|
|
|
return client, nil
|
|
}
|
|
|
|
type SearchProvider interface {
|
|
Search(ctx context.Context, query string, count int) (string, error)
|
|
}
|
|
|
|
type BraveSearchProvider struct {
|
|
apiKey string
|
|
proxy string
|
|
client *http.Client
|
|
}
|
|
|
|
func (p *BraveSearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
|
|
searchURL := fmt.Sprintf("https://api.search.brave.com/res/v1/web/search?q=%s&count=%d",
|
|
url.QueryEscape(query), count)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to create request: %w", err)
|
|
}
|
|
|
|
req.Header.Set("Accept", "application/json")
|
|
req.Header.Set("X-Subscription-Token", p.apiKey)
|
|
|
|
resp, err := p.client.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to read response: %w", err)
|
|
}
|
|
|
|
var searchResp struct {
|
|
Web struct {
|
|
Results []struct {
|
|
Title string `json:"title"`
|
|
URL string `json:"url"`
|
|
Description string `json:"description"`
|
|
} `json:"results"`
|
|
} `json:"web"`
|
|
}
|
|
|
|
if err := json.Unmarshal(body, &searchResp); err != nil {
|
|
// Log error body for debugging
|
|
fmt.Printf("Brave API Error Body: %s\n", string(body))
|
|
return "", fmt.Errorf("failed to parse response: %w", err)
|
|
}
|
|
|
|
results := searchResp.Web.Results
|
|
if len(results) == 0 {
|
|
return fmt.Sprintf("No results for: %s", query), nil
|
|
}
|
|
|
|
var lines []string
|
|
lines = append(lines, fmt.Sprintf("Results for: %s", query))
|
|
for i, item := range results {
|
|
if i >= count {
|
|
break
|
|
}
|
|
lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, item.Title, item.URL))
|
|
if item.Description != "" {
|
|
lines = append(lines, fmt.Sprintf(" %s", item.Description))
|
|
}
|
|
}
|
|
|
|
return strings.Join(lines, "\n"), nil
|
|
}
|
|
|
|
type TavilySearchProvider struct {
|
|
apiKey string
|
|
baseURL string
|
|
proxy string
|
|
client *http.Client
|
|
}
|
|
|
|
func (p *TavilySearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
|
|
searchURL := p.baseURL
|
|
if searchURL == "" {
|
|
searchURL = "https://api.tavily.com/search"
|
|
}
|
|
|
|
payload := map[string]any{
|
|
"api_key": p.apiKey,
|
|
"query": query,
|
|
"search_depth": "advanced",
|
|
"include_answer": false,
|
|
"include_images": false,
|
|
"include_raw_content": false,
|
|
"max_results": count,
|
|
}
|
|
|
|
bodyBytes, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to marshal payload: %w", err)
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", searchURL, bytes.NewBuffer(bodyBytes))
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to create request: %w", err)
|
|
}
|
|
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("User-Agent", userAgent)
|
|
|
|
resp, err := p.client.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to read response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return "", fmt.Errorf("tavily api error (status %d): %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var searchResp struct {
|
|
Results []struct {
|
|
Title string `json:"title"`
|
|
URL string `json:"url"`
|
|
Content string `json:"content"`
|
|
} `json:"results"`
|
|
}
|
|
|
|
if err := json.Unmarshal(body, &searchResp); err != nil {
|
|
return "", fmt.Errorf("failed to parse response: %w", err)
|
|
}
|
|
|
|
results := searchResp.Results
|
|
if len(results) == 0 {
|
|
return fmt.Sprintf("No results for: %s", query), nil
|
|
}
|
|
|
|
var lines []string
|
|
lines = append(lines, fmt.Sprintf("Results for: %s (via Tavily)", query))
|
|
for i, item := range results {
|
|
if i >= count {
|
|
break
|
|
}
|
|
lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, item.Title, item.URL))
|
|
if item.Content != "" {
|
|
lines = append(lines, fmt.Sprintf(" %s", item.Content))
|
|
}
|
|
}
|
|
|
|
return strings.Join(lines, "\n"), nil
|
|
}
|
|
|
|
type DuckDuckGoSearchProvider struct {
|
|
proxy string
|
|
client *http.Client
|
|
}
|
|
|
|
func (p *DuckDuckGoSearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
|
|
searchURL := fmt.Sprintf("https://html.duckduckgo.com/html/?q=%s", url.QueryEscape(query))
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to create request: %w", err)
|
|
}
|
|
|
|
req.Header.Set("User-Agent", userAgent)
|
|
|
|
resp, err := p.client.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to read response: %w", err)
|
|
}
|
|
|
|
return p.extractResults(string(body), count, query)
|
|
}
|
|
|
|
func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query string) (string, error) {
|
|
// Simple regex based extraction for DDG HTML
|
|
// Strategy: Find all result containers or key anchors directly
|
|
|
|
// Try finding the result links directly first, as they are the most critical
|
|
// Pattern: <a class="result__a" href="...">Title</a>
|
|
// The previous regex was a bit strict. Let's make it more flexible for attributes order/content
|
|
matches := reDDGLink.FindAllStringSubmatch(html, count+5)
|
|
|
|
if len(matches) == 0 {
|
|
return fmt.Sprintf("No results found or extraction failed. Query: %s", query), nil
|
|
}
|
|
|
|
var lines []string
|
|
lines = append(lines, fmt.Sprintf("Results for: %s (via DuckDuckGo)", query))
|
|
|
|
// Pre-compile snippet regex to run inside the loop
|
|
// We'll search for snippets relative to the link position or just globally if needed
|
|
// But simple global search for snippets might mismatch order.
|
|
// Since we only have the raw HTML string, let's just extract snippets globally and assume order matches (risky but simple for regex)
|
|
// Or better: Let's assume the snippet follows the link in the HTML
|
|
|
|
// A better regex approach: iterate through text and find matches in order
|
|
// But for now, let's grab all snippets too
|
|
snippetMatches := reDDGSnippet.FindAllStringSubmatch(html, count+5)
|
|
|
|
maxItems := min(len(matches), count)
|
|
|
|
for i := range maxItems {
|
|
urlStr := matches[i][1]
|
|
title := stripTags(matches[i][2])
|
|
title = strings.TrimSpace(title)
|
|
|
|
// URL decoding if needed
|
|
if strings.Contains(urlStr, "uddg=") {
|
|
if u, err := url.QueryUnescape(urlStr); err == nil {
|
|
_, after, ok := strings.Cut(u, "uddg=")
|
|
if ok {
|
|
urlStr = after
|
|
}
|
|
}
|
|
}
|
|
|
|
lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, title, urlStr))
|
|
|
|
// Attempt to attach snippet if available and index aligns
|
|
if i < len(snippetMatches) {
|
|
snippet := stripTags(snippetMatches[i][1])
|
|
snippet = strings.TrimSpace(snippet)
|
|
if snippet != "" {
|
|
lines = append(lines, fmt.Sprintf(" %s", snippet))
|
|
}
|
|
}
|
|
}
|
|
|
|
return strings.Join(lines, "\n"), nil
|
|
}
|
|
|
|
func stripTags(content string) string {
|
|
return reTags.ReplaceAllString(content, "")
|
|
}
|
|
|
|
type PerplexitySearchProvider struct {
|
|
apiKey string
|
|
proxy string
|
|
client *http.Client
|
|
}
|
|
|
|
func (p *PerplexitySearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
|
|
searchURL := "https://api.perplexity.ai/chat/completions"
|
|
|
|
payload := map[string]any{
|
|
"model": "sonar",
|
|
"messages": []map[string]string{
|
|
{
|
|
"role": "system",
|
|
"content": "You are a search assistant. Provide concise search results with titles, URLs, and brief descriptions in the following format:\n1. Title\n URL\n Description\n\nDo not add extra commentary.",
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": fmt.Sprintf("Search for: %s. Provide up to %d relevant results.", query, count),
|
|
},
|
|
},
|
|
"max_tokens": 1000,
|
|
}
|
|
|
|
payloadBytes, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to marshal request: %w", err)
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", searchURL, strings.NewReader(string(payloadBytes)))
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to create request: %w", err)
|
|
}
|
|
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Authorization", "Bearer "+p.apiKey)
|
|
req.Header.Set("User-Agent", userAgent)
|
|
|
|
resp, err := p.client.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to read response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return "", fmt.Errorf("Perplexity API error: %s", string(body))
|
|
}
|
|
|
|
var searchResp struct {
|
|
Choices []struct {
|
|
Message struct {
|
|
Content string `json:"content"`
|
|
} `json:"message"`
|
|
} `json:"choices"`
|
|
}
|
|
|
|
if err := json.Unmarshal(body, &searchResp); err != nil {
|
|
return "", fmt.Errorf("failed to parse response: %w", err)
|
|
}
|
|
|
|
if len(searchResp.Choices) == 0 {
|
|
return fmt.Sprintf("No results for: %s", query), nil
|
|
}
|
|
|
|
return fmt.Sprintf("Results for: %s (via Perplexity)\n%s", query, searchResp.Choices[0].Message.Content), nil
|
|
}
|
|
|
|
type WebSearchTool struct {
|
|
provider SearchProvider
|
|
maxResults int
|
|
}
|
|
|
|
type WebSearchToolOptions struct {
|
|
BraveAPIKey string
|
|
BraveMaxResults int
|
|
BraveEnabled bool
|
|
TavilyAPIKey string
|
|
TavilyBaseURL string
|
|
TavilyMaxResults int
|
|
TavilyEnabled bool
|
|
DuckDuckGoMaxResults int
|
|
DuckDuckGoEnabled bool
|
|
PerplexityAPIKey string
|
|
PerplexityMaxResults int
|
|
PerplexityEnabled bool
|
|
ExaAPIKey string
|
|
ExaMaxResults int
|
|
ExaEnabled bool
|
|
Proxy string
|
|
}
|
|
|
|
func NewWebSearchTool(opts WebSearchToolOptions) (*WebSearchTool, error) {
|
|
var provider SearchProvider
|
|
maxResults := 5
|
|
|
|
// Priority: Perplexity > Exa > Brave > Tavily > DuckDuckGo
|
|
if opts.PerplexityEnabled && opts.PerplexityAPIKey != "" {
|
|
client, err := createHTTPClient(opts.Proxy, perplexityTimeout)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create HTTP client for Perplexity: %w", err)
|
|
}
|
|
provider = &PerplexitySearchProvider{apiKey: opts.PerplexityAPIKey, proxy: opts.Proxy, client: client}
|
|
if opts.PerplexityMaxResults > 0 {
|
|
maxResults = opts.PerplexityMaxResults
|
|
}
|
|
} else if opts.ExaEnabled && opts.ExaAPIKey != "" {
|
|
client, err := createHTTPClient(opts.Proxy, searchTimeout)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create HTTP client for Exa: %w", err)
|
|
}
|
|
provider = &ExaSearchProvider{apiKey: opts.ExaAPIKey, proxy: opts.Proxy, client: client}
|
|
if opts.ExaMaxResults > 0 {
|
|
maxResults = opts.ExaMaxResults
|
|
}
|
|
} else if opts.BraveEnabled && opts.BraveAPIKey != "" {
|
|
client, err := createHTTPClient(opts.Proxy, searchTimeout)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create HTTP client for Brave: %w", err)
|
|
}
|
|
provider = &BraveSearchProvider{apiKey: opts.BraveAPIKey, proxy: opts.Proxy, client: client}
|
|
if opts.BraveMaxResults > 0 {
|
|
maxResults = opts.BraveMaxResults
|
|
}
|
|
} else if opts.TavilyEnabled && opts.TavilyAPIKey != "" {
|
|
client, err := createHTTPClient(opts.Proxy, searchTimeout)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create HTTP client for Tavily: %w", err)
|
|
}
|
|
provider = &TavilySearchProvider{
|
|
apiKey: opts.TavilyAPIKey,
|
|
baseURL: opts.TavilyBaseURL,
|
|
proxy: opts.Proxy,
|
|
client: client,
|
|
}
|
|
if opts.TavilyMaxResults > 0 {
|
|
maxResults = opts.TavilyMaxResults
|
|
}
|
|
} else if opts.DuckDuckGoEnabled {
|
|
client, err := createHTTPClient(opts.Proxy, searchTimeout)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create HTTP client for DuckDuckGo: %w", err)
|
|
}
|
|
provider = &DuckDuckGoSearchProvider{proxy: opts.Proxy, client: client}
|
|
if opts.DuckDuckGoMaxResults > 0 {
|
|
maxResults = opts.DuckDuckGoMaxResults
|
|
}
|
|
} else {
|
|
return nil, nil
|
|
}
|
|
|
|
return &WebSearchTool{
|
|
provider: provider,
|
|
maxResults: maxResults,
|
|
}, nil
|
|
}
|
|
|
|
func (t *WebSearchTool) Name() string {
|
|
return "web_search"
|
|
}
|
|
|
|
func (t *WebSearchTool) Description() string {
|
|
return "Search the web for current information. Returns titles, URLs, and snippets from search results."
|
|
}
|
|
|
|
func (t *WebSearchTool) Parameters() map[string]any {
|
|
return map[string]any{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"query": map[string]any{
|
|
"type": "string",
|
|
"description": "Search query",
|
|
},
|
|
"count": map[string]any{
|
|
"type": "integer",
|
|
"description": "Number of results (1-10)",
|
|
"minimum": 1.0,
|
|
"maximum": 10.0,
|
|
},
|
|
},
|
|
"required": []string{"query"},
|
|
}
|
|
}
|
|
|
|
func (t *WebSearchTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
|
|
query, ok := args["query"].(string)
|
|
if !ok {
|
|
return ErrorResult("query is required")
|
|
}
|
|
|
|
count := t.maxResults
|
|
if c, ok := args["count"].(float64); ok {
|
|
if int(c) > 0 && int(c) <= 10 {
|
|
count = int(c)
|
|
}
|
|
}
|
|
|
|
result, err := t.provider.Search(ctx, query, count)
|
|
if err != nil {
|
|
return ErrorResult(fmt.Sprintf("search failed: %v", err))
|
|
}
|
|
|
|
return &ToolResult{
|
|
ForLLM: result,
|
|
ForUser: result,
|
|
}
|
|
}
|
|
|
|
type WebFetchTool struct {
|
|
maxChars int
|
|
proxy string
|
|
client *http.Client
|
|
fetchLimitBytes int64
|
|
}
|
|
|
|
func NewWebFetchTool(maxChars int, fetchLimitBytes int64) (*WebFetchTool, error) {
|
|
// createHTTPClient cannot fail with an empty proxy string.
|
|
return NewWebFetchToolWithProxy(maxChars, "", fetchLimitBytes)
|
|
}
|
|
|
|
func NewWebFetchToolWithProxy(maxChars int, proxy string, fetchLimitBytes int64) (*WebFetchTool, error) {
|
|
if maxChars <= 0 {
|
|
maxChars = defaultMaxChars
|
|
}
|
|
client, err := createHTTPClient(proxy, fetchTimeout)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create HTTP client for web fetch: %w", err)
|
|
}
|
|
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
|
if len(via) >= maxRedirects {
|
|
return fmt.Errorf("stopped after %d redirects", maxRedirects)
|
|
}
|
|
return nil
|
|
}
|
|
if fetchLimitBytes <= 0 {
|
|
fetchLimitBytes = 10 * 1024 * 1024 // Security Fallback
|
|
}
|
|
return &WebFetchTool{
|
|
maxChars: maxChars,
|
|
proxy: proxy,
|
|
client: client,
|
|
fetchLimitBytes: fetchLimitBytes,
|
|
}, nil
|
|
}
|
|
|
|
func (t *WebFetchTool) Name() string {
|
|
return "web_fetch"
|
|
}
|
|
|
|
func (t *WebFetchTool) Description() string {
|
|
return "Fetch a URL and extract readable content (HTML to text). Use this to get weather info, news, articles, or any web content."
|
|
}
|
|
|
|
func (t *WebFetchTool) Parameters() map[string]any {
|
|
return map[string]any{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"url": map[string]any{
|
|
"type": "string",
|
|
"description": "URL to fetch",
|
|
},
|
|
"maxChars": map[string]any{
|
|
"type": "integer",
|
|
"description": "Maximum characters to extract",
|
|
"minimum": 100.0,
|
|
},
|
|
},
|
|
"required": []string{"url"},
|
|
}
|
|
}
|
|
|
|
func (t *WebFetchTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
|
|
urlStr, ok := args["url"].(string)
|
|
if !ok {
|
|
return ErrorResult("url is required")
|
|
}
|
|
|
|
parsedURL, err := url.Parse(urlStr)
|
|
if err != nil {
|
|
return ErrorResult(fmt.Sprintf("invalid URL: %v", err))
|
|
}
|
|
|
|
if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" {
|
|
return ErrorResult("only http/https URLs are allowed")
|
|
}
|
|
|
|
if parsedURL.Host == "" {
|
|
return ErrorResult("missing domain in URL")
|
|
}
|
|
|
|
maxChars := t.maxChars
|
|
if mc, ok := args["maxChars"].(float64); ok {
|
|
if int(mc) > 100 {
|
|
maxChars = int(mc)
|
|
}
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
|
|
if err != nil {
|
|
return ErrorResult(fmt.Sprintf("failed to create request: %v", err))
|
|
}
|
|
|
|
req.Header.Set("User-Agent", userAgent)
|
|
|
|
resp, err := t.client.Do(req)
|
|
if err != nil {
|
|
return ErrorResult(fmt.Sprintf("request failed: %v", err))
|
|
}
|
|
|
|
resp.Body = http.MaxBytesReader(nil, resp.Body, t.fetchLimitBytes)
|
|
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
var maxBytesErr *http.MaxBytesError
|
|
if errors.As(err, &maxBytesErr) {
|
|
return ErrorResult(fmt.Sprintf("failed to read response: size exceeded %d bytes limit", t.fetchLimitBytes))
|
|
}
|
|
return ErrorResult(fmt.Sprintf("failed to read response: %v", err))
|
|
}
|
|
|
|
contentType := resp.Header.Get("Content-Type")
|
|
|
|
var text, extractor string
|
|
|
|
if strings.Contains(contentType, "application/json") {
|
|
var jsonData any
|
|
if err := json.Unmarshal(body, &jsonData); err == nil {
|
|
formatted, _ := json.MarshalIndent(jsonData, "", " ")
|
|
text = string(formatted)
|
|
extractor = "json"
|
|
} else {
|
|
text = string(body)
|
|
extractor = "raw"
|
|
}
|
|
} else if strings.Contains(contentType, "text/html") || len(body) > 0 &&
|
|
(strings.HasPrefix(string(body), "<!DOCTYPE") || strings.HasPrefix(strings.ToLower(string(body)), "<html")) {
|
|
text = t.extractText(string(body))
|
|
extractor = "text"
|
|
} else {
|
|
text = string(body)
|
|
extractor = "raw"
|
|
}
|
|
|
|
truncated := len(text) > maxChars
|
|
if truncated {
|
|
text = text[:maxChars]
|
|
}
|
|
|
|
result := map[string]any{
|
|
"url": urlStr,
|
|
"status": resp.StatusCode,
|
|
"extractor": extractor,
|
|
"truncated": truncated,
|
|
"length": len(text),
|
|
"text": text,
|
|
}
|
|
|
|
resultJSON, _ := json.MarshalIndent(result, "", " ")
|
|
|
|
return &ToolResult{
|
|
ForLLM: string(resultJSON),
|
|
ForUser: fmt.Sprintf(
|
|
"Fetched %d bytes from %s (extractor: %s, truncated: %v)",
|
|
len(text),
|
|
urlStr,
|
|
extractor,
|
|
truncated,
|
|
),
|
|
}
|
|
}
|
|
|
|
func (t *WebFetchTool) extractText(htmlContent string) string {
|
|
result := reScript.ReplaceAllLiteralString(htmlContent, "")
|
|
result = reStyle.ReplaceAllLiteralString(result, "")
|
|
result = reTags.ReplaceAllLiteralString(result, "")
|
|
|
|
result = strings.TrimSpace(result)
|
|
|
|
result = reWhitespace.ReplaceAllString(result, " ")
|
|
result = reBlankLines.ReplaceAllString(result, "\n\n")
|
|
|
|
lines := strings.Split(result, "\n")
|
|
var cleanLines []string
|
|
for _, line := range lines {
|
|
line = strings.TrimSpace(line)
|
|
if line != "" {
|
|
cleanLines = append(cleanLines, line)
|
|
}
|
|
}
|
|
|
|
return strings.Join(cleanLines, "\n")
|
|
}
|
|
|
|
// ExaSearchProvider uses the Exa AI search API (https://exa.ai).
|
|
type ExaSearchProvider struct {
|
|
apiKey string
|
|
proxy string
|
|
client *http.Client
|
|
}
|
|
|
|
func (p *ExaSearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
|
|
reqBody := map[string]any{
|
|
"query": query,
|
|
"num_results": count,
|
|
"type": "neural",
|
|
}
|
|
jsonData, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return "", fmt.Errorf("exa: marshal error: %w", err)
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", "https://api.exa.ai/search", bytes.NewReader(jsonData))
|
|
if err != nil {
|
|
return "", fmt.Errorf("exa: request error: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("x-api-key", p.apiKey)
|
|
|
|
resp, err := p.client.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("exa: search failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("exa: read error: %w", err)
|
|
}
|
|
if resp.StatusCode != http.StatusOK {
|
|
return "", fmt.Errorf("exa: API error %d: %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var result struct {
|
|
Results []struct {
|
|
Title string `json:"title"`
|
|
URL string `json:"url"`
|
|
Text string `json:"text"`
|
|
} `json:"results"`
|
|
}
|
|
if err := json.Unmarshal(body, &result); err != nil {
|
|
return "", fmt.Errorf("exa: parse error: %w", err)
|
|
}
|
|
|
|
var sb strings.Builder
|
|
maxResults := count
|
|
if maxResults > len(result.Results) {
|
|
maxResults = len(result.Results)
|
|
}
|
|
for i, r := range result.Results[:maxResults] {
|
|
sb.WriteString(fmt.Sprintf("%d. %s\n URL: %s\n", i+1, r.Title, r.URL))
|
|
if r.Text != "" {
|
|
snippet := r.Text
|
|
if len(snippet) > 200 {
|
|
snippet = snippet[:200] + "..."
|
|
}
|
|
sb.WriteString(fmt.Sprintf(" %s\n", snippet))
|
|
}
|
|
sb.WriteString("\n")
|
|
}
|
|
|
|
return sb.String(), nil
|
|
}
|