mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
Merge pull request #2524 from SiYue-ZO/feature/sogou-web-search-default
Add configurable Sogou-backed web search
This commit is contained in:
@@ -194,6 +194,7 @@ func registerSharedTools(
|
||||
|
||||
if cfg.Tools.IsToolEnabled("web") {
|
||||
searchTool, err := tools.NewWebSearchTool(tools.WebSearchToolOptions{
|
||||
Provider: cfg.Tools.Web.Provider,
|
||||
BraveAPIKeys: cfg.Tools.Web.Brave.APIKeys.Values(),
|
||||
BraveMaxResults: cfg.Tools.Web.Brave.MaxResults,
|
||||
BraveEnabled: cfg.Tools.Web.Brave.Enabled,
|
||||
@@ -201,6 +202,8 @@ func registerSharedTools(
|
||||
TavilyBaseURL: cfg.Tools.Web.Tavily.BaseURL,
|
||||
TavilyMaxResults: cfg.Tools.Web.Tavily.MaxResults,
|
||||
TavilyEnabled: cfg.Tools.Web.Tavily.Enabled,
|
||||
SogouMaxResults: cfg.Tools.Web.Sogou.MaxResults,
|
||||
SogouEnabled: cfg.Tools.Web.Sogou.Enabled,
|
||||
DuckDuckGoMaxResults: cfg.Tools.Web.DuckDuckGo.MaxResults,
|
||||
DuckDuckGoEnabled: cfg.Tools.Web.DuckDuckGo.Enabled,
|
||||
PerplexityAPIKeys: cfg.Tools.Web.Perplexity.APIKeys.Values(),
|
||||
|
||||
@@ -664,6 +664,11 @@ type DuckDuckGoConfig struct {
|
||||
MaxResults int `json:"max_results" env:"PICOCLAW_TOOLS_WEB_DUCKDUCKGO_MAX_RESULTS"`
|
||||
}
|
||||
|
||||
type SogouConfig struct {
|
||||
Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_WEB_SOGOU_ENABLED"`
|
||||
MaxResults int `json:"max_results" env:"PICOCLAW_TOOLS_WEB_SOGOU_MAX_RESULTS"`
|
||||
}
|
||||
|
||||
type PerplexityConfig struct {
|
||||
Enabled bool `json:"enabled" yaml:"-" env:"PICOCLAW_TOOLS_WEB_PERPLEXITY_ENABLED"`
|
||||
APIKeys SecureStrings `json:"api_keys,omitzero" yaml:"api_keys,omitempty" env:"PICOCLAW_TOOLS_WEB_PERPLEXITY_API_KEYS"`
|
||||
@@ -710,11 +715,13 @@ type WebToolsConfig struct {
|
||||
ToolConfig ` yaml:"-" envPrefix:"PICOCLAW_TOOLS_WEB_"`
|
||||
Brave BraveConfig `yaml:"brave,omitempty" json:"brave"`
|
||||
Tavily TavilyConfig `yaml:"tavily,omitempty" json:"tavily"`
|
||||
Sogou SogouConfig `yaml:"-" json:"sogou"`
|
||||
DuckDuckGo DuckDuckGoConfig `yaml:"-" json:"duckduckgo"`
|
||||
Perplexity PerplexityConfig `yaml:"perplexity,omitempty" json:"perplexity"`
|
||||
SearXNG SearXNGConfig `yaml:"-" json:"searxng"`
|
||||
GLMSearch GLMSearchConfig `yaml:"glm_search,omitempty" json:"glm_search"`
|
||||
BaiduSearch BaiduSearchConfig `yaml:"baidu_search,omitempty" json:"baidu_search"`
|
||||
Provider string `yaml:"-" json:"provider,omitempty" env:"PICOCLAW_TOOLS_WEB_PROVIDER"`
|
||||
// PreferNative controls whether to use provider-native web search when
|
||||
// the active LLM supports it (e.g. OpenAI web_search_preview). When true,
|
||||
// the client-side web_search tool is hidden to avoid duplicate search surfaces,
|
||||
|
||||
@@ -760,6 +760,28 @@ func TestDefaultConfig_WebPreferNativeEnabled(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultConfig_WebProviderIsAuto(t *testing.T) {
|
||||
cfg := DefaultConfig()
|
||||
if cfg.Tools.Web.Provider != "auto" {
|
||||
t.Fatalf("DefaultConfig().Tools.Web.Provider = %q, want auto", cfg.Tools.Web.Provider)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfigExample_WebProviderIsAuto(t *testing.T) {
|
||||
data, err := os.ReadFile(filepath.Join("..", "..", "config", "config.example.json"))
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFile(config.example.json) error: %v", err)
|
||||
}
|
||||
|
||||
var cfg Config
|
||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||
t.Fatalf("Unmarshal(config.example.json) error: %v", err)
|
||||
}
|
||||
if cfg.Tools.Web.Provider != "auto" {
|
||||
t.Fatalf("config.example.json tools.web.provider = %q, want auto", cfg.Tools.Web.Provider)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultConfig_ToolFeedbackDisabled(t *testing.T) {
|
||||
cfg := DefaultConfig()
|
||||
if cfg.Agents.Defaults.ToolFeedback.Enabled {
|
||||
|
||||
@@ -278,6 +278,7 @@ func DefaultConfig() *Config {
|
||||
ToolConfig: ToolConfig{
|
||||
Enabled: true,
|
||||
},
|
||||
Provider: "auto",
|
||||
PreferNative: true,
|
||||
Proxy: "",
|
||||
FetchLimitBytes: 10 * 1024 * 1024, // 10MB by default
|
||||
@@ -290,10 +291,14 @@ func DefaultConfig() *Config {
|
||||
Enabled: false,
|
||||
MaxResults: 5,
|
||||
},
|
||||
DuckDuckGo: DuckDuckGoConfig{
|
||||
Sogou: SogouConfig{
|
||||
Enabled: true,
|
||||
MaxResults: 5,
|
||||
},
|
||||
DuckDuckGo: DuckDuckGoConfig{
|
||||
Enabled: false,
|
||||
MaxResults: 5,
|
||||
},
|
||||
Perplexity: PerplexityConfig{
|
||||
Enabled: false,
|
||||
MaxResults: 5,
|
||||
|
||||
+378
-46
@@ -15,6 +15,7 @@ import (
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/sipeed/picoclaw/pkg/config"
|
||||
"github.com/sipeed/picoclaw/pkg/logger"
|
||||
@@ -23,6 +24,7 @@ import (
|
||||
|
||||
const (
|
||||
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
sogouUserAgent = "Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1"
|
||||
userAgentHonest = "picoclaw/%s (+https://github.com/sipeed/picoclaw; AI assistant bot)"
|
||||
|
||||
// HTTP client timeouts for web tool providers.
|
||||
@@ -46,9 +48,18 @@ var (
|
||||
reDDGLink = regexp.MustCompile(
|
||||
`<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>`,
|
||||
)
|
||||
reDDGSnippet = regexp.MustCompile(`<a class="result__snippet[^"]*".*?>([\s\S]*?)</a>`)
|
||||
reDDGSnippet = regexp.MustCompile(
|
||||
`<a class="result__snippet[^"]*".*?>([\s\S]*?)</a>`,
|
||||
)
|
||||
reSogouTitle = regexp.MustCompile(
|
||||
`<a\s+class=resultLink\s+href="([^"]+)"[^>]*id="sogou_vr_\d+_\d+"[^>]*>\s*(.*?)\s*</a>`,
|
||||
)
|
||||
reSogouSnippet = regexp.MustCompile(`<div class="clamp\d*">\s*(.*?)\s*</div>`)
|
||||
reSogouRealURL = regexp.MustCompile(`url=([^&]+)`)
|
||||
)
|
||||
|
||||
var preferredWebSearchLanguage atomic.Value
|
||||
|
||||
type APIKeyPool struct {
|
||||
keys []string
|
||||
current uint32
|
||||
@@ -91,6 +102,39 @@ type SearchProvider interface {
|
||||
Search(ctx context.Context, query string, count int, rangeCode string) (string, error)
|
||||
}
|
||||
|
||||
type SearchResultItem struct {
|
||||
Title string
|
||||
URL string
|
||||
Snippet string
|
||||
}
|
||||
|
||||
func extractSogouURL(href string) string {
|
||||
match := reSogouRealURL.FindStringSubmatch(href)
|
||||
if len(match) < 2 {
|
||||
return ""
|
||||
}
|
||||
decoded, err := url.QueryUnescape(match[1])
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return decoded
|
||||
}
|
||||
|
||||
func applySogouRangeHint(query string, rangeCode string) string {
|
||||
switch rangeCode {
|
||||
case "d":
|
||||
return query + " 最近一天"
|
||||
case "w":
|
||||
return query + " 最近一周"
|
||||
case "m":
|
||||
return query + " 最近一个月"
|
||||
case "y":
|
||||
return query + " 最近一年"
|
||||
default:
|
||||
return query
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeSearchRange(raw string) (string, error) {
|
||||
rangeCode := strings.ToLower(strings.TrimSpace(raw))
|
||||
switch rangeCode {
|
||||
@@ -206,6 +250,27 @@ func mapBaiduRecencyFilter(rangeCode string) string {
|
||||
}
|
||||
}
|
||||
|
||||
func normalizePreferredWebSearchLanguage(lang string) string {
|
||||
lang = strings.ToLower(strings.TrimSpace(lang))
|
||||
switch {
|
||||
case strings.HasPrefix(lang, "zh"), lang == "chinese":
|
||||
return "zh"
|
||||
case strings.HasPrefix(lang, "en"), lang == "english":
|
||||
return "en"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func SetPreferredWebSearchLanguage(lang string) {
|
||||
preferredWebSearchLanguage.Store(normalizePreferredWebSearchLanguage(lang))
|
||||
}
|
||||
|
||||
func GetPreferredWebSearchLanguage() string {
|
||||
lang, _ := preferredWebSearchLanguage.Load().(string)
|
||||
return lang
|
||||
}
|
||||
|
||||
type BraveSearchProvider struct {
|
||||
keyPool *APIKeyPool
|
||||
proxy string
|
||||
@@ -425,6 +490,104 @@ func (p *TavilySearchProvider) Search(
|
||||
return "", fmt.Errorf("all api keys failed, last error: %w", lastErr)
|
||||
}
|
||||
|
||||
type SogouSearchProvider struct {
|
||||
proxy string
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func (p *SogouSearchProvider) Search(
|
||||
ctx context.Context,
|
||||
query string,
|
||||
count int,
|
||||
rangeCode string,
|
||||
) (string, error) {
|
||||
const sogouWAPURL = "https://wap.sogou.com/web/searchList.jsp"
|
||||
|
||||
results := make([]SearchResultItem, 0, count)
|
||||
seenURLs := make(map[string]bool)
|
||||
maxPages := min(3, (count+1)/2+1)
|
||||
|
||||
for page := 1; page <= maxPages && len(results) < count; page++ {
|
||||
params := url.Values{}
|
||||
params.Set("keyword", applySogouRangeHint(query, rangeCode))
|
||||
params.Set("v", "5")
|
||||
params.Set("p", fmt.Sprintf("%d", page))
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, sogouWAPURL+"?"+params.Encode(), nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", sogouUserAgent)
|
||||
|
||||
resp, err := p.client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("request failed: %w", err)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("Sogou returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
html := string(body)
|
||||
if len(html) < 200 {
|
||||
break
|
||||
}
|
||||
|
||||
matches := reSogouTitle.FindAllStringSubmatch(html, -1)
|
||||
for _, match := range matches {
|
||||
if len(match) < 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
title := stripTags(match[2])
|
||||
link := extractSogouURL(match[1])
|
||||
if title == "" || link == "" || seenURLs[link] {
|
||||
continue
|
||||
}
|
||||
seenURLs[link] = true
|
||||
|
||||
start := strings.Index(html, match[0])
|
||||
snippet := ""
|
||||
if start >= 0 {
|
||||
after := html[start+len(match[0]):]
|
||||
if len(after) > 2000 {
|
||||
after = after[:2000]
|
||||
}
|
||||
if snippetMatch := reSogouSnippet.FindStringSubmatch(after); len(snippetMatch) > 1 {
|
||||
snippet = stripTags(snippetMatch[1])
|
||||
}
|
||||
}
|
||||
|
||||
results = append(results, SearchResultItem{
|
||||
Title: title,
|
||||
URL: link,
|
||||
Snippet: snippet,
|
||||
})
|
||||
if len(results) >= count {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(results) == 0 {
|
||||
return fmt.Sprintf("No results for: %s", query), nil
|
||||
}
|
||||
|
||||
lines := []string{fmt.Sprintf("Results for: %s (via Sogou)", query)}
|
||||
for i, item := range results {
|
||||
lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, item.Title, item.URL))
|
||||
if item.Snippet != "" {
|
||||
lines = append(lines, fmt.Sprintf(" %s", item.Snippet))
|
||||
}
|
||||
}
|
||||
return strings.Join(lines, "\n"), nil
|
||||
}
|
||||
|
||||
type DuckDuckGoSearchProvider struct {
|
||||
proxy string
|
||||
client *http.Client
|
||||
@@ -909,11 +1072,13 @@ func (p *BaiduSearchProvider) Search(
|
||||
}
|
||||
|
||||
type WebSearchTool struct {
|
||||
provider SearchProvider
|
||||
maxResults int
|
||||
provider SearchProvider
|
||||
maxResults int
|
||||
providerResolver func(query string) (SearchProvider, int)
|
||||
}
|
||||
|
||||
type WebSearchToolOptions struct {
|
||||
Provider string
|
||||
BraveAPIKeys []string
|
||||
BraveMaxResults int
|
||||
BraveEnabled bool
|
||||
@@ -921,6 +1086,8 @@ type WebSearchToolOptions struct {
|
||||
TavilyBaseURL string
|
||||
TavilyMaxResults int
|
||||
TavilyEnabled bool
|
||||
SogouMaxResults int
|
||||
SogouEnabled bool
|
||||
DuckDuckGoMaxResults int
|
||||
DuckDuckGoEnabled bool
|
||||
PerplexityAPIKeys []string
|
||||
@@ -941,100 +1108,256 @@ type WebSearchToolOptions struct {
|
||||
Proxy string
|
||||
}
|
||||
|
||||
func NewWebSearchTool(opts WebSearchToolOptions) (*WebSearchTool, error) {
|
||||
var provider SearchProvider
|
||||
maxResults := 10
|
||||
// Priority: Perplexity > Brave > SearXNG > Tavily > DuckDuckGo > Baidu Search > GLM Search
|
||||
if opts.PerplexityEnabled {
|
||||
func (opts WebSearchToolOptions) providerByName(name string) (SearchProvider, int, error) {
|
||||
switch strings.ToLower(strings.TrimSpace(name)) {
|
||||
case "", "auto":
|
||||
return nil, 0, nil
|
||||
case "sogou":
|
||||
if !opts.SogouEnabled {
|
||||
return nil, 0, nil
|
||||
}
|
||||
client, err := utils.CreateHTTPClient(opts.Proxy, searchTimeout)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to create HTTP client for Sogou: %w", err)
|
||||
}
|
||||
maxResults := 10
|
||||
if opts.SogouMaxResults > 0 {
|
||||
maxResults = min(opts.SogouMaxResults, 10)
|
||||
}
|
||||
return &SogouSearchProvider{
|
||||
proxy: opts.Proxy,
|
||||
client: client,
|
||||
}, maxResults, nil
|
||||
case "perplexity":
|
||||
if !opts.PerplexityEnabled {
|
||||
return nil, 0, nil
|
||||
}
|
||||
client, err := utils.CreateHTTPClient(opts.Proxy, perplexityTimeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create HTTP client for Perplexity: %w", err)
|
||||
}
|
||||
provider = &PerplexitySearchProvider{
|
||||
keyPool: NewAPIKeyPool(opts.PerplexityAPIKeys),
|
||||
proxy: opts.Proxy,
|
||||
client: client,
|
||||
return nil, 0, fmt.Errorf("failed to create HTTP client for Perplexity: %w", err)
|
||||
}
|
||||
maxResults := 10
|
||||
if opts.PerplexityMaxResults > 0 {
|
||||
maxResults = min(opts.PerplexityMaxResults, 10)
|
||||
}
|
||||
} else if opts.BraveEnabled {
|
||||
return &PerplexitySearchProvider{
|
||||
keyPool: NewAPIKeyPool(opts.PerplexityAPIKeys),
|
||||
proxy: opts.Proxy,
|
||||
client: client,
|
||||
}, maxResults, nil
|
||||
case "brave":
|
||||
if !opts.BraveEnabled {
|
||||
return nil, 0, nil
|
||||
}
|
||||
client, err := utils.CreateHTTPClient(opts.Proxy, searchTimeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create HTTP client for Brave: %w", err)
|
||||
return nil, 0, fmt.Errorf("failed to create HTTP client for Brave: %w", err)
|
||||
}
|
||||
provider = &BraveSearchProvider{keyPool: NewAPIKeyPool(opts.BraveAPIKeys), proxy: opts.Proxy, client: client}
|
||||
maxResults := 10
|
||||
if opts.BraveMaxResults > 0 {
|
||||
maxResults = min(opts.BraveMaxResults, 10)
|
||||
}
|
||||
} else if opts.SearXNGEnabled {
|
||||
provider = &SearXNGSearchProvider{baseURL: opts.SearXNGBaseURL}
|
||||
return &BraveSearchProvider{
|
||||
keyPool: NewAPIKeyPool(opts.BraveAPIKeys),
|
||||
proxy: opts.Proxy,
|
||||
client: client,
|
||||
}, maxResults, nil
|
||||
case "searxng":
|
||||
if !opts.SearXNGEnabled {
|
||||
return nil, 0, nil
|
||||
}
|
||||
maxResults := 10
|
||||
if opts.SearXNGMaxResults > 0 {
|
||||
maxResults = min(opts.SearXNGMaxResults, 10)
|
||||
}
|
||||
} else if opts.TavilyEnabled {
|
||||
return &SearXNGSearchProvider{
|
||||
baseURL: opts.SearXNGBaseURL,
|
||||
}, maxResults, nil
|
||||
case "tavily":
|
||||
if !opts.TavilyEnabled {
|
||||
return nil, 0, nil
|
||||
}
|
||||
client, err := utils.CreateHTTPClient(opts.Proxy, searchTimeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create HTTP client for Tavily: %w", err)
|
||||
return nil, 0, fmt.Errorf("failed to create HTTP client for Tavily: %w", err)
|
||||
}
|
||||
provider = &TavilySearchProvider{
|
||||
maxResults := 10
|
||||
if opts.TavilyMaxResults > 0 {
|
||||
maxResults = min(opts.TavilyMaxResults, 10)
|
||||
}
|
||||
return &TavilySearchProvider{
|
||||
keyPool: NewAPIKeyPool(opts.TavilyAPIKeys),
|
||||
baseURL: opts.TavilyBaseURL,
|
||||
proxy: opts.Proxy,
|
||||
client: client,
|
||||
}, maxResults, nil
|
||||
case "duckduckgo":
|
||||
if !opts.DuckDuckGoEnabled {
|
||||
return nil, 0, nil
|
||||
}
|
||||
if opts.TavilyMaxResults > 0 {
|
||||
maxResults = min(opts.TavilyMaxResults, 10)
|
||||
}
|
||||
} else if opts.DuckDuckGoEnabled {
|
||||
client, err := utils.CreateHTTPClient(opts.Proxy, searchTimeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create HTTP client for DuckDuckGo: %w", err)
|
||||
return nil, 0, fmt.Errorf("failed to create HTTP client for DuckDuckGo: %w", err)
|
||||
}
|
||||
provider = &DuckDuckGoSearchProvider{proxy: opts.Proxy, client: client}
|
||||
maxResults := 10
|
||||
if opts.DuckDuckGoMaxResults > 0 {
|
||||
maxResults = min(opts.DuckDuckGoMaxResults, 10)
|
||||
}
|
||||
} else if opts.BaiduSearchEnabled {
|
||||
return &DuckDuckGoSearchProvider{
|
||||
proxy: opts.Proxy,
|
||||
client: client,
|
||||
}, maxResults, nil
|
||||
case "baidu_search":
|
||||
if !opts.BaiduSearchEnabled {
|
||||
return nil, 0, nil
|
||||
}
|
||||
client, err := utils.CreateHTTPClient(opts.Proxy, perplexityTimeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create HTTP client for Baidu Search: %w", err)
|
||||
return nil, 0, fmt.Errorf("failed to create HTTP client for Baidu Search: %w", err)
|
||||
}
|
||||
provider = &BaiduSearchProvider{
|
||||
maxResults := 10
|
||||
if opts.BaiduSearchMaxResults > 0 {
|
||||
maxResults = min(opts.BaiduSearchMaxResults, 10)
|
||||
}
|
||||
return &BaiduSearchProvider{
|
||||
apiKey: opts.BaiduSearchAPIKey,
|
||||
baseURL: opts.BaiduSearchBaseURL,
|
||||
proxy: opts.Proxy,
|
||||
client: client,
|
||||
}, maxResults, nil
|
||||
case "glm_search":
|
||||
if !opts.GLMSearchEnabled {
|
||||
return nil, 0, nil
|
||||
}
|
||||
if opts.BaiduSearchMaxResults > 0 {
|
||||
maxResults = min(opts.BaiduSearchMaxResults, 10)
|
||||
}
|
||||
} else if opts.GLMSearchEnabled {
|
||||
client, err := utils.CreateHTTPClient(opts.Proxy, searchTimeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create HTTP client for GLM Search: %w", err)
|
||||
return nil, 0, fmt.Errorf("failed to create HTTP client for GLM Search: %w", err)
|
||||
}
|
||||
searchEngine := opts.GLMSearchEngine
|
||||
if searchEngine == "" {
|
||||
searchEngine = "search_std"
|
||||
}
|
||||
provider = &GLMSearchProvider{
|
||||
maxResults := 10
|
||||
if opts.GLMSearchMaxResults > 0 {
|
||||
maxResults = min(opts.GLMSearchMaxResults, 10)
|
||||
}
|
||||
return &GLMSearchProvider{
|
||||
apiKey: opts.GLMSearchAPIKey,
|
||||
baseURL: opts.GLMSearchBaseURL,
|
||||
searchEngine: searchEngine,
|
||||
proxy: opts.Proxy,
|
||||
client: client,
|
||||
}, maxResults, nil
|
||||
default:
|
||||
return nil, 0, fmt.Errorf("unknown web search provider %q", name)
|
||||
}
|
||||
}
|
||||
|
||||
func containsHan(text string) bool {
|
||||
for _, r := range text {
|
||||
if unicode.Is(unicode.Han, r) {
|
||||
return true
|
||||
}
|
||||
if opts.GLMSearchMaxResults > 0 {
|
||||
maxResults = min(opts.GLMSearchMaxResults, 10)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func containsLatinLetter(text string) bool {
|
||||
for _, r := range text {
|
||||
if unicode.IsLetter(r) && unicode.In(r, unicode.Latin) {
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func prefersDuckDuckGoQuery(text string) bool {
|
||||
trimmed := strings.TrimSpace(text)
|
||||
if trimmed == "" {
|
||||
return GetPreferredWebSearchLanguage() == "en"
|
||||
}
|
||||
if containsHan(trimmed) {
|
||||
return false
|
||||
}
|
||||
if containsLatinLetter(trimmed) {
|
||||
return true
|
||||
}
|
||||
return GetPreferredWebSearchLanguage() == "en"
|
||||
}
|
||||
|
||||
func (opts WebSearchToolOptions) buildProviderResolver() (func(query string) (SearchProvider, int), error) {
|
||||
providerName := strings.ToLower(strings.TrimSpace(opts.Provider))
|
||||
if providerName != "" && providerName != "auto" {
|
||||
provider, maxResults, err := opts.providerByName(providerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if provider == nil {
|
||||
return func(string) (SearchProvider, int) { return nil, 0 }, nil
|
||||
}
|
||||
return func(string) (SearchProvider, int) { return provider, maxResults }, nil
|
||||
}
|
||||
|
||||
for _, name := range []string{"perplexity", "brave", "searxng", "tavily"} {
|
||||
provider, maxResults, err := opts.providerByName(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if provider != nil {
|
||||
return func(string) (SearchProvider, int) { return provider, maxResults }, nil
|
||||
}
|
||||
}
|
||||
|
||||
sogouProvider, sogouMaxResults, err := opts.providerByName("sogou")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
duckProvider, duckMaxResults, err := opts.providerByName("duckduckgo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if sogouProvider != nil && duckProvider != nil {
|
||||
return func(query string) (SearchProvider, int) {
|
||||
if prefersDuckDuckGoQuery(query) {
|
||||
return duckProvider, duckMaxResults
|
||||
}
|
||||
return sogouProvider, sogouMaxResults
|
||||
}, nil
|
||||
}
|
||||
if sogouProvider != nil {
|
||||
return func(string) (SearchProvider, int) { return sogouProvider, sogouMaxResults }, nil
|
||||
}
|
||||
if duckProvider != nil {
|
||||
return func(string) (SearchProvider, int) { return duckProvider, duckMaxResults }, nil
|
||||
}
|
||||
|
||||
for _, name := range []string{"baidu_search", "glm_search"} {
|
||||
provider, maxResults, err := opts.providerByName(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if provider != nil {
|
||||
return func(string) (SearchProvider, int) { return provider, maxResults }, nil
|
||||
}
|
||||
}
|
||||
|
||||
return func(string) (SearchProvider, int) { return nil, 0 }, nil
|
||||
}
|
||||
|
||||
func NewWebSearchTool(opts WebSearchToolOptions) (*WebSearchTool, error) {
|
||||
resolver, err := opts.buildProviderResolver()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
provider, maxResults := resolver("")
|
||||
if provider == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return &WebSearchTool{
|
||||
provider: provider,
|
||||
maxResults: maxResults,
|
||||
provider: provider,
|
||||
maxResults: maxResults,
|
||||
providerResolver: resolver,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -1077,13 +1400,22 @@ func (t *WebSearchTool) Execute(ctx context.Context, args map[string]any) *ToolR
|
||||
}
|
||||
query = strings.TrimSpace(query)
|
||||
|
||||
count64, err := getInt64Arg(args, "count", int64(t.maxResults))
|
||||
provider := t.provider
|
||||
maxResults := t.maxResults
|
||||
if t.providerResolver != nil {
|
||||
provider, maxResults = t.providerResolver(query)
|
||||
}
|
||||
if provider == nil {
|
||||
return ErrorResult("search provider is not configured")
|
||||
}
|
||||
|
||||
count64, err := getInt64Arg(args, "count", int64(maxResults))
|
||||
if err != nil {
|
||||
return ErrorResult(err.Error())
|
||||
}
|
||||
count := t.maxResults
|
||||
count := maxResults
|
||||
if count64 > 0 && count64 <= 10 {
|
||||
count = int(count64)
|
||||
count = min(int(count64), maxResults)
|
||||
}
|
||||
|
||||
rangeCode, err := normalizeSearchRange("")
|
||||
@@ -1101,7 +1433,7 @@ func (t *WebSearchTool) Execute(ctx context.Context, args map[string]any) *ToolR
|
||||
}
|
||||
}
|
||||
|
||||
result, err := t.provider.Search(ctx, query, count, rangeCode)
|
||||
result, err := provider.Search(ctx, query, count, rangeCode)
|
||||
if err != nil {
|
||||
return ErrorResult(fmt.Sprintf("search failed: %v", err))
|
||||
}
|
||||
|
||||
+204
-5
@@ -385,19 +385,24 @@ func TestWebFetchTool_PayloadTooLarge(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestWebTool_WebSearch_NoApiKey verifies that no tool is created when API key is missing
|
||||
// TestWebTool_WebSearch_NoApiKey verifies missing credentials are surfaced at execution time.
|
||||
func TestWebTool_WebSearch_NoApiKey(t *testing.T) {
|
||||
tool, err := NewWebSearchTool(WebSearchToolOptions{BraveEnabled: true, BraveAPIKeys: nil})
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error: %v", err)
|
||||
}
|
||||
if tool == nil {
|
||||
t.Fatalf("Expected tool to be created")
|
||||
t.Fatalf("Expected tool when Brave is enabled, even without API keys")
|
||||
}
|
||||
ctx := context.Background()
|
||||
result := tool.Execute(ctx, map[string]any{"query": "test"})
|
||||
|
||||
result := tool.Execute(context.Background(), map[string]any{
|
||||
"query": "test query",
|
||||
})
|
||||
if !result.IsError {
|
||||
t.Errorf("Expected error when API key is missing")
|
||||
t.Fatalf("Expected missing Brave API key to return error")
|
||||
}
|
||||
if !strings.Contains(result.ForLLM, "no API key provided") {
|
||||
t.Fatalf("Unexpected error message: %s", result.ForLLM)
|
||||
}
|
||||
|
||||
// Also nil when nothing is enabled
|
||||
@@ -1672,3 +1677,197 @@ func TestWebTool_GLMSearch_Priority(t *testing.T) {
|
||||
t.Errorf("Expected GLMSearchProvider when only GLM enabled, got %T", tool2.provider)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebTool_SogouSearch_Success(t *testing.T) {
|
||||
provider := &SogouSearchProvider{
|
||||
client: &http.Client{
|
||||
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
|
||||
rec := httptest.NewRecorder()
|
||||
fmt.Fprint(rec, `<html><body>
|
||||
<a class=resultLink href="/link?url=https%3A%2F%2Fexample.com%2Fa" id="sogou_vr_0_0">Result A</a>
|
||||
<div class="clamp3">Snippet A</div>
|
||||
<a class=resultLink href="/link?url=https%3A%2F%2Fexample.com%2Fb" id="sogou_vr_0_1">Result B</a>
|
||||
<div class="clamp3">Snippet B</div>
|
||||
</body></html>`)
|
||||
return rec.Result(), nil
|
||||
}),
|
||||
},
|
||||
}
|
||||
|
||||
out, err := provider.Search(context.Background(), "test query", 2, "")
|
||||
if err != nil {
|
||||
t.Fatalf("Search() error: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, "via Sogou") || !strings.Contains(out, "https://example.com/a") {
|
||||
t.Fatalf("unexpected output: %s", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplySogouRangeHint(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
query string
|
||||
rangeCode string
|
||||
want string
|
||||
}{
|
||||
{name: "empty range", query: "golang", rangeCode: "", want: "golang"},
|
||||
{name: "day", query: "golang", rangeCode: "d", want: "golang 最近一天"},
|
||||
{name: "week", query: "golang", rangeCode: "w", want: "golang 最近一周"},
|
||||
{name: "month", query: "golang", rangeCode: "m", want: "golang 最近一个月"},
|
||||
{name: "year", query: "golang", rangeCode: "y", want: "golang 最近一年"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := applySogouRangeHint(tt.query, tt.rangeCode); got != tt.want {
|
||||
t.Fatalf("applySogouRangeHint(%q, %q) = %q, want %q", tt.query, tt.rangeCode, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrefersDuckDuckGoQuery(t *testing.T) {
|
||||
SetPreferredWebSearchLanguage("")
|
||||
t.Cleanup(func() {
|
||||
SetPreferredWebSearchLanguage("")
|
||||
})
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
query string
|
||||
want bool
|
||||
}{
|
||||
{name: "english words", query: "golang web search", want: true},
|
||||
{name: "english with numbers", query: "OpenAI o3 price 2026", want: true},
|
||||
{name: "chinese", query: "今天上海天气", want: false},
|
||||
{name: "mixed with han", query: "golang 中文 教程", want: false},
|
||||
{name: "numbers only", query: "2026 04 15", want: false},
|
||||
{name: "blank", query: " ", want: false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := prefersDuckDuckGoQuery(tt.query); got != tt.want {
|
||||
t.Fatalf("prefersDuckDuckGoQuery(%q) = %v, want %v", tt.query, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrefersDuckDuckGoQuery_FallsBackToPreferredLanguage(t *testing.T) {
|
||||
SetPreferredWebSearchLanguage("en")
|
||||
t.Cleanup(func() {
|
||||
SetPreferredWebSearchLanguage("")
|
||||
})
|
||||
|
||||
if !prefersDuckDuckGoQuery("2026 04 15") {
|
||||
t.Fatal("numeric query should prefer DuckDuckGo when preferred language is English")
|
||||
}
|
||||
|
||||
SetPreferredWebSearchLanguage("zh")
|
||||
if prefersDuckDuckGoQuery("2026 04 15") {
|
||||
t.Fatal("numeric query should prefer Sogou when preferred language is Chinese")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebTool_SogouPriorityAndExplicitProvider(t *testing.T) {
|
||||
tool, err := NewWebSearchTool(WebSearchToolOptions{
|
||||
SogouEnabled: true,
|
||||
SogouMaxResults: 5,
|
||||
DuckDuckGoEnabled: true,
|
||||
DuckDuckGoMaxResults: 5,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("NewWebSearchTool() error: %v", err)
|
||||
}
|
||||
if _, ok := tool.provider.(*SogouSearchProvider); !ok {
|
||||
t.Fatalf("expected SogouSearchProvider, got %T", tool.provider)
|
||||
}
|
||||
|
||||
tool, err = NewWebSearchTool(WebSearchToolOptions{
|
||||
Provider: "duckduckgo",
|
||||
SogouEnabled: true,
|
||||
SogouMaxResults: 5,
|
||||
DuckDuckGoEnabled: true,
|
||||
DuckDuckGoMaxResults: 5,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("NewWebSearchTool() error: %v", err)
|
||||
}
|
||||
if _, ok := tool.provider.(*DuckDuckGoSearchProvider); !ok {
|
||||
t.Fatalf("expected DuckDuckGoSearchProvider, got %T", tool.provider)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebTool_AutoProviderPrefersConfiguredProvidersBeforeSogou(t *testing.T) {
|
||||
tool, err := NewWebSearchTool(WebSearchToolOptions{
|
||||
SogouEnabled: true,
|
||||
SogouMaxResults: 5,
|
||||
BraveEnabled: true,
|
||||
BraveAPIKeys: []string{"brave-key"},
|
||||
BraveMaxResults: 5,
|
||||
DuckDuckGoEnabled: true,
|
||||
DuckDuckGoMaxResults: 5,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("NewWebSearchTool() error: %v", err)
|
||||
}
|
||||
if _, ok := tool.provider.(*BraveSearchProvider); !ok {
|
||||
t.Fatalf("expected BraveSearchProvider, got %T", tool.provider)
|
||||
}
|
||||
}
|
||||
|
||||
type stubSearchProvider struct {
|
||||
result string
|
||||
calls []string
|
||||
}
|
||||
|
||||
func (p *stubSearchProvider) Search(
|
||||
_ context.Context,
|
||||
query string,
|
||||
_ int,
|
||||
_ string,
|
||||
) (string, error) {
|
||||
p.calls = append(p.calls, query)
|
||||
return p.result, nil
|
||||
}
|
||||
|
||||
func TestWebTool_AutoProviderRoutesQueryLanguageBetweenSogouAndDuckDuckGo(t *testing.T) {
|
||||
sogouProvider := &stubSearchProvider{result: "via sogou"}
|
||||
duckProvider := &stubSearchProvider{result: "via duckduckgo"}
|
||||
tool := &WebSearchTool{
|
||||
provider: sogouProvider,
|
||||
maxResults: 5,
|
||||
providerResolver: func(query string) (SearchProvider, int) {
|
||||
if prefersDuckDuckGoQuery(query) {
|
||||
return duckProvider, 3
|
||||
}
|
||||
return sogouProvider, 5
|
||||
},
|
||||
}
|
||||
|
||||
enResult := tool.Execute(context.Background(), map[string]any{"query": "golang concurrency", "count": 10})
|
||||
if enResult.IsError {
|
||||
t.Fatalf("english Execute() returned error: %s", enResult.ForLLM)
|
||||
}
|
||||
if len(duckProvider.calls) != 1 || duckProvider.calls[0] != "golang concurrency" {
|
||||
t.Fatalf("english query should use DuckDuckGo provider, calls=%v", duckProvider.calls)
|
||||
}
|
||||
if len(sogouProvider.calls) != 0 {
|
||||
t.Fatalf("english query should not call Sogou provider, calls=%v", sogouProvider.calls)
|
||||
}
|
||||
|
||||
zhResult := tool.Execute(context.Background(), map[string]any{"query": "今天上海天气"})
|
||||
if zhResult.IsError {
|
||||
t.Fatalf("chinese Execute() returned error: %s", zhResult.ForLLM)
|
||||
}
|
||||
if len(sogouProvider.calls) != 1 || sogouProvider.calls[0] != "今天上海天气" {
|
||||
t.Fatalf("chinese query should use Sogou provider, calls=%v", sogouProvider.calls)
|
||||
}
|
||||
}
|
||||
|
||||
type roundTripFunc func(*http.Request) (*http.Response, error)
|
||||
|
||||
func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
return fn(req)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user