Merge pull request #2524 from SiYue-ZO/feature/sogou-web-search-default

Add configurable Sogou-backed web search
This commit is contained in:
美電球
2026-04-15 20:50:53 +08:00
committed by GitHub
18 changed files with 1656 additions and 55 deletions
+3
View File
@@ -194,6 +194,7 @@ func registerSharedTools(
if cfg.Tools.IsToolEnabled("web") {
searchTool, err := tools.NewWebSearchTool(tools.WebSearchToolOptions{
Provider: cfg.Tools.Web.Provider,
BraveAPIKeys: cfg.Tools.Web.Brave.APIKeys.Values(),
BraveMaxResults: cfg.Tools.Web.Brave.MaxResults,
BraveEnabled: cfg.Tools.Web.Brave.Enabled,
@@ -201,6 +202,8 @@ func registerSharedTools(
TavilyBaseURL: cfg.Tools.Web.Tavily.BaseURL,
TavilyMaxResults: cfg.Tools.Web.Tavily.MaxResults,
TavilyEnabled: cfg.Tools.Web.Tavily.Enabled,
SogouMaxResults: cfg.Tools.Web.Sogou.MaxResults,
SogouEnabled: cfg.Tools.Web.Sogou.Enabled,
DuckDuckGoMaxResults: cfg.Tools.Web.DuckDuckGo.MaxResults,
DuckDuckGoEnabled: cfg.Tools.Web.DuckDuckGo.Enabled,
PerplexityAPIKeys: cfg.Tools.Web.Perplexity.APIKeys.Values(),
+7
View File
@@ -664,6 +664,11 @@ type DuckDuckGoConfig struct {
MaxResults int `json:"max_results" env:"PICOCLAW_TOOLS_WEB_DUCKDUCKGO_MAX_RESULTS"`
}
type SogouConfig struct {
Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_WEB_SOGOU_ENABLED"`
MaxResults int `json:"max_results" env:"PICOCLAW_TOOLS_WEB_SOGOU_MAX_RESULTS"`
}
type PerplexityConfig struct {
Enabled bool `json:"enabled" yaml:"-" env:"PICOCLAW_TOOLS_WEB_PERPLEXITY_ENABLED"`
APIKeys SecureStrings `json:"api_keys,omitzero" yaml:"api_keys,omitempty" env:"PICOCLAW_TOOLS_WEB_PERPLEXITY_API_KEYS"`
@@ -710,11 +715,13 @@ type WebToolsConfig struct {
ToolConfig ` yaml:"-" envPrefix:"PICOCLAW_TOOLS_WEB_"`
Brave BraveConfig `yaml:"brave,omitempty" json:"brave"`
Tavily TavilyConfig `yaml:"tavily,omitempty" json:"tavily"`
Sogou SogouConfig `yaml:"-" json:"sogou"`
DuckDuckGo DuckDuckGoConfig `yaml:"-" json:"duckduckgo"`
Perplexity PerplexityConfig `yaml:"perplexity,omitempty" json:"perplexity"`
SearXNG SearXNGConfig `yaml:"-" json:"searxng"`
GLMSearch GLMSearchConfig `yaml:"glm_search,omitempty" json:"glm_search"`
BaiduSearch BaiduSearchConfig `yaml:"baidu_search,omitempty" json:"baidu_search"`
Provider string `yaml:"-" json:"provider,omitempty" env:"PICOCLAW_TOOLS_WEB_PROVIDER"`
// PreferNative controls whether to use provider-native web search when
// the active LLM supports it (e.g. OpenAI web_search_preview). When true,
// the client-side web_search tool is hidden to avoid duplicate search surfaces,
+22
View File
@@ -760,6 +760,28 @@ func TestDefaultConfig_WebPreferNativeEnabled(t *testing.T) {
}
}
func TestDefaultConfig_WebProviderIsAuto(t *testing.T) {
cfg := DefaultConfig()
if cfg.Tools.Web.Provider != "auto" {
t.Fatalf("DefaultConfig().Tools.Web.Provider = %q, want auto", cfg.Tools.Web.Provider)
}
}
func TestConfigExample_WebProviderIsAuto(t *testing.T) {
data, err := os.ReadFile(filepath.Join("..", "..", "config", "config.example.json"))
if err != nil {
t.Fatalf("ReadFile(config.example.json) error: %v", err)
}
var cfg Config
if err := json.Unmarshal(data, &cfg); err != nil {
t.Fatalf("Unmarshal(config.example.json) error: %v", err)
}
if cfg.Tools.Web.Provider != "auto" {
t.Fatalf("config.example.json tools.web.provider = %q, want auto", cfg.Tools.Web.Provider)
}
}
func TestDefaultConfig_ToolFeedbackDisabled(t *testing.T) {
cfg := DefaultConfig()
if cfg.Agents.Defaults.ToolFeedback.Enabled {
+6 -1
View File
@@ -278,6 +278,7 @@ func DefaultConfig() *Config {
ToolConfig: ToolConfig{
Enabled: true,
},
Provider: "auto",
PreferNative: true,
Proxy: "",
FetchLimitBytes: 10 * 1024 * 1024, // 10MB by default
@@ -290,10 +291,14 @@ func DefaultConfig() *Config {
Enabled: false,
MaxResults: 5,
},
DuckDuckGo: DuckDuckGoConfig{
Sogou: SogouConfig{
Enabled: true,
MaxResults: 5,
},
DuckDuckGo: DuckDuckGoConfig{
Enabled: false,
MaxResults: 5,
},
Perplexity: PerplexityConfig{
Enabled: false,
MaxResults: 5,
+378 -46
View File
@@ -15,6 +15,7 @@ import (
"strings"
"sync/atomic"
"time"
"unicode"
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/logger"
@@ -23,6 +24,7 @@ import (
const (
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
sogouUserAgent = "Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1"
userAgentHonest = "picoclaw/%s (+https://github.com/sipeed/picoclaw; AI assistant bot)"
// HTTP client timeouts for web tool providers.
@@ -46,9 +48,18 @@ var (
reDDGLink = regexp.MustCompile(
`<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>`,
)
reDDGSnippet = regexp.MustCompile(`<a class="result__snippet[^"]*".*?>([\s\S]*?)</a>`)
reDDGSnippet = regexp.MustCompile(
`<a class="result__snippet[^"]*".*?>([\s\S]*?)</a>`,
)
reSogouTitle = regexp.MustCompile(
`<a\s+class=resultLink\s+href="([^"]+)"[^>]*id="sogou_vr_\d+_\d+"[^>]*>\s*(.*?)\s*</a>`,
)
reSogouSnippet = regexp.MustCompile(`<div class="clamp\d*">\s*(.*?)\s*</div>`)
reSogouRealURL = regexp.MustCompile(`url=([^&]+)`)
)
var preferredWebSearchLanguage atomic.Value
type APIKeyPool struct {
keys []string
current uint32
@@ -91,6 +102,39 @@ type SearchProvider interface {
Search(ctx context.Context, query string, count int, rangeCode string) (string, error)
}
type SearchResultItem struct {
Title string
URL string
Snippet string
}
func extractSogouURL(href string) string {
match := reSogouRealURL.FindStringSubmatch(href)
if len(match) < 2 {
return ""
}
decoded, err := url.QueryUnescape(match[1])
if err != nil {
return ""
}
return decoded
}
func applySogouRangeHint(query string, rangeCode string) string {
switch rangeCode {
case "d":
return query + " 最近一天"
case "w":
return query + " 最近一周"
case "m":
return query + " 最近一个月"
case "y":
return query + " 最近一年"
default:
return query
}
}
func normalizeSearchRange(raw string) (string, error) {
rangeCode := strings.ToLower(strings.TrimSpace(raw))
switch rangeCode {
@@ -206,6 +250,27 @@ func mapBaiduRecencyFilter(rangeCode string) string {
}
}
func normalizePreferredWebSearchLanguage(lang string) string {
lang = strings.ToLower(strings.TrimSpace(lang))
switch {
case strings.HasPrefix(lang, "zh"), lang == "chinese":
return "zh"
case strings.HasPrefix(lang, "en"), lang == "english":
return "en"
default:
return ""
}
}
func SetPreferredWebSearchLanguage(lang string) {
preferredWebSearchLanguage.Store(normalizePreferredWebSearchLanguage(lang))
}
func GetPreferredWebSearchLanguage() string {
lang, _ := preferredWebSearchLanguage.Load().(string)
return lang
}
type BraveSearchProvider struct {
keyPool *APIKeyPool
proxy string
@@ -425,6 +490,104 @@ func (p *TavilySearchProvider) Search(
return "", fmt.Errorf("all api keys failed, last error: %w", lastErr)
}
type SogouSearchProvider struct {
proxy string
client *http.Client
}
func (p *SogouSearchProvider) Search(
ctx context.Context,
query string,
count int,
rangeCode string,
) (string, error) {
const sogouWAPURL = "https://wap.sogou.com/web/searchList.jsp"
results := make([]SearchResultItem, 0, count)
seenURLs := make(map[string]bool)
maxPages := min(3, (count+1)/2+1)
for page := 1; page <= maxPages && len(results) < count; page++ {
params := url.Values{}
params.Set("keyword", applySogouRangeHint(query, rangeCode))
params.Set("v", "5")
params.Set("p", fmt.Sprintf("%d", page))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, sogouWAPURL+"?"+params.Encode(), nil)
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("User-Agent", sogouUserAgent)
resp, err := p.client.Do(req)
if err != nil {
return "", fmt.Errorf("request failed: %w", err)
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
resp.Body.Close()
if err != nil {
return "", fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("Sogou returned status %d", resp.StatusCode)
}
html := string(body)
if len(html) < 200 {
break
}
matches := reSogouTitle.FindAllStringSubmatch(html, -1)
for _, match := range matches {
if len(match) < 3 {
continue
}
title := stripTags(match[2])
link := extractSogouURL(match[1])
if title == "" || link == "" || seenURLs[link] {
continue
}
seenURLs[link] = true
start := strings.Index(html, match[0])
snippet := ""
if start >= 0 {
after := html[start+len(match[0]):]
if len(after) > 2000 {
after = after[:2000]
}
if snippetMatch := reSogouSnippet.FindStringSubmatch(after); len(snippetMatch) > 1 {
snippet = stripTags(snippetMatch[1])
}
}
results = append(results, SearchResultItem{
Title: title,
URL: link,
Snippet: snippet,
})
if len(results) >= count {
break
}
}
}
if len(results) == 0 {
return fmt.Sprintf("No results for: %s", query), nil
}
lines := []string{fmt.Sprintf("Results for: %s (via Sogou)", query)}
for i, item := range results {
lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, item.Title, item.URL))
if item.Snippet != "" {
lines = append(lines, fmt.Sprintf(" %s", item.Snippet))
}
}
return strings.Join(lines, "\n"), nil
}
type DuckDuckGoSearchProvider struct {
proxy string
client *http.Client
@@ -909,11 +1072,13 @@ func (p *BaiduSearchProvider) Search(
}
type WebSearchTool struct {
provider SearchProvider
maxResults int
provider SearchProvider
maxResults int
providerResolver func(query string) (SearchProvider, int)
}
type WebSearchToolOptions struct {
Provider string
BraveAPIKeys []string
BraveMaxResults int
BraveEnabled bool
@@ -921,6 +1086,8 @@ type WebSearchToolOptions struct {
TavilyBaseURL string
TavilyMaxResults int
TavilyEnabled bool
SogouMaxResults int
SogouEnabled bool
DuckDuckGoMaxResults int
DuckDuckGoEnabled bool
PerplexityAPIKeys []string
@@ -941,100 +1108,256 @@ type WebSearchToolOptions struct {
Proxy string
}
func NewWebSearchTool(opts WebSearchToolOptions) (*WebSearchTool, error) {
var provider SearchProvider
maxResults := 10
// Priority: Perplexity > Brave > SearXNG > Tavily > DuckDuckGo > Baidu Search > GLM Search
if opts.PerplexityEnabled {
func (opts WebSearchToolOptions) providerByName(name string) (SearchProvider, int, error) {
switch strings.ToLower(strings.TrimSpace(name)) {
case "", "auto":
return nil, 0, nil
case "sogou":
if !opts.SogouEnabled {
return nil, 0, nil
}
client, err := utils.CreateHTTPClient(opts.Proxy, searchTimeout)
if err != nil {
return nil, 0, fmt.Errorf("failed to create HTTP client for Sogou: %w", err)
}
maxResults := 10
if opts.SogouMaxResults > 0 {
maxResults = min(opts.SogouMaxResults, 10)
}
return &SogouSearchProvider{
proxy: opts.Proxy,
client: client,
}, maxResults, nil
case "perplexity":
if !opts.PerplexityEnabled {
return nil, 0, nil
}
client, err := utils.CreateHTTPClient(opts.Proxy, perplexityTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for Perplexity: %w", err)
}
provider = &PerplexitySearchProvider{
keyPool: NewAPIKeyPool(opts.PerplexityAPIKeys),
proxy: opts.Proxy,
client: client,
return nil, 0, fmt.Errorf("failed to create HTTP client for Perplexity: %w", err)
}
maxResults := 10
if opts.PerplexityMaxResults > 0 {
maxResults = min(opts.PerplexityMaxResults, 10)
}
} else if opts.BraveEnabled {
return &PerplexitySearchProvider{
keyPool: NewAPIKeyPool(opts.PerplexityAPIKeys),
proxy: opts.Proxy,
client: client,
}, maxResults, nil
case "brave":
if !opts.BraveEnabled {
return nil, 0, nil
}
client, err := utils.CreateHTTPClient(opts.Proxy, searchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for Brave: %w", err)
return nil, 0, fmt.Errorf("failed to create HTTP client for Brave: %w", err)
}
provider = &BraveSearchProvider{keyPool: NewAPIKeyPool(opts.BraveAPIKeys), proxy: opts.Proxy, client: client}
maxResults := 10
if opts.BraveMaxResults > 0 {
maxResults = min(opts.BraveMaxResults, 10)
}
} else if opts.SearXNGEnabled {
provider = &SearXNGSearchProvider{baseURL: opts.SearXNGBaseURL}
return &BraveSearchProvider{
keyPool: NewAPIKeyPool(opts.BraveAPIKeys),
proxy: opts.Proxy,
client: client,
}, maxResults, nil
case "searxng":
if !opts.SearXNGEnabled {
return nil, 0, nil
}
maxResults := 10
if opts.SearXNGMaxResults > 0 {
maxResults = min(opts.SearXNGMaxResults, 10)
}
} else if opts.TavilyEnabled {
return &SearXNGSearchProvider{
baseURL: opts.SearXNGBaseURL,
}, maxResults, nil
case "tavily":
if !opts.TavilyEnabled {
return nil, 0, nil
}
client, err := utils.CreateHTTPClient(opts.Proxy, searchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for Tavily: %w", err)
return nil, 0, fmt.Errorf("failed to create HTTP client for Tavily: %w", err)
}
provider = &TavilySearchProvider{
maxResults := 10
if opts.TavilyMaxResults > 0 {
maxResults = min(opts.TavilyMaxResults, 10)
}
return &TavilySearchProvider{
keyPool: NewAPIKeyPool(opts.TavilyAPIKeys),
baseURL: opts.TavilyBaseURL,
proxy: opts.Proxy,
client: client,
}, maxResults, nil
case "duckduckgo":
if !opts.DuckDuckGoEnabled {
return nil, 0, nil
}
if opts.TavilyMaxResults > 0 {
maxResults = min(opts.TavilyMaxResults, 10)
}
} else if opts.DuckDuckGoEnabled {
client, err := utils.CreateHTTPClient(opts.Proxy, searchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for DuckDuckGo: %w", err)
return nil, 0, fmt.Errorf("failed to create HTTP client for DuckDuckGo: %w", err)
}
provider = &DuckDuckGoSearchProvider{proxy: opts.Proxy, client: client}
maxResults := 10
if opts.DuckDuckGoMaxResults > 0 {
maxResults = min(opts.DuckDuckGoMaxResults, 10)
}
} else if opts.BaiduSearchEnabled {
return &DuckDuckGoSearchProvider{
proxy: opts.Proxy,
client: client,
}, maxResults, nil
case "baidu_search":
if !opts.BaiduSearchEnabled {
return nil, 0, nil
}
client, err := utils.CreateHTTPClient(opts.Proxy, perplexityTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for Baidu Search: %w", err)
return nil, 0, fmt.Errorf("failed to create HTTP client for Baidu Search: %w", err)
}
provider = &BaiduSearchProvider{
maxResults := 10
if opts.BaiduSearchMaxResults > 0 {
maxResults = min(opts.BaiduSearchMaxResults, 10)
}
return &BaiduSearchProvider{
apiKey: opts.BaiduSearchAPIKey,
baseURL: opts.BaiduSearchBaseURL,
proxy: opts.Proxy,
client: client,
}, maxResults, nil
case "glm_search":
if !opts.GLMSearchEnabled {
return nil, 0, nil
}
if opts.BaiduSearchMaxResults > 0 {
maxResults = min(opts.BaiduSearchMaxResults, 10)
}
} else if opts.GLMSearchEnabled {
client, err := utils.CreateHTTPClient(opts.Proxy, searchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for GLM Search: %w", err)
return nil, 0, fmt.Errorf("failed to create HTTP client for GLM Search: %w", err)
}
searchEngine := opts.GLMSearchEngine
if searchEngine == "" {
searchEngine = "search_std"
}
provider = &GLMSearchProvider{
maxResults := 10
if opts.GLMSearchMaxResults > 0 {
maxResults = min(opts.GLMSearchMaxResults, 10)
}
return &GLMSearchProvider{
apiKey: opts.GLMSearchAPIKey,
baseURL: opts.GLMSearchBaseURL,
searchEngine: searchEngine,
proxy: opts.Proxy,
client: client,
}, maxResults, nil
default:
return nil, 0, fmt.Errorf("unknown web search provider %q", name)
}
}
func containsHan(text string) bool {
for _, r := range text {
if unicode.Is(unicode.Han, r) {
return true
}
if opts.GLMSearchMaxResults > 0 {
maxResults = min(opts.GLMSearchMaxResults, 10)
}
return false
}
func containsLatinLetter(text string) bool {
for _, r := range text {
if unicode.IsLetter(r) && unicode.In(r, unicode.Latin) {
return true
}
} else {
}
return false
}
func prefersDuckDuckGoQuery(text string) bool {
trimmed := strings.TrimSpace(text)
if trimmed == "" {
return GetPreferredWebSearchLanguage() == "en"
}
if containsHan(trimmed) {
return false
}
if containsLatinLetter(trimmed) {
return true
}
return GetPreferredWebSearchLanguage() == "en"
}
func (opts WebSearchToolOptions) buildProviderResolver() (func(query string) (SearchProvider, int), error) {
providerName := strings.ToLower(strings.TrimSpace(opts.Provider))
if providerName != "" && providerName != "auto" {
provider, maxResults, err := opts.providerByName(providerName)
if err != nil {
return nil, err
}
if provider == nil {
return func(string) (SearchProvider, int) { return nil, 0 }, nil
}
return func(string) (SearchProvider, int) { return provider, maxResults }, nil
}
for _, name := range []string{"perplexity", "brave", "searxng", "tavily"} {
provider, maxResults, err := opts.providerByName(name)
if err != nil {
return nil, err
}
if provider != nil {
return func(string) (SearchProvider, int) { return provider, maxResults }, nil
}
}
sogouProvider, sogouMaxResults, err := opts.providerByName("sogou")
if err != nil {
return nil, err
}
duckProvider, duckMaxResults, err := opts.providerByName("duckduckgo")
if err != nil {
return nil, err
}
if sogouProvider != nil && duckProvider != nil {
return func(query string) (SearchProvider, int) {
if prefersDuckDuckGoQuery(query) {
return duckProvider, duckMaxResults
}
return sogouProvider, sogouMaxResults
}, nil
}
if sogouProvider != nil {
return func(string) (SearchProvider, int) { return sogouProvider, sogouMaxResults }, nil
}
if duckProvider != nil {
return func(string) (SearchProvider, int) { return duckProvider, duckMaxResults }, nil
}
for _, name := range []string{"baidu_search", "glm_search"} {
provider, maxResults, err := opts.providerByName(name)
if err != nil {
return nil, err
}
if provider != nil {
return func(string) (SearchProvider, int) { return provider, maxResults }, nil
}
}
return func(string) (SearchProvider, int) { return nil, 0 }, nil
}
func NewWebSearchTool(opts WebSearchToolOptions) (*WebSearchTool, error) {
resolver, err := opts.buildProviderResolver()
if err != nil {
return nil, err
}
provider, maxResults := resolver("")
if provider == nil {
return nil, nil
}
return &WebSearchTool{
provider: provider,
maxResults: maxResults,
provider: provider,
maxResults: maxResults,
providerResolver: resolver,
}, nil
}
@@ -1077,13 +1400,22 @@ func (t *WebSearchTool) Execute(ctx context.Context, args map[string]any) *ToolR
}
query = strings.TrimSpace(query)
count64, err := getInt64Arg(args, "count", int64(t.maxResults))
provider := t.provider
maxResults := t.maxResults
if t.providerResolver != nil {
provider, maxResults = t.providerResolver(query)
}
if provider == nil {
return ErrorResult("search provider is not configured")
}
count64, err := getInt64Arg(args, "count", int64(maxResults))
if err != nil {
return ErrorResult(err.Error())
}
count := t.maxResults
count := maxResults
if count64 > 0 && count64 <= 10 {
count = int(count64)
count = min(int(count64), maxResults)
}
rangeCode, err := normalizeSearchRange("")
@@ -1101,7 +1433,7 @@ func (t *WebSearchTool) Execute(ctx context.Context, args map[string]any) *ToolR
}
}
result, err := t.provider.Search(ctx, query, count, rangeCode)
result, err := provider.Search(ctx, query, count, rangeCode)
if err != nil {
return ErrorResult(fmt.Sprintf("search failed: %v", err))
}
+204 -5
View File
@@ -385,19 +385,24 @@ func TestWebFetchTool_PayloadTooLarge(t *testing.T) {
}
}
// TestWebTool_WebSearch_NoApiKey verifies that no tool is created when API key is missing
// TestWebTool_WebSearch_NoApiKey verifies missing credentials are surfaced at execution time.
func TestWebTool_WebSearch_NoApiKey(t *testing.T) {
tool, err := NewWebSearchTool(WebSearchToolOptions{BraveEnabled: true, BraveAPIKeys: nil})
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if tool == nil {
t.Fatalf("Expected tool to be created")
t.Fatalf("Expected tool when Brave is enabled, even without API keys")
}
ctx := context.Background()
result := tool.Execute(ctx, map[string]any{"query": "test"})
result := tool.Execute(context.Background(), map[string]any{
"query": "test query",
})
if !result.IsError {
t.Errorf("Expected error when API key is missing")
t.Fatalf("Expected missing Brave API key to return error")
}
if !strings.Contains(result.ForLLM, "no API key provided") {
t.Fatalf("Unexpected error message: %s", result.ForLLM)
}
// Also nil when nothing is enabled
@@ -1672,3 +1677,197 @@ func TestWebTool_GLMSearch_Priority(t *testing.T) {
t.Errorf("Expected GLMSearchProvider when only GLM enabled, got %T", tool2.provider)
}
}
func TestWebTool_SogouSearch_Success(t *testing.T) {
provider := &SogouSearchProvider{
client: &http.Client{
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
rec := httptest.NewRecorder()
fmt.Fprint(rec, `<html><body>
<a class=resultLink href="/link?url=https%3A%2F%2Fexample.com%2Fa" id="sogou_vr_0_0">Result A</a>
<div class="clamp3">Snippet A</div>
<a class=resultLink href="/link?url=https%3A%2F%2Fexample.com%2Fb" id="sogou_vr_0_1">Result B</a>
<div class="clamp3">Snippet B</div>
</body></html>`)
return rec.Result(), nil
}),
},
}
out, err := provider.Search(context.Background(), "test query", 2, "")
if err != nil {
t.Fatalf("Search() error: %v", err)
}
if !strings.Contains(out, "via Sogou") || !strings.Contains(out, "https://example.com/a") {
t.Fatalf("unexpected output: %s", out)
}
}
func TestApplySogouRangeHint(t *testing.T) {
tests := []struct {
name string
query string
rangeCode string
want string
}{
{name: "empty range", query: "golang", rangeCode: "", want: "golang"},
{name: "day", query: "golang", rangeCode: "d", want: "golang 最近一天"},
{name: "week", query: "golang", rangeCode: "w", want: "golang 最近一周"},
{name: "month", query: "golang", rangeCode: "m", want: "golang 最近一个月"},
{name: "year", query: "golang", rangeCode: "y", want: "golang 最近一年"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := applySogouRangeHint(tt.query, tt.rangeCode); got != tt.want {
t.Fatalf("applySogouRangeHint(%q, %q) = %q, want %q", tt.query, tt.rangeCode, got, tt.want)
}
})
}
}
func TestPrefersDuckDuckGoQuery(t *testing.T) {
SetPreferredWebSearchLanguage("")
t.Cleanup(func() {
SetPreferredWebSearchLanguage("")
})
tests := []struct {
name string
query string
want bool
}{
{name: "english words", query: "golang web search", want: true},
{name: "english with numbers", query: "OpenAI o3 price 2026", want: true},
{name: "chinese", query: "今天上海天气", want: false},
{name: "mixed with han", query: "golang 中文 教程", want: false},
{name: "numbers only", query: "2026 04 15", want: false},
{name: "blank", query: " ", want: false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := prefersDuckDuckGoQuery(tt.query); got != tt.want {
t.Fatalf("prefersDuckDuckGoQuery(%q) = %v, want %v", tt.query, got, tt.want)
}
})
}
}
func TestPrefersDuckDuckGoQuery_FallsBackToPreferredLanguage(t *testing.T) {
SetPreferredWebSearchLanguage("en")
t.Cleanup(func() {
SetPreferredWebSearchLanguage("")
})
if !prefersDuckDuckGoQuery("2026 04 15") {
t.Fatal("numeric query should prefer DuckDuckGo when preferred language is English")
}
SetPreferredWebSearchLanguage("zh")
if prefersDuckDuckGoQuery("2026 04 15") {
t.Fatal("numeric query should prefer Sogou when preferred language is Chinese")
}
}
func TestWebTool_SogouPriorityAndExplicitProvider(t *testing.T) {
tool, err := NewWebSearchTool(WebSearchToolOptions{
SogouEnabled: true,
SogouMaxResults: 5,
DuckDuckGoEnabled: true,
DuckDuckGoMaxResults: 5,
})
if err != nil {
t.Fatalf("NewWebSearchTool() error: %v", err)
}
if _, ok := tool.provider.(*SogouSearchProvider); !ok {
t.Fatalf("expected SogouSearchProvider, got %T", tool.provider)
}
tool, err = NewWebSearchTool(WebSearchToolOptions{
Provider: "duckduckgo",
SogouEnabled: true,
SogouMaxResults: 5,
DuckDuckGoEnabled: true,
DuckDuckGoMaxResults: 5,
})
if err != nil {
t.Fatalf("NewWebSearchTool() error: %v", err)
}
if _, ok := tool.provider.(*DuckDuckGoSearchProvider); !ok {
t.Fatalf("expected DuckDuckGoSearchProvider, got %T", tool.provider)
}
}
func TestWebTool_AutoProviderPrefersConfiguredProvidersBeforeSogou(t *testing.T) {
tool, err := NewWebSearchTool(WebSearchToolOptions{
SogouEnabled: true,
SogouMaxResults: 5,
BraveEnabled: true,
BraveAPIKeys: []string{"brave-key"},
BraveMaxResults: 5,
DuckDuckGoEnabled: true,
DuckDuckGoMaxResults: 5,
})
if err != nil {
t.Fatalf("NewWebSearchTool() error: %v", err)
}
if _, ok := tool.provider.(*BraveSearchProvider); !ok {
t.Fatalf("expected BraveSearchProvider, got %T", tool.provider)
}
}
type stubSearchProvider struct {
result string
calls []string
}
func (p *stubSearchProvider) Search(
_ context.Context,
query string,
_ int,
_ string,
) (string, error) {
p.calls = append(p.calls, query)
return p.result, nil
}
func TestWebTool_AutoProviderRoutesQueryLanguageBetweenSogouAndDuckDuckGo(t *testing.T) {
sogouProvider := &stubSearchProvider{result: "via sogou"}
duckProvider := &stubSearchProvider{result: "via duckduckgo"}
tool := &WebSearchTool{
provider: sogouProvider,
maxResults: 5,
providerResolver: func(query string) (SearchProvider, int) {
if prefersDuckDuckGoQuery(query) {
return duckProvider, 3
}
return sogouProvider, 5
},
}
enResult := tool.Execute(context.Background(), map[string]any{"query": "golang concurrency", "count": 10})
if enResult.IsError {
t.Fatalf("english Execute() returned error: %s", enResult.ForLLM)
}
if len(duckProvider.calls) != 1 || duckProvider.calls[0] != "golang concurrency" {
t.Fatalf("english query should use DuckDuckGo provider, calls=%v", duckProvider.calls)
}
if len(sogouProvider.calls) != 0 {
t.Fatalf("english query should not call Sogou provider, calls=%v", sogouProvider.calls)
}
zhResult := tool.Execute(context.Background(), map[string]any{"query": "今天上海天气"})
if zhResult.IsError {
t.Fatalf("chinese Execute() returned error: %s", zhResult.ForLLM)
}
if len(sogouProvider.calls) != 1 || sogouProvider.calls[0] != "今天上海天气" {
t.Fatalf("chinese query should use Sogou provider, calls=%v", sogouProvider.calls)
}
}
type roundTripFunc func(*http.Request) (*http.Response, error)
func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return fn(req)
}