diff --git a/docs/tools_configuration.md b/docs/tools_configuration.md
index 08746e267..a38f0856f 100644
--- a/docs/tools_configuration.md
+++ b/docs/tools_configuration.md
@@ -158,7 +158,7 @@ and injected into the context for a configured number of turns (`ttl`).
 
 | Config               | Type | Default | Description                                                                                                                       |
 |----------------------|------|---------|-----------------------------------------------------------------------------------------------------------------------------------|
-| `enabled`            | bool | false   | If true, MCP tools are hidden and loaded on-demand via search. If false, all tools are loaded                                     |
+| `enabled`            | bool | false   | Global default: if `true`, all MCP tools are hidden and loaded on-demand via search; if `false`, all tools are loaded into context. Individual servers can override this with the per-server `deferred` field. |
 | `ttl`                | int  | 5       | Number of conversational turns a discovered tool remains unlocked                                                                 |
 | `max_search_results` | int  | 5       | Maximum number of tools returned per search query                                                                                 |
 | `use_bm25`           | bool | true    | Enable the natural language/keyword search tool (`tool_search_tool_bm25`). **Warning**: consumes more resources than regex search |
@@ -169,16 +169,17 @@ and injected into the context for a configured number of turns (`ttl`).
 
 ### Per-Server Config
 
-| Config     | Type   | Required | Description                                |
-|------------|--------|----------|--------------------------------------------|
-| `enabled`  | bool   | yes      | Enable this MCP server                     |
-| `type`     | string | no       | Transport type: `stdio`, `sse`, `http`     |
-| `command`  | string | stdio    | Executable command for stdio transport     |
-| `args`     | array  | no       | Command arguments for stdio transport      |
-| `env`      | object | no       | Environment variables for stdio process    |
-| `env_file` | string | no       | Path to environment file for stdio process |
-| `url`      | string | sse/http | Endpoint URL for `sse`/`http` transport    |
-| `headers`  | object | no       | HTTP headers for `sse`/`http` transport    |
+| Config     | Type    | Required | Description                                                                                                                                                     |
+|------------|---------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `enabled`  | bool    | yes      | Enable this MCP server                                                                                                                                          |
+| `deferred` | bool    | no       | Override deferred mode for this server only. `true` = tools are hidden and discoverable via search; `false` = tools are always visible in context. When omitted, the global `discovery.enabled` value applies. |
+| `type`     | string  | no       | Transport type: `stdio`, `sse`, `http`                                                                                                                          |
+| `command`  | string  | stdio    | Executable command for stdio transport                                                                                                                          |
+| `args`     | array   | no       | Command arguments for stdio transport                                                                                                                           |
+| `env`      | object  | no       | Environment variables for stdio process                                                                                                                         |
+| `env_file` | string  | no       | Path to environment file for stdio process                                                                                                                      |
+| `url`      | string  | sse/http | Endpoint URL for `sse`/`http` transport                                                                                                                         |
+| `headers`  | object  | no       | HTTP headers for `sse`/`http` transport                                                                                                                         |
 
 ### Transport Behavior
 
@@ -291,6 +292,50 @@ dynamically only when requested by the user.*
 }
 ```
 
+#### 4) Mixed setup: per-server deferred override
+
+*Discovery is enabled globally, but `filesystem` is pinned as always-visible while `context7` follows the global
+default (deferred). `aws` explicitly opts in to deferred mode even though it is the same as the global default.*
+
+```json
+{
+  "tools": {
+    "mcp": {
+      "enabled": true,
+      "discovery": {
+        "enabled": true,
+        "ttl": 5,
+        "max_search_results": 5,
+        "use_bm25": true
+      },
+      "servers": {
+        "filesystem": {
+          "enabled": true,
+          "command": "npx",
+          "args": ["-y", "@modelcontextprotocol/server-filesystem", "/workspace"],
+          "deferred": false
+        },
+        "context7": {
+          "enabled": true,
+          "command": "npx",
+          "args": ["-y", "@upstash/context7-mcp"]
+        },
+        "aws": {
+          "enabled": true,
+          "command": "npx",
+          "args": ["-y", "aws-mcp-server"],
+          "deferred": true
+        }
+      }
+    }
+  }
+}
+```
+
+> **Tip:** `deferred` on a per-server basis is independent of `discovery.enabled`. You can keep
+> `discovery.enabled: false` globally (all tools visible by default) and still mark individual
+> high-volume servers as `"deferred": true` to avoid polluting the context with their tools.
+
 ## Skills Tool
 
 The skills tool configures skill discovery and installation via registries like ClawHub.
diff --git a/pkg/agent/loop_mcp.go b/pkg/agent/loop_mcp.go
index 962789a06..97debbc33 100644
--- a/pkg/agent/loop_mcp.go
+++ b/pkg/agent/loop_mcp.go
@@ -11,6 +11,7 @@ import (
 	"fmt"
 	"sync"
 
+	"github.com/sipeed/picoclaw/pkg/config"
 	"github.com/sipeed/picoclaw/pkg/logger"
 	"github.com/sipeed/picoclaw/pkg/mcp"
 	"github.com/sipeed/picoclaw/pkg/tools"
@@ -111,6 +112,12 @@ func (al *AgentLoop) ensureMCPInitialized(ctx context.Context) error {
 
 		for serverName, conn := range servers {
 			uniqueTools += len(conn.Tools)
+
+			// Determine whether this server's tools should be deferred (hidden).
+			// Per-server "deferred" field takes precedence over the global Discovery.Enabled.
+			serverCfg := al.cfg.Tools.MCP.Servers[serverName]
+			registerAsHidden := serverIsDeferred(al.cfg.Tools.MCP.Discovery.Enabled, serverCfg)
+
 			for _, tool := range conn.Tools {
 				for _, agentID := range agentIDs {
 					agent, ok := al.registry.GetAgent(agentID)
@@ -120,7 +127,7 @@ func (al *AgentLoop) ensureMCPInitialized(ctx context.Context) error {
 
 					mcpTool := tools.NewMCPTool(mcpManager, serverName, tool)
 
-					if al.cfg.Tools.MCP.Discovery.Enabled {
+					if registerAsHidden {
 						agent.Tools.RegisterHidden(mcpTool)
 					} else {
 						agent.Tools.Register(mcpTool)
@@ -133,6 +140,7 @@ func (al *AgentLoop) ensureMCPInitialized(ctx context.Context) error {
 							"server":   serverName,
 							"tool":     tool.Name,
 							"name":     mcpTool.Name(),
+							"deferred": registerAsHidden,
 						})
 				}
 			}
@@ -198,3 +206,18 @@ func (al *AgentLoop) ensureMCPInitialized(ctx context.Context) error {
 
 	return al.mcp.getInitErr()
 }
+
+// serverIsDeferred reports whether an MCP server's tools should be registered
+// as hidden (deferred/discovery mode).
+//
+// The per-server Deferred field takes precedence over the global discoveryEnabled
+// default. When Deferred is nil, discoveryEnabled is used as the fallback.
+func serverIsDeferred(discoveryEnabled bool, serverCfg config.MCPServerConfig) bool {
+	if !discoveryEnabled {
+		return false
+	}
+	if serverCfg.Deferred != nil {
+		return *serverCfg.Deferred
+	}
+	return true
+}
diff --git a/pkg/agent/loop_mcp_test.go b/pkg/agent/loop_mcp_test.go
new file mode 100644
index 000000000..35c3e49c8
--- /dev/null
+++ b/pkg/agent/loop_mcp_test.go
@@ -0,0 +1,75 @@
+// PicoClaw - Ultra-lightweight personal AI agent
+// Inspired by and based on nanobot: https://github.com/HKUDS/nanobot
+// License: MIT
+//
+// Copyright (c) 2026 PicoClaw contributors
+
+package agent
+
+import (
+	"testing"
+
+	"github.com/sipeed/picoclaw/pkg/config"
+)
+
+func boolPtr(b bool) *bool { return &b }
+
+func TestServerIsDeferred(t *testing.T) {
+	tests := []struct {
+		name             string
+		discoveryEnabled bool
+		serverDeferred   *bool
+		want             bool
+	}{
+		// --- global false always wins: per-server deferred is ignored ---
+		{
+			name:             "global false: per-server deferred=true is ignored",
+			discoveryEnabled: false,
+			serverDeferred:   boolPtr(true),
+			want:             false,
+		},
+		{
+			name:             "global false: per-server deferred=false stays false",
+			discoveryEnabled: false,
+			serverDeferred:   boolPtr(false),
+			want:             false,
+		},
+		// --- global true: per-server override applies ---
+		{
+			name:             "global true: per-server deferred=false opts out",
+			discoveryEnabled: true,
+			serverDeferred:   boolPtr(false),
+			want:             false,
+		},
+		{
+			name:             "global true: per-server deferred=true stays true",
+			discoveryEnabled: true,
+			serverDeferred:   boolPtr(true),
+			want:             true,
+		},
+		// --- no per-server override: fall back to global ---
+		{
+			name:             "no per-server field, global discovery enabled",
+			discoveryEnabled: true,
+			serverDeferred:   nil,
+			want:             true,
+		},
+		{
+			name:             "no per-server field, global discovery disabled",
+			discoveryEnabled: false,
+			serverDeferred:   nil,
+			want:             false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			serverCfg := config.MCPServerConfig{Deferred: tt.serverDeferred}
+			got := serverIsDeferred(tt.discoveryEnabled, serverCfg)
+			if got != tt.want {
+				t.Errorf("serverIsDeferred(discoveryEnabled=%v, deferred=%v) = %v, want %v",
+					tt.discoveryEnabled, tt.serverDeferred, got, tt.want)
+			}
+		})
+	}
+}
diff --git a/pkg/config/config.go b/pkg/config/config.go
index 0cb67acb2..ece2a7dbf 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -817,6 +817,10 @@ type ClawHubRegistryConfig struct {
 type MCPServerConfig struct {
 	// Enabled indicates whether this MCP server is active
 	Enabled bool `json:"enabled"`
+	// Deferred controls whether this server's tools are registered as hidden (deferred/discovery mode).
+	// When nil, the global Discovery.Enabled setting applies.
+	// When explicitly set to true or false, it overrides the global setting for this server only.
+	Deferred *bool `json:"deferred,omitempty"`
 	// Command is the executable to run (e.g., "npx", "python", "/path/to/server")
 	Command string `json:"command"`
 	// Args are the arguments to pass to the command
diff --git a/pkg/tools/web.go b/pkg/tools/web.go
index 810914f2e..42cf79578 100644
--- a/pkg/tools/web.go
+++ b/pkg/tools/web.go
@@ -16,12 +16,14 @@ import (
 	"sync/atomic"
 	"time"
 
+	"github.com/sipeed/picoclaw/pkg/config"
 	"github.com/sipeed/picoclaw/pkg/logger"
 	"github.com/sipeed/picoclaw/pkg/utils"
 )
 
 const (
-	userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+	userAgent       = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+	userAgentHonest = "picoclaw/%s (+https://github.com/sipeed/picoclaw; AI assistant bot)"
 
 	// HTTP client timeouts for web tool providers.
 	searchTimeout     = 10 * time.Second // Brave, Tavily, DuckDuckGo
@@ -913,28 +915,58 @@ func (t *WebFetchTool) Execute(ctx context.Context, args map[string]any) *ToolRe
 		}
 	}
 
-	req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
-	if err != nil {
-		return ErrorResult(fmt.Sprintf("failed to create request: %v", err))
+	doFetch := func(ua string) (*http.Response, []byte, error) {
+		req, reqErr := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
+		if reqErr != nil {
+			return nil, nil, fmt.Errorf("failed to create request: %w", reqErr)
+		}
+		req.Header.Set("User-Agent", ua)
+		resp, doErr := t.client.Do(req)
+		if doErr != nil {
+			return nil, nil, fmt.Errorf("request failed: %w", doErr)
+		}
+		resp.Body = http.MaxBytesReader(nil, resp.Body, t.fetchLimitBytes)
+
+		b, readErr := io.ReadAll(resp.Body)
+		return resp, b, readErr
 	}
 
-	req.Header.Set("User-Agent", userAgent)
-	resp, err := t.client.Do(req)
-	if err != nil {
-		return ErrorResult(fmt.Sprintf("request failed: %v", err))
+	resp, body, err := doFetch(userAgent)
+	if resp != nil && resp.Body != nil {
+		defer resp.Body.Close()
 	}
 
-	resp.Body = http.MaxBytesReader(nil, resp.Body, t.fetchLimitBytes)
-
-	defer resp.Body.Close()
-
-	body, err := io.ReadAll(resp.Body)
 	if err != nil {
 		var maxBytesErr *http.MaxBytesError
 		if errors.As(err, &maxBytesErr) {
 			return ErrorResult(fmt.Sprintf("failed to read response: size exceeded %d bytes limit", t.fetchLimitBytes))
 		}
-		return ErrorResult(fmt.Sprintf("failed to read response: %v", err))
+		return ErrorResult(err.Error())
+	}
+
+	// Cloudflare (and similar WAFs) signal bot challenges with 403 + cf-mitigated: challenge.
+	// Retry once with an honest User-Agent that identifies picoclaw, which some
+	// operators explicitly allow-list for AI assistants.
+	if resp.StatusCode == http.StatusForbidden && resp.Header.Get("Cf-Mitigated") == "challenge" {
+		logger.DebugCF("tool", "Cloudflare challenge detected, retrying with honest User-Agent",
+			map[string]any{"url": urlStr})
+		honestUA := fmt.Sprintf(userAgentHonest, config.Version)
+		resp2, body2, err2 := doFetch(honestUA)
+		if resp2 != nil && resp2.Body != nil {
+			defer resp2.Body.Close()
+		}
+
+		if err2 == nil {
+			resp, body = resp2, body2
+		} else {
+			var maxBytesErr *http.MaxBytesError
+			if errors.As(err2, &maxBytesErr) {
+				return ErrorResult(
+					fmt.Sprintf("failed to read response: size exceeded %d bytes limit", t.fetchLimitBytes),
+				)
+			}
+			return ErrorResult(err2.Error())
+		}
 	}
 
 	bodyStr := string(body)
@@ -1004,7 +1036,7 @@ func (t *WebFetchTool) Execute(ctx context.Context, args map[string]any) *ToolRe
 
 	truncated := len(text) > maxChars
 	if truncated {
-		text = text[:maxChars]
+		text = text[:maxChars] + "\n[Content truncated due to size limit]"
 	}
 
 	result := map[string]any{
diff --git a/pkg/tools/web_test.go b/pkg/tools/web_test.go
index dfb33971a..98c763193 100644
--- a/pkg/tools/web_test.go
+++ b/pkg/tools/web_test.go
@@ -212,6 +212,132 @@ func TestWebTool_WebFetch_Truncation(t *testing.T) {
 	if truncated, ok := resultMap["truncated"].(bool); !ok || !truncated {
 		t.Errorf("Expected 'truncated' to be true in result")
 	}
+
+	// Text should end with the truncation notice
+	if text, ok := resultMap["text"].(string); ok {
+		if !strings.HasSuffix(text, "[Content truncated due to size limit]") {
+			t.Errorf("Expected text to end with truncation notice, got: %q", text[max(0, len(text)-60):])
+		}
+	}
+}
+
+// TestWebTool_WebFetch_TruncationNotice verifies the truncation notice is appended
+// for all content formats (text/plain, text/html, markdown, application/json).
+func TestWebTool_WebFetch_TruncationNotice(t *testing.T) {
+	withPrivateWebFetchHostsAllowed(t)
+
+	const truncationNotice = "[Content truncated due to size limit]"
+	const maxChars = 100
+
+	tests := []struct {
+		name        string
+		contentType string
+		body        string
+		format      string
+	}{
+		{
+			name:        "plain text",
+			contentType: "text/plain",
+			body:        strings.Repeat("a", 500),
+			format:      "plaintext",
+		},
+		{
+			name:        "html plaintext extractor",
+			contentType: "text/html",
+			body:        "<html><body>" + strings.Repeat("b", 500) + "</body></html>",
+			format:      "plaintext",
+		},
+		{
+			name:        "html markdown extractor",
+			contentType: "text/html",
+			body:        "<html><body>" + strings.Repeat("c", 500) + "</body></html>",
+			format:      "markdown",
+		},
+		{
+			name:        "json",
+			contentType: "application/json",
+			body:        `"` + strings.Repeat("d", 500) + `"`,
+			format:      "plaintext",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+				w.Header().Set("Content-Type", tt.contentType)
+				w.WriteHeader(http.StatusOK)
+				w.Write([]byte(tt.body))
+			}))
+			defer server.Close()
+
+			tool, err := NewWebFetchTool(maxChars, tt.format, testFetchLimit)
+			if err != nil {
+				t.Fatalf("NewWebFetchTool() error: %v", err)
+			}
+
+			result := tool.Execute(context.Background(), map[string]any{"url": server.URL})
+			if result.IsError {
+				t.Fatalf("unexpected error: %s", result.ForLLM)
+			}
+
+			var resultMap map[string]any
+			if err := json.Unmarshal([]byte(result.ForLLM), &resultMap); err != nil {
+				t.Fatalf("failed to unmarshal result JSON: %v", err)
+			}
+
+			text, ok := resultMap["text"].(string)
+			if !ok {
+				t.Fatal("missing 'text' field in result")
+			}
+
+			if !strings.HasSuffix(text, truncationNotice) {
+				t.Errorf("expected text to end with %q, got suffix: %q", truncationNotice, text[max(0, len(text)-60):])
+			}
+
+			if truncated, ok := resultMap["truncated"].(bool); !ok || !truncated {
+				t.Errorf("expected truncated=true in result")
+			}
+		})
+	}
+}
+
+// TestWebTool_WebFetch_NoTruncationNoticeWhenFitsInLimit verifies that the notice
+// is NOT appended when the content fits within the limit.
+func TestWebTool_WebFetch_NoTruncationNoticeWhenFitsInLimit(t *testing.T) {
+	withPrivateWebFetchHostsAllowed(t)
+
+	const truncationNotice = "[Content truncated due to size limit]"
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain")
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte("short content"))
+	}))
+	defer server.Close()
+
+	tool, err := NewWebFetchTool(50000, format, testFetchLimit)
+	if err != nil {
+		t.Fatalf("NewWebFetchTool() error: %v", err)
+	}
+
+	result := tool.Execute(context.Background(), map[string]any{"url": server.URL})
+	if result.IsError {
+		t.Fatalf("unexpected error: %s", result.ForLLM)
+	}
+
+	var resultMap map[string]any
+	if err := json.Unmarshal([]byte(result.ForLLM), &resultMap); err != nil {
+		t.Fatalf("failed to unmarshal result JSON: %v", err)
+	}
+
+	text, _ := resultMap["text"].(string)
+	if strings.Contains(text, truncationNotice) {
+		t.Errorf("expected no truncation notice for content within limit, got: %q", text)
+	}
+
+	if truncated, _ := resultMap["truncated"].(bool); truncated {
+		t.Errorf("expected truncated=false for content within limit")
+	}
 }
 
 func TestWebFetchTool_PayloadTooLarge(t *testing.T) {
@@ -943,6 +1069,119 @@ func TestWebTool_TavilySearch_Success(t *testing.T) {
 	}
 }
 
+// TestWebFetchTool_CloudflareChallenge_RetryWithHonestUA verifies that a 403 response
+// with cf-mitigated: challenge triggers a retry using the honest picoclaw User-Agent,
+// and that the retry response is returned when it succeeds.
+func TestWebFetchTool_CloudflareChallenge_RetryWithHonestUA(t *testing.T) {
+	withPrivateWebFetchHostsAllowed(t)
+
+	requestCount := 0
+	var receivedUAs []string
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		requestCount++
+		receivedUAs = append(receivedUAs, r.Header.Get("User-Agent"))
+
+		if requestCount == 1 {
+			// First request: simulate Cloudflare challenge
+			w.Header().Set("Cf-Mitigated", "challenge")
+			w.Header().Set("Content-Type", "text/html")
+			w.WriteHeader(http.StatusForbidden)
+			w.Write([]byte("<html><body>Cloudflare challenge</body></html>"))
+			return
+		}
+		// Second request (honest UA retry): success
+		w.Header().Set("Content-Type", "text/plain")
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte("real content"))
+	}))
+	defer server.Close()
+
+	tool, err := NewWebFetchTool(50000, format, testFetchLimit)
+	if err != nil {
+		t.Fatalf("NewWebFetchTool() error: %v", err)
+	}
+
+	result := tool.Execute(context.Background(), map[string]any{"url": server.URL})
+
+	if result.IsError {
+		t.Fatalf("expected success after retry, got error: %s", result.ForLLM)
+	}
+	if !strings.Contains(result.ForLLM, "real content") {
+		t.Errorf("expected retry response content, got: %s", result.ForLLM)
+	}
+	if requestCount != 2 {
+		t.Errorf("expected exactly 2 requests, got %d", requestCount)
+	}
+
+	// First request must use the generic user agent
+	if receivedUAs[0] != userAgent {
+		t.Errorf("first request UA = %q, want %q", receivedUAs[0], userAgent)
+	}
+	// Second request must use the honest picoclaw user agent
+	if !strings.Contains(receivedUAs[1], "picoclaw") {
+		t.Errorf("retry request UA = %q, want it to contain 'picoclaw'", receivedUAs[1])
+	}
+}
+
+// TestWebFetchTool_CloudflareChallenge_NoRetryOnOtherErrors verifies that a plain 403
+// (without cf-mitigated: challenge) does NOT trigger a retry.
+func TestWebFetchTool_CloudflareChallenge_NoRetryOnOtherErrors(t *testing.T) {
+	withPrivateWebFetchHostsAllowed(t)
+
+	requestCount := 0
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		requestCount++
+		w.Header().Set("Content-Type", "text/plain")
+		w.WriteHeader(http.StatusForbidden)
+		w.Write([]byte("plain forbidden"))
+	}))
+	defer server.Close()
+
+	tool, err := NewWebFetchTool(50000, format, testFetchLimit)
+	if err != nil {
+		t.Fatalf("NewWebFetchTool() error: %v", err)
+	}
+
+	tool.Execute(context.Background(), map[string]any{"url": server.URL})
+
+	if requestCount != 1 {
+		t.Errorf("expected exactly 1 request for plain 403, got %d", requestCount)
+	}
+}
+
+// TestWebFetchTool_CloudflareChallenge_RetryFailsToo verifies that if the honest-UA
+// retry also fails (e.g. still blocked), the error from the retry is returned.
+func TestWebFetchTool_CloudflareChallenge_RetryFailsToo(t *testing.T) {
+	withPrivateWebFetchHostsAllowed(t)
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Always return CF challenge regardless of UA
+		w.Header().Set("Cf-Mitigated", "challenge")
+		w.Header().Set("Content-Type", "text/html")
+		w.WriteHeader(http.StatusForbidden)
+		w.Write([]byte("<html><body>still blocked</body></html>"))
+	}))
+	defer server.Close()
+
+	tool, err := NewWebFetchTool(50000, format, testFetchLimit)
+	if err != nil {
+		t.Fatalf("NewWebFetchTool() error: %v", err)
+	}
+
+	result := tool.Execute(context.Background(), map[string]any{"url": server.URL})
+
+	// Should not be an error — the retry response is used as-is (403 is a valid HTTP response)
+	if result.IsError {
+		t.Fatalf("expected non-error result even when retry is also blocked, got: %s", result.ForLLM)
+	}
+	// Status in the JSON result should reflect the 403
+	if !strings.Contains(result.ForLLM, "403") {
+		t.Errorf("expected status 403 in result, got: %s", result.ForLLM)
+	}
+}
+
 func TestAPIKeyPool(t *testing.T) {
 	pool := NewAPIKeyPool([]string{"key1", "key2", "key3"})
 	if len(pool.keys) != 3 {