Merge pull request #999 from yinwm/fix/mcp-race-condition-and-resource-leak

fix(mcp): resolve TOCTOU race condition and resource leak
This commit is contained in:
daming大铭
2026-03-03 15:06:57 +08:00
committed by GitHub
4 changed files with 29 additions and 22 deletions
+11 -10
View File
@@ -178,6 +178,17 @@ func (al *AgentLoop) Run(ctx context.Context) error {
// Initialize MCP servers for all agents
if al.cfg.Tools.MCP.Enabled {
mcpManager := mcp.NewManager()
// Ensure MCP connections are cleaned up on exit, regardless of initialization success
// This fixes resource leak when LoadFromMCPConfig partially succeeds then fails
defer func() {
if err := mcpManager.Close(); err != nil {
logger.ErrorCF("agent", "Failed to close MCP manager",
map[string]any{
"error": err.Error(),
})
}
}()
defaultAgent := al.registry.GetDefaultAgent()
var workspacePath string
if defaultAgent != nil && defaultAgent.Workspace != "" {
@@ -192,16 +203,6 @@ func (al *AgentLoop) Run(ctx context.Context) error {
"error": err.Error(),
})
} else {
// Ensure MCP connections are cleaned up on exit, only if initialization succeeded
defer func() {
if err := mcpManager.Close(); err != nil {
logger.ErrorCF("agent", "Failed to close MCP manager",
map[string]any{
"error": err.Error(),
})
}
}()
// Register MCP tools for all agents
servers := mcpManager.GetServers()
uniqueTools := 0
+15 -9
View File
@@ -11,6 +11,7 @@ import (
"path/filepath"
"strings"
"sync"
"sync/atomic"
"github.com/modelcontextprotocol/go-sdk/mcp"
@@ -108,7 +109,7 @@ type ServerConnection struct {
type Manager struct {
servers map[string]*ServerConnection
mu sync.RWMutex
closed bool
closed atomic.Bool // changed from bool to atomic.Bool to avoid TOCTOU race
wg sync.WaitGroup // tracks in-flight CallTool calls
}
@@ -440,14 +441,20 @@ func (m *Manager) CallTool(
serverName, toolName string,
arguments map[string]any,
) (*mcp.CallToolResult, error) {
// Check if closed before acquiring lock (fast path)
if m.closed.Load() {
return nil, fmt.Errorf("manager is closed")
}
m.mu.RLock()
if m.closed {
// Double-check after acquiring lock to prevent TOCTOU race
if m.closed.Load() {
m.mu.RUnlock()
return nil, fmt.Errorf("manager is closed")
}
conn, ok := m.servers[serverName]
if ok {
m.wg.Add(1)
m.wg.Add(1) // Add to WaitGroup while holding the lock
}
m.mu.RUnlock()
@@ -471,15 +478,14 @@ func (m *Manager) CallTool(
// Close closes all server connections
func (m *Manager) Close() error {
m.mu.Lock()
if m.closed {
m.mu.Unlock()
return nil
// Use Swap to atomically set closed=true and get the previous value
// This prevents TOCTOU race with CallTool's closed check
if m.closed.Swap(true) {
return nil // already closed
}
m.closed = true
m.mu.Unlock()
// Wait for all in-flight CallTool calls to finish before closing sessions
// After closed=true is set, no new CallTool can start (they check closed first)
m.wg.Wait()
m.mu.Lock()
+1 -1
View File
@@ -268,7 +268,7 @@ func TestGetAllTools_FiltersEmptyTools(t *testing.T) {
func TestCallTool_ErrorsForClosedOrMissingServer(t *testing.T) {
t.Run("manager closed", func(t *testing.T) {
mgr := NewManager()
mgr.closed = true
mgr.closed.Store(true)
_, err := mgr.CallTool(context.Background(), "s1", "tool", nil)
if err == nil || !strings.Contains(err.Error(), "manager is closed") {