fix(mcp): prevent use-after-close race between CallTool and Close

A race could occur when Close() called conn.Session.Close() concurrently
with an in-flight conn.Session.CallTool(), leading to undefined behavior.

Fix by adding a sync.WaitGroup to Manager:
- CallTool increments the WaitGroup while holding the read lock (after
  checking m.closed), ensuring no new calls are counted after Close sets
  the flag
- Close sets m.closed=true, releases the write lock, then waits for all
  in-flight calls to finish via wg.Wait() before closing sessions
This commit is contained in:
yuchou87
2026-02-21 14:10:48 +08:00
parent 11dbc301f9
commit cfc29a1383
+13 -2
View File
@@ -108,6 +108,7 @@ type Manager struct {
servers map[string]*ServerConnection
mu sync.RWMutex
closed bool
wg sync.WaitGroup // tracks in-flight CallTool calls
}
// NewManager creates a new MCP manager
@@ -414,11 +415,15 @@ func (m *Manager) CallTool(ctx context.Context, serverName, toolName string, arg
return nil, fmt.Errorf("manager is closed")
}
conn, ok := m.servers[serverName]
if ok {
m.wg.Add(1)
}
m.mu.RUnlock()
if !ok {
return nil, fmt.Errorf("server %s not found", serverName)
}
defer m.wg.Done()
params := &mcp.CallToolParams{
Name: toolName,
@@ -436,12 +441,18 @@ func (m *Manager) CallTool(ctx context.Context, serverName, toolName string, arg
// Close closes all server connections
func (m *Manager) Close() error {
m.mu.Lock()
defer m.mu.Unlock()
if m.closed {
m.mu.Unlock()
return nil
}
m.closed = true
m.mu.Unlock()
// Wait for all in-flight CallTool calls to finish before closing sessions
m.wg.Wait()
m.mu.Lock()
defer m.mu.Unlock()
logger.InfoCF("mcp", "Closing all MCP server connections",
map[string]interface{}{