fix: Avoid failure of the main agent process due to tool call failures (#1023)

* Avoid failure of the main agent process due to tool call failures or abnormal returns

* rename recover
This commit is contained in:
linhaolin1
2026-03-19 00:10:26 +08:00
committed by GitHub
parent 54654d2794
commit f93d2b4533
4 changed files with 295 additions and 9 deletions
+41 -8
View File
@@ -188,15 +188,48 @@ func (r *ToolRegistry) ExecuteWithContext(
// The callback is a call parameter, not mutable state on the tool instance.
var result *ToolResult
start := time.Now()
if asyncExec, ok := tool.(AsyncExecutor); ok && asyncCallback != nil {
logger.DebugCF("tool", "Executing async tool via ExecuteAsync",
map[string]any{
"tool": name,
})
result = asyncExec.ExecuteAsync(ctx, args, asyncCallback)
} else {
result = tool.Execute(ctx, args)
// Use recover to catch any panics during tool execution
// This prevents tool crashes from killing the entire agent
func() {
defer func() {
if re := recover(); re != nil {
errMsg := fmt.Sprintf("Tool '%s' crashed with panic: %v", name, re)
logger.ErrorCF("tool", "Tool execution panic recovered",
map[string]any{
"tool": name,
"panic": fmt.Sprintf("%v", re),
})
result = &ToolResult{
ForLLM: errMsg,
ForUser: errMsg,
IsError: true,
Err: fmt.Errorf("panic: %v", re),
}
}
}()
if asyncExec, ok := tool.(AsyncExecutor); ok && asyncCallback != nil {
logger.DebugCF("tool", "Executing async tool via ExecuteAsync",
map[string]any{
"tool": name,
})
result = asyncExec.ExecuteAsync(ctx, args, asyncCallback)
} else {
result = tool.Execute(ctx, args)
}
}()
// Handle nil result (should not happen, but defensive)
if result == nil {
result = &ToolResult{
ForLLM: fmt.Sprintf("Tool '%s' returned nil result unexpectedly", name),
ForUser: fmt.Sprintf("Tool '%s' returned nil result unexpectedly", name),
IsError: true,
Err: fmt.Errorf("nil result from tool"),
}
}
duration := time.Since(start)
// Log based on result type
+173
View File
@@ -2,6 +2,7 @@ package tools
import (
"context"
"errors"
"strings"
"sync"
"testing"
@@ -358,3 +359,175 @@ func TestToolRegistry_ConcurrentAccess(t *testing.T) {
t.Error("expected tools to be registered after concurrent access")
}
}
// --- Panic and abnormal exit tests ---
// mockPanicTool is a tool that panics during execution
type mockPanicTool struct {
name string
panicValue any
}
func (m *mockPanicTool) Name() string { return m.name }
func (m *mockPanicTool) Description() string { return "a tool that panics" }
func (m *mockPanicTool) Parameters() map[string]any { return map[string]any{"type": "object"} }
func (m *mockPanicTool) Execute(_ context.Context, _ map[string]any) *ToolResult {
panic(m.panicValue)
}
// mockNilResultTool is a tool that returns nil
type mockNilResultTool struct {
name string
}
func (m *mockNilResultTool) Name() string { return m.name }
func (m *mockNilResultTool) Description() string { return "a tool that returns nil" }
func (m *mockNilResultTool) Parameters() map[string]any { return map[string]any{"type": "object"} }
func (m *mockNilResultTool) Execute(_ context.Context, _ map[string]any) *ToolResult {
return nil
}
func TestToolRegistry_Execute_PanicRecovery(t *testing.T) {
r := NewToolRegistry()
r.Register(&mockPanicTool{
name: "panic_tool",
panicValue: "something went terribly wrong",
})
// Should not panic, should return error result
result := r.Execute(context.Background(), "panic_tool", nil)
if result == nil {
t.Fatal("expected non-nil result after panic recovery")
}
if !result.IsError {
t.Error("expected IsError=true after panic")
}
if !strings.Contains(result.ForLLM, "panic") {
t.Errorf("expected 'panic' in error message, got %q", result.ForLLM)
}
if !strings.Contains(result.ForLLM, "panic_tool") {
t.Errorf("expected tool name in error message, got %q", result.ForLLM)
}
if !strings.Contains(result.ForLLM, "something went terribly wrong") {
t.Errorf("expected panic value in error message, got %q", result.ForLLM)
}
if result.Err == nil {
t.Error("expected Err to be set")
}
}
func TestToolRegistry_Execute_PanicRecovery_ErrorType(t *testing.T) {
r := NewToolRegistry()
// Test with error type panic
r.Register(&mockPanicTool{
name: "error_panic_tool",
panicValue: errors.New("custom error panic"),
})
result := r.Execute(context.Background(), "error_panic_tool", nil)
if !result.IsError {
t.Error("expected IsError=true")
}
if !strings.Contains(result.ForLLM, "custom error panic") {
t.Errorf("expected error message in ForLLM, got %q", result.ForLLM)
}
}
func TestToolRegistry_Execute_PanicRecovery_IntType(t *testing.T) {
r := NewToolRegistry()
// Test with int type panic
r.Register(&mockPanicTool{
name: "int_panic_tool",
panicValue: 42,
})
result := r.Execute(context.Background(), "int_panic_tool", nil)
if !result.IsError {
t.Error("expected IsError=true")
}
if !strings.Contains(result.ForLLM, "42") {
t.Errorf("expected panic value '42' in ForLLM, got %q", result.ForLLM)
}
}
func TestToolRegistry_Execute_NilResultHandling(t *testing.T) {
r := NewToolRegistry()
r.Register(&mockNilResultTool{name: "nil_tool"})
result := r.Execute(context.Background(), "nil_tool", nil)
if result == nil {
t.Fatal("expected non-nil result when tool returns nil")
}
if !result.IsError {
t.Error("expected IsError=true for nil result")
}
if !strings.Contains(result.ForLLM, "nil_tool") {
t.Errorf("expected tool name in error message, got %q", result.ForLLM)
}
if !strings.Contains(result.ForLLM, "nil result") {
t.Errorf("expected 'nil result' in error message, got %q", result.ForLLM)
}
if result.Err == nil {
t.Error("expected Err to be set")
}
}
func TestToolRegistry_ExecuteWithContext_PanicRecovery(t *testing.T) {
r := NewToolRegistry()
r.Register(&mockPanicTool{
name: "ctx_panic_tool",
panicValue: "context panic test",
})
// Should not panic even with context
result := r.ExecuteWithContext(
context.Background(),
"ctx_panic_tool",
map[string]any{"key": "value"},
"telegram",
"chat-123",
nil,
)
if result == nil {
t.Fatal("expected non-nil result")
}
if !result.IsError {
t.Error("expected IsError=true")
}
if !strings.Contains(result.ForLLM, "context panic test") {
t.Errorf("expected panic message, got %q", result.ForLLM)
}
}
func TestToolRegistry_Execute_PanicDoesNotAffectOtherTools(t *testing.T) {
r := NewToolRegistry()
r.Register(&mockPanicTool{name: "bad_tool", panicValue: "boom"})
r.Register(&mockRegistryTool{
name: "good_tool",
desc: "works fine",
params: map[string]any{},
result: SilentResult("success"),
})
// First, trigger the panic
result1 := r.Execute(context.Background(), "bad_tool", nil)
if !result1.IsError {
t.Error("expected error from panic tool")
}
// Then, verify the good tool still works
result2 := r.Execute(context.Background(), "good_tool", nil)
if result2.IsError {
t.Errorf("expected success from good tool, got error: %s", result2.ForLLM)
}
if result2.ForLLM != "success" {
t.Errorf("expected 'success', got %q", result2.ForLLM)
}
}
+18 -1
View File
@@ -311,13 +311,30 @@ func (t *ExecTool) Execute(ctx context.Context, args map[string]any) *ToolResult
if err != nil {
if errors.Is(cmdCtx.Err(), context.DeadlineExceeded) {
msg := fmt.Sprintf("Command timed out after %v", t.timeout)
if output != "" {
msg += "\n\nPartial output before timeout:\n" + output
}
return &ToolResult{
ForLLM: msg,
ForUser: msg,
IsError: true,
Err: fmt.Errorf("command timeout: %w", err),
}
}
output += fmt.Sprintf("\nExit code: %v", err)
// Extract detailed exit information
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
exitCode := exitErr.ExitCode()
output += fmt.Sprintf("\n\n[Command exited with code %d]", exitCode)
// Add signal information if killed by signal (Unix)
if exitCode == -1 {
output += " (killed by signal)"
}
} else {
output += fmt.Sprintf("\n\n[Command failed: %v]", err)
}
}
if output == "" {
+63
View File
@@ -489,6 +489,69 @@ func TestShellTool_SafePathsInWorkspaceRestriction(t *testing.T) {
}
}
// TestShellTool_ExitCodeDetails verifies that exit codes are captured with details
func TestShellTool_ExitCodeDetails(t *testing.T) {
tool, err := NewExecTool("", false)
if err != nil {
t.Fatalf("unable to configure exec tool: %s", err)
}
ctx := context.Background()
args := map[string]any{
"command": "sh -c 'exit 42'",
}
result := tool.Execute(ctx, args)
if !result.IsError {
t.Error("expected error for non-zero exit code")
}
// Should contain the exit code in the message (new format: "exited with code 42")
if !strings.Contains(result.ForLLM, "42") {
t.Errorf("expected exit code 42 in error message, got: %s", result.ForLLM)
}
// Verify the new detailed message format
if !strings.Contains(result.ForLLM, "exited with code") {
t.Errorf("expected 'exited with code' in message, got: %s", result.ForLLM)
}
// Err field is set by the exec system (may or may not be set depending on implementation)
// The important thing is that IsError=true
t.Logf("Exit code result: %s", result.ForLLM)
}
// TestShellTool_TimeoutWithPartialOutput verifies timeout includes partial output
func TestShellTool_TimeoutWithPartialOutput(t *testing.T) {
tool, err := NewExecTool("", false)
if err != nil {
t.Fatalf("unable to configure exec tool: %s", err)
}
tool.SetTimeout(1 * time.Second) // Give more time for echo to complete
ctx := context.Background()
// Use a command that outputs immediately then sleeps
args := map[string]any{
"command": "echo 'partial output before timeout' && sleep 30",
}
result := tool.Execute(ctx, args)
if !result.IsError {
t.Error("expected error for timeout")
}
// Should mention timeout
if !strings.Contains(result.ForLLM, "timed out") {
t.Errorf("expected 'timed out' in message, got: %s", result.ForLLM)
}
// Log the result for debugging (partial output depends on shell behavior)
t.Logf("Timeout result: %s", result.ForLLM)
}
// TestShellTool_CustomAllowPatterns verifies that custom allow patterns exempt
// commands from deny pattern checks.
func TestShellTool_CustomAllowPatterns(t *testing.T) {