feat(tools): add exec tool enhancement with background execution and PTY support (#1752)

- Unified exec tool with actions: run/list/poll/read/write/send-keys/kill
- PTY support using creack/pty library
- Process session management with background execution
- Process group kill for cleaning up child processes
- Session cleanup: 30-minute TTL for old sessions
- Output buffer: 100MB limit with truncation

Actions:
- run: execute command (sync or background)
- list: list all sessions
- poll: check session status
- read: read session output
- write: send input to session stdin
- send-keys: send special keys (up, down, ctrl-c, enter, etc.)
- kill: terminate session

Tests:
- PTY: allowed commands, write/read, poll, kill, process group kill
- Non-PTY: background execution, list, read, write, poll, kill, process group kill
- Session management: add/get/remove/list/cleanup
This commit is contained in:
Liu Yuan
2026-03-21 22:38:03 +08:00
committed by GitHub
parent 6148ccc529
commit f901af8cbc
11 changed files with 2082 additions and 31 deletions
+730 -12
View File
@@ -3,20 +3,37 @@ package tools
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strings"
"sync"
"syscall"
"time"
"github.com/creack/pty"
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/constants"
)
var (
globalSessionManager = NewSessionManager()
sessionManagerMu sync.RWMutex
)
func getSessionManager() *SessionManager {
sessionManagerMu.RLock()
defer sessionManagerMu.RUnlock()
return globalSessionManager
}
type ExecTool struct {
workingDir string
timeout time.Duration
@@ -26,6 +43,7 @@ type ExecTool struct {
allowedPathPatterns []*regexp.Regexp
restrictToWorkspace bool
allowRemote bool
sessionManager *SessionManager
}
var (
@@ -145,7 +163,7 @@ func NewExecToolWithConfig(
denyPatterns = append(denyPatterns, defaultDenyPatterns...)
}
timeout := 60 * time.Second
var timeout time.Duration
if config != nil && config.Tools.Exec.TimeoutSeconds > 0 {
timeout = time.Duration(config.Tools.Exec.TimeoutSeconds) * time.Second
}
@@ -159,6 +177,7 @@ func NewExecToolWithConfig(
allowedPathPatterns: allowedPathPatterns,
restrictToWorkspace: restrict,
allowRemote: allowRemote,
sessionManager: getSessionManager(),
}, nil
}
@@ -167,27 +186,146 @@ func (t *ExecTool) Name() string {
}
func (t *ExecTool) Description() string {
return "Execute a shell command and return its output. Use with caution."
return `Execute shell commands. Use background=true for long-running commands (returns sessionId). Use pty=true for interactive commands (can combine with background=true). Use poll/read/write/send-keys/kill with sessionId to manage background sessions. Sessions auto-cleanup 30 minutes after process exits; use kill to terminate early. Output buffer limit: 100MB.`
}
func (t *ExecTool) Parameters() map[string]any {
return map[string]any{
"type": "object",
"properties": map[string]any{
"command": map[string]any{
"type": "string",
"description": "The shell command to execute",
"oneOf": []map[string]any{
{
"type": "object",
"properties": map[string]any{
"action": map[string]any{"const": "run", "description": "Execute a shell command"},
"command": map[string]any{"type": "string", "description": "Shell command to execute"},
"background": map[string]any{
"type": "string",
"description": "Run in background immediately",
},
"pty": map[string]any{
"type": "string",
"description": "Run in a pseudo-terminal (PTY) when available",
},
"cwd": map[string]any{
"type": "string",
"description": "Working directory for the command",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds (default: 0 = no timeout, kills process on expiry)",
},
},
"required": []string{"action", "command"},
},
"working_dir": map[string]any{
"type": "string",
"description": "Optional working directory for the command",
{
"type": "object",
"properties": map[string]any{
"action": map[string]any{"const": "list", "description": "List all active sessions"},
},
"required": []string{"action"},
},
{
"type": "object",
"properties": map[string]any{
"action": map[string]any{
"const": "poll",
"description": "Check session status. Returns: {sessionId, status: running|done, exitCode}. exitCode only meaningful when status=done",
},
"sessionId": map[string]any{
"type": "string",
"description": "Session ID returned from background command",
},
},
"required": []string{"action", "sessionId"},
},
{
"type": "object",
"properties": map[string]any{
"action": map[string]any{
"const": "read",
"description": "Read output from session. Returns: {sessionId, output, status: running|done}",
},
"sessionId": map[string]any{
"type": "string",
"description": "Session ID returned from background command",
},
},
"required": []string{"action", "sessionId"},
},
{
"type": "object",
"properties": map[string]any{
"action": map[string]any{
"const": "write",
"description": "Send input to session stdin (only when status=running)",
},
"sessionId": map[string]any{
"type": "string",
"description": "Session ID returned from background command",
},
"data": map[string]any{"type": "string", "description": "Data to write to session stdin."},
},
"required": []string{"action", "sessionId", "data"},
},
{
"type": "object",
"properties": map[string]any{
"action": map[string]any{"const": "kill", "description": "Terminate session"},
"sessionId": map[string]any{
"type": "string",
"description": "Session ID returned from background command",
},
},
"required": []string{"action", "sessionId"},
},
{
"type": "object",
"properties": map[string]any{
"action": map[string]any{
"const": "send-keys",
"description": "Send special keys to PTY session. Keys: down/up/left/right/enter/escape/tab/backspace/ctrl-c/ctrl-d/ctrl-z. Multiple keys separated by comma",
},
"sessionId": map[string]any{
"type": "string",
"description": "Session ID returned from background command",
},
"keys": map[string]any{
"type": "string",
"description": "Comma-separated key names (optional spaces around comma). Valid keys: up, down, left, right, enter, tab, escape, backspace, ctrl-c, ctrl-d, home, end, pageup, pagedown, f1-f12.",
},
},
"required": []string{"action", "sessionId", "keys"},
},
},
"required": []string{"command"},
}
}
func (t *ExecTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
action, _ := args["action"].(string)
if action == "" {
return ErrorResult("action is required")
}
switch action {
case "run":
return t.executeRun(ctx, args)
case "list":
return t.executeList()
case "poll":
return t.executePoll(args)
case "read":
return t.executeRead(args)
case "write":
return t.executeWrite(args)
case "kill":
return t.executeKill(args)
case "send-keys":
return t.executeSendKeys(args)
default:
return ErrorResult(fmt.Sprintf("unknown action: %s", action))
}
}
func (t *ExecTool) executeRun(ctx context.Context, args map[string]any) *ToolResult {
command, ok := args["command"].(string)
if !ok {
return ErrorResult("command is required")
@@ -206,8 +344,26 @@ func (t *ExecTool) Execute(ctx context.Context, args map[string]any) *ToolResult
}
}
getBoolArg := func(key string) bool {
switch v := args[key].(type) {
case bool:
return v
case string:
return v == "true"
}
return false
}
isPty := getBoolArg("pty")
isBackground := getBoolArg("background")
if isPty {
if runtime.GOOS == "windows" {
return ErrorResult("PTY is not supported on Windows. Use background=true without pty.")
}
}
cwd := t.workingDir
if wd, ok := args["working_dir"].(string); ok && wd != "" {
if wd, ok := args["cwd"].(string); ok && wd != "" {
if t.restrictToWorkspace && t.workingDir != "" {
resolvedWD, err := validatePathWithAllowPaths(wd, t.workingDir, true, t.allowedPathPatterns)
if err != nil {
@@ -253,6 +409,14 @@ func (t *ExecTool) Execute(ctx context.Context, args map[string]any) *ToolResult
}
}
if isBackground {
return t.runBackground(ctx, command, cwd, isPty)
}
return t.runSync(ctx, command, cwd)
}
func (t *ExecTool) runSync(ctx context.Context, command, cwd string) *ToolResult {
// timeout == 0 means no timeout
var cmdCtx context.Context
var cancel context.CancelFunc
@@ -361,6 +525,560 @@ func (t *ExecTool) Execute(ctx context.Context, args map[string]any) *ToolResult
}
}
func (t *ExecTool) runBackground(ctx context.Context, command, cwd string, ptyEnabled bool) *ToolResult {
sessionID := generateSessionID()
session := &ProcessSession{
ID: sessionID,
Command: command,
PTY: ptyEnabled,
Background: true,
StartTime: time.Now().Unix(),
Status: "running",
ptyKeyMode: PtyKeyModeCSI,
}
var cmd *exec.Cmd
if runtime.GOOS == "windows" {
cmd = exec.Command("powershell", "-NoProfile", "-NonInteractive", "-Command", command)
} else {
cmd = exec.Command("sh", "-c", command)
}
if cwd != "" {
cmd.Dir = cwd
}
prepareCommandForTermination(cmd)
var stdoutReader io.ReadCloser
var stderrReader io.ReadCloser
var stdinWriter io.WriteCloser
if ptyEnabled {
ptmx, tty, err := pty.Open()
if err != nil {
return ErrorResult(fmt.Sprintf("failed to create PTY: %v", err))
}
cmd.Stdin = tty
cmd.Stdout = tty
cmd.Stderr = tty
// For PTY, we need Setsid to create a new session.
// Note: Setsid and Setpgid conflict, so we must replace SysProcAttr entirely.
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
session.ptyMaster = ptmx
} else {
var err error
stdoutReader, err = cmd.StdoutPipe()
if err != nil {
return ErrorResult(fmt.Sprintf("failed to create stdout pipe: %v", err))
}
stderrReader, err = cmd.StderrPipe()
if err != nil {
return ErrorResult(fmt.Sprintf("failed to create stderr pipe: %v", err))
}
stdinWriter, err = cmd.StdinPipe()
if err != nil {
return ErrorResult(fmt.Sprintf("failed to create stdin pipe: %v", err))
}
session.stdoutPipe = io.MultiReader(stdoutReader, stderrReader)
session.stdinWriter = stdinWriter
}
if err := cmd.Start(); err != nil {
if session.ptyMaster != nil {
session.ptyMaster.Close()
}
return ErrorResult(fmt.Sprintf("failed to start command: %v", err))
}
session.PID = cmd.Process.Pid
t.sessionManager.Add(session)
session.outputBuffer = &bytes.Buffer{}
// PTY mode: read from ptyMaster and wait for process
// Note: On Linux, closing ptyMaster doesn't interrupt blocking Read() calls,
// so we need cmd.Wait() in a separate goroutine to detect process exit.
if session.PTY && session.ptyMaster != nil {
go func() {
cmd.Wait() // Wait for process to exit
session.mu.Lock()
if cmd.ProcessState != nil {
session.ExitCode = cmd.ProcessState.ExitCode()
}
session.Status = "done"
session.mu.Unlock()
}()
go func() {
buf := make([]byte, 4096)
for {
n, err := session.ptyMaster.Read(buf)
if n > 0 {
raw := string(buf[:n])
if mode := detectPtyKeyMode(raw); mode != PtyKeyModeNotFound && mode != session.GetPtyKeyMode() {
session.SetPtyKeyMode(mode)
}
session.mu.Lock()
if session.outputBuffer.Len() >= maxOutputBufferSize {
if !session.outputTruncated {
session.outputBuffer.WriteString(outputTruncateMarker)
session.outputTruncated = true
}
} else {
session.outputBuffer.Write(buf[:n])
}
session.mu.Unlock()
}
if err != nil {
break
}
}
}()
} else {
// Non-PTY mode: single goroutine reads pipes.
// When Read() returns EOF (pipe closed), we break.
// When process exits, OS closes pipe write end → Read() returns EOF → we exit.
go func() {
buf := make([]byte, 4096)
// Read stdout
for {
n, err := stdoutReader.Read(buf)
if n > 0 {
session.mu.Lock()
if session.outputBuffer.Len() >= maxOutputBufferSize {
if !session.outputTruncated {
session.outputBuffer.WriteString(outputTruncateMarker)
session.outputTruncated = true
}
} else {
session.outputBuffer.Write(buf[:n])
}
session.mu.Unlock()
}
if err != nil {
break
}
}
// Read stderr
for {
n, err := stderrReader.Read(buf)
if n > 0 {
session.mu.Lock()
if session.outputBuffer.Len() >= maxOutputBufferSize {
if !session.outputTruncated {
session.outputBuffer.WriteString(outputTruncateMarker)
session.outputTruncated = true
}
} else {
session.outputBuffer.Write(buf[:n])
}
session.mu.Unlock()
}
if err != nil {
break
}
}
// All pipes closed, get exit status
if stdinWriter != nil {
stdinWriter.Close()
}
cmd.Wait()
session.mu.Lock()
if cmd.ProcessState != nil {
session.ExitCode = cmd.ProcessState.ExitCode()
}
session.Status = "done"
session.mu.Unlock()
}()
}
resp := ExecResponse{
SessionID: sessionID,
Status: "running",
}
data, _ := json.Marshal(resp)
return &ToolResult{
ForLLM: string(data),
ForUser: fmt.Sprintf("Session %s started", sessionID),
IsError: false,
}
}
func (t *ExecTool) executeList() *ToolResult {
sessions := t.sessionManager.List()
resp := ExecResponse{
Sessions: sessions,
}
data, _ := json.Marshal(resp)
return &ToolResult{
ForLLM: string(data),
ForUser: fmt.Sprintf("%d active sessions", len(sessions)),
IsError: false,
}
}
func (t *ExecTool) executePoll(args map[string]any) *ToolResult {
sessionID, ok := args["sessionId"].(string)
if !ok {
return ErrorResult("sessionId is required")
}
session, err := t.sessionManager.Get(sessionID)
if err != nil {
if errors.Is(err, ErrSessionNotFound) {
return ErrorResult(fmt.Sprintf("session not found: %s", sessionID))
}
return ErrorResult(err.Error())
}
resp := ExecResponse{
SessionID: sessionID,
Status: session.GetStatus(),
ExitCode: session.GetExitCode(),
}
data, _ := json.Marshal(resp)
return &ToolResult{
ForLLM: string(data),
IsError: false,
}
}
func (t *ExecTool) executeRead(args map[string]any) *ToolResult {
sessionID, ok := args["sessionId"].(string)
if !ok {
return ErrorResult("sessionId is required")
}
session, err := t.sessionManager.Get(sessionID)
if err != nil {
if errors.Is(err, ErrSessionNotFound) {
return ErrorResult(fmt.Sprintf("session not found: %s", sessionID))
}
return ErrorResult(err.Error())
}
output := session.Read()
resp := ExecResponse{
SessionID: sessionID,
Output: output,
Status: session.GetStatus(),
}
data, _ := json.Marshal(resp)
return &ToolResult{
ForLLM: string(data),
IsError: false,
}
}
func (t *ExecTool) executeWrite(args map[string]any) *ToolResult {
sessionID, ok := args["sessionId"].(string)
if !ok {
return ErrorResult("sessionId is required")
}
data, ok := args["data"].(string)
if !ok {
return ErrorResult("data is required")
}
session, err := t.sessionManager.Get(sessionID)
if err != nil {
if errors.Is(err, ErrSessionNotFound) {
return ErrorResult(fmt.Sprintf("session not found: %s", sessionID))
}
return ErrorResult(err.Error())
}
if session.IsDone() {
return ErrorResult(fmt.Sprintf("process already exited with code %d", session.GetExitCode()))
}
if err := session.Write(data); err != nil {
if errors.Is(err, ErrSessionDone) {
return ErrorResult(fmt.Sprintf("process already exited with code %d", session.GetExitCode()))
}
return ErrorResult(fmt.Sprintf("failed to write to session: %v", err))
}
resp := ExecResponse{
SessionID: sessionID,
Status: session.GetStatus(),
}
respData, _ := json.Marshal(resp)
return &ToolResult{
ForLLM: string(respData),
IsError: false,
}
}
func (t *ExecTool) executeKill(args map[string]any) *ToolResult {
sessionID, ok := args["sessionId"].(string)
if !ok {
return ErrorResult("sessionId is required")
}
session, err := t.sessionManager.Get(sessionID)
if err != nil {
if errors.Is(err, ErrSessionNotFound) {
return ErrorResult(fmt.Sprintf("session not found: %s", sessionID))
}
return ErrorResult(err.Error())
}
if session.IsDone() {
return ErrorResult(fmt.Sprintf("process already exited with code %d", session.GetExitCode()))
}
if err := session.Kill(); err != nil {
return ErrorResult(fmt.Sprintf("failed to kill session: %v", err))
}
t.sessionManager.Remove(sessionID)
resp := ExecResponse{
SessionID: sessionID,
Status: "done",
}
data, _ := json.Marshal(resp)
return &ToolResult{
ForLLM: string(data),
ForUser: fmt.Sprintf("Session %s killed", sessionID),
IsError: false,
}
}
// keyMap maps key names to their escape sequences.
var keyMap = map[string]string{
"enter": "\r",
"return": "\r",
"tab": "\t",
"escape": "\x1b",
"esc": "\x1b",
"space": " ",
"backspace": "\x7f",
"bspace": "\x7f",
"up": "\x1b[A",
"down": "\x1b[B",
"right": "\x1b[C",
"left": "\x1b[D",
"home": "\x1b[1~",
"end": "\x1b[4~",
"pageup": "\x1b[5~",
"pagedown": "\x1b[6~",
"pgup": "\x1b[5~",
"pgdn": "\x1b[6~",
"insert": "\x1b[2~",
"ic": "\x1b[2~",
"delete": "\x1b[3~",
"del": "\x1b[3~",
"dc": "\x1b[3~",
"btab": "\x1b[Z",
"f1": "\x1bOP",
"f2": "\x1bOQ",
"f3": "\x1bOR",
"f4": "\x1bOS",
"f5": "\x1b[15~",
"f6": "\x1b[17~",
"f7": "\x1b[18~",
"f8": "\x1b[19~",
"f9": "\x1b[20~",
"f10": "\x1b[21~",
"f11": "\x1b[23~",
"f12": "\x1b[24~",
}
// ss3KeysMap maps key names to SS3 escape sequences
var ss3KeysMap = map[string]string{
"up": "\x1bOA",
"down": "\x1bOB",
"right": "\x1bOC",
"left": "\x1bOD",
"home": "\x1bOH",
"end": "\x1bOF",
}
func detectPtyKeyMode(raw string) PtyKeyMode {
const SMKX = "\x1b[?1h"
const RMKX = "\x1b[?1l"
lastSmkx := strings.LastIndex(raw, SMKX)
lastRmkx := strings.LastIndex(raw, RMKX)
if lastSmkx == -1 && lastRmkx == -1 {
return PtyKeyModeNotFound
}
if lastSmkx > lastRmkx {
return PtyKeyModeSS3
}
return PtyKeyModeCSI
}
// encodeKeyToken encodes a single key token into its escape sequence.
// Supports:
// - Named keys: "enter", "tab", "up", "ctrl-c", "alt-x", etc.
// - Ctrl modifier: "ctrl-c" or "c-c" (sends Ctrl+char)
// - Alt modifier: "alt-x" or "m-x" (sends ESC+char)
func encodeKeyToken(token string, ptyKeyMode PtyKeyMode) (string, error) {
token = strings.ToLower(strings.TrimSpace(token))
if token == "" {
return "", nil
}
// Handle ctrl-X format (c-x)
if strings.HasPrefix(token, "c-") {
char := token[2]
if char >= 'a' && char <= 'z' {
return string(rune(char) & 0x1f), nil // ctrl-a through ctrl-z
}
return "", fmt.Errorf("invalid ctrl key: %s", token)
}
// Handle ctrl-X format (ctrl-x)
if strings.HasPrefix(token, "ctrl-") {
char := token[5]
if char >= 'a' && char <= 'z' {
return string(rune(char) & 0x1f), nil
}
return "", fmt.Errorf("invalid ctrl key: %s", token)
}
// Handle alt-X format (m-x or alt-x)
if strings.HasPrefix(token, "m-") || strings.HasPrefix(token, "alt-") {
var char string
if strings.HasPrefix(token, "m-") {
char = token[2:]
} else {
char = token[4:]
}
if len(char) == 1 {
return "\x1b" + char, nil
}
return "", fmt.Errorf("invalid alt key: %s", token)
}
// Handle shift modifier for special keys (shift-up, shift-down, etc.)
if strings.HasPrefix(token, "s-") || strings.HasPrefix(token, "shift-") {
var key string
if strings.HasPrefix(token, "s-") {
key = token[2:]
} else {
key = token[6:]
}
// Apply shift modifier: for single-char keys, return uppercase
if seq, ok := keyMap[key]; ok {
// For escape sequences, we can't easily add shift
// For single-char keys (letters), return uppercase
if len(seq) == 1 {
return strings.ToUpper(seq), nil
}
return seq, nil
}
return "", fmt.Errorf("unknown key with shift: %s", key)
}
if ptyKeyMode == PtyKeyModeSS3 {
if seq, ok := ss3KeysMap[token]; ok {
return seq, nil
}
}
if seq, ok := keyMap[token]; ok {
return seq, nil
}
return "", fmt.Errorf("unknown key: %s (use write action for text input)", token)
}
// encodeKeySequence encodes a slice of key tokens into a single string.
func encodeKeySequence(tokens []string, ptyKeyMode PtyKeyMode) (string, error) {
var result string
for _, token := range tokens {
seq, err := encodeKeyToken(token, ptyKeyMode)
if err != nil {
return "", err
}
result += seq
}
return result, nil
}
func (t *ExecTool) executeSendKeys(args map[string]any) *ToolResult {
sessionID, ok := args["sessionId"].(string)
if !ok {
return ErrorResult("sessionId is required")
}
keysStr, ok := args["keys"].(string)
if !ok {
return ErrorResult("keys must be a string")
}
if keysStr == "" {
return ErrorResult("keys cannot be empty")
}
// Parse comma-separated key names
keyNames := strings.Split(keysStr, ",")
var keys []string
for _, k := range keyNames {
k = strings.TrimSpace(k)
if k != "" {
keys = append(keys, k)
}
}
if len(keys) == 0 {
return ErrorResult("keys cannot be empty")
}
session, err := t.sessionManager.Get(sessionID)
if err != nil {
if errors.Is(err, ErrSessionNotFound) {
return ErrorResult(fmt.Sprintf("session not found: %s", sessionID))
}
return ErrorResult(err.Error())
}
ptyKeyMode := session.GetPtyKeyMode()
data, err := encodeKeySequence(keys, ptyKeyMode)
if err != nil {
return ErrorResult(fmt.Sprintf("invalid key: %v", err))
}
if session.IsDone() {
return ErrorResult(fmt.Sprintf("process already exited with code %d", session.GetExitCode()))
}
if err := session.Write(data); err != nil {
if errors.Is(err, ErrSessionDone) {
return ErrorResult(fmt.Sprintf("process already exited with code %d", session.GetExitCode()))
}
return ErrorResult(fmt.Sprintf("failed to send keys: %v", err))
}
resp := ExecResponse{
SessionID: sessionID,
Status: "running",
Output: fmt.Sprintf("Sent keys: %v", keys),
}
respData, _ := json.Marshal(resp)
return &ToolResult{
ForLLM: string(respData),
IsError: false,
}
}
func (t *ExecTool) guardCommand(command, cwd string) string {
cmd := strings.TrimSpace(command)
lower := strings.ToLower(cmd)