Files
picoclaw/pkg/tools/shell.go
T
2026-03-25 10:14:16 +08:00

484 lines
13 KiB
Go

package tools
import (
"bytes"
"context"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strings"
"time"
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/constants"
)
type ExecTool struct {
workingDir string
timeout time.Duration
denyPatterns []*regexp.Regexp
allowPatterns []*regexp.Regexp
customAllowPatterns []*regexp.Regexp
allowedPathPatterns []*regexp.Regexp
restrictToWorkspace bool
allowRemote bool
}
var (
defaultDenyPatterns = []*regexp.Regexp{
regexp.MustCompile(`\brm\s+-[rf]{1,2}\b`),
regexp.MustCompile(`\bdel\s+/[fq]\b`),
regexp.MustCompile(`\brmdir\s+/s\b`),
// Match disk wiping commands (must be followed by space/args)
regexp.MustCompile(
`(^|[^-\w])\b(format|mkfs|diskpart)\b\s`,
),
regexp.MustCompile(`\bdd\s+if=`),
// Block writes to block devices (all common naming schemes).
regexp.MustCompile(
`>\s*/dev/(sd[a-z]|hd[a-z]|vd[a-z]|xvd[a-z]|nvme\d|mmcblk\d|loop\d|dm-\d|md\d|sr\d|nbd\d)`,
),
regexp.MustCompile(`\b(shutdown|reboot|poweroff)\b`),
regexp.MustCompile(`:\(\)\s*\{.*\};\s*:`),
regexp.MustCompile(`\$\([^)]+\)`),
regexp.MustCompile(`\$\{[^}]+\}`),
regexp.MustCompile("`[^`]+`"),
regexp.MustCompile(`\|\s*sh\b`),
regexp.MustCompile(`\|\s*bash\b`),
regexp.MustCompile(`;\s*rm\s+-[rf]`),
regexp.MustCompile(`&&\s*rm\s+-[rf]`),
regexp.MustCompile(`\|\|\s*rm\s+-[rf]`),
regexp.MustCompile(`<<\s*EOF`),
regexp.MustCompile(`\$\(\s*cat\s+`),
regexp.MustCompile(`\$\(\s*curl\s+`),
regexp.MustCompile(`\$\(\s*wget\s+`),
regexp.MustCompile(`\$\(\s*which\s+`),
regexp.MustCompile(`\bsudo\b`),
regexp.MustCompile(`\bchmod\s+[0-7]{3,4}\b`),
regexp.MustCompile(`\bchown\b`),
regexp.MustCompile(`\bpkill\b`),
regexp.MustCompile(`\bkillall\b`),
regexp.MustCompile(`\bkill\b`),
regexp.MustCompile(`\bcurl\b.*\|\s*(sh|bash)`),
regexp.MustCompile(`\bwget\b.*\|\s*(sh|bash)`),
regexp.MustCompile(`\bnpm\s+install\s+-g\b`),
regexp.MustCompile(`\bpip\s+install\s+--user\b`),
regexp.MustCompile(`\bapt\s+(install|remove|purge)\b`),
regexp.MustCompile(`\byum\s+(install|remove)\b`),
regexp.MustCompile(`\bdnf\s+(install|remove)\b`),
regexp.MustCompile(`\bdocker\s+run\b`),
regexp.MustCompile(`\bdocker\s+exec\b`),
regexp.MustCompile(`\bgit\s+push\b`),
regexp.MustCompile(`\bgit\s+force\b`),
regexp.MustCompile(`\bssh\b.*@`),
regexp.MustCompile(`\beval\b`),
regexp.MustCompile(`\bsource\s+.*\.sh\b`),
}
// absolutePathPattern matches absolute file paths in commands (Unix and Windows).
absolutePathPattern = regexp.MustCompile(`[A-Za-z]:\\[^\\\"']+|/[^\s\"']+`)
// safePaths are kernel pseudo-devices that are always safe to reference in
// commands, regardless of workspace restriction. They contain no user data
// and cannot cause destructive writes.
safePaths = map[string]bool{
"/dev/null": true,
"/dev/zero": true,
"/dev/random": true,
"/dev/urandom": true,
"/dev/stdin": true,
"/dev/stdout": true,
"/dev/stderr": true,
}
)
func NewExecTool(workingDir string, restrict bool, allowPaths ...[]*regexp.Regexp) (*ExecTool, error) {
return NewExecToolWithConfig(workingDir, restrict, nil, allowPaths...)
}
func NewExecToolWithConfig(
workingDir string,
restrict bool,
config *config.Config,
allowPaths ...[]*regexp.Regexp,
) (*ExecTool, error) {
denyPatterns := make([]*regexp.Regexp, 0)
customAllowPatterns := make([]*regexp.Regexp, 0)
var allowedPathPatterns []*regexp.Regexp
allowRemote := true
if len(allowPaths) > 0 {
allowedPathPatterns = allowPaths[0]
}
if config != nil {
execConfig := config.Tools.Exec
enableDenyPatterns := execConfig.EnableDenyPatterns
allowRemote = execConfig.AllowRemote
if enableDenyPatterns {
denyPatterns = append(denyPatterns, defaultDenyPatterns...)
if len(execConfig.CustomDenyPatterns) > 0 {
fmt.Printf("Using custom deny patterns: %v\n", execConfig.CustomDenyPatterns)
for _, pattern := range execConfig.CustomDenyPatterns {
re, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("invalid custom deny pattern %q: %w", pattern, err)
}
denyPatterns = append(denyPatterns, re)
}
}
} else {
// If deny patterns are disabled, we won't add any patterns, allowing all commands.
fmt.Println("Warning: deny patterns are disabled. All commands will be allowed.")
}
for _, pattern := range execConfig.CustomAllowPatterns {
re, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("invalid custom allow pattern %q: %w", pattern, err)
}
customAllowPatterns = append(customAllowPatterns, re)
}
} else {
denyPatterns = append(denyPatterns, defaultDenyPatterns...)
}
timeout := 60 * time.Second
if config != nil && config.Tools.Exec.TimeoutSeconds > 0 {
timeout = time.Duration(config.Tools.Exec.TimeoutSeconds) * time.Second
}
return &ExecTool{
workingDir: workingDir,
timeout: timeout,
denyPatterns: denyPatterns,
allowPatterns: nil,
customAllowPatterns: customAllowPatterns,
allowedPathPatterns: allowedPathPatterns,
restrictToWorkspace: restrict,
allowRemote: allowRemote,
}, nil
}
func (t *ExecTool) Name() string {
return "exec"
}
func (t *ExecTool) Description() string {
return "Execute a shell command and return its output. Use with caution."
}
func (t *ExecTool) Parameters() map[string]any {
return map[string]any{
"type": "object",
"properties": map[string]any{
"command": map[string]any{
"type": "string",
"description": "The shell command to execute",
},
"working_dir": map[string]any{
"type": "string",
"description": "Optional working directory for the command",
},
},
"required": []string{"command"},
}
}
func (t *ExecTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
command, ok := args["command"].(string)
if !ok {
return ErrorResult("command is required")
}
// GHSA-pv8c-p6jf-3fpp: block exec from remote channels (e.g. Telegram webhooks)
// unless explicitly opted-in via config. Fail-closed: empty channel = blocked.
if !t.allowRemote {
channel := ToolChannel(ctx)
if channel == "" {
channel, _ = args["__channel"].(string)
}
channel = strings.TrimSpace(channel)
if channel == "" || !constants.IsInternalChannel(channel) {
return ErrorResult("exec is restricted to internal channels")
}
}
cwd := t.workingDir
if wd, ok := args["working_dir"].(string); ok && wd != "" {
if t.restrictToWorkspace && t.workingDir != "" {
resolvedWD, err := validatePathWithAllowPaths(wd, t.workingDir, true, t.allowedPathPatterns)
if err != nil {
return ErrorResult("Command blocked by safety guard (" + err.Error() + ")")
}
cwd = resolvedWD
} else {
cwd = wd
}
}
if cwd == "" {
wd, err := os.Getwd()
if err == nil {
cwd = wd
}
}
if guardError := t.guardCommand(command, cwd); guardError != "" {
return ErrorResult(guardError)
}
// Re-resolve symlinks immediately before execution to shrink the TOCTOU window
// between validation and cmd.Dir assignment.
if t.restrictToWorkspace && t.workingDir != "" && cwd != t.workingDir {
resolved, err := filepath.EvalSymlinks(cwd)
if err != nil {
return ErrorResult(fmt.Sprintf("Command blocked by safety guard (path resolution failed: %v)", err))
}
if isAllowedPath(resolved, t.allowedPathPatterns) {
cwd = resolved
} else {
absWorkspace, _ := filepath.Abs(t.workingDir)
wsResolved, _ := filepath.EvalSymlinks(absWorkspace)
if wsResolved == "" {
wsResolved = absWorkspace
}
rel, err := filepath.Rel(wsResolved, resolved)
if err != nil || !filepath.IsLocal(rel) {
return ErrorResult("Command blocked by safety guard (working directory escaped workspace)")
}
cwd = resolved
}
}
// timeout == 0 means no timeout
var cmdCtx context.Context
var cancel context.CancelFunc
if t.timeout > 0 {
cmdCtx, cancel = context.WithTimeout(ctx, t.timeout)
} else {
cmdCtx, cancel = context.WithCancel(ctx)
}
defer cancel()
var cmd *exec.Cmd
if runtime.GOOS == "windows" {
cmd = exec.CommandContext(cmdCtx, "powershell", "-NoProfile", "-NonInteractive", "-Command", command)
} else {
cmd = exec.CommandContext(cmdCtx, "sh", "-c", command)
}
if cwd != "" {
cmd.Dir = cwd
}
prepareCommandForTermination(cmd)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Start(); err != nil {
return ErrorResult(fmt.Sprintf("failed to start command: %v", err))
}
done := make(chan error, 1)
go func() {
done <- cmd.Wait()
}()
var err error
select {
case err = <-done:
case <-cmdCtx.Done():
_ = terminateProcessTree(cmd)
select {
case err = <-done:
case <-time.After(2 * time.Second):
if cmd.Process != nil {
_ = cmd.Process.Kill()
}
err = <-done
}
}
output := stdout.String()
if stderr.Len() > 0 {
output += "\nSTDERR:\n" + stderr.String()
}
if err != nil {
if errors.Is(cmdCtx.Err(), context.DeadlineExceeded) {
msg := fmt.Sprintf("Command timed out after %v", t.timeout)
if output != "" {
msg += "\n\nPartial output before timeout:\n" + output
}
return &ToolResult{
ForLLM: msg,
ForUser: msg,
IsError: true,
Err: fmt.Errorf("command timeout: %w", err),
}
}
// Extract detailed exit information
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
exitCode := exitErr.ExitCode()
output += fmt.Sprintf("\n\n[Command exited with code %d]", exitCode)
// Add signal information if killed by signal (Unix)
if exitCode == -1 {
output += " (killed by signal)"
}
} else {
output += fmt.Sprintf("\n\n[Command failed: %v]", err)
}
}
if output == "" {
output = "(no output)"
}
maxLen := 10000
if len(output) > maxLen {
output = output[:maxLen] + fmt.Sprintf("\n... (truncated, %d more chars)", len(output)-maxLen)
}
if err != nil {
return &ToolResult{
ForLLM: output,
ForUser: output,
IsError: true,
}
}
return &ToolResult{
ForLLM: output,
ForUser: output,
IsError: false,
}
}
func (t *ExecTool) guardCommand(command, cwd string) string {
cmd := strings.TrimSpace(command)
lower := strings.ToLower(cmd)
// Custom allow patterns exempt a command from deny checks.
explicitlyAllowed := false
for _, pattern := range t.customAllowPatterns {
if pattern.MatchString(lower) {
explicitlyAllowed = true
break
}
}
if !explicitlyAllowed {
for _, pattern := range t.denyPatterns {
if pattern.MatchString(lower) {
return "Command blocked by safety guard (dangerous pattern detected)"
}
}
}
if len(t.allowPatterns) > 0 {
allowed := false
for _, pattern := range t.allowPatterns {
if pattern.MatchString(lower) {
allowed = true
break
}
}
if !allowed {
return "Command blocked by safety guard (not in allowlist)"
}
}
if t.restrictToWorkspace {
if strings.Contains(cmd, "..\\") || strings.Contains(cmd, "../") {
return "Command blocked by safety guard (path traversal detected)"
}
cwdPath, err := filepath.Abs(cwd)
if err != nil {
return ""
}
// Web URL schemes whose path components (starting with //) should be exempt
// from workspace sandbox checks. file: is intentionally excluded so that
// file:// URIs are still validated against the workspace boundary.
webSchemes := []string{"http:", "https:", "ftp:", "ftps:", "sftp:", "ssh:", "git:"}
matchIndices := absolutePathPattern.FindAllStringIndex(cmd, -1)
for _, loc := range matchIndices {
raw := cmd[loc[0]:loc[1]]
// Skip URL path components that look like they're from web URLs.
// When a URL like "https://github.com" is parsed, the regex captures
// "//github.com" as a match (the path portion after "https:").
// Use the exact match position (loc[0]) so that duplicate //path substrings
// in the same command are each evaluated at their own position.
if strings.HasPrefix(raw, "//") && loc[0] > 0 {
before := cmd[:loc[0]]
isWebURL := false
for _, scheme := range webSchemes {
if strings.HasSuffix(before, scheme) {
isWebURL = true
break
}
}
if isWebURL {
continue
}
}
p, err := filepath.Abs(raw)
if err != nil {
continue
}
if safePaths[p] {
continue
}
if isAllowedPath(p, t.allowedPathPatterns) {
continue
}
rel, err := filepath.Rel(cwdPath, p)
if err != nil {
continue
}
if strings.HasPrefix(rel, "..") {
return "Command blocked by safety guard (path outside working dir)"
}
}
}
return ""
}
func (t *ExecTool) SetTimeout(timeout time.Duration) {
t.timeout = timeout
}
func (t *ExecTool) SetRestrictToWorkspace(restrict bool) {
t.restrictToWorkspace = restrict
}
func (t *ExecTool) SetAllowPatterns(patterns []string) error {
t.allowPatterns = make([]*regexp.Regexp, 0, len(patterns))
for _, p := range patterns {
re, err := regexp.Compile(p)
if err != nil {
return fmt.Errorf("invalid allow pattern %q: %w", p, err)
}
t.allowPatterns = append(t.allowPatterns, re)
}
return nil
}