mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-05-25 16:00:35 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e20ac34f35 |
@@ -84,6 +84,16 @@ func createToolRegistry(workspace string, restrict bool, cfg *config.Config, msg
|
||||
}
|
||||
registry.Register(tools.NewWebFetchTool(50000))
|
||||
|
||||
// Browser automation tool (agent-browser CLI)
|
||||
if cfg.Tools.Browser.Enabled {
|
||||
registry.Register(tools.NewBrowserTool(tools.BrowserToolOptions{
|
||||
Session: cfg.Tools.Browser.Session,
|
||||
Headless: cfg.Tools.Browser.Headless,
|
||||
Timeout: cfg.Tools.Browser.Timeout,
|
||||
CDPPort: cfg.Tools.Browser.CDPPort,
|
||||
}))
|
||||
}
|
||||
|
||||
// Hardware tools (I2C, SPI) - Linux only, returns error on other platforms
|
||||
registry.Register(tools.NewI2CTool())
|
||||
registry.Register(tools.NewSPITool())
|
||||
|
||||
+16
-1
@@ -211,8 +211,17 @@ type WebToolsConfig struct {
|
||||
DuckDuckGo DuckDuckGoConfig `json:"duckduckgo"`
|
||||
}
|
||||
|
||||
type BrowserConfig struct {
|
||||
Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_BROWSER_ENABLED"`
|
||||
Session string `json:"session" env:"PICOCLAW_TOOLS_BROWSER_SESSION"`
|
||||
Headless bool `json:"headless" env:"PICOCLAW_TOOLS_BROWSER_HEADLESS"`
|
||||
Timeout int `json:"timeout" env:"PICOCLAW_TOOLS_BROWSER_TIMEOUT"`
|
||||
CDPPort int `json:"cdp_port" env:"PICOCLAW_TOOLS_BROWSER_CDP_PORT"`
|
||||
}
|
||||
|
||||
type ToolsConfig struct {
|
||||
Web WebToolsConfig `json:"web"`
|
||||
Web WebToolsConfig `json:"web"`
|
||||
Browser BrowserConfig `json:"browser"`
|
||||
}
|
||||
|
||||
func DefaultConfig() *Config {
|
||||
@@ -322,6 +331,12 @@ func DefaultConfig() *Config {
|
||||
MaxResults: 5,
|
||||
},
|
||||
},
|
||||
Browser: BrowserConfig{
|
||||
Enabled: false,
|
||||
Headless: true,
|
||||
Timeout: 30,
|
||||
CDPPort: 9222,
|
||||
},
|
||||
},
|
||||
Heartbeat: HeartbeatConfig{
|
||||
Enabled: true,
|
||||
|
||||
@@ -0,0 +1,229 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// BrowserToolOptions configures the BrowserTool.
|
||||
type BrowserToolOptions struct {
|
||||
Session string // Session name for isolation
|
||||
Headless bool // Run in headless mode (default true)
|
||||
Timeout int // Command timeout in seconds (default 30)
|
||||
CDPPort int // Chrome DevTools Protocol port (default 9222)
|
||||
}
|
||||
|
||||
// BrowserTool wraps the agent-browser CLI for headless browser automation.
|
||||
// It delegates all browser complexity to the external `agent-browser` binary.
|
||||
type BrowserTool struct {
|
||||
session string
|
||||
headless bool
|
||||
timeout time.Duration
|
||||
cdpPort int
|
||||
}
|
||||
|
||||
// NewBrowserTool creates a new BrowserTool with the given options.
|
||||
func NewBrowserTool(opts BrowserToolOptions) *BrowserTool {
|
||||
timeout := 30
|
||||
if opts.Timeout > 0 {
|
||||
timeout = opts.Timeout
|
||||
}
|
||||
cdpPort := 9222
|
||||
if opts.CDPPort > 0 {
|
||||
cdpPort = opts.CDPPort
|
||||
}
|
||||
return &BrowserTool{
|
||||
session: opts.Session,
|
||||
headless: opts.Headless,
|
||||
timeout: time.Duration(timeout) * time.Second,
|
||||
cdpPort: cdpPort,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *BrowserTool) Name() string {
|
||||
return "browser"
|
||||
}
|
||||
|
||||
func (t *BrowserTool) Description() string {
|
||||
return `Automate a headless browser via agent-browser CLI. Pass the subcommand as 'command'.
|
||||
The browser daemon persists between calls — open a page first, then interact with it.
|
||||
|
||||
Core workflow:
|
||||
browser open <url> → Navigate to URL
|
||||
browser snapshot -i → Get interactive elements with refs (@e1, @e2, ...)
|
||||
browser click @e2 → Click element by ref
|
||||
browser fill @e3 "text" → Fill input by ref
|
||||
browser type @e3 "text" → Type into element
|
||||
browser press Enter → Press a key
|
||||
browser screenshot [path] → Take screenshot
|
||||
browser get text @e1 → Get text content of element
|
||||
browser get title → Get page title
|
||||
browser get url → Get current URL
|
||||
browser eval "js code" → Run JavaScript
|
||||
browser scroll down [px] → Scroll page
|
||||
browser wait <selector|ms> → Wait for element or time
|
||||
browser close → Close browser
|
||||
|
||||
CSS selectors also work: browser click "#submit"
|
||||
|
||||
Examples:
|
||||
command: "open https://example.com"
|
||||
command: "snapshot -i"
|
||||
command: "click @e2"
|
||||
command: "fill @e3 \"user@example.com\""
|
||||
command: "get title"
|
||||
command: "screenshot /tmp/page.png"
|
||||
command: "close"`
|
||||
}
|
||||
|
||||
func (t *BrowserTool) Parameters() map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"command": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "The agent-browser subcommand to execute (e.g. 'open https://example.com', 'snapshot -i', 'click @e2')",
|
||||
},
|
||||
},
|
||||
"required": []string{"command"},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *BrowserTool) Execute(ctx context.Context, args map[string]interface{}) *ToolResult {
|
||||
command, ok := args["command"].(string)
|
||||
if !ok || strings.TrimSpace(command) == "" {
|
||||
return ErrorResult("command is required (e.g. 'open https://example.com')")
|
||||
}
|
||||
|
||||
// Build the full agent-browser command line
|
||||
cmdArgs := t.buildArgs(command)
|
||||
|
||||
cmdCtx, cancel := context.WithTimeout(ctx, t.timeout)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(cmdCtx, "agent-browser", cmdArgs...)
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
err := cmd.Run()
|
||||
output := stdout.String()
|
||||
if stderr.Len() > 0 {
|
||||
errOut := stderr.String()
|
||||
// Filter out noise from stderr (daemon startup messages, etc.)
|
||||
if !strings.Contains(errOut, "Daemon started") {
|
||||
if output != "" {
|
||||
output += "\n"
|
||||
}
|
||||
output += errOut
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if cmdCtx.Err() == context.DeadlineExceeded {
|
||||
msg := fmt.Sprintf("Browser command timed out after %v: %s", t.timeout, command)
|
||||
return &ToolResult{
|
||||
ForLLM: msg,
|
||||
ForUser: msg,
|
||||
IsError: true,
|
||||
}
|
||||
}
|
||||
// Include output even on error — agent-browser often puts useful info in stdout
|
||||
if output == "" {
|
||||
output = fmt.Sprintf("command failed: %v", err)
|
||||
} else {
|
||||
output += fmt.Sprintf("\nExit code: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if output == "" {
|
||||
output = "(no output)"
|
||||
}
|
||||
|
||||
// Truncate long output
|
||||
maxLen := 10000
|
||||
if len(output) > maxLen {
|
||||
output = output[:maxLen] + fmt.Sprintf("\n... (truncated, %d more chars)", len(output)-maxLen)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return &ToolResult{
|
||||
ForLLM: output,
|
||||
ForUser: output,
|
||||
IsError: true,
|
||||
}
|
||||
}
|
||||
|
||||
return &ToolResult{
|
||||
ForLLM: output,
|
||||
ForUser: output,
|
||||
IsError: false,
|
||||
}
|
||||
}
|
||||
|
||||
// buildArgs constructs the argument list for the agent-browser command.
|
||||
// It splits the user command string and prepends global flags.
|
||||
func (t *BrowserTool) buildArgs(command string) []string {
|
||||
var globalArgs []string
|
||||
|
||||
// Add CDP port
|
||||
globalArgs = append(globalArgs, "--cdp", fmt.Sprintf("%d", t.cdpPort))
|
||||
|
||||
// Add session flag if configured
|
||||
if t.session != "" {
|
||||
globalArgs = append(globalArgs, "--session", t.session)
|
||||
}
|
||||
|
||||
// Add --headed if not headless (agent-browser defaults to headless)
|
||||
if !t.headless {
|
||||
globalArgs = append(globalArgs, "--headed")
|
||||
}
|
||||
|
||||
// Add --json for machine-readable output
|
||||
globalArgs = append(globalArgs, "--json")
|
||||
|
||||
// Parse the command string into arguments, respecting quotes
|
||||
cmdArgs := splitCommand(command)
|
||||
|
||||
return append(globalArgs, cmdArgs...)
|
||||
}
|
||||
|
||||
// splitCommand splits a command string into arguments, respecting quoted strings.
|
||||
func splitCommand(command string) []string {
|
||||
var args []string
|
||||
var current strings.Builder
|
||||
inQuote := false
|
||||
quoteChar := byte(0)
|
||||
|
||||
for i := 0; i < len(command); i++ {
|
||||
ch := command[i]
|
||||
switch {
|
||||
case inQuote:
|
||||
if ch == quoteChar {
|
||||
inQuote = false
|
||||
} else {
|
||||
current.WriteByte(ch)
|
||||
}
|
||||
case ch == '"' || ch == '\'':
|
||||
inQuote = true
|
||||
quoteChar = ch
|
||||
case ch == ' ' || ch == '\t':
|
||||
if current.Len() > 0 {
|
||||
args = append(args, current.String())
|
||||
current.Reset()
|
||||
}
|
||||
default:
|
||||
current.WriteByte(ch)
|
||||
}
|
||||
}
|
||||
if current.Len() > 0 {
|
||||
args = append(args, current.String())
|
||||
}
|
||||
|
||||
return args
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBrowserTool_Name(t *testing.T) {
|
||||
tool := NewBrowserTool(BrowserToolOptions{})
|
||||
if tool.Name() != "browser" {
|
||||
t.Errorf("Expected name 'browser', got %q", tool.Name())
|
||||
}
|
||||
}
|
||||
|
||||
func TestBrowserTool_Description(t *testing.T) {
|
||||
tool := NewBrowserTool(BrowserToolOptions{})
|
||||
desc := tool.Description()
|
||||
if !strings.Contains(desc, "agent-browser") {
|
||||
t.Error("Description should mention agent-browser")
|
||||
}
|
||||
if !strings.Contains(desc, "snapshot") {
|
||||
t.Error("Description should mention snapshot command")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBrowserTool_Parameters(t *testing.T) {
|
||||
tool := NewBrowserTool(BrowserToolOptions{})
|
||||
params := tool.Parameters()
|
||||
|
||||
props, ok := params["properties"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatal("Expected properties map")
|
||||
}
|
||||
|
||||
if _, ok := props["command"]; !ok {
|
||||
t.Error("Expected 'command' in properties")
|
||||
}
|
||||
|
||||
required, ok := params["required"].([]string)
|
||||
if !ok {
|
||||
t.Fatal("Expected required slice")
|
||||
}
|
||||
if len(required) != 1 || required[0] != "command" {
|
||||
t.Errorf("Expected required=['command'], got %v", required)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBrowserTool_MissingCommand(t *testing.T) {
|
||||
tool := NewBrowserTool(BrowserToolOptions{})
|
||||
ctx := context.Background()
|
||||
|
||||
// Empty args
|
||||
result := tool.Execute(ctx, map[string]interface{}{})
|
||||
if !result.IsError {
|
||||
t.Error("Expected error for missing command")
|
||||
}
|
||||
|
||||
// Empty string
|
||||
result = tool.Execute(ctx, map[string]interface{}{"command": ""})
|
||||
if !result.IsError {
|
||||
t.Error("Expected error for empty command")
|
||||
}
|
||||
|
||||
// Whitespace only
|
||||
result = tool.Execute(ctx, map[string]interface{}{"command": " "})
|
||||
if !result.IsError {
|
||||
t.Error("Expected error for whitespace-only command")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBrowserTool_BuildArgs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
session string
|
||||
command string
|
||||
wantArgs []string
|
||||
}{
|
||||
{
|
||||
name: "simple command",
|
||||
command: "open https://example.com",
|
||||
wantArgs: []string{"--cdp", "9222", "--headed", "--json", "open", "https://example.com"},
|
||||
},
|
||||
{
|
||||
name: "with session",
|
||||
session: "test-session",
|
||||
command: "snapshot -i",
|
||||
wantArgs: []string{"--cdp", "9222", "--session", "test-session", "--headed", "--json", "snapshot", "-i"},
|
||||
},
|
||||
{
|
||||
name: "quoted arguments",
|
||||
command: `fill @e3 "hello world"`,
|
||||
wantArgs: []string{"--cdp", "9222", "--headed", "--json", "fill", "@e3", "hello world"},
|
||||
},
|
||||
{
|
||||
name: "single quoted",
|
||||
command: `fill @e3 'hello world'`,
|
||||
wantArgs: []string{"--cdp", "9222", "--headed", "--json", "fill", "@e3", "hello world"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tool := NewBrowserTool(BrowserToolOptions{Session: tt.session})
|
||||
got := tool.buildArgs(tt.command)
|
||||
|
||||
if len(got) != len(tt.wantArgs) {
|
||||
t.Errorf("buildArgs(%q) = %v (len %d), want %v (len %d)",
|
||||
tt.command, got, len(got), tt.wantArgs, len(tt.wantArgs))
|
||||
return
|
||||
}
|
||||
|
||||
for i := range got {
|
||||
if got[i] != tt.wantArgs[i] {
|
||||
t.Errorf("buildArgs(%q)[%d] = %q, want %q",
|
||||
tt.command, i, got[i], tt.wantArgs[i])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitCommand(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
want []string
|
||||
}{
|
||||
{"open https://example.com", []string{"open", "https://example.com"}},
|
||||
{`fill @e3 "test@example.com"`, []string{"fill", "@e3", "test@example.com"}},
|
||||
{"snapshot -i -c -d 3", []string{"snapshot", "-i", "-c", "-d", "3"}},
|
||||
{`eval "document.title"`, []string{"eval", "document.title"}},
|
||||
{" click @e2 ", []string{"click", "@e2"}},
|
||||
{`get text @e1`, []string{"get", "text", "@e1"}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.input, func(t *testing.T) {
|
||||
got := splitCommand(tt.input)
|
||||
if len(got) != len(tt.want) {
|
||||
t.Errorf("splitCommand(%q) = %v, want %v", tt.input, got, tt.want)
|
||||
return
|
||||
}
|
||||
for i := range got {
|
||||
if got[i] != tt.want[i] {
|
||||
t.Errorf("splitCommand(%q)[%d] = %q, want %q", tt.input, i, got[i], tt.want[i])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user