Merge branch 'upstream-main' into feat/subturn-poc

This commit is contained in:
Administrator
2026-03-18 22:57:01 +08:00
117 changed files with 14857 additions and 7091 deletions
+3
View File
@@ -52,6 +52,9 @@ dist/
# Windows Application Icon/Resource
*.syso
# Test telegram integration
cmd/telegram/
# Keep embedded backend dist directory placeholder in VCS
!web/backend/dist/
web/backend/dist/*
+12
View File
@@ -297,6 +297,18 @@ docker-clean:
docker compose -f docker/docker-compose.full.yml down -v
docker rmi picoclaw:latest picoclaw:full 2>/dev/null || true
## build-macos-app: Build PicoClaw macOS .app bundle (no terminal window)
build-macos-app:
@echo "Building macOS .app bundle..."
@if [ "$(UNAME_S)" != "Darwin" ]; then \
echo "Error: This target is only available on macOS"; \
exit 1; \
fi
@cd web && $(MAKE) build && cd ..
@./scripts/build-macos-app.sh $(BINARY_NAME)-$(PLATFORM)-$(ARCH)
@echo "macOS .app bundle created: $(BUILD_DIR)/PicoClaw.app"
## help: Show this help message
help:
@echo "picoclaw Makefile"
+82 -1074
View File
File diff suppressed because it is too large Load Diff
+129 -1047
View File
File diff suppressed because it is too large Load Diff
+58 -1386
View File
File diff suppressed because it is too large Load Diff
+116 -1105
View File
File diff suppressed because it is too large Load Diff
+128 -1085
View File
File diff suppressed because it is too large Load Diff
+94 -785
View File
@@ -3,10 +3,10 @@
<h1>PicoClaw: 基于Go语言的超高效 AI 助手</h1>
<h3>10$硬件 · 10MB内存 · 1秒启动 · 皮皮虾,我们走!</h3>
<h3>$10 硬件 · <10MB 内存 · <1s 启动 · 皮皮虾,我们走!</h3>
<p>
<img src="https://img.shields.io/badge/Go-1.21+-00ADD8?style=flat&logo=go&logoColor=white" alt="Go">
<img src="https://img.shields.io/badge/Arch-x86__64%2C%20ARM64%2C%20MIPS%2C%20RISC--V-blue" alt="Hardware">
<img src="https://img.shields.io/badge/Go-1.25+-00ADD8?style=flat&logo=go&logoColor=white" alt="Go">
<img src="https://img.shields.io/badge/Arch-x86__64%2C%20ARM64%2C%20MIPS%2C%20RISC--V%2C%20LoongArch-blue" alt="Hardware">
<img src="https://img.shields.io/badge/license-MIT-green" alt="License">
<br>
<a href="https://picoclaw.io"><img src="https://img.shields.io/badge/Website-picoclaw.io-blue?style=flat&logo=google-chrome&logoColor=white" alt="Website"></a>
@@ -26,7 +26,7 @@
> **PicoClaw** 是由 [矽速科技 (Sipeed)](https://sipeed.com) 发起的独立开源项目,完全使用 **Go 语言**从零编写——不是 OpenClaw、NanoBot 或其他项目的分支。
🦐 **PicoClaw** 是一个受 [NanoBot](https://github.com/HKUDS/nanobot) 启发的超轻量级个人 AI 助手。它采用 **Go 语言** 从零重构,经历了一个自举过程——即由 AI Agent 自身驱动了整个架构迁移和代码优化。
🦐 **PicoClaw** 是一个受 [NanoBot](https://github.com/HKUDS/nanobot) 启发的超轻量级个人 AI 助手。它采用 **Go 语言** 从零重构,经历了一个"自举"过程——即由 AI Agent 自身驱动了整个架构迁移和代码优化。
⚡️ **极致轻量**:可在 **10 美元** 的硬件上运行,内存占用 **<10MB**。这意味着比 OpenClaw 节省 99% 的内存,比 Mac mini 便宜 98%
@@ -45,42 +45,60 @@
</tr>
</table>
注意:人手有限,中文文档可能略有滞后,请优先查看英文文档。
> [!CAUTION]
> **🚨 SECURITY & OFFICIAL CHANNELS / 安全声明**
> **🚨 安全声明**
>
> - **无加密货币 (NO CRYPTO):** PicoClaw **没有** 发行任何官方代币、Token 或虚拟货币。所有在 `pump.fun` 或其他交易平台上的相关声称均为 **诈骗**。
> - **官方域名:** 唯一的官方网站是 **[picoclaw.io](https://picoclaw.io)**,公司官网是 **[sipeed.com](https://sipeed.com)**。
> - **警惕:** 许多 `.ai/.org/.com/.net/...` 后缀的域名被第三方抢注,请勿轻信。
> - **注意:** picoclaw正在初期的快速功能开发阶段,可能有尚未修复的网络安全问题,在1.0正式版发布前,请不要将其部署到生产环境中
> - **注意:** picoclaw最近合并了大量PRs,近期版本可能内存占用较大(10~20MB),我们将在功能较为收敛后进行资源占用优化.
> - **注意:** PicoClaw 正在初期的快速功能开发阶段,可能有尚未修复的网络安全问题,在 1.0 正式版发布前,请不要将其部署到生产环境中
> - **注意:** PicoClaw 最近合并了大量 PR,近期版本可能内存占用较大 (10~20MB),我们将在功能较为收敛后进行资源占用优化
## 📢 新闻 (News)
## 📢 新闻
2026-02-16 🎉 PicoClaw 在一周内突破了12K star! 感谢大家的关注!PicoClaw 的成长速度超乎我们预期. 由于PR数量的快速膨胀,我们亟需社区开发者参与维护. 我们需要的志愿者角色和roadmap已经发布到了[这里](docs/ROADMAP.md), 期待你的参与
2026-03-17 🚀 **v0.2.3 发布!** 系统托盘 UIWindows & Linux)、子 Agent 状态查询 (`spawn_status`)、实验性 Gateway 热重载、Cron 安全门控,以及 2 项安全修复。PicoClaw 已达 **25K ⭐**
2026-02-13 🎉 **PicoClaw 在 4 天内突破 5000 Stars** 感谢社区的支持!由于正值中国春节假期,PR 和 Issue 涌入较多,我们正在利用这段时间敲定 **项目路线图 (Roadmap)** 并组建 **开发者群组**,以便加速 PicoClaw 的开发
🚀 **行动号召:** 请在 GitHub Discussions 中提交您的功能请求 (Feature Requests)。我们将在接下来的周会上进行审查和优先级排序。
2026-03-09 🎉 **v0.2.1 — 史上最大更新!** MCP 协议支持、4 个新频道 (Matrix/IRC/WeCom/Discord Proxy)、3 个新 Provider (Kimi/Minimax/Avian)、视觉管线、JSONL 记忆存储、模型路由
2026-02-09 🎉 **PicoClaw 正式发布!** 仅用 1 天构建,旨在将 AI Agent 带入 10 美元硬件与 <10MB 内存的世界。🦐 PicoClaw(皮皮虾),我们走!
2026-02-28 📦 **v0.2.0** 发布,支持 Docker Compose 和 Web UI 启动器。
2026-02-26 🎉 PicoClaw 仅 17 天突破 **20K Stars**!频道自动编排和能力接口上线。
<details>
<summary>更早的新闻...</summary>
2026-02-16 🎉 PicoClaw 一周内突破 12K Stars!社区维护者角色和 [路线图](ROADMAP.md) 正式发布。
2026-02-13 🎉 PicoClaw 4 天内突破 5000 Stars!项目路线图和开发者群组筹建中。
2026-02-09 🎉 **PicoClaw 正式发布!** 仅用 1 天构建,将 AI Agent 带入 $10 硬件与 <10MB 内存的世界。🦐 皮皮虾,我们走!
</details>
## ✨ 特性
🪶 **超轻量级**: 核心功能内存占用 <10MB — 比 Clawdbot 小 99%。
🪶 **超轻量级**: 核心功能内存占用 <10MB — 比 OpenClaw 小 99%。*
💰 **极低成本**: 高效到足以在 10 美元的硬件上运行 — 比 Mac mini 便宜 98%。
💰 **极低成本**: 高效到足以在 $10 的硬件上运行 — 比 Mac mini 便宜 98%。
⚡️ **闪电启动**: 启动速度快 400 倍,即使在 0.6GHz 单核处理器上也能在 1 秒内启动。
🌍 **真正可移植**: 跨 RISC-V、ARM、MIPS 和 x86 架构的单二进制文件,一键运行!
🤖 **AI 自举**: 纯 Go 语言原生实现 — 95% 的核心代码由 Agent 生成,并经由人机回环 (Human-in-the-loop)”微调。
🤖 **AI 自举**: 纯 Go 语言原生实现 — 95% 的核心代码由 Agent 生成,并经由"人机回环"微调。
🔌 **MCP 支持**: 原生 [Model Context Protocol](https://modelcontextprotocol.io/) 集成 — 连接任意 MCP 服务器扩展 Agent 能力。
👁️ **视觉管线**: 直接向 Agent 发送图片和文件 — 自动 base64 编码对接多模态 LLM。
🧠 **智能路由**: 基于规则的模型路由 — 简单查询走轻量模型,节省 API 成本。
_*近期版本因快速合并 PR 可能占用 10–20MB,资源优化已列入计划。启动速度对比基于 0.8GHz 单核实测(见下方对比表)。_
| | OpenClaw | NanoBot | **PicoClaw** |
| ------------------------------ | ------------- | ------------------------ | -------------------------------------- |
| **语言** | TypeScript | Python | **Go** |
| **RAM** | >1GB | >100MB | **< 10MB** |
| **RAM** | >1GB | >100MB | **< 10MB*** |
| **启动时间**</br>(0.8GHz core) | >500s | >30s | **<1s** |
| **成本** | Mac Mini $599 | 大多数 Linux 开发板 ~$50 | **任意 Linux 开发板**</br>**低至 $10** |
@@ -110,31 +128,32 @@
### 📱 在手机上轻松运行
picoclaw 可以将你10年前的老旧手机废物利用,变身成为你的AI助理!快速指南:
PicoClaw 可以将你 10 年前的老旧手机废物利用,变身成为你的 AI 助理!快速指南
1. 先去应用商店下载安装Termux
1. 安装 [Termux](https://github.com/termux/termux-app)(可从 [GitHub Releases](https://github.com/termux/termux-app/releases) 下载,或在 F-Droid 等应用商店搜索)
2. 打开后执行指令
```bash
# 注意: 下面的v0.1.1 可以换为你实际看到的最新版本
wget https://github.com/sipeed/picoclaw/releases/download/v0.1.1/picoclaw-linux-arm64
chmod +x picoclaw-linux-arm64
# 从 Release 页面下载最新版本
wget https://github.com/sipeed/picoclaw/releases/latest/download/picoclaw_Linux_arm64.tar.gz
tar xzf picoclaw_Linux_arm64.tar.gz
pkg install proot
termux-chroot ./picoclaw-linux-arm64 onboard
termux-chroot ./picoclaw onboard
```
然后跟随下面的快速开始章节继续配置picoclaw即可使用!
然后跟随下面的"快速开始"章节继续配置 PicoClaw 即可使用!
<img src="assets/termux.jpg" alt="PicoClaw" width="512">
### 🐜 创新的低占用部署
PicoClaw 几乎可以部署在任何 Linux 设备上!
- $9.9 [LicheeRV-Nano](https://www.aliexpress.com/item/1005006519668532.html) E(网口) 或 W(WiFi6) 版本,用于极简家庭助手
- $30~50 [NanoKVM](https://www.aliexpress.com/item/1005007369816019.html),或 $100 [NanoKVM-Pro](https://www.aliexpress.com/item/1005010048471263.html),用于自动化服务器运维
- $50 [MaixCAM](https://www.aliexpress.com/item/1005008053333693.html) 或 $100 [MaixCAM2](https://www.kickstarter.com/projects/zepan/maixcam2-build-your-next-gen-4k-ai-camera),用于智能监控
- $9.9 [LicheeRV-Nano](https://www.aliexpress.com/item/1005006519668532.html) E(网口) 或 W(WiFi6) 版本,用于极简家庭助手
- $30~50 [NanoKVM](https://www.aliexpress.com/item/1005007369816019.html),或 $100 [NanoKVM-Pro](https://www.aliexpress.com/item/1005010048471263.html),用于自动化服务器运维
- $50 [MaixCAM](https://www.aliexpress.com/item/1005008053333693.html) 或 $100 [MaixCAM2](https://www.kickstarter.com/projects/zepan/maixcam2-build-your-next-gen-4k-ai-camera),用于智能监控
[https://private-user-images.githubusercontent.com/83055338/547056448-e7b031ff-d6f5-4468-bcca-5726b6fecb5c.mp4](https://private-user-images.githubusercontent.com/83055338/547056448-e7b031ff-d6f5-4468-bcca-5726b6fecb5c.mp4)
<https://private-user-images.githubusercontent.com/83055338/547056448-e7b031ff-d6f5-4468-bcca-5726b6fecb5c.mp4>
🌟 更多部署案例敬请期待!
@@ -142,7 +161,7 @@ PicoClaw 几乎可以部署在任何 Linux 设备上!
### 使用预编译二进制文件安装
从 [Release 页面](https://github.com/sipeed/picoclaw/releases) 下载适用于您平台的件。
从 [Release 页面](https://github.com/sipeed/picoclaw/releases) 下载适用于您平台的二进制文件。
### 从源码安装(获取最新特性,开发推荐)
@@ -158,782 +177,72 @@ make build
# 为多平台构建
make build-all
# 为 Raspberry Pi Zero 2 W 构建(32位: make build-linux-arm; 64位: make build-linux-arm64
make build-pi-zero
# 构建并安装
make install
```
## 🐳 Docker Compose
**Raspberry Pi Zero 2 W:** 请使用与系统匹配的二进制文件:32 位 Raspberry Pi OS → `make build-linux-arm`64 位 → `make build-linux-arm64`。或运行 `make build-pi-zero` 同时构建两者。
您也可以使用 Docker Compose 运行 PicoClaw,无需在本地安装任何环境。
## 📚 文档
```bash
# 1. 克隆仓库
git clone https://github.com/sipeed/picoclaw.git
cd picoclaw
详细指南请参阅以下文档,README 仅涵盖快速入门。
# 2. 首次运行 — 自动生成 docker/data/config.json 后退出
docker compose -f docker/docker-compose.yml --profile gateway up
# 容器打印 "First-run setup complete." 后自动停止
# 3. 填写 API Key 等配置
vim docker/data/config.json # 设置 provider API key、Bot Token 等
# 4. 正式启动
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
> [!TIP]
> **Docker 用户**: 默认情况下, Gateway 监听 `127.0.0.1`,该端口不会暴露到容器外。如果需要通过端口映射访问健康检查接口,请在环境变量中设置 `PICOCLAW_GATEWAY_HOST=0.0.0.0` 或修改 `config.json`。
```bash
# 5. 查看日志
docker compose -f docker/docker-compose.yml logs -f picoclaw-gateway
# 6. 停止
docker compose -f docker/docker-compose.yml --profile gateway down
```
### Agent 模式 (一次性运行)
```bash
# 提问
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent -m "2+2 等于几?"
# 交互模式
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent
```
### 更新镜像
```bash
docker compose -f docker/docker-compose.yml pull
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
### 🚀 快速开始
> [!TIP]
> 在 `~/.picoclaw/config.json` 中设置您的 API Key。获取 API Key: [火山引擎 (CodingPlan)](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) (LLM) · [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu (智谱)](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM)。网络搜索是 **可选的** — 获取免费的 [Tavily API](https://tavily.com) (每月 1000 次免费查询) 或 [Brave Search API](https://brave.com/search/api) (每月 2000 次免费查询)。
**1. 初始化 (Initialize)**
```bash
picoclaw onboard
```
**2. 配置 (Configure)** (`~/.picoclaw/config.json`)
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model_name": "gpt-5.4",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key",
"api_base":"https://ark.cn-beijing.volces.com/api/coding/v3"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "your-api-key",
"request_timeout": 300
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "your-anthropic-key"
}
],
"tools": {
"web": {
"brave": {
"enabled": false,
"api_key": "YOUR_BRAVE_API_KEY",
"max_results": 5
},
"tavily": {
"enabled": false,
"api_key": "YOUR_TAVILY_API_KEY",
"max_results": 5
}
},
"cron": {
"exec_timeout_minutes": 5
}
}
}
```
> **新功能**: `model_list` 配置格式支持零代码添加 provider。详见[模型配置](#模型配置-model_list)章节。
> `request_timeout` 为可选项,单位为秒。若省略或设置为 `<= 0`PicoClaw 使用默认超时(120 秒)。
**3. 获取 API Key**
* **LLM 提供商**: [OpenRouter](https://openrouter.ai/keys) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) · [Anthropic](https://console.anthropic.com) · [OpenAI](https://platform.openai.com) · [Gemini](https://aistudio.google.com/api-keys)
* **网络搜索** (可选): [Tavily](https://tavily.com) - 专为 AI Agent 优化 (1000 请求/月) · [Brave Search](https://brave.com/search/api) - 提供免费层级 (2000 请求/月)
> **注意**: 完整的配置模板请参考 `config.example.json`。
**4. 对话 (Chat)**
```bash
picoclaw agent -m "2+2 等于几?"
```
就是这样!您在 2 分钟内就拥有了一个可工作的 AI 助手。
---
## 💬 聊天应用集成 (Chat Apps)
PicoClaw 支持多种聊天平台,使您的 Agent 能够连接到任何地方。
> **注意**: 所有 Webhook 类渠道(LINE、WeCom 等)均挂载在同一个 Gateway HTTP 服务器上(`gateway.host`:`gateway.port`,默认 `127.0.0.1:18790`),无需为每个渠道单独配置端口。注意:飞书(Feishu)使用 WebSocket/SDK 模式,不通过该共享 HTTP webhook 服务器接收消息。
### 核心渠道
| 渠道 | 设置难度 | 特性说明 | 文档链接 |
| -------------------- | ----------- | ----------------------------------------- | --------------------------------------------------------------------------------------------------------------- |
| **Telegram** | ⭐ 简单 | 推荐,支持语音转文字,长轮询无需公网 | [查看文档](docs/channels/telegram/README.zh.md) |
| **Discord** | ⭐ 简单 | Socket Mode,支持群组/私信,Bot 生态成熟 | [查看文档](docs/channels/discord/README.zh.md) |
| **Slack** | ⭐ 简单 | **Socket Mode** (无需公网 IP),企业级支持 | [查看文档](docs/channels/slack/README.zh.md) |
| **Matrix** | ⭐⭐ 中等 | 联邦协议,支持自建 homeserver 与公开服务器 | [查看文档](docs/channels/matrix/README.zh.md) |
| **QQ** | ⭐⭐ 中等 | 官方机器人 API,适合国内社群 | [查看文档](docs/channels/qq/README.zh.md) |
| **钉钉 (DingTalk)** | ⭐⭐ 中等 | Stream 模式无需公网,企业办公首选 | [查看文档](docs/channels/dingtalk/README.zh.md) |
| **企业微信 (WeCom)** | ⭐⭐⭐ 较难 | 支持群机器人(Webhook)、自建应用(API)和智能机器人(AI Bot) | [Bot 文档](docs/channels/wecom/wecom_bot/README.zh.md) / [App 文档](docs/channels/wecom/wecom_app/README.zh.md) / [AI Bot 文档](docs/channels/wecom/wecom_aibot/README.zh.md) |
| **飞书 (Feishu)** | ⭐⭐⭐ 较难 | 企业级协作,功能丰富 | [查看文档](docs/channels/feishu/README.zh.md) |
| **Line** | ⭐⭐⭐ 较难 | 需要 HTTPS Webhook | [查看文档](docs/channels/line/README.zh.md) |
| **OneBot** | ⭐⭐ 中等 | 兼容 NapCat/Go-CQHTTP,社区生态丰富 | [查看文档](docs/channels/onebot/README.zh.md) |
| **MaixCam** | ⭐ 简单 | 专为 AI 摄像头设计的硬件集成通道 | [查看文档](docs/channels/maixcam/README.zh.md) |
### Telegram 命令注册(启动时自动同步)
PicoClaw 现在使用统一的命令定义来源。启动时会自动将 Telegram 支持的命令(例如 `/start``/help``/show``/list`)注册到 Bot 命令菜单,确保菜单展示与实际行为一致。
Telegram 侧保留的是命令菜单注册能力;通用命令的实际执行统一走 Agent Loop 中的 commands executor。
如果注册因网络或 API 短暂异常失败,不会阻塞 channel 启动;系统会在后台自动重试。
| 主题 | 说明 |
|------|------|
| 🐳 [Docker 与快速开始](docs/zh/docker.md) | Docker Compose 配置、Launcher/Agent 模式、快速开始 |
| 💬 [聊天应用配置](docs/zh/chat-apps.md) | Telegram、Discord、WhatsApp、Matrix、QQ、Slack、IRC、钉钉、LINE、飞书、企业微信等 |
| ⚙️ [配置指南](docs/zh/configuration.md) | 环境变量、工作区布局、技能来源、安全沙箱、心跳任务 |
| 🔌 [提供商与模型配置](docs/zh/providers.md) | 20+ LLM 提供商、模型路由、model_list 配置、Provider 架构 |
| 🔄 [异步任务与 Spawn](docs/zh/spawn-tasks.md) | 快速任务、长任务与 Spawn、异步子 Agent 编排 |
| 🐛 [疑难解答](docs/zh/troubleshooting.md) | 常见问题与解决方案 |
| 🔧 [工具配置](docs/zh/tools_configuration.md) | 工具启用/禁用、执行策略 |
## <img src="assets/clawdchat-icon.png" width="24" height="24" alt="ClawdChat"> 加入 Agent 社交网络
只需通过 CLI 或任何集成的聊天应用发送一条消息,即可将 PicoClaw 连接到 Agent 社交网络。
\*\*阅读 `https://clawdchat.ai/skill.md` 并按照说明加入 [ClawdChat.ai](https://clawdchat.ai)
## 配置详解
配置文件路径: `~/.picoclaw/config.json`
### 环境变量
你可以使用环境变量覆盖默认路径。这对于便携安装、容器化部署或将 picoclaw 作为系统服务运行非常有用。这些变量是独立的,控制不同的路径。
| 变量 | 描述 | 默认路径 |
|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
| `PICOCLAW_CONFIG` | 覆盖配置文件的路径。这直接告诉 picoclaw 加载哪个 `config.json`,忽略所有其他位置。 | `~/.picoclaw/config.json` |
| `PICOCLAW_HOME` | 覆盖 picoclaw 数据根目录。这会更改 `workspace` 和其他数据目录的默认位置。 | `~/.picoclaw` |
**示例:**
```bash
# 使用特定的配置文件运行 picoclaw
# 工作区路径将从该配置文件中读取
PICOCLAW_CONFIG=/etc/picoclaw/production.json picoclaw gateway
# 在 /opt/picoclaw 中存储所有数据运行 picoclaw
# 配置将从默认的 ~/.picoclaw/config.json 加载
# 工作区将在 /opt/picoclaw/workspace 创建
PICOCLAW_HOME=/opt/picoclaw picoclaw agent
# 同时使用两者进行完全自定义设置
PICOCLAW_HOME=/srv/picoclaw PICOCLAW_CONFIG=/srv/picoclaw/main.json picoclaw gateway
```
### 工作区布局 (Workspace Layout)
PicoClaw 将数据存储在您配置的工作区中(默认:`~/.picoclaw/workspace`):
```
~/.picoclaw/workspace/
├── sessions/ # 对话会话和历史
├── memory/ # 长期记忆 (MEMORY.md)
├── state/ # 持久化状态 (最后一次频道等)
├── cron/ # 定时任务数据库
├── skills/ # 自定义技能
├── AGENTS.md # Agent 行为指南
├── HEARTBEAT.md # 周期性任务提示词 (每 30 分钟检查一次)
├── IDENTITY.md # Agent 身份设定
├── SOUL.md # Agent 灵魂/性格
└── USER.md # 用户偏好
```
### 技能来源 (Skill Sources)
默认情况下,技能会按以下顺序加载:
1. `~/.picoclaw/workspace/skills`(工作区)
2. `~/.picoclaw/skills`(全局)
3. `<current-working-directory>/skills`(内置)
在高级/测试场景下,可通过以下环境变量覆盖内置技能目录:
```bash
export PICOCLAW_BUILTIN_SKILLS=/path/to/skills
```
### 统一命令执行策略
- 通用斜杠命令通过 `pkg/agent/loop.go` 中的 `commands.Executor` 统一执行。
- Channel 适配器不再在本地消费通用命令;它们只负责把入站文本转发到 bus/agent 路径。Telegram 仍会在启动时自动注册其支持的命令菜单。
- 未注册的斜杠命令(例如 `/foo`)会透传给 LLM 按普通输入处理。
- 已注册但当前 channel 不支持的命令(例如 WhatsApp 上的 `/show`)会返回明确的用户可见错误,并停止后续处理。
### 心跳 / 周期性任务 (Heartbeat)
PicoClaw 可以自动执行周期性任务。在工作区创建 `HEARTBEAT.md` 文件:
```markdown
# Periodic Tasks
- Check my email for important messages
- Review my calendar for upcoming events
- Check the weather forecast
```
Agent 将每隔 30 分钟(可配置)读取此文件,并使用可用工具执行任务。
#### 使用 Spawn 的异步任务
对于耗时较长的任务(网络搜索、API 调用),使用 `spawn` 工具创建一个 **子 Agent (subagent)**
```markdown
# Periodic Tasks
## Quick Tasks (respond directly)
- Report current time
## Long Tasks (use spawn for async)
- Search the web for AI news and summarize
- Check email and report important messages
```
**关键行为:**
| 特性 | 描述 |
| ---------------- | ---------------------------------------- |
| **spawn** | 创建异步子 Agent,不阻塞主心跳进程 |
| **独立上下文** | 子 Agent 拥有独立上下文,无会话历史 |
| **message tool** | 子 Agent 通过 message 工具直接与用户通信 |
| **非阻塞** | spawn 后,心跳继续处理下一个任务 |
#### 子 Agent 通信原理
```
心跳触发 (Heartbeat triggers)
Agent 读取 HEARTBEAT.md
对于长任务: spawn 子 Agent
↓ ↓
继续下一个任务 子 Agent 独立工作
↓ ↓
所有任务完成 子 Agent 使用 "message" 工具
↓ ↓
响应 HEARTBEAT_OK 用户直接收到结果
```
子 Agent 可以访问工具(message, web_search 等),并且无需通过主 Agent 即可独立与用户通信。
**配置:**
```json
{
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
| 选项 | 默认值 | 描述 |
| ---------- | ------ | ---------------------------- |
| `enabled` | `true` | 启用/禁用心跳 |
| `interval` | `30` | 检查间隔,单位分钟 (最小: 5) |
**环境变量:**
- `PICOCLAW_HEARTBEAT_ENABLED=false` 禁用
- `PICOCLAW_HEARTBEAT_INTERVAL=60` 更改间隔
### 提供商 (Providers)
> [!NOTE]
> Groq 通过 Whisper 提供免费的语音转录。如果配置了 Groq,任意渠道的音频消息都将在 Agent 层面自动转录为文字。
| 提供商 | 用途 | 获取 API Key |
| -------------------- | ---------------------------- | -------------------------------------------------------------------- |
| `gemini` | LLM (Gemini 直连) | [aistudio.google.com](https://aistudio.google.com) |
| `zhipu` | LLM (智谱直连) | [bigmodel.cn](bigmodel.cn) |
| `volcengine` | LLM (火山引擎直连) | [volcengine.com](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| `openrouter` | LLM (推荐,可访问所有模型) | [openrouter.ai](https://openrouter.ai) |
| `anthropic` | LLM (Claude 直连) | [console.anthropic.com](https://console.anthropic.com) |
| `openai` | LLM (GPT 直连) | [platform.openai.com](https://platform.openai.com) |
| `deepseek` | LLM (DeepSeek 直连) | [platform.deepseek.com](https://platform.deepseek.com) |
| `qwen` | LLM (通义千问) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) |
| `groq` | LLM + **语音转录** (Whisper) | [console.groq.com](https://console.groq.com) |
| `cerebras` | LLM (Cerebras 直连) | [cerebras.ai](https://cerebras.ai) |
### 模型配置 (model_list)
> **新功能!** PicoClaw 现在采用**以模型为中心**的配置方式。只需使用 `厂商/模型` 格式(如 `zhipu/glm-4.7`)即可添加新的 provider——**无需修改任何代码!**
该设计同时支持**多 Agent 场景**,提供灵活的 Provider 选择:
- **不同 Agent 使用不同 Provider**:每个 Agent 可以使用自己的 LLM provider
- **模型回退(Fallback)**:配置主模型和备用模型,提高可靠性
- **负载均衡**:在多个 API 端点之间分配请求
- **集中化配置**:在一个地方管理所有 provider
#### 📋 所有支持的厂商
| 厂商 | `model` 前缀 | 默认 API Base | 协议 | 获取 API Key |
| ------------------- | ----------------- | --------------------------------------------------- | --------- | ----------------------------------------------------------------- |
| **OpenAI** | `openai/` | `https://api.openai.com/v1` | OpenAI | [获取密钥](https://platform.openai.com) |
| **Anthropic** | `anthropic/` | `https://api.anthropic.com/v1` | Anthropic | [获取密钥](https://console.anthropic.com) |
| **智谱 AI (GLM)** | `zhipu/` | `https://open.bigmodel.cn/api/paas/v4` | OpenAI | [获取密钥](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
| **DeepSeek** | `deepseek/` | `https://api.deepseek.com/v1` | OpenAI | [获取密钥](https://platform.deepseek.com) |
| **Google Gemini** | `gemini/` | `https://generativelanguage.googleapis.com/v1beta` | OpenAI | [获取密钥](https://aistudio.google.com/api-keys) |
| **Groq** | `groq/` | `https://api.groq.com/openai/v1` | OpenAI | [获取密钥](https://console.groq.com) |
| **Moonshot** | `moonshot/` | `https://api.moonshot.cn/v1` | OpenAI | [获取密钥](https://platform.moonshot.cn) |
| **通义千问 (Qwen)** | `qwen/` | `https://dashscope.aliyuncs.com/compatible-mode/v1` | OpenAI | [获取密钥](https://dashscope.console.aliyun.com) |
| **NVIDIA** | `nvidia/` | `https://integrate.api.nvidia.com/v1` | OpenAI | [获取密钥](https://build.nvidia.com) |
| **Ollama** | `ollama/` | `http://localhost:11434/v1` | OpenAI | 本地(无需密钥) |
| **OpenRouter** | `openrouter/` | `https://openrouter.ai/api/v1` | OpenAI | [获取密钥](https://openrouter.ai/keys) |
| **VLLM** | `vllm/` | `http://localhost:8000/v1` | OpenAI | 本地 |
| **Cerebras** | `cerebras/` | `https://api.cerebras.ai/v1` | OpenAI | [获取密钥](https://cerebras.ai) |
| **火山引擎(Doubao** | `volcengine/` | `https://ark.cn-beijing.volces.com/api/v3` | OpenAI | [获取密钥](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| **神算云** | `shengsuanyun/` | `https://router.shengsuanyun.com/api/v1` | OpenAI | - |
| **BytePlus** | `byteplus/` | `https://ark.ap-southeast.bytepluses.com/api/v3` | OpenAI | [获取密钥](https://www.byteplus.com) |
| **LongCat** | `longcat/` | `https://api.longcat.chat/openai` | OpenAI | [获取密钥](https://longcat.chat/platform) |
| **ModelScope (魔搭)**| `modelscope/` | `https://api-inference.modelscope.cn/v1` | OpenAI | [获取 Token](https://modelscope.cn/my/tokens) |
| **Azure OpenAI** | `azure/` | `https://{resource}.openai.azure.com` | Azure | [获取密钥](https://portal.azure.com) |
| **Antigravity** | `antigravity/` | Google Cloud | 自定义 | 仅 OAuth |
| **GitHub Copilot** | `github-copilot/` | `localhost:4321` | gRPC | - |
#### 基础配置示例
```json
{
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-your-openai-key"
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
},
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-zhipu-key"
}
],
"agents": {
"defaults": {
"model": "gpt-5.4"
}
}
}
```
#### 各厂商配置示例
**OpenAI**
```json
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-..."
}
```
**火山引擎(Doubao**
```json
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-..."
}
```
**智谱 AI (GLM)**
```json
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
```
**DeepSeek**
```json
{
"model_name": "deepseek-chat",
"model": "deepseek/deepseek-chat",
"api_key": "sk-..."
}
```
**Anthropic (使用 OAuth)**
```json
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"auth_method": "oauth"
}
```
> 运行 `picoclaw auth login --provider anthropic` 来设置 OAuth 凭证。
**Anthropic Messages API(原生格式)**
用于直接访问 Anthropic API 或仅支持 Anthropic 原生消息格式的自定义端点:
```json
{
"model_name": "claude-opus-4-6",
"model": "anthropic-messages/claude-opus-4-6",
"api_key": "sk-ant-your-key",
"api_base": "https://api.anthropic.com"
}
```
> 使用 `anthropic-messages` 协议的场景:
> - 使用仅支持 Anthropic 原生 `/v1/messages` 端点的第三方代理(不支持 OpenAI 兼容的 `/v1/chat/completions`
> - 连接到 MiniMax、Synthetic 等需要 Anthropic 原生消息格式的服务
> - 现有的 `anthropic` 协议返回 404 错误(说明端点不支持 OpenAI 兼容格式)
>
> **注意:** `anthropic` 协议使用 OpenAI 兼容格式(`/v1/chat/completions`),而 `anthropic-messages` 使用 Anthropic 原生格式(`/v1/messages`)。请根据端点支持的格式选择。
**Ollama (本地)**
```json
{
"model_name": "llama3",
"model": "ollama/llama3"
}
```
**自定义代理/API**
```json
{
"model_name": "my-custom-model",
"model": "openai/custom-model",
"api_base": "https://my-proxy.com/v1",
"api_key": "sk-...",
"request_timeout": 300
}
```
#### 负载均衡
为同一个模型名称配置多个端点——PicoClaw 会自动在它们之间轮询:
```json
{
"model_list": [
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api1.example.com/v1",
"api_key": "sk-key1"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api2.example.com/v1",
"api_key": "sk-key2"
}
]
}
```
#### 从旧的 `providers` 配置迁移
旧的 `providers` 配置格式**已弃用**,但为向后兼容仍支持。
**旧配置(已弃用):**
```json
{
"providers": {
"zhipu": {
"api_key": "your-key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
},
"agents": {
"defaults": {
"provider": "zhipu",
"model": "glm-4.7"
}
}
}
```
**新配置(推荐):**
```json
{
"model_list": [
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
],
"agents": {
"defaults": {
"model": "glm-4.7"
}
}
}
```
详细的迁移指南请参考 [docs/migration/model-list-migration.md](docs/migration/model-list-migration.md)。
<details>
<summary><b>智谱 (Zhipu) 配置示例</b></summary>
**1. 获取 API key 和 base URL**
- 获取 [API key](https://bigmodel.cn/usercenter/proj-mgmt/apikeys)
**2. 配置**
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model": "glm-4.7",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"providers": {
"zhipu": {
"api_key": "Your API Key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
}
}
```
**3. 运行**
```bash
picoclaw agent -m "你好"
```
</details>
<details>
<summary><b>完整配置示例</b></summary>
```json
{
"agents": {
"defaults": {
"model": "anthropic/claude-opus-4-5"
}
},
"session": {
"dm_scope": "per-channel-peer",
"backlog_limit": 20
},
"providers": {
"openrouter": {
"api_key": "sk-or-v1-xxx"
},
"groq": {
"api_key": "gsk_xxx"
}
},
"channels": {
"telegram": {
"enabled": true,
"token": "123456:ABC...",
"allow_from": ["123456789"]
},
"discord": {
"enabled": true,
"token": "",
"allow_from": [""]
},
"whatsapp": {
"enabled": false
},
"feishu": {
"enabled": false,
"app_id": "cli_xxx",
"app_secret": "xxx",
"encrypt_key": "",
"verification_token": "",
"allow_from": []
},
"qq": {
"enabled": false,
"app_id": "",
"app_secret": "",
"allow_from": []
}
},
"tools": {
"web": {
"brave": {
"enabled": false,
"api_key": "YOUR_BRAVE_API_KEY",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
}
},
"cron": {
"exec_timeout_minutes": 5
}
},
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
</details>
## CLI 命令行参考
| 命令 | 描述 |
| ------------------------- | ------------------ |
| `picoclaw onboard` | 初始化配置和工作区 |
| `picoclaw agent -m "..."` | 与 Agent 对话 |
| `picoclaw agent` | 交互式聊天模式 |
| `picoclaw gateway` | 启动网关 (Gateway) |
| `picoclaw status` | 显示状态 |
| `picoclaw cron list` | 列出所有定时任务 |
| `picoclaw cron add ...` | 添加定时任务 |
### 定时任务 / 提醒 (Scheduled Tasks)
通过 CLI 或任何集成的聊天应用发送一条消息,即可将 PicoClaw 连接到 Agent 社交网络。
**阅读 `https://clawdchat.ai/skill.md` 并按照说明加入 [ClawdChat.ai](https://clawdchat.ai)**
## 🖥 CLI 命令行参考
| 命令 | 说明 |
| ------------------------- | ---------------------- |
| `picoclaw onboard` | 初始化配置与工作区 |
| `picoclaw agent -m "..."` | 与 Agent 对话 |
| `picoclaw agent` | 交互式对话模式 |
| `picoclaw gateway` | 启动网关 |
| `picoclaw status` | 查看状态 |
| `picoclaw version` | 查看版本信息 |
| `picoclaw cron list` | 列出所有定时任务 |
| `picoclaw cron add ...` | 添加定时任务 |
| `picoclaw cron disable` | 禁用定时任务 |
| `picoclaw cron remove` | 删除定时任务 |
| `picoclaw skills list` | 列出已安装技能 |
| `picoclaw skills install` | 安装技能 |
| `picoclaw migrate` | 从旧版本迁移数据 |
| `picoclaw auth login` | 认证提供商 |
### 定时任务 / 提醒
PicoClaw 通过 `cron` 工具支持定时提醒和重复任务:
- **一次性提醒**: "Remind me in 10 minutes" (10分钟后提醒我) → 10分钟后触发一次
- **重复任务**: "Remind me every 2 hours" (每2小时提醒我) → 每2小时触发
- **Cron 表达式**: "Remind me at 9am daily" (每天上午9点提醒我) → 使用 cron 表达式
* **一次性提醒**: "10分钟后提醒我" → 10分钟后触发一次
* **重复任务**: "每2小时提醒我" → 每2小时触发
* **Cron 表达式**: "每天上午9点提醒我" → 使用 cron 表达式
任务存储在 `~/.picoclaw/workspace/cron/` 中并自动处理。
## 🤝 贡献与路线图 (Roadmap)
## 🤝 贡献与路线图
欢迎提交 PR!代码库刻意保持小巧和可读。🤗
路线图即将发布...
查看完整的 [社区路线图](https://github.com/sipeed/picoclaw/blob/main/ROADMAP.md)。
开发者群组正在组建中,入群门槛:至少合并过 1 个 PR。
用户群组:
Discord: [https://discord.gg/V4sAZ9XWpN](https://discord.gg/V4sAZ9XWpN)
Discord: <https://discord.gg/V4sAZ9XWpN>
<img src="assets/wechat.png" alt="PicoClaw" width="512">
## 🐛 疑难解答 (Troubleshooting)
### 网络搜索提示 "API 配置问题"
如果您尚未配置搜索 API Key,这是正常的。PicoClaw 会提供手动搜索的帮助链接。
启用网络搜索:
1. 在 [https://tavily.com](https://tavily.com) (1000 次免费) 或 [https://brave.com/search/api](https://brave.com/search/api) 获取免费 API Key (2000 次免费)
2. 添加到 `~/.picoclaw/config.json`:
```json
{
"tools": {
"web": {
"brave": {
"enabled": false,
"api_key": "YOUR_BRAVE_API_KEY",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
}
}
}
}
```
### 遇到内容过滤错误 (Content Filtering Errors)
某些提供商(如智谱)有严格的内容过滤。尝试改写您的问题或使用其他模型。
### Telegram bot 提示 "Conflict: terminated by other getUpdates"
这表示有另一个机器人实例正在运行。请确保同一时间只有一个 `picoclaw gateway` 进程在运行。
---
## 📝 API Key 对比
| 服务 | 免费层级 | 适用场景 |
| --- | --- | --- |
| **OpenRouter** | 200K tokens/月 | 多模型聚合 (Claude, GPT-4 等) |
| **火山引擎 CodingPlan** | 9.9 元/首月 | 最适合国内用户,多种 SOTA 模型(豆包、DeepSeek 等) |
| **智谱 (Zhipu)** | 200K tokens/月 | 适合中国用户 |
| **Brave Search** | 2000 次查询/月 | 网络搜索功能 |
| **Tavily** | 1000 次查询/月 | AI Agent 搜索优化 |
| **Groq** | 提供免费层级 | 极速推理 (Llama, Mixtral) |
| **LongCat** | 最多 5M tokens/天 | 推理速度快 (免费额度) |
| **ModelScope (魔搭)** | 2000 次请求/天 | 免费推理 (Qwen, GLM, DeepSeek 等) |
---
<div align="center">
<img src="assets/logo.jpg" alt="PicoClaw Meme" width="512">
</div>
BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB

After

Width:  |  Height:  |  Size: 158 KiB

+2 -2
View File
@@ -12,7 +12,7 @@ const Logo = "🦞"
// GetPicoclawHome returns the picoclaw home directory.
// Priority: $PICOCLAW_HOME > ~/.picoclaw
func GetPicoclawHome() string {
if home := os.Getenv("PICOCLAW_HOME"); home != "" {
if home := os.Getenv(config.EnvHome); home != "" {
return home
}
home, _ := os.UserHomeDir()
@@ -20,7 +20,7 @@ func GetPicoclawHome() string {
}
func GetConfigPath() string {
if configPath := os.Getenv("PICOCLAW_CONFIG"); configPath != "" {
if configPath := os.Getenv(config.EnvConfig); configPath != "" {
return configPath
}
return filepath.Join(GetPicoclawHome(), "config.json")
+7 -4
View File
@@ -78,9 +78,8 @@
"token": "YOUR_TELEGRAM_BOT_TOKEN",
"base_url": "",
"proxy": "",
"allow_from": [
"YOUR_USER_ID"
],
"allow_from": ["YOUR_USER_ID"],
"use_markdown_v2": false,
"reasoning_channel_id": ""
},
"discord": {
@@ -313,6 +312,9 @@
"allow_write_paths": null,
"web": {
"enabled": true,
"prefer_native": true,
"fetch_limit_bytes": 10485760,
"format": "plaintext",
"brave": {
"enabled": false,
"api_key": "YOUR_BRAVE_API_KEY",
@@ -351,7 +353,8 @@
"search_engine": "search_std",
"max_results": 5
},
"fetch_limit_bytes": 10485760
"fetch_limit_bytes": 10485760,
"private_host_whitelist": []
},
"cron": {
"enabled": true,
+431
View File
@@ -0,0 +1,431 @@
# 💬 Chat Apps Configuration
> Back to [README](../README.md)
## 💬 Chat Apps
Talk to your picoclaw through Telegram, Discord, WhatsApp, Matrix, QQ, DingTalk, LINE, WeCom, Feishu, Slack, IRC, OneBot, MaixCam, or Pico (native protocol)
> **Note**: All webhook-based channels (LINE, WeCom, etc.) are served on a single shared Gateway HTTP server (`gateway.host`:`gateway.port`, default `127.0.0.1:18790`). There are no per-channel ports to configure. Note: Feishu uses WebSocket/SDK mode and does not use the shared HTTP webhook server.
| Channel | Setup |
| ------------ | ---------------------------------- |
| **Telegram** | Easy (just a token) |
| **Discord** | Easy (bot token + intents) |
| **WhatsApp** | Easy (native: QR scan; or bridge URL) |
| **Matrix** | Medium (homeserver + bot access token) |
| **QQ** | Easy (AppID + AppSecret) |
| **DingTalk** | Medium (app credentials) |
| **LINE** | Medium (credentials + webhook URL) |
| **WeCom AI Bot** | Medium (Token + AES key) |
| **Feishu** | Medium (App ID + Secret, WebSocket mode) |
| **Slack** | Medium (Bot token + App token) |
| **IRC** | Medium (server + TLS config) |
| **OneBot** | Medium (QQ via OneBot protocol) |
| **MaixCam** | Easy (Sipeed hardware integration) |
| **Pico** | Native PicoClaw protocol |
<details>
<summary><b>Telegram</b> (Recommended)</summary>
**1. Create a bot**
* Open Telegram, search `@BotFather`
* Send `/newbot`, follow prompts
* Copy the token
**2. Configure**
```json
{
"channels": {
"telegram": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"],
"use_markdown_v2": false,
}
}
}
```
> Get your user ID from `@userinfobot` on Telegram.
**3. Run**
```bash
picoclaw gateway
```
**4. Telegram command menu (auto-registered at startup)**
PicoClaw now keeps command definitions in one shared registry. On startup, Telegram will automatically register supported bot commands (for example `/start`, `/help`, `/show`, `/list`) so command menu and runtime behavior stay in sync.
Telegram command menu registration remains channel-local discovery UX; generic command execution is handled centrally in the agent loop via the commands executor.
If command registration fails (network/API transient errors), the channel still starts and PicoClaw retries registration in the background.
**4. Advanced Formatting**
You can set use_markdown_v2: true to enable enhanced formatting options. This allows the bot to utilize the full range of Telegram MarkdownV2 features, including nested styles, spoilers, and custom fixed-width blocks.
</details>
<details>
<summary><b>Discord</b></summary>
**1. Create a bot**
* Go to <https://discord.com/developers/applications>
* Create an application → Bot → Add Bot
* Copy the bot token
**2. Enable intents**
* In the Bot settings, enable **MESSAGE CONTENT INTENT**
* (Optional) Enable **SERVER MEMBERS INTENT** if you plan to use allow lists based on member data
**3. Get your User ID**
* Discord Settings → Advanced → enable **Developer Mode**
* Right-click your avatar → **Copy User ID**
**4. Configure**
```json
{
"channels": {
"discord": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
**5. Invite the bot**
* OAuth2 → URL Generator
* Scopes: `bot`
* Bot Permissions: `Send Messages`, `Read Message History`
* Open the generated invite URL and add the bot to your server
**Optional: Group trigger mode**
By default the bot responds to all messages in a server channel. To restrict responses to @-mentions only, add:
```json
{
"channels": {
"discord": {
"group_trigger": { "mention_only": true }
}
}
}
```
You can also trigger by keyword prefixes (e.g. `!bot`):
```json
{
"channels": {
"discord": {
"group_trigger": { "prefixes": ["!bot"] }
}
}
}
```
**6. Run**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>WhatsApp</b> (native via whatsmeow)</summary>
PicoClaw can connect to WhatsApp in two ways:
- **Native (recommended):** In-process using [whatsmeow](https://github.com/tulir/whatsmeow). No separate bridge. Set `"use_native": true` and leave `bridge_url` empty. On first run, scan the QR code with WhatsApp (Linked Devices). Session is stored under your workspace (e.g. `workspace/whatsapp/`). The native channel is **optional** to keep the default binary small; build with `-tags whatsapp_native` (e.g. `make build-whatsapp-native` or `go build -tags whatsapp_native ./cmd/...`).
- **Bridge:** Connect to an external WebSocket bridge. Set `bridge_url` (e.g. `ws://localhost:3001`) and keep `use_native` false.
**Configure (native)**
```json
{
"channels": {
"whatsapp": {
"enabled": true,
"use_native": true,
"session_store_path": "",
"allow_from": []
}
}
}
```
If `session_store_path` is empty, the session is stored in `<workspace>/whatsapp/`. Run `picoclaw gateway`; on first run, scan the QR code printed in the terminal with WhatsApp → Linked Devices.
</details>
<details>
<summary><b>QQ</b></summary>
**1. Create a bot**
- Go to [QQ Open Platform](https://q.qq.com/#)
- Create an application → Get **AppID** and **AppSecret**
**2. Configure**
```json
{
"channels": {
"qq": {
"enabled": true,
"app_id": "YOUR_APP_ID",
"app_secret": "YOUR_APP_SECRET",
"allow_from": []
}
}
}
```
> Set `allow_from` to empty to allow all users, or specify QQ numbers to restrict access.
**3. Run**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>DingTalk</b></summary>
**1. Create a bot**
* Go to [Open Platform](https://open.dingtalk.com/)
* Create an internal app
* Copy Client ID and Client Secret
**2. Configure**
```json
{
"channels": {
"dingtalk": {
"enabled": true,
"client_id": "YOUR_CLIENT_ID",
"client_secret": "YOUR_CLIENT_SECRET",
"allow_from": []
}
}
}
```
> Set `allow_from` to empty to allow all users, or specify DingTalk user IDs to restrict access.
**3. Run**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>Matrix</b></summary>
**1. Prepare bot account**
* Use your preferred homeserver (e.g. `https://matrix.org` or self-hosted)
* Create a bot user and obtain its access token
**2. Configure**
```json
{
"channels": {
"matrix": {
"enabled": true,
"homeserver": "https://matrix.org",
"user_id": "@your-bot:matrix.org",
"access_token": "YOUR_MATRIX_ACCESS_TOKEN",
"allow_from": []
}
}
}
```
**3. Run**
```bash
picoclaw gateway
```
For full options (`device_id`, `join_on_invite`, `group_trigger`, `placeholder`, `reasoning_channel_id`), see [Matrix Channel Configuration Guide](docs/channels/matrix/README.md).
</details>
<details>
<summary><b>LINE</b></summary>
**1. Create a LINE Official Account**
- Go to [LINE Developers Console](https://developers.line.biz/)
- Create a provider → Create a Messaging API channel
- Copy **Channel Secret** and **Channel Access Token**
**2. Configure**
```json
{
"channels": {
"line": {
"enabled": true,
"channel_secret": "YOUR_CHANNEL_SECRET",
"channel_access_token": "YOUR_CHANNEL_ACCESS_TOKEN",
"webhook_path": "/webhook/line",
"allow_from": []
}
}
}
```
> LINE webhook is served on the shared Gateway server (`gateway.host`:`gateway.port`, default `127.0.0.1:18790`).
**3. Set up Webhook URL**
LINE requires HTTPS for webhooks. Use a reverse proxy or tunnel:
```bash
# Example with ngrok (gateway default port is 18790)
ngrok http 18790
```
Then set the Webhook URL in LINE Developers Console to `https://your-domain/webhook/line` and enable **Use webhook**.
**4. Run**
```bash
picoclaw gateway
```
> In group chats, the bot responds only when @mentioned. Replies quote the original message.
</details>
<details>
<summary><b>WeCom (企业微信)</b></summary>
PicoClaw supports three types of WeCom integration:
**Option 1: WeCom Bot (Bot)** - Easier setup, supports group chats
**Option 2: WeCom App (Custom App)** - More features, proactive messaging, private chat only
**Option 3: WeCom AI Bot (AI Bot)** - Official AI Bot, streaming replies, supports group & private chat
See [WeCom AI Bot Configuration Guide](docs/channels/wecom/wecom_aibot/README.zh.md) for detailed setup instructions.
**Quick Setup - WeCom Bot:**
**1. Create a bot**
* Go to WeCom Admin Console → Group Chat → Add Group Bot
* Copy the webhook URL (format: `https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx`)
**2. Configure**
```json
{
"channels": {
"wecom": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_url": "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=YOUR_KEY",
"webhook_path": "/webhook/wecom",
"allow_from": []
}
}
}
```
> WeCom webhook is served on the shared Gateway server (`gateway.host`:`gateway.port`, default `127.0.0.1:18790`).
**Quick Setup - WeCom App:**
**1. Create an app**
* Go to WeCom Admin Console → App Management → Create App
* Copy **AgentId** and **Secret**
* Go to "My Company" page, copy **CorpID**
**2. Configure receive message**
* In App details, click "Receive Message" → "Set API"
* Set URL to `http://your-server:18790/webhook/wecom-app`
* Generate **Token** and **EncodingAESKey**
**3. Configure**
```json
{
"channels": {
"wecom_app": {
"enabled": true,
"corp_id": "wwxxxxxxxxxxxxxxxx",
"corp_secret": "YOUR_CORP_SECRET",
"agent_id": 1000002,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-app",
"allow_from": []
}
}
}
```
**4. Run**
```bash
picoclaw gateway
```
> **Note**: WeCom webhook callbacks are served on the Gateway port (default 18790). Use a reverse proxy for HTTPS.
**Quick Setup - WeCom AI Bot:**
**1. Create an AI Bot**
* Go to WeCom Admin Console → App Management → AI Bot
* In the AI Bot settings, configure callback URL: `http://your-server:18791/webhook/wecom-aibot`
* Copy **Token** and click "Random Generate" for **EncodingAESKey**
**2. Configure**
```json
{
"channels": {
"wecom_aibot": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_43_CHAR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-aibot",
"allow_from": [],
"welcome_message": "Hello! How can I help you?"
}
}
}
```
**3. Run**
```bash
picoclaw gateway
```
> **Note**: WeCom AI Bot uses streaming pull protocol — no reply timeout concerns. Long tasks (>30 seconds) automatically switch to `response_url` push delivery.
</details>
+218
View File
@@ -0,0 +1,218 @@
# ⚙️ Configuration Guide
> Back to [README](../README.md)
## ⚙️ Configuration
Config file: `~/.picoclaw/config.json`
### Environment Variables
You can override default paths using environment variables. This is useful for portable installations, containerized deployments, or running picoclaw as a system service. These variables are independent and control different paths.
| Variable | Description | Default Path |
|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
| `PICOCLAW_CONFIG` | Overrides the path to the configuration file. This directly tells picoclaw which `config.json` to load, ignoring all other locations. | `~/.picoclaw/config.json` |
| `PICOCLAW_HOME` | Overrides the root directory for picoclaw data. This changes the default location of the `workspace` and other data directories. | `~/.picoclaw` |
**Examples:**
```bash
# Run picoclaw using a specific config file
# The workspace path will be read from within that config file
PICOCLAW_CONFIG=/etc/picoclaw/production.json picoclaw gateway
# Run picoclaw with all its data stored in /opt/picoclaw
# Config will be loaded from the default ~/.picoclaw/config.json
# Workspace will be created at /opt/picoclaw/workspace
PICOCLAW_HOME=/opt/picoclaw picoclaw agent
# Use both for a fully customized setup
PICOCLAW_HOME=/srv/picoclaw PICOCLAW_CONFIG=/srv/picoclaw/main.json picoclaw gateway
```
### Workspace Layout
PicoClaw stores data in your configured workspace (default: `~/.picoclaw/workspace`):
```
~/.picoclaw/workspace/
├── sessions/ # Conversation sessions and history
├── memory/ # Long-term memory (MEMORY.md)
├── state/ # Persistent state (last channel, etc.)
├── cron/ # Scheduled jobs database
├── skills/ # Custom skills
├── AGENTS.md # Agent behavior guide
├── HEARTBEAT.md # Periodic task prompts (checked every 30 min)
├── IDENTITY.md # Agent identity
├── SOUL.md # Agent soul
└── USER.md # User preferences
```
### Skill Sources
By default, skills are loaded from:
1. `~/.picoclaw/workspace/skills` (workspace)
2. `~/.picoclaw/skills` (global)
3. `<current-working-directory>/skills` (builtin)
For advanced/test setups, you can override the builtin skills root with:
```bash
export PICOCLAW_BUILTIN_SKILLS=/path/to/skills
```
### Unified Command Execution Policy
- Generic slash commands are executed through a single path in `pkg/agent/loop.go` via `commands.Executor`.
- Channel adapters no longer consume generic commands locally; they forward inbound text to the bus/agent path. Telegram still auto-registers supported commands at startup.
- Unknown slash command (for example `/foo`) passes through to normal LLM processing.
- Registered but unsupported command on the current channel (for example `/show` on WhatsApp) returns an explicit user-facing error and stops further processing.
### 🔒 Security Sandbox
PicoClaw runs in a sandboxed environment by default. The agent can only access files and execute commands within the configured workspace.
#### Default Configuration
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"restrict_to_workspace": true
}
}
}
```
| Option | Default | Description |
| ----------------------- | ----------------------- | ----------------------------------------- |
| `workspace` | `~/.picoclaw/workspace` | Working directory for the agent |
| `restrict_to_workspace` | `true` | Restrict file/command access to workspace |
#### Protected Tools
When `restrict_to_workspace: true`, the following tools are sandboxed:
| Tool | Function | Restriction |
| ------------- | ---------------- | -------------------------------------- |
| `read_file` | Read files | Only files within workspace |
| `write_file` | Write files | Only files within workspace |
| `list_dir` | List directories | Only directories within workspace |
| `edit_file` | Edit files | Only files within workspace |
| `append_file` | Append to files | Only files within workspace |
| `exec` | Execute commands | Command paths must be within workspace |
#### Additional Exec Protection
Even with `restrict_to_workspace: false`, the `exec` tool blocks these dangerous commands:
* `rm -rf`, `del /f`, `rmdir /s` — Bulk deletion
* `format`, `mkfs`, `diskpart` — Disk formatting
* `dd if=` — Disk imaging
* Writing to `/dev/sd[a-z]` — Direct disk writes
* `shutdown`, `reboot`, `poweroff` — System shutdown
* Fork bomb `:(){ :|:& };:`
### File Access Control
| Config Key | Type | Default | Description |
|------------|------|---------|-------------|
| `tools.allow_read_paths` | string[] | `[]` | Additional paths allowed for reading outside workspace |
| `tools.allow_write_paths` | string[] | `[]` | Additional paths allowed for writing outside workspace |
### Exec Security
| Config Key | Type | Default | Description |
|------------|------|---------|-------------|
| `tools.exec.allow_remote` | bool | `false` | Allow exec tool from remote channels (Telegram/Discord etc.) |
| `tools.exec.enable_deny_patterns` | bool | `true` | Enable dangerous command interception |
| `tools.exec.custom_deny_patterns` | string[] | `[]` | Custom regex patterns to block |
| `tools.exec.custom_allow_patterns` | string[] | `[]` | Custom regex patterns to allow |
> **Security Note:** Symlink protection is enabled by default — all file paths are resolved through `filepath.EvalSymlinks` before whitelist matching, preventing symlink escape attacks.
#### Known Limitation: Child Processes From Build Tools
The exec safety guard only inspects the command line PicoClaw launches directly. It does not recursively inspect child
processes spawned by allowed developer tools such as `make`, `go run`, `cargo`, `npm run`, or custom build scripts.
That means a top-level command can still compile or launch other binaries after it passes the initial guard check. In
practice, treat build scripts, Makefiles, package scripts, and generated binaries as executable code that needs the same
level of review as a direct shell command.
For higher-risk environments:
* Review build scripts before execution.
* Prefer approval/manual review for compile-and-run workflows.
* Run PicoClaw inside a container or VM if you need stronger isolation than the built-in guard provides.
#### Error Examples
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (path outside working dir)}
```
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (dangerous pattern detected)}
```
#### Disabling Restrictions (Security Risk)
If you need the agent to access paths outside the workspace:
**Method 1: Config file**
```json
{
"agents": {
"defaults": {
"restrict_to_workspace": false
}
}
}
```
**Method 2: Environment variable**
```bash
export PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE=false
```
> ⚠️ **Warning**: Disabling this restriction allows the agent to access any path on your system. Use with caution in controlled environments only.
#### Security Boundary Consistency
The `restrict_to_workspace` setting applies consistently across all execution paths:
| Execution Path | Security Boundary |
| ---------------- | ---------------------------- |
| Main Agent | `restrict_to_workspace` ✅ |
| Subagent / Spawn | Inherits same restriction ✅ |
| Heartbeat tasks | Inherits same restriction ✅ |
All paths share the same workspace restriction — there's no way to bypass the security boundary through subagents or scheduled tasks.
### Heartbeat (Periodic Tasks)
PicoClaw can perform periodic tasks automatically. Create a `HEARTBEAT.md` file in your workspace:
```markdown
# Periodic Tasks
- Check my email for important messages
- Review my calendar for upcoming events
- Check the weather forecast
```
The agent will read this file every 30 minutes (configurable) and execute any tasks using available tools.
#### Async Tasks with Spawn
For long-running tasks (web search, API calls), use the `spawn` tool to create a **subagent**:
```markdown
# Periodic Tasks
+166
View File
@@ -0,0 +1,166 @@
# 🐳 Docker & Quick Start Guide
> Back to [README](../README.md)
## 🐳 Docker Compose
You can also run PicoClaw using Docker Compose without installing anything locally.
```bash
# 1. Clone this repo
git clone https://github.com/sipeed/picoclaw.git
cd picoclaw
# 2. First run — auto-generates docker/data/config.json then exits
docker compose -f docker/docker-compose.yml --profile gateway up
# The container prints "First-run setup complete." and stops.
# 3. Set your API keys
vim docker/data/config.json # Set provider API keys, bot tokens, etc.
# 4. Start
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
> [!TIP]
> **Docker Users**: By default, the Gateway listens on `127.0.0.1` which is not accessible from the host. If you need to access the health endpoints or expose ports, set `PICOCLAW_GATEWAY_HOST=0.0.0.0` in your environment or update `config.json`.
```bash
# 5. Check logs
docker compose -f docker/docker-compose.yml logs -f picoclaw-gateway
# 6. Stop
docker compose -f docker/docker-compose.yml --profile gateway down
```
### Launcher Mode (Web Console)
The `launcher` image includes all three binaries (`picoclaw`, `picoclaw-launcher`, `picoclaw-launcher-tui`) and starts the web console by default, which provides a browser-based UI for configuration and chat.
```bash
docker compose -f docker/docker-compose.yml --profile launcher up -d
```
Open http://localhost:18800 in your browser. The launcher manages the gateway process automatically.
> [!WARNING]
> The web console does not yet support authentication. Avoid exposing it to the public internet.
### Agent Mode (One-shot)
```bash
# Ask a question
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent -m "What is 2+2?"
# Interactive mode
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent
```
### Update
```bash
docker compose -f docker/docker-compose.yml pull
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
### 🚀 Quick Start
> [!TIP]
> Set your API Key in `~/.picoclaw/config.json`. Get API Keys: [Volcengine (CodingPlan)](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) (LLM) · [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM). Web search is optional — get a free [Tavily API](https://tavily.com) (1000 free queries/month) or [Brave Search API](https://brave.com/search/api) (2000 free queries/month).
**1. Initialize**
```bash
picoclaw onboard
```
**2. Configure** (`~/.picoclaw/config.json`)
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model_name": "gpt-5.4",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key",
"api_base":"https://ark.cn-beijing.volces.com/api/coding/v3"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "your-api-key",
"request_timeout": 300
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "your-anthropic-key"
}
],
"tools": {
"web": {
"enabled": true,
"fetch_limit_bytes": 10485760,
"format": "plaintext",
"brave": {
"enabled": false,
"api_key": "YOUR_BRAVE_API_KEY",
"max_results": 5
},
"tavily": {
"enabled": false,
"api_key": "YOUR_TAVILY_API_KEY",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "YOUR_PERPLEXITY_API_KEY",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://your-searxng-instance:8888",
"max_results": 5
}
}
}
}
```
> **New**: The `model_list` configuration format allows zero-code provider addition. See [Model Configuration](#model-configuration-model_list) for details.
> `request_timeout` is optional and uses seconds. If omitted or set to `<= 0`, PicoClaw uses the default timeout (120s).
**3. Get API Keys**
* **LLM Provider**: [OpenRouter](https://openrouter.ai/keys) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) · [Anthropic](https://console.anthropic.com) · [OpenAI](https://platform.openai.com) · [Gemini](https://aistudio.google.com/api-keys)
* **Web Search** (optional):
* [Brave Search](https://brave.com/search/api) - Paid ($5/1000 queries, ~$5-6/month)
* [Perplexity](https://www.perplexity.ai) - AI-powered search with chat interface
* [SearXNG](https://github.com/searxng/searxng) - Self-hosted metasearch engine (free, no API key needed)
* [Tavily](https://tavily.com) - Optimized for AI Agents (1000 requests/month)
* DuckDuckGo - Built-in fallback (no API key required)
> **Note**: See `config.example.json` for a complete configuration template.
**4. Chat**
```bash
picoclaw agent -m "What is 2+2?"
```
That's it! You have a working AI assistant in 2 minutes.
---
+588
View File
@@ -0,0 +1,588 @@
# 💬 Configuration des Applications de Chat
> Retour au [README](../../README.fr.md)
## 💬 Applications de Chat
Communiquez avec votre PicoClaw via Telegram, Discord, WhatsApp, Matrix, QQ, DingTalk, LINE, WeCom, Feishu, Slack, IRC, OneBot ou MaixCam.
> **Note** : Tous les canaux basés sur les webhooks (LINE, WeCom, etc.) sont servis sur un seul serveur HTTP Gateway partagé (`gateway.host`:`gateway.port`, par défaut `127.0.0.1:18790`). Il n'y a pas de ports par canal à configurer. Note : Feishu utilise le mode WebSocket/SDK et n'utilise pas le serveur HTTP webhook partagé.
| Canal | Configuration |
| ------------ | -------------------------------------- |
| **Telegram** | Facile (juste un token) |
| **Discord** | Facile (bot token + intents) |
| **WhatsApp** | Facile (natif : scan QR ; ou bridge URL) |
| **Matrix** | Moyen (homeserver + bot access token) |
| **QQ** | Facile (AppID + AppSecret) |
| **DingTalk** | Moyen (identifiants de l'application) |
| **LINE** | Moyen (identifiants + webhook URL) |
| **WeCom AI Bot** | Moyen (Token + clé AES) |
| **Feishu** | Moyen (App ID + Secret, mode WebSocket) |
| **Slack** | Moyen (Bot token + App token) |
| **IRC** | Moyen (serveur + configuration TLS) |
| **OneBot** | Moyen (QQ via protocole OneBot) |
| **MaixCam** | Facile (intégration matérielle Sipeed) |
| **Pico** | Native PicoClaw protocol |
<details>
<summary><b>Telegram</b> (Recommandé)</summary>
**1. Créer un bot**
* Ouvrez Telegram, recherchez `@BotFather`
* Envoyez `/newbot`, suivez les instructions
* Copiez le token
**2. Configurer**
```json
{
"channels": {
"telegram": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
> Obtenez votre identifiant utilisateur via `@userinfobot` sur Telegram.
**3. Lancer**
```bash
picoclaw gateway
```
**4. Menu de commandes Telegram (enregistré automatiquement au démarrage)**
PicoClaw conserve les définitions de commandes dans un registre partagé unique. Au démarrage, Telegram enregistre automatiquement les commandes bot prises en charge (par exemple `/start`, `/help`, `/show`, `/list`) afin que le menu de commandes et le comportement à l'exécution restent synchronisés.
L'enregistrement du menu de commandes Telegram reste une découverte UX locale au canal ; l'exécution générique des commandes est gérée de manière centralisée dans la boucle agent via l'exécuteur de commandes.
Si l'enregistrement des commandes échoue (erreurs transitoires réseau/API), le canal démarre quand même et PicoClaw réessaie l'enregistrement en arrière-plan.
</details>
<details>
<summary><b>Discord</b></summary>
**1. Créer un bot**
* Allez sur <https://discord.com/developers/applications>
* Créez une application → Bot → Add Bot
* Copiez le token du bot
**2. Activer les intents**
* Dans les paramètres du Bot, activez **MESSAGE CONTENT INTENT**
* (Optionnel) Activez **SERVER MEMBERS INTENT** si vous prévoyez d'utiliser des listes d'autorisation basées sur les données des membres
**3. Obtenir votre identifiant utilisateur**
* Paramètres Discord → Avancé → activez **Developer Mode**
* Clic droit sur votre avatar → **Copy User ID**
**4. Configurer**
```json
{
"channels": {
"discord": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
**5. Inviter le bot**
* OAuth2 → URL Generator
* Scopes : `bot`
* Bot Permissions : `Send Messages`, `Read Message History`
* Ouvrez l'URL d'invitation générée et ajoutez le bot à votre serveur
**Mode déclenchement en groupe (optionnel)**
Par défaut, le bot répond à tous les messages dans un canal de serveur. Pour limiter les réponses aux @mentions uniquement, ajoutez :
```json
{
"channels": {
"discord": {
"group_trigger": { "mention_only": true }
}
}
}
```
Vous pouvez également déclencher par préfixes de mots-clés (par ex. `!bot`) :
```json
{
"channels": {
"discord": {
"group_trigger": { "prefixes": ["!bot"] }
}
}
}
```
**6. Lancer**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>WhatsApp</b> (natif via whatsmeow)</summary>
PicoClaw peut se connecter à WhatsApp de deux manières :
- **Natif (recommandé) :** En processus via [whatsmeow](https://github.com/tulir/whatsmeow). Pas de bridge séparé. Définissez `"use_native": true` et laissez `bridge_url` vide. Au premier lancement, scannez le code QR avec WhatsApp (Appareils liés). La session est stockée dans votre workspace (par ex. `workspace/whatsapp/`). Le canal natif est **optionnel** pour garder le binaire par défaut léger ; compilez avec `-tags whatsapp_native` (par ex. `make build-whatsapp-native` ou `go build -tags whatsapp_native ./cmd/...`).
- **Bridge :** Connectez-vous à un bridge WebSocket externe. Définissez `bridge_url` (par ex. `ws://localhost:3001`) et gardez `use_native` à false.
**Configurer (natif)**
```json
{
"channels": {
"whatsapp": {
"enabled": true,
"use_native": true,
"session_store_path": "",
"allow_from": []
}
}
}
```
Si `session_store_path` est vide, la session est stockée dans `<workspace>/whatsapp/`. Lancez `picoclaw gateway` ; au premier lancement, scannez le code QR affiché dans le terminal avec WhatsApp → Appareils liés.
</details>
<details>
<summary><b>QQ</b></summary>
**1. Créer un bot**
- Allez sur [QQ Open Platform](https://q.qq.com/#)
- Créez une application → Obtenez **AppID** et **AppSecret**
**2. Configurer**
```json
{
"channels": {
"qq": {
"enabled": true,
"app_id": "YOUR_APP_ID",
"app_secret": "YOUR_APP_SECRET",
"allow_from": []
}
}
}
```
> Définissez `allow_from` vide pour autoriser tous les utilisateurs, ou spécifiez des numéros QQ pour restreindre l'accès.
**3. Lancer**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>DingTalk</b></summary>
**1. Créer un bot**
* Allez sur [Open Platform](https://open.dingtalk.com/)
* Créez une application interne
* Copiez le Client ID et le Client Secret
**2. Configurer**
```json
{
"channels": {
"dingtalk": {
"enabled": true,
"client_id": "YOUR_CLIENT_ID",
"client_secret": "YOUR_CLIENT_SECRET",
"allow_from": []
}
}
}
```
> Définissez `allow_from` vide pour autoriser tous les utilisateurs, ou spécifiez des identifiants DingTalk pour restreindre l'accès.
**3. Lancer**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>Matrix</b></summary>
**1. Préparer le compte bot**
* Utilisez votre homeserver préféré (par ex. `https://matrix.org` ou auto-hébergé)
* Créez un utilisateur bot et obtenez son access token
**2. Configurer**
```json
{
"channels": {
"matrix": {
"enabled": true,
"homeserver": "https://matrix.org",
"user_id": "@your-bot:matrix.org",
"access_token": "YOUR_MATRIX_ACCESS_TOKEN",
"allow_from": []
}
}
}
```
**3. Lancer**
```bash
picoclaw gateway
```
Pour toutes les options (`device_id`, `join_on_invite`, `group_trigger`, `placeholder`, `reasoning_channel_id`), voir le [Guide de Configuration du Canal Matrix](docs/channels/matrix/README.md).
</details>
<details>
<summary><b>LINE</b></summary>
**1. Créer un compte officiel LINE**
- Allez sur [LINE Developers Console](https://developers.line.biz/)
- Créez un provider → Créez un canal Messaging API
- Copiez le **Channel Secret** et le **Channel Access Token**
**2. Configurer**
```json
{
"channels": {
"line": {
"enabled": true,
"channel_secret": "YOUR_CHANNEL_SECRET",
"channel_access_token": "YOUR_CHANNEL_ACCESS_TOKEN",
"webhook_path": "/webhook/line",
"allow_from": []
}
}
}
```
> Le webhook LINE est servi sur le serveur Gateway partagé (`gateway.host`:`gateway.port`, par défaut `127.0.0.1:18790`).
**3. Configurer l'URL du Webhook**
LINE nécessite HTTPS pour les webhooks. Utilisez un reverse proxy ou un tunnel :
```bash
# Exemple avec ngrok (le port par défaut du gateway est 18790)
ngrok http 18790
```
Puis définissez l'URL du Webhook dans la console LINE Developers à `https://your-domain/webhook/line` et activez **Use webhook**.
**4. Lancer**
```bash
picoclaw gateway
```
> Dans les discussions de groupe, le bot ne répond que lorsqu'il est @mentionné. Les réponses citent le message original.
</details>
<details>
<summary><b>WeCom (企业微信)</b></summary>
PicoClaw prend en charge trois types d'intégration WeCom :
**Option 1 : WeCom Bot (Bot)** - Configuration plus facile, prend en charge les discussions de groupe
**Option 2 : WeCom App (Application personnalisée)** - Plus de fonctionnalités, messagerie proactive, chat privé uniquement
**Option 3 : WeCom AI Bot (Bot IA)** - Bot IA officiel, réponses en streaming, prend en charge les discussions de groupe et privées
Voir le [Guide de Configuration WeCom AI Bot](docs/channels/wecom/wecom_aibot/README.zh.md) pour les instructions détaillées.
**Configuration rapide - WeCom Bot :**
**1. Créer un bot**
* Allez dans la console d'administration WeCom → Discussion de groupe → Ajouter un bot de groupe
* Copiez l'URL du webhook (format : `https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx`)
**2. Configurer**
```json
{
"channels": {
"wecom": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_url": "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=YOUR_KEY",
"webhook_path": "/webhook/wecom",
"allow_from": []
}
}
}
```
> Le webhook WeCom est servi sur le serveur Gateway partagé (`gateway.host`:`gateway.port`, par défaut `127.0.0.1:18790`).
**Configuration rapide - WeCom App :**
**1. Créer une application**
* Allez dans la console d'administration WeCom → Gestion des applications → Créer une application
* Copiez **AgentId** et **Secret**
* Allez sur la page "Mon entreprise", copiez **CorpID**
**2. Configurer la réception des messages**
* Dans les détails de l'application, cliquez sur "Recevoir les messages" → "Configurer l'API"
* Définissez l'URL à `http://your-server:18790/webhook/wecom-app`
* Générez **Token** et **EncodingAESKey**
**3. Configurer**
```json
{
"channels": {
"wecom_app": {
"enabled": true,
"corp_id": "wwxxxxxxxxxxxxxxxx",
"corp_secret": "YOUR_CORP_SECRET",
"agent_id": 1000002,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-app",
"allow_from": []
}
}
}
```
**4. Lancer**
```bash
picoclaw gateway
```
> **Note** : Les callbacks webhook WeCom sont servis sur le port Gateway (par défaut 18790). Utilisez un reverse proxy pour HTTPS.
**Configuration rapide - WeCom AI Bot :**
**1. Créer un AI Bot**
* Allez dans la console d'administration WeCom → Gestion des applications → AI Bot
* Dans les paramètres du AI Bot, configurez l'URL de callback : `http://your-server:18791/webhook/wecom-aibot`
* Copiez **Token** et cliquez sur "Générer aléatoirement" pour **EncodingAESKey**
**2. Configurer**
```json
{
"channels": {
"wecom_aibot": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_43_CHAR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-aibot",
"allow_from": [],
"welcome_message": "Hello! How can I help you?"
}
}
}
```
**3. Lancer**
```bash
picoclaw gateway
```
> **Note** : WeCom AI Bot utilise le protocole streaming pull — pas de problème de timeout de réponse. Les tâches longues (>30 secondes) basculent automatiquement vers la livraison push via `response_url`.
</details>
<details>
<summary><b>Feishu (飞书)</b></summary>
**1. Créer une application**
* Allez sur [Feishu Open Platform](https://open.feishu.cn/)
* Créez une application → Obtenez **App ID** et **App Secret**
**2. Configurer**
```json
{
"channels": {
"feishu": {
"enabled": true,
"app_id": "cli_xxx",
"app_secret": "xxx",
"encrypt_key": "",
"verification_token": "",
"allow_from": []
}
}
}
```
> Feishu utilise le mode WebSocket/SDK et ne nécessite pas de serveur webhook.
**3. Lancer**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>Slack</b></summary>
**1. Créer une application Slack**
* Allez sur [Slack API](https://api.slack.com/apps)
* Créez une nouvelle application
* Obtenez le **Bot Token** et l'**App Token**
**2. Configurer**
```json
{
"channels": {
"slack": {
"enabled": true,
"bot_token": "xoxb-your-bot-token",
"app_token": "xapp-your-app-token",
"allow_from": []
}
}
}
```
**3. Lancer**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>IRC</b></summary>
**1. Configurer le serveur IRC**
* Préparez les informations de votre serveur IRC (adresse, port, canal)
**2. Configurer**
```json
{
"channels": {
"irc": {
"enabled": true,
"server": "irc.example.com:6697",
"nick": "picoclaw-bot",
"channel": "#your-channel",
"use_tls": true,
"allow_from": []
}
}
}
```
**3. Lancer**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>OneBot</b></summary>
**1. Configurer OneBot**
* Installez une implémentation OneBot compatible (par ex. go-cqhttp, Lagrange)
* Configurez la connexion WebSocket
**2. Configurer**
```json
{
"channels": {
"onebot": {
"enabled": true,
"ws_url": "ws://localhost:8080",
"allow_from": []
}
}
}
```
> OneBot permet d'utiliser QQ via le protocole OneBot standard.
**3. Lancer**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>MaixCam</b></summary>
**1. Préparer le matériel**
* Obtenez un appareil [Sipeed MaixCam](https://wiki.sipeed.com/maixcam)
**2. Configurer**
```json
{
"channels": {
"maixcam": {
"enabled": true,
"allow_from": []
}
}
}
```
> MaixCam est une intégration matérielle Sipeed pour l'interaction IA embarquée.
**3. Lancer**
```bash
picoclaw gateway
```
</details>
+217
View File
@@ -0,0 +1,217 @@
# ⚙️ Guide de Configuration
> Retour au [README](../../README.fr.md)
## ⚙️ Configuration
Fichier de configuration : `~/.picoclaw/config.json`
### Variables d'Environnement
Vous pouvez remplacer les chemins par défaut à l'aide de variables d'environnement. Ceci est utile pour les installations portables, les déploiements conteneurisés ou l'exécution de PicoClaw en tant que service système. Ces variables sont indépendantes et contrôlent des chemins différents.
| Variable | Description | Chemin par défaut |
|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
| `PICOCLAW_CONFIG` | Remplace le chemin vers le fichier de configuration. Indique directement à PicoClaw quel `config.json` charger, en ignorant tous les autres emplacements. | `~/.picoclaw/config.json` |
| `PICOCLAW_HOME` | Remplace le répertoire racine des données PicoClaw. Change l'emplacement par défaut du `workspace` et des autres répertoires de données. | `~/.picoclaw` |
**Exemples :**
```bash
# Run picoclaw using a specific config file
# The workspace path will be read from within that config file
PICOCLAW_CONFIG=/etc/picoclaw/production.json picoclaw gateway
# Run picoclaw with all its data stored in /opt/picoclaw
# Config will be loaded from the default ~/.picoclaw/config.json
# Workspace will be created at /opt/picoclaw/workspace
PICOCLAW_HOME=/opt/picoclaw picoclaw agent
# Use both for a fully customized setup
PICOCLAW_HOME=/srv/picoclaw PICOCLAW_CONFIG=/srv/picoclaw/main.json picoclaw gateway
```
### Structure du Workspace
PicoClaw stocke les données dans votre workspace configuré (par défaut : `~/.picoclaw/workspace`) :
```
~/.picoclaw/workspace/
├── sessions/ # Sessions de conversation et historique
├── memory/ # Mémoire à long terme (MEMORY.md)
├── state/ # État persistant (dernier canal, etc.)
├── cron/ # Base de données des tâches planifiées
├── skills/ # Compétences personnalisées
├── AGENTS.md # Guide de comportement de l'agent
├── HEARTBEAT.md # Invites de tâches périodiques (vérifiées toutes les 30 min)
├── IDENTITY.md # Identité de l'agent
├── SOUL.md # Âme de l'agent
└── USER.md # Préférences utilisateur
```
### Sources de Compétences
Par défaut, les compétences sont chargées depuis :
1. `~/.picoclaw/workspace/skills` (workspace)
2. `~/.picoclaw/skills` (global)
3. `<current-working-directory>/skills` (builtin)
Pour les configurations avancées/de test, vous pouvez remplacer la racine des compétences builtin avec :
```bash
export PICOCLAW_BUILTIN_SKILLS=/path/to/skills
```
### Politique Unifiée d'Exécution des Commandes
- Les commandes slash génériques sont exécutées via un chemin unique dans `pkg/agent/loop.go` via `commands.Executor`.
- Les adaptateurs de canaux ne consomment plus les commandes génériques localement ; ils transmettent le texte entrant au chemin bus/agent. Telegram enregistre toujours automatiquement les commandes prises en charge au démarrage.
- Une commande slash inconnue (par exemple `/foo`) passe au traitement LLM normal.
- Une commande enregistrée mais non prise en charge sur le canal actuel (par exemple `/show` sur WhatsApp) renvoie une erreur explicite à l'utilisateur et arrête le traitement ultérieur.
### 🔒 Sandbox de Sécurité
PicoClaw s'exécute dans un environnement sandboxé par défaut. L'agent ne peut accéder aux fichiers et exécuter des commandes que dans le workspace configuré.
#### Configuration par Défaut
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"restrict_to_workspace": true
}
}
}
```
| Option | Par défaut | Description |
| ----------------------- | ----------------------- | ------------------------------------------------- |
| `workspace` | `~/.picoclaw/workspace` | Répertoire de travail de l'agent |
| `restrict_to_workspace` | `true` | Restreindre l'accès fichiers/commandes au workspace |
#### Outils Protégés
Lorsque `restrict_to_workspace: true`, les outils suivants sont sandboxés :
| Outil | Fonction | Restriction |
| ------------- | --------------------- | ---------------------------------------------- |
| `read_file` | Lire des fichiers | Uniquement les fichiers dans le workspace |
| `write_file` | Écrire des fichiers | Uniquement les fichiers dans le workspace |
| `list_dir` | Lister les répertoires| Uniquement les répertoires dans le workspace |
| `edit_file` | Modifier des fichiers | Uniquement les fichiers dans le workspace |
| `append_file` | Ajouter aux fichiers | Uniquement les fichiers dans le workspace |
| `exec` | Exécuter des commandes| Les chemins de commande doivent être dans le workspace |
#### Protection Exec Supplémentaire
Même avec `restrict_to_workspace: false`, l'outil `exec` bloque ces commandes dangereuses :
* `rm -rf`, `del /f`, `rmdir /s` — Suppression en masse
* `format`, `mkfs`, `diskpart` — Formatage de disque
* `dd if=` — Imagerie de disque
* Écriture vers `/dev/sd[a-z]` — Écritures directes sur disque
* `shutdown`, `reboot`, `poweroff` — Arrêt du système
* Fork bomb `:(){ :|:& };:`
### Contrôle d'Accès aux Fichiers
| Clé de configuration | Type | Par défaut | Description |
|----------------------|------|------------|-------------|
| `tools.allow_read_paths` | string[] | `[]` | Chemins supplémentaires autorisés en lecture en dehors du workspace |
| `tools.allow_write_paths` | string[] | `[]` | Chemins supplémentaires autorisés en écriture en dehors du workspace |
### Sécurité Exec
| Clé de configuration | Type | Par défaut | Description |
|----------------------|------|------------|-------------|
| `tools.exec.allow_remote` | bool | `false` | Autoriser l'outil exec depuis les canaux distants (Telegram/Discord etc.) |
| `tools.exec.enable_deny_patterns` | bool | `true` | Activer l'interception des commandes dangereuses |
| `tools.exec.custom_deny_patterns` | string[] | `[]` | Patterns regex personnalisés à bloquer |
| `tools.exec.custom_allow_patterns` | string[] | `[]` | Patterns regex personnalisés à autoriser |
> **Note de sécurité :** La protection Symlink est activée par défaut — tous les chemins de fichiers sont résolus via `filepath.EvalSymlinks` avant la correspondance avec la liste blanche, empêchant les attaques d'évasion par symlink.
#### Limitation Connue : Processus Enfants des Outils de Build
Le garde de sécurité exec n'inspecte que la ligne de commande lancée directement par PicoClaw. Il n'inspecte pas récursivement les processus enfants générés par les outils de développement autorisés tels que `make`, `go run`, `cargo`, `npm run` ou les scripts de build personnalisés.
Cela signifie qu'une commande de niveau supérieur peut toujours compiler ou lancer d'autres binaires après avoir passé la vérification initiale du garde. En pratique, traitez les scripts de build, les Makefiles, les scripts de packages et les binaires générés comme du code exécutable nécessitant le même niveau de revue qu'une commande shell directe.
Pour les environnements à haut risque :
* Examinez les scripts de build avant l'exécution.
* Préférez l'approbation/revue manuelle pour les workflows de compilation et d'exécution.
* Exécutez PicoClaw dans un conteneur ou une VM si vous avez besoin d'une isolation plus forte que celle fournie par le garde intégré.
#### Exemples d'Erreurs
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (path outside working dir)}
```
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (dangerous pattern detected)}
```
#### Désactiver les Restrictions (Risque de Sécurité)
Si vous avez besoin que l'agent accède à des chemins en dehors du workspace :
**Méthode 1 : Fichier de configuration**
```json
{
"agents": {
"defaults": {
"restrict_to_workspace": false
}
}
}
```
**Méthode 2 : Variable d'environnement**
```bash
export PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE=false
```
> ⚠️ **Avertissement** : Désactiver cette restriction permet à l'agent d'accéder à n'importe quel chemin sur votre système. À utiliser avec précaution dans des environnements contrôlés uniquement.
#### Cohérence des Limites de Sécurité
Le paramètre `restrict_to_workspace` s'applique de manière cohérente à tous les chemins d'exécution :
| Chemin d'exécution | Limite de sécurité |
| ------------------ | -------------------------------- |
| Main Agent | `restrict_to_workspace` ✅ |
| Subagent / Spawn | Hérite de la même restriction ✅ |
| Heartbeat tasks | Hérite de la même restriction ✅ |
Tous les chemins partagent la même restriction de workspace — il n'y a aucun moyen de contourner la limite de sécurité via les subagents ou les tâches planifiées.
### Heartbeat (Tâches Périodiques)
PicoClaw peut effectuer des tâches périodiques automatiquement. Créez un fichier `HEARTBEAT.md` dans votre workspace :
```markdown
# Periodic Tasks
- Check my email for important messages
- Review my calendar for upcoming events
- Check the weather forecast
```
L'agent lira ce fichier toutes les 30 minutes (configurable) et exécutera toutes les tâches en utilisant les outils disponibles.
#### Tâches Asynchrones avec Spawn
Pour les tâches longues (recherche web, appels API), utilisez l'outil `spawn` pour créer un **subagent** :
```markdown
# Periodic Tasks
```
+166
View File
@@ -0,0 +1,166 @@
# 🐳 Docker et Démarrage Rapide
> Retour au [README](../../README.fr.md)
## 🐳 Docker Compose
Vous pouvez également exécuter PicoClaw avec Docker Compose sans rien installer localement.
```bash
# 1. Cloner ce dépôt
git clone https://github.com/sipeed/picoclaw.git
cd picoclaw
# 2. Premier lancement — génère automatiquement docker/data/config.json puis s'arrête
docker compose -f docker/docker-compose.yml --profile gateway up
# Le conteneur affiche "First-run setup complete." et s'arrête.
# 3. Configurer vos clés API
vim docker/data/config.json # Set provider API keys, bot tokens, etc.
# 4. Démarrer
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
> [!TIP]
> **Utilisateurs Docker** : Par défaut, le Gateway écoute sur `127.0.0.1`, ce qui n'est pas accessible depuis l'hôte. Si vous devez accéder aux endpoints de santé ou exposer des ports, définissez `PICOCLAW_GATEWAY_HOST=0.0.0.0` dans votre environnement ou mettez à jour `config.json`.
```bash
# 5. Vérifier les logs
docker compose -f docker/docker-compose.yml logs -f picoclaw-gateway
# 6. Arrêter
docker compose -f docker/docker-compose.yml --profile gateway down
```
### Mode Launcher (Console Web)
L'image `launcher` inclut les trois binaires (`picoclaw`, `picoclaw-launcher`, `picoclaw-launcher-tui`) et démarre la console web par défaut, qui fournit une interface navigateur pour la configuration et le chat.
```bash
docker compose -f docker/docker-compose.yml --profile launcher up -d
```
Ouvrez http://localhost:18800 dans votre navigateur. Le launcher gère automatiquement le processus gateway.
> [!WARNING]
> La console web ne prend pas encore en charge l'authentification. Évitez de l'exposer sur Internet public.
### Mode Agent (One-shot)
```bash
# Poser une question
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent -m "What is 2+2?"
# Mode interactif
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent
```
### Mise à jour
```bash
docker compose -f docker/docker-compose.yml pull
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
### 🚀 Démarrage Rapide
> [!TIP]
> Configurez votre clé API dans `~/.picoclaw/config.json`. Obtenir des clés API : [Volcengine (CodingPlan)](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) (LLM) · [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM). La recherche web est optionnelle — obtenez gratuitement une [API Tavily](https://tavily.com) (1000 requêtes gratuites/mois) ou une [API Brave Search](https://brave.com/search/api) (2000 requêtes gratuites/mois).
**1. Initialiser**
```bash
picoclaw onboard
```
**2. Configurer** (`~/.picoclaw/config.json`)
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model_name": "gpt-5.4",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key",
"api_base":"https://ark.cn-beijing.volces.com/api/coding/v3"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "your-api-key",
"request_timeout": 300
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "your-anthropic-key"
}
],
"tools": {
"web": {
"enabled": true,
"fetch_limit_bytes": 10485760,
"format": "plaintext",
"brave": {
"enabled": false,
"api_key": "YOUR_BRAVE_API_KEY",
"max_results": 5
},
"tavily": {
"enabled": false,
"api_key": "YOUR_TAVILY_API_KEY",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "YOUR_PERPLEXITY_API_KEY",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://your-searxng-instance:8888",
"max_results": 5
}
}
}
}
```
> **Nouveau** : Le format de configuration `model_list` permet l'ajout de fournisseurs sans modification de code. Voir [Configuration des Modèles](#configuration-des-modèles-model_list) pour plus de détails.
> `request_timeout` est optionnel et utilise les secondes. S'il est omis ou défini à `<= 0`, PicoClaw utilise le timeout par défaut (120s).
**3. Obtenir des clés API**
* **Fournisseur LLM** : [OpenRouter](https://openrouter.ai/keys) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) · [Anthropic](https://console.anthropic.com) · [OpenAI](https://platform.openai.com) · [Gemini](https://aistudio.google.com/api-keys)
* **Recherche Web** (optionnel) :
* [Brave Search](https://brave.com/search/api) - Payant ($5/1000 requêtes, ~$5-6/mois)
* [Perplexity](https://www.perplexity.ai) - Recherche alimentée par l'IA avec interface de chat
* [SearXNG](https://github.com/searxng/searxng) - Métamoteur auto-hébergé (gratuit, pas de clé API nécessaire)
* [Tavily](https://tavily.com) - Optimisé pour les agents IA (1000 requêtes/mois)
* DuckDuckGo - Solution de repli intégrée (pas de clé API requise)
> **Note** : Voir `config.example.json` pour un modèle de configuration complet.
**4. Discuter**
```bash
picoclaw agent -m "What is 2+2?"
```
C'est tout ! Vous avez un assistant IA fonctionnel en 2 minutes.
---
+434
View File
@@ -0,0 +1,434 @@
# 🔌 Fournisseurs et Configuration des Modèles
> Retour au [README](../../README.fr.md)
### Fournisseurs
> [!NOTE]
> Groq fournit la transcription vocale gratuite via Whisper. Si configuré, les messages audio de n'importe quel canal seront automatiquement transcrits au niveau de l'agent.
| Provider | Purpose | Get API Key |
| ------------ | --------------------------------------- | ------------------------------------------------------------ |
| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) |
| `zhipu` | LLM (Zhipu direct) | [bigmodel.cn](https://bigmodel.cn) |
| `volcengine` | LLM (Volcengine direct) | [volcengine.com](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| `openrouter` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) |
| `anthropic` | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) |
| `openai` | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) |
| `deepseek` | LLM (DeepSeek direct) | [platform.deepseek.com](https://platform.deepseek.com) |
| `qwen` | LLM (Qwen direct) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) |
| `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) |
| `cerebras` | LLM (Cerebras direct) | [cerebras.ai](https://cerebras.ai) |
| `vivgrid` | LLM (Vivgrid direct) | [vivgrid.com](https://vivgrid.com) |
| `moonshot` | LLM (Kimi/Moonshot direct) | [platform.moonshot.cn](https://platform.moonshot.cn) |
| `minimax` | LLM (Minimax direct) | [platform.minimaxi.com](https://platform.minimaxi.com) |
| `avian` | LLM (Avian direct) | [avian.io](https://avian.io) |
| `mistral` | LLM (Mistral direct) | [console.mistral.ai](https://console.mistral.ai) |
| `longcat` | LLM (Longcat direct) | [longcat.ai](https://longcat.ai) |
| `modelscope` | LLM (ModelScope direct) | [modelscope.cn](https://modelscope.cn) |
### Configuration des Modèles (model_list)
> **Nouveauté** PicoClaw utilise désormais une approche de configuration **centrée sur le modèle**. Spécifiez simplement le format `vendor/model` (par ex. `zhipu/glm-4.7`) pour ajouter de nouveaux fournisseurs — **aucune modification de code requise !**
Cette conception permet également le **support multi-agents** avec une sélection flexible de fournisseurs :
- **Différents agents, différents fournisseurs** : Chaque agent peut utiliser son propre fournisseur LLM
- **Modèles de repli** : Configurez des modèles principaux et de repli pour la résilience
- **Répartition de charge** : Distribuez les requêtes entre plusieurs endpoints
- **Configuration centralisée** : Gérez tous les fournisseurs en un seul endroit
#### 📋 Tous les Vendors Supportés
| Vendor | `model` Prefix | Default API Base | Protocol | API Key |
| ------------------- | ----------------- |-----------------------------------------------------| --------- | ---------------------------------------------------------------- |
| **OpenAI** | `openai/` | `https://api.openai.com/v1` | OpenAI | [Get Key](https://platform.openai.com) |
| **Anthropic** | `anthropic/` | `https://api.anthropic.com/v1` | Anthropic | [Get Key](https://console.anthropic.com) |
| **智谱 AI (GLM)** | `zhipu/` | `https://open.bigmodel.cn/api/paas/v4` | OpenAI | [Get Key](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
| **DeepSeek** | `deepseek/` | `https://api.deepseek.com/v1` | OpenAI | [Get Key](https://platform.deepseek.com) |
| **Google Gemini** | `gemini/` | `https://generativelanguage.googleapis.com/v1beta` | OpenAI | [Get Key](https://aistudio.google.com/api-keys) |
| **Groq** | `groq/` | `https://api.groq.com/openai/v1` | OpenAI | [Get Key](https://console.groq.com) |
| **Moonshot** | `moonshot/` | `https://api.moonshot.cn/v1` | OpenAI | [Get Key](https://platform.moonshot.cn) |
| **通义千问 (Qwen)** | `qwen/` | `https://dashscope.aliyuncs.com/compatible-mode/v1` | OpenAI | [Get Key](https://dashscope.console.aliyun.com) |
| **NVIDIA** | `nvidia/` | `https://integrate.api.nvidia.com/v1` | OpenAI | [Get Key](https://build.nvidia.com) |
| **Ollama** | `ollama/` | `http://localhost:11434/v1` | OpenAI | Local (no key needed) |
| **OpenRouter** | `openrouter/` | `https://openrouter.ai/api/v1` | OpenAI | [Get Key](https://openrouter.ai/keys) |
| **LiteLLM Proxy** | `litellm/` | `http://localhost:4000/v1` | OpenAI | Your LiteLLM proxy key |
| **VLLM** | `vllm/` | `http://localhost:8000/v1` | OpenAI | Local |
| **Cerebras** | `cerebras/` | `https://api.cerebras.ai/v1` | OpenAI | [Get Key](https://cerebras.ai) |
| **VolcEngine (Doubao)** | `volcengine/` | `https://ark.cn-beijing.volces.com/api/v3` | OpenAI | [Get Key](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| **神算云** | `shengsuanyun/` | `https://router.shengsuanyun.com/api/v1` | OpenAI | - |
| **BytePlus** | `byteplus/` | `https://ark.ap-southeast.bytepluses.com/api/v3` | OpenAI | [Get Key](https://www.byteplus.com) |
| **Vivgrid** | `vivgrid/` | `https://api.vivgrid.com/v1` | OpenAI | [Get Key](https://vivgrid.com) |
| **LongCat** | `longcat/` | `https://api.longcat.chat/openai` | OpenAI | [Get Key](https://longcat.chat/platform) |
| **ModelScope (魔搭)**| `modelscope/` | `https://api-inference.modelscope.cn/v1` | OpenAI | [Get Token](https://modelscope.cn/my/tokens) |
| **Antigravity** | `antigravity/` | Google Cloud | Custom | OAuth only |
| **GitHub Copilot** | `github-copilot/` | `localhost:4321` | gRPC | - |
#### Configuration de Base
```json
{
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-your-openai-key"
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
},
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-zhipu-key"
}
],
"agents": {
"defaults": {
"model": "gpt-5.4"
}
}
}
```
#### Exemples par Vendor
**OpenAI**
```json
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-..."
}
```
**VolcEngine (Doubao)**
```json
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-..."
}
```
**智谱 AI (GLM)**
```json
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
```
**DeepSeek**
```json
{
"model_name": "deepseek-chat",
"model": "deepseek/deepseek-chat",
"api_key": "sk-..."
}
```
**Anthropic (avec clé API)**
```json
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
}
```
> Exécutez `picoclaw auth login --provider anthropic` pour coller votre token API.
**API Anthropic Messages (format natif)**
Pour l'accès direct à l'API Anthropic ou les endpoints personnalisés qui ne prennent en charge que le format de message natif d'Anthropic :
```json
{
"model_name": "claude-opus-4-6",
"model": "anthropic-messages/claude-opus-4-6",
"api_key": "sk-ant-your-key",
"api_base": "https://api.anthropic.com"
}
```
> Utilisez le protocole `anthropic-messages` lorsque :
> - Vous utilisez des proxys tiers qui ne prennent en charge que l'endpoint natif `/v1/messages` d'Anthropic (pas le format compatible OpenAI `/v1/chat/completions`)
> - Vous vous connectez à des services comme MiniMax, Synthetic qui nécessitent le format de message natif d'Anthropic
> - Le protocole `anthropic` existant renvoie des erreurs 404 (indiquant que l'endpoint ne prend pas en charge le format compatible OpenAI)
>
> **Note :** Le protocole `anthropic` utilise le format compatible OpenAI (`/v1/chat/completions`), tandis que `anthropic-messages` utilise le format natif d'Anthropic (`/v1/messages`). Choisissez en fonction du format pris en charge par votre endpoint.
**Ollama (local)**
```json
{
"model_name": "llama3",
"model": "ollama/llama3"
}
```
**Proxy/API Personnalisé**
```json
{
"model_name": "my-custom-model",
"model": "openai/custom-model",
"api_base": "https://my-proxy.com/v1",
"api_key": "sk-...",
"request_timeout": 300
}
```
**LiteLLM Proxy**
```json
{
"model_name": "lite-gpt4",
"model": "litellm/lite-gpt4",
"api_base": "http://localhost:4000/v1",
"api_key": "sk-..."
}
```
PicoClaw ne supprime que le préfixe externe `litellm/` avant d'envoyer la requête, donc les alias de proxy comme `litellm/lite-gpt4` envoient `lite-gpt4`, tandis que `litellm/openai/gpt-4o` envoie `openai/gpt-4o`.
#### Répartition de Charge
Configurez plusieurs endpoints pour le même nom de modèle — PicoClaw effectuera automatiquement un round-robin entre eux :
```json
{
"model_list": [
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api1.example.com/v1",
"api_key": "sk-key1"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api2.example.com/v1",
"api_key": "sk-key2"
}
]
}
```
#### Migration depuis l'Ancienne Configuration `providers`
L'ancienne configuration `providers` est **dépréciée** mais toujours prise en charge pour la compatibilité ascendante.
**Ancienne configuration (dépréciée) :**
```json
{
"providers": {
"zhipu": {
"api_key": "your-key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
},
"agents": {
"defaults": {
"provider": "zhipu",
"model": "glm-4.7"
}
}
}
```
**Nouvelle configuration (recommandée) :**
```json
{
"model_list": [
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
],
"agents": {
"defaults": {
"model": "glm-4.7"
}
}
}
```
Pour un guide de migration détaillé, voir [docs/migration/model-list-migration.md](docs/migration/model-list-migration.md).
### Architecture des Fournisseurs
PicoClaw route les fournisseurs par famille de protocoles :
- Protocole compatible OpenAI : OpenRouter, passerelles compatibles OpenAI, Groq, Zhipu et endpoints de type vLLM.
- Protocole Anthropic : Comportement natif de l'API Claude.
- Chemin Codex/OAuth : Route d'authentification OAuth/token OpenAI.
Cela maintient le runtime léger tout en faisant des nouveaux backends compatibles OpenAI principalement une opération de configuration (`api_base` + `api_key`).
<details>
<summary><b>Zhipu</b></summary>
**1. Obtenir la clé API et l'URL de base**
* Obtenir la [clé API](https://bigmodel.cn/usercenter/proj-mgmt/apikeys)
**2. Configurer**
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model": "glm-4.7",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"providers": {
"zhipu": {
"api_key": "Your API Key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
}
}
```
**3. Lancer**
```bash
picoclaw agent -m "Hello"
```
</details>
<details>
<summary><b>Exemple de configuration complète</b></summary>
```json
{
"agents": {
"defaults": {
"model": "anthropic/claude-opus-4-5"
}
},
"session": {
"dm_scope": "per-channel-peer",
"backlog_limit": 20
},
"providers": {
"openrouter": {
"api_key": "sk-or-v1-xxx"
},
"groq": {
"api_key": "gsk_xxx"
}
},
"channels": {
"telegram": {
"enabled": true,
"token": "123456:ABC...",
"allow_from": ["123456789"]
},
"discord": {
"enabled": true,
"token": "",
"allow_from": [""]
},
"whatsapp": {
"enabled": false,
"bridge_url": "ws://localhost:3001",
"use_native": false,
"session_store_path": "",
"allow_from": []
},
"feishu": {
"enabled": false,
"app_id": "cli_xxx",
"app_secret": "xxx",
"encrypt_key": "",
"verification_token": "",
"allow_from": []
},
"qq": {
"enabled": false,
"app_id": "",
"app_secret": "",
"allow_from": []
}
},
"tools": {
"web": {
"brave": {
"enabled": false,
"api_key": "BSA...",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://localhost:8888",
"max_results": 5
}
},
"cron": {
"exec_timeout_minutes": 5
}
},
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
</details>
---
## 📝 Comparaison des Clés API
| Service | Pricing | Use Case |
| ---------------- | ------------------------ | ------------------------------------- |
| **OpenRouter** | Free: 200K tokens/month | Multiple models (Claude, GPT-4, etc.) |
| **Volcengine CodingPlan** | ¥9.9/first month | Best for Chinese users, multiple SOTA models (Doubao, DeepSeek, etc.) |
| **Zhipu** | Free: 200K tokens/month | Suitable for Chinese users |
| **Brave Search** | $5/1000 queries | Web search functionality |
| **SearXNG** | Free (self-hosted) | Privacy-focused metasearch (70+ engines) |
| **Groq** | Free tier available | Fast inference (Llama, Mixtral) |
| **Cerebras** | Free tier available | Fast inference (Llama, Qwen, etc.) |
| **LongCat** | Free: up to 5M tokens/day | Fast inference |
| **ModelScope** | Free: 2000 requests/day | Inference (Qwen, GLM, DeepSeek, etc.) |
---
<div align="center">
<img src="assets/logo.jpg" alt="PicoClaw Meme" width="512">
</div>
+61
View File
@@ -0,0 +1,61 @@
# 🔄 Tâches Asynchrones et Spawn
> Retour au [README](../../README.fr.md)
## Tâches Rapides (réponse directe)
- Rapporter l'heure actuelle
## Tâches Longues (utiliser spawn pour l'asynchrone)
- Rechercher sur le web des actualités IA et résumer
- Vérifier les emails et rapporter les messages importants
```
**Comportements clés :**
| Fonctionnalité | Description |
| ----------------------- | --------------------------------------------------------------- |
| **spawn** | Crée un subagent asynchrone, ne bloque pas le heartbeat |
| **Independent context** | Le subagent a son propre contexte, pas d'historique de session |
| **message tool** | Le subagent communique directement avec l'utilisateur via l'outil message |
| **Non-blocking** | Après le spawn, le heartbeat continue à la tâche suivante |
#### Fonctionnement de la Communication du Subagent
```
Heartbeat se déclenche
L'agent lit HEARTBEAT.md
Pour une tâche longue : spawn subagent
↓ ↓
Continue à la tâche suivante Le subagent travaille indépendamment
↓ ↓
Toutes les tâches terminées Le subagent utilise l'outil "message"
↓ ↓
Répond HEARTBEAT_OK L'utilisateur reçoit le résultat directement
```
Le subagent a accès aux outils (message, web_search, etc.) et peut communiquer avec l'utilisateur indépendamment sans passer par l'agent principal.
**Configuration :**
```json
{
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
| Option | Par défaut | Description |
| ---------- | ---------- | ---------------------------------------------- |
| `enabled` | `true` | Activer/désactiver le heartbeat |
| `interval` | `30` | Intervalle de vérification en minutes (min: 5) |
**Variables d'environnement :**
* `PICOCLAW_HEARTBEAT_ENABLED=false` pour désactiver
* `PICOCLAW_HEARTBEAT_INTERVAL=60` pour changer l'intervalle
+336
View File
@@ -0,0 +1,336 @@
# 🔧 Configuration des Outils
> Retour au [README](../../README.fr.md)
La configuration des outils de PicoClaw se trouve dans le champ `tools` de `config.json`.
## Structure du répertoire
```json
{
"tools": {
"web": {
...
},
"mcp": {
...
},
"exec": {
...
},
"cron": {
...
},
"skills": {
...
}
}
}
```
## Outils Web
Les outils web sont utilisés pour la recherche et la récupération de pages web.
### Web Fetcher
Paramètres généraux pour la récupération et le traitement du contenu des pages web.
| Config | Type | Par défaut | Description |
|---------------------|--------|---------------|-----------------------------------------------------------------------------------------------|
| `enabled` | bool | true | Activer la capacité de récupération de pages web. |
| `fetch_limit_bytes` | int | 10485760 | Taille maximale du contenu de la page web à récupérer, en octets (par défaut 10 Mo). |
| `format` | string | "plaintext" | Format de sortie du contenu récupéré. Options : `plaintext` ou `markdown` (recommandé). |
### Brave
| Config | Type | Par défaut | Description |
|---------------|--------|------------|---------------------------|
| `enabled` | bool | false | Activer la recherche Brave |
| `api_key` | string | - | Clé API Brave Search |
| `max_results` | int | 5 | Nombre maximum de résultats |
### DuckDuckGo
| Config | Type | Par défaut | Description |
|---------------|------|------------|--------------------------------|
| `enabled` | bool | true | Activer la recherche DuckDuckGo |
| `max_results` | int | 5 | Nombre maximum de résultats |
### Perplexity
| Config | Type | Par défaut | Description |
|---------------|--------|------------|--------------------------------|
| `enabled` | bool | false | Activer la recherche Perplexity |
| `api_key` | string | - | Clé API Perplexity |
| `max_results` | int | 5 | Nombre maximum de résultats |
## Outil Exec
L'outil exec est utilisé pour exécuter des commandes shell.
| Config | Type | Par défaut | Description |
|------------------------|-------|------------|------------------------------------------------|
| `enable_deny_patterns` | bool | true | Activer le blocage par défaut des commandes dangereuses |
| `custom_deny_patterns` | array | [] | Modèles de refus personnalisés (expressions régulières) |
### Fonctionnalité
- **`enable_deny_patterns`** : Définir à `false` pour désactiver complètement les modèles de blocage par défaut des commandes dangereuses
- **`custom_deny_patterns`** : Ajouter des modèles regex de refus personnalisés ; les commandes correspondantes seront bloquées
### Modèles de commandes bloquées par défaut
Par défaut, PicoClaw bloque les commandes dangereuses suivantes :
- Commandes de suppression : `rm -rf`, `del /f/q`, `rmdir /s`
- Opérations disque : `format`, `mkfs`, `diskpart`, `dd if=`, écriture vers `/dev/sd*`
- Opérations système : `shutdown`, `reboot`, `poweroff`
- Substitution de commandes : `$()`, `${}`, backticks
- Pipe vers shell : `| sh`, `| bash`
- Élévation de privilèges : `sudo`, `chmod`, `chown`
- Contrôle de processus : `pkill`, `killall`, `kill -9`
- Opérations distantes : `curl | sh`, `wget | sh`, `ssh`
- Gestion de paquets : `apt`, `yum`, `dnf`, `npm install -g`, `pip install --user`
- Conteneurs : `docker run`, `docker exec`
- Git : `git push`, `git force`
- Autres : `eval`, `source *.sh`
### Limitation architecturale connue
Le garde exec ne valide que la commande de niveau supérieur envoyée à PicoClaw. Il n'inspecte **pas** récursivement les processus enfants générés par les outils de build ou les scripts après le démarrage de cette commande.
Exemples de workflows pouvant contourner le garde de commande directe une fois la commande initiale autorisée :
- `make run`
- `go run ./cmd/...`
- `cargo run`
- `npm run build`
Cela signifie que le garde est utile pour bloquer les commandes directes manifestement dangereuses, mais ce n'est **pas** un bac à sable complet pour les pipelines de build non vérifiés. Si votre modèle de menace inclut du code non fiable dans l'espace de travail, utilisez une isolation plus forte comme des conteneurs, des VM ou un flux d'approbation autour des commandes de build et d'exécution.
### Exemple de configuration
```json
{
"tools": {
"exec": {
"enable_deny_patterns": true,
"custom_deny_patterns": [
"\\brm\\s+-r\\b",
"\\bkillall\\s+python"
]
}
}
}
```
## Outil Cron
L'outil cron est utilisé pour planifier des tâches périodiques.
| Config | Type | Par défaut | Description |
|------------------------|------|------------|----------------------------------------------------|
| `exec_timeout_minutes` | int | 5 | Délai d'expiration en minutes, 0 signifie sans limite |
## Outil MCP
L'outil MCP permet l'intégration avec des serveurs Model Context Protocol externes.
### Découverte d'outils (chargement paresseux)
Lors de la connexion à plusieurs serveurs MCP, exposer simultanément des centaines d'outils peut épuiser la fenêtre de contexte du LLM et augmenter les coûts API. La fonctionnalité **Discovery** résout ce problème en gardant les outils MCP *masqués* par défaut.
Au lieu de charger tous les outils, le LLM reçoit un outil de recherche léger (utilisant la correspondance par mots-clés BM25 ou les expressions régulières). Lorsque le LLM a besoin d'une capacité spécifique, il recherche dans la bibliothèque masquée. Les outils correspondants sont alors temporairement « déverrouillés » et injectés dans le contexte pour un nombre configuré de tours (`ttl`).
### Configuration globale
| Config | Type | Par défaut | Description |
|-------------|--------|------------|----------------------------------------------|
| `enabled` | bool | false | Activer l'intégration MCP globalement |
| `discovery` | object | `{}` | Configuration de la découverte d'outils (voir ci-dessous) |
| `servers` | object | `{}` | Mappage du nom de serveur à la configuration du serveur |
### Configuration Discovery (`discovery`)
| Config | Type | Par défaut | Description |
|----------------------|------|------------|-----------------------------------------------------------------------------------------------------------------------------------|
| `enabled` | bool | false | Si true, les outils MCP sont masqués et chargés à la demande via la recherche. Si false, tous les outils sont chargés |
| `ttl` | int | 5 | Nombre de tours de conversation pendant lesquels un outil découvert reste déverrouillé |
| `max_search_results` | int | 5 | Nombre maximum d'outils retournés par requête de recherche |
| `use_bm25` | bool | true | Activer l'outil de recherche par langage naturel/mots-clés (`tool_search_tool_bm25`). **Attention** : consomme plus de ressources que la recherche regex |
| `use_regex` | bool | false | Activer l'outil de recherche par motif regex (`tool_search_tool_regex`) |
> **Note :** Si `discovery.enabled` est `true`, vous **devez** activer au moins un moteur de recherche (`use_bm25` ou `use_regex`),
> sinon l'application ne démarrera pas.
### Configuration par serveur
| Config | Type | Requis | Description |
|------------|--------|----------|--------------------------------------------|
| `enabled` | bool | oui | Activer ce serveur MCP |
| `type` | string | non | Type de transport : `stdio`, `sse`, `http` |
| `command` | string | stdio | Commande exécutable pour le transport stdio |
| `args` | array | non | Arguments de commande pour le transport stdio |
| `env` | object | non | Variables d'environnement pour le processus stdio |
| `env_file` | string | non | Chemin vers le fichier d'environnement pour le processus stdio |
| `url` | string | sse/http | URL du point de terminaison pour le transport `sse`/`http` |
| `headers` | object | non | En-têtes HTTP pour le transport `sse`/`http` |
### Comportement du transport
- Si `type` est omis, le transport est détecté automatiquement :
- `url` est défini → `sse`
- `command` est défini → `stdio`
- `http` et `sse` utilisent tous deux `url` + `headers` optionnels.
- `env` et `env_file` ne sont appliqués qu'aux serveurs `stdio`.
### Exemples de configuration
#### 1) Serveur MCP Stdio
```json
{
"tools": {
"mcp": {
"enabled": true,
"servers": {
"filesystem": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-filesystem",
"/tmp"
]
}
}
}
}
}
```
#### 2) Serveur MCP distant SSE/HTTP
```json
{
"tools": {
"mcp": {
"enabled": true,
"servers": {
"remote-mcp": {
"enabled": true,
"type": "sse",
"url": "https://example.com/mcp",
"headers": {
"Authorization": "Bearer YOUR_TOKEN"
}
}
}
}
}
}
```
#### 3) Configuration MCP massive avec découverte d'outils activée
*Dans cet exemple, le LLM ne verra que `tool_search_tool_bm25`. Il recherchera et déverrouillera dynamiquement les outils Github ou Postgres uniquement lorsque l'utilisateur le demande.*
```json
{
"tools": {
"mcp": {
"enabled": true,
"discovery": {
"enabled": true,
"ttl": 5,
"max_search_results": 5,
"use_bm25": true,
"use_regex": false
},
"servers": {
"github": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-github"
],
"env": {
"GITHUB_PERSONAL_ACCESS_TOKEN": "YOUR_GITHUB_TOKEN"
}
},
"postgres": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-postgres",
"postgresql://user:password@localhost/dbname"
]
},
"slack": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-slack"
],
"env": {
"SLACK_BOT_TOKEN": "YOUR_SLACK_BOT_TOKEN",
"SLACK_TEAM_ID": "YOUR_SLACK_TEAM_ID"
}
}
}
}
}
}
```
## Outil Skills
L'outil skills configure la découverte et l'installation de compétences via des registres comme ClawHub.
### Registres
| Config | Type | Par défaut | Description |
|------------------------------------|--------|----------------------|----------------------------------------------|
| `registries.clawhub.enabled` | bool | true | Activer le registre ClawHub |
| `registries.clawhub.base_url` | string | `https://clawhub.ai` | URL de base ClawHub |
| `registries.clawhub.auth_token` | string | `""` | Jeton Bearer optionnel pour des limites de débit plus élevées |
| `registries.clawhub.search_path` | string | `/api/v1/search` | Chemin de l'API de recherche |
| `registries.clawhub.skills_path` | string | `/api/v1/skills` | Chemin de l'API Skills |
| `registries.clawhub.download_path` | string | `/api/v1/download` | Chemin de l'API de téléchargement |
### Exemple de configuration
```json
{
"tools": {
"skills": {
"registries": {
"clawhub": {
"enabled": true,
"base_url": "https://clawhub.ai",
"auth_token": "",
"search_path": "/api/v1/search",
"skills_path": "/api/v1/skills",
"download_path": "/api/v1/download"
}
}
}
}
}
```
## Variables d'environnement
Toutes les options de configuration peuvent être remplacées via des variables d'environnement au format `PICOCLAW_TOOLS_<SECTION>_<KEY>` :
Par exemple :
- `PICOCLAW_TOOLS_WEB_BRAVE_ENABLED=true`
- `PICOCLAW_TOOLS_EXEC_ENABLE_DENY_PATTERNS=false`
- `PICOCLAW_TOOLS_CRON_EXEC_TIMEOUT_MINUTES=10`
- `PICOCLAW_TOOLS_MCP_ENABLED=true`
Note : La configuration de type map imbriquée (par exemple `tools.mcp.servers.<name>.*`) est configurée dans `config.json` plutôt que via des variables d'environnement.
+45
View File
@@ -0,0 +1,45 @@
# 🐛 Dépannage
> Retour au [README](../../README.fr.md)
## "model ... not found in model_list" ou OpenRouter "free is not a valid model ID"
**Symptôme :** Vous voyez l'une des erreurs suivantes :
- `Error creating provider: model "openrouter/free" not found in model_list`
- OpenRouter retourne 400 : `"free is not a valid model ID"`
**Cause :** Le champ `model` dans votre entrée `model_list` est ce qui est envoyé à l'API. Pour OpenRouter, vous devez utiliser l'identifiant de modèle **complet**, pas un raccourci.
- **Incorrect :** `"model": "free"` → OpenRouter reçoit `free` et le rejette.
- **Correct :** `"model": "openrouter/free"` → OpenRouter reçoit `openrouter/free` (routage automatique du niveau gratuit).
**Correction :** Dans `~/.picoclaw/config.json` (ou votre chemin de configuration) :
1. **agents.defaults.model** doit correspondre à un `model_name` dans `model_list` (par ex. `"openrouter-free"`).
2. Le **model** de cette entrée doit être un identifiant de modèle OpenRouter valide, par exemple :
- `"openrouter/free"` niveau gratuit automatique
- `"google/gemini-2.0-flash-exp:free"`
- `"meta-llama/llama-3.1-8b-instruct:free"`
Exemple :
```json
{
"agents": {
"defaults": {
"model": "openrouter-free"
}
},
"model_list": [
{
"model_name": "openrouter-free",
"model": "openrouter/free",
"api_key": "sk-or-v1-YOUR_OPENROUTER_KEY",
"api_base": "https://openrouter.ai/api/v1"
}
]
}
```
Obtenez votre clé sur [OpenRouter Keys](https://openrouter.ai/keys).
+574
View File
@@ -0,0 +1,574 @@
# 💬 チャットアプリ設定
> [README](../../README.ja.md) に戻る
## 💬 チャットアプリ連携
PicoClaw は複数のチャットプラットフォームをサポートしており、Agent をどこにでも接続できます。
> **注意**: すべての Webhook ベースのチャネル(LINE、WeCom など)は、共有 Gateway HTTP サーバー(`gateway.host`:`gateway.port`、デフォルト `127.0.0.1:18790`)上で提供されます。チャネルごとにポートを設定する必要はありません。注意:飛書(Feishu)は WebSocket/SDK モードを使用し、共有 HTTP Webhook サーバーは使用しません。
### チャネル一覧
| チャネル | セットアップ難易度 | 特徴 | ドキュメント |
| -------------------- | ------------------ | ----------------------------------------- | --------------------------------------------------------------------------------------------------------------- |
| **Telegram** | ⭐ 簡単 | 推奨、音声テキスト変換対応、ロングポーリング(公開 IP 不要) | [ドキュメント](../channels/telegram/README.zh.md) |
| **Discord** | ⭐ 簡単 | Socket Mode、グループ/DM 対応、Bot エコシステム充実 | [ドキュメント](../channels/discord/README.zh.md) |
| **WhatsApp** | ⭐ 簡単 | ネイティブ (QR スキャン) または Bridge URL | [ドキュメント](../channels/whatsapp/README.zh.md) |
| **Slack** | ⭐ 簡単 | **Socket Mode** (公開 IP 不要)、エンタープライズ対応 | [ドキュメント](../channels/slack/README.zh.md) |
| **Matrix** | ⭐⭐ 中程度 | フェデレーションプロトコル、セルフホスト対応 | [ドキュメント](../channels/matrix/README.zh.md) |
| **QQ** | ⭐⭐ 中程度 | 公式ボット API、中国コミュニティ向け | [ドキュメント](../channels/qq/README.zh.md) |
| **DingTalk** | ⭐⭐ 中程度 | Stream モード(公開 IP 不要)、企業向け | [ドキュメント](../channels/dingtalk/README.zh.md) |
| **LINE** | ⭐⭐⭐ やや難 | HTTPS Webhook が必要 | [ドキュメント](../channels/line/README.zh.md) |
| **WeCom (企業微信)** | ⭐⭐⭐ やや難 | グループ Bot (Webhook)、カスタムアプリ (API)、AI Bot 対応 | [Bot](../channels/wecom/wecom_bot/README.zh.md) / [App](../channels/wecom/wecom_app/README.zh.md) / [AI Bot](../channels/wecom/wecom_aibot/README.zh.md) |
| **Feishu (飛書)** | ⭐⭐⭐ やや難 | エンタープライズコラボレーション、機能豊富 | [ドキュメント](../channels/feishu/README.zh.md) |
| **IRC** | ⭐⭐ 中程度 | サーバー + TLS 設定 | - |
| **OneBot** | ⭐⭐ 中程度 | NapCat/Go-CQHTTP 互換、コミュニティエコシステム充実 | [ドキュメント](../channels/onebot/README.zh.md) |
| **MaixCam** | ⭐ 簡単 | Sipeed AI カメラハードウェア統合チャネル | [ドキュメント](../channels/maixcam/README.zh.md) |
| **Pico** | ⭐ 簡単 | PicoClaw ネイティブプロトコルチャネル | |
---
<details>
<summary><b>Telegram</b>(推奨)</summary>
**1. Bot を作成**
* Telegram を開き、`@BotFather` を検索
* `/newbot` を送信し、プロンプトに従う
* Token をコピー
**2. 設定**
```json
{
"channels": {
"telegram": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
> Telegram の `@userinfobot` から User ID を取得できます。
**3. 実行**
```bash
picoclaw gateway
```
**4. Telegram コマンドメニュー(起動時に自動登録)**
PicoClaw は統一されたコマンド定義を使用します。起動時に Telegram がサポートするコマンド(例: `/start``/help``/show``/list`)を Bot コマンドメニューに自動登録し、メニュー表示と実際の動作を一致させます。
Telegram 側はコマンドメニュー登録機能を保持し、汎用コマンドの実行は Agent Loop 内の commands executor で統一的に処理されます。
ネットワークや API の一時的なエラーで登録に失敗しても、チャネルの起動はブロックされません。システムがバックグラウンドで自動リトライします。
</details>
<details>
<summary><b>Discord</b></summary>
**1. Bot を作成**
* <https://discord.com/developers/applications> にアクセス
* アプリケーションを作成 → Bot → Bot を追加
* Bot Token をコピー
**2. Intents を有効化**
* Bot 設定で **MESSAGE CONTENT INTENT** を有効化
* (オプション)メンバーデータに基づくホワイトリストが必要な場合は **SERVER MEMBERS INTENT** を有効化
**3. User ID を取得**
* Discord 設定 → 詳細設定 → **開発者モード** を有効化
* アバターを右クリック → **ユーザー ID をコピー**
**4. 設定**
```json
{
"channels": {
"discord": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
**5. Bot を招待**
* OAuth2 → URL Generator
* Scopes: `bot`
* Bot Permissions: `Send Messages`, `Read Message History`
* 生成された招待リンクを開き、Bot をサーバーに追加
**オプション:グループトリガーモード**
デフォルトでは Bot はサーバーチャネル内のすべてのメッセージに応答します。@メンション時のみ応答するには
```json
{
"channels": {
"discord": {
"group_trigger": { "mention_only": true }
}
}
}
```
キーワードプレフィックスでトリガーすることもできます(例: `!bot`):
```json
{
"channels": {
"discord": {
"group_trigger": { "prefixes": ["!bot"] }
}
}
}
```
**6. 実行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>WhatsApp</b>(ネイティブ whatsmeow</summary>
PicoClaw は 2 つの WhatsApp 接続方式をサポートしています:
- **ネイティブ(推奨):** プロセス内で [whatsmeow](https://github.com/tulir/whatsmeow) を使用。独立した Bridge は不要です。`"use_native": true` に設定し、`bridge_url` を空にします。初回実行時に WhatsApp で QR コードをスキャン(リンクデバイス)。セッションはワークスペース配下(例: `workspace/whatsapp/`)に保存されます。ネイティブチャネルは**オプション**ビルドで、`-tags whatsapp_native` でコンパイルします(例: `make build-whatsapp-native` または `go build -tags whatsapp_native ./cmd/...`)。
- **Bridge** 外部 WebSocket Bridge に接続。`bridge_url`(例: `ws://localhost:3001`)を設定し、`use_native` を false のままにします。
**設定(ネイティブ)**
```json
{
"channels": {
"whatsapp": {
"enabled": true,
"use_native": true,
"session_store_path": "",
"allow_from": []
}
}
}
```
`session_store_path` が空の場合、セッションは `<workspace>/whatsapp/` に保存されます。`picoclaw gateway` を実行し、初回実行時にターミナルに表示される QR コードをスキャンしてください(WhatsApp → リンクデバイス)。
</details>
<details>
<summary><b>Matrix</b></summary>
**1. Bot アカウントを準備**
* お好みの homeserver(例: `https://matrix.org` またはセルフホスト)を使用
* Bot ユーザーを作成し、access token を取得
**2. 設定**
```json
{
"channels": {
"matrix": {
"enabled": true,
"homeserver": "https://matrix.org",
"user_id": "@your-bot:matrix.org",
"access_token": "YOUR_MATRIX_ACCESS_TOKEN",
"allow_from": []
}
}
}
```
**3. 実行**
```bash
picoclaw gateway
```
すべてのオプション(`device_id``join_on_invite``group_trigger``placeholder``reasoning_channel_id`)については [Matrix チャネル設定ガイド](../channels/matrix/README.md) を参照してください。
</details>
<details>
<summary><b>QQ</b></summary>
**1. Bot を作成**
- [QQ 開放プラットフォーム](https://q.qq.com/#) にアクセス
- アプリケーションを作成 → **AppID****AppSecret** を取得
**2. 設定**
```json
{
"channels": {
"qq": {
"enabled": true,
"app_id": "YOUR_APP_ID",
"app_secret": "YOUR_APP_SECRET",
"allow_from": []
}
}
}
```
> `allow_from` を空にするとすべてのユーザーを許可します。QQ 番号を指定してアクセスを制限することもできます。
**3. 実行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>Slack</b></summary>
**1. Slack App を作成**
* [Slack API](https://api.slack.com/apps) でアプリを作成
* **Socket Mode** を有効化
* **Bot Token** と **App-Level Token** を取得
**2. 設定**
```json
{
"channels": {
"slack": {
"enabled": true,
"bot_token": "xoxb-YOUR_BOT_TOKEN",
"app_token": "xapp-YOUR_APP_TOKEN",
"allow_from": []
}
}
}
```
**3. 実行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>IRC</b></summary>
**1. 設定**
```json
{
"channels": {
"irc": {
"enabled": true,
"server": "irc.libera.chat:6697",
"nick": "picoclaw-bot",
"use_tls": true,
"channels_to_join": ["#your-channel"],
"allow_from": []
}
}
}
```
**2. 実行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>DingTalk</b></summary>
**1. Bot を作成**
* [開放プラットフォーム](https://open.dingtalk.com/) にアクセス
* 内部アプリを作成
* Client ID と Client Secret をコピー
**2. 設定**
```json
{
"channels": {
"dingtalk": {
"enabled": true,
"client_id": "YOUR_CLIENT_ID",
"client_secret": "YOUR_CLIENT_SECRET",
"allow_from": []
}
}
}
```
> `allow_from` を空にするとすべてのユーザーを許可します。DingTalk ユーザー ID を指定してアクセスを制限することもできます。
**3. 実行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>LINE</b></summary>
**1. LINE 公式アカウントを作成**
- [LINE Developers Console](https://developers.line.biz/) にアクセス
- Provider を作成 → Messaging API チャネルを作成
- **Channel Secret** と **Channel Access Token** をコピー
**2. 設定**
```json
{
"channels": {
"line": {
"enabled": true,
"channel_secret": "YOUR_CHANNEL_SECRET",
"channel_access_token": "YOUR_CHANNEL_ACCESS_TOKEN",
"webhook_path": "/webhook/line",
"allow_from": []
}
}
}
```
> LINE Webhook は共有 Gateway サーバー(`gateway.host`:`gateway.port`、デフォルト `127.0.0.1:18790`)上で提供されます。
**3. Webhook URL を設定**
LINE は HTTPS Webhook が必要です。リバースプロキシまたはトンネルを使用してください:
```bash
# 例:ngrok を使用(Gateway デフォルトポートは 18790)
ngrok http 18790
```
LINE Developers Console で Webhook URL を `https://your-domain/webhook/line` に設定し、**Use webhook** を有効にしてください。
**4. 実行**
```bash
picoclaw gateway
```
> グループチャットでは、Bot は @メンション時のみ応答します。返信は元のメッセージを引用します。
</details>
<details>
<summary><b>Feishu (飛書)</b></summary>
**1. アプリを作成**
* [飛書開放プラットフォーム](https://open.feishu.cn/) にアクセス
* 企業カスタムアプリを作成
* **App ID** と **App Secret** を取得
**2. 設定**
```json
{
"channels": {
"feishu": {
"enabled": true,
"app_id": "cli_xxx",
"app_secret": "xxx",
"encrypt_key": "",
"verification_token": "",
"allow_from": []
}
}
}
```
**3. 実行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>WeCom (企業微信)</b></summary>
PicoClaw は 3 種類の WeCom 統合をサポートしています:
**方式 1: グループ Bot (Bot)** — セットアップ簡単、グループチャット対応
**方式 2: カスタムアプリ (App)** — より多機能、プロアクティブメッセージング、プライベートチャットのみ
**方式 3: AI Bot** — 公式 AI Bot、ストリーミング返信、グループ・プライベートチャット対応
詳細なセットアップ手順は [WeCom AI Bot 設定ガイド](../channels/wecom/wecom_aibot/README.zh.md) を参照してください。
**クイックセットアップ — グループ Bot:**
**1. Bot を作成**
* WeCom 管理コンソール → グループチャット → グループ Bot を追加
* Webhook URL をコピー(形式:`https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx`
**2. 設定**
```json
{
"channels": {
"wecom": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_url": "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=YOUR_KEY",
"webhook_path": "/webhook/wecom",
"allow_from": []
}
}
}
```
> WeCom Webhook は共有 Gateway サーバー(`gateway.host`:`gateway.port`、デフォルト `127.0.0.1:18790`)上で提供されます。
**クイックセットアップ — カスタムアプリ:**
**1. アプリを作成**
* WeCom 管理コンソール → アプリ管理 → アプリを作成
* **AgentId** と **Secret** をコピー
* 「マイ企業」ページで **CorpID** をコピー
**2. メッセージ受信を設定**
* アプリ詳細で「メッセージ受信」→「API を設定」をクリック
* URL を `http://your-server:18790/webhook/wecom-app` に設定
* **Token** と **EncodingAESKey** を生成
**3. 設定**
```json
{
"channels": {
"wecom_app": {
"enabled": true,
"corp_id": "wwxxxxxxxxxxxxxxxx",
"corp_secret": "YOUR_CORP_SECRET",
"agent_id": 1000002,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-app",
"allow_from": []
}
}
}
```
**4. 実行**
```bash
picoclaw gateway
```
> **注意**: WeCom Webhook コールバックは Gateway ポート(デフォルト 18790)で提供されます。HTTPS にはリバースプロキシを使用してください。
**クイックセットアップ — AI Bot:**
**1. AI Bot を作成**
* WeCom 管理コンソール → アプリ管理 → AI Bot
* AI Bot 設定でコールバック URL を設定:`http://your-server:18791/webhook/wecom-aibot`
* **Token** をコピーし、「ランダム生成」をクリックして **EncodingAESKey** を取得
**2. 設定**
```json
{
"channels": {
"wecom_aibot": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_43_CHAR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-aibot",
"allow_from": [],
"welcome_message": "こんにちは!何かお手伝いできますか?"
}
}
}
```
**3. 実行**
```bash
picoclaw gateway
```
> **注意**: WeCom AI Bot はストリーミングプルプロトコルを使用しており、返信タイムアウトの心配はありません。長時間タスク(30 秒超)は自動的に `response_url` プッシュ配信に切り替わります。
</details>
<details>
<summary><b>OneBot</b></summary>
**1. 設定**
NapCat / Go-CQHTTP などの OneBot 実装と互換性があります。
```json
{
"channels": {
"onebot": {
"enabled": true,
"allow_from": []
}
}
}
```
**2. 実行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>MaixCam</b></summary>
Sipeed AI カメラハードウェア向けの統合チャネルです。
```json
{
"channels": {
"maixcam": {
"enabled": true
}
}
}
```
```bash
picoclaw gateway
```
</details>
+256
View File
@@ -0,0 +1,256 @@
# ⚙️ 設定ガイド
> [README](../../README.ja.md) に戻る
## ⚙️ 設定詳細
設定ファイルパス: `~/.picoclaw/config.json`
### 環境変数
環境変数を使用してデフォルトパスを上書きできます。ポータブルインストール、コンテナ化デプロイ、または picoclaw をシステムサービスとして実行する場合に便利です。これらの変数は独立しており、異なるパスを制御します。
| 変数 | 説明 | デフォルトパス |
|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
| `PICOCLAW_CONFIG` | 設定ファイルのパスを上書きします。picoclaw がどの `config.json` を読み込むかを直接指定し、他のすべての場所を無視します。 | `~/.picoclaw/config.json` |
| `PICOCLAW_HOME` | picoclaw データのルートディレクトリを上書きします。`workspace` やその他のデータディレクトリのデフォルト場所を変更します。 | `~/.picoclaw` |
**例:**
```bash
# 特定の設定ファイルで picoclaw を実行
# ワークスペースパスはその設定ファイル内から読み込まれます
PICOCLAW_CONFIG=/etc/picoclaw/production.json picoclaw gateway
# /opt/picoclaw にすべてのデータを保存して picoclaw を実行
# 設定はデフォルトの ~/.picoclaw/config.json から読み込まれます
# ワークスペースは /opt/picoclaw/workspace に作成されます
PICOCLAW_HOME=/opt/picoclaw picoclaw agent
# 両方を使用して完全にカスタマイズ
PICOCLAW_HOME=/srv/picoclaw PICOCLAW_CONFIG=/srv/picoclaw/main.json picoclaw gateway
```
### ワークスペースレイアウト
PicoClaw は設定されたワークスペース(デフォルト: `~/.picoclaw/workspace`)にデータを保存します:
```
~/.picoclaw/workspace/
├── sessions/ # 会話セッションと履歴
├── memory/ # 長期記憶 (MEMORY.md)
├── state/ # 永続化状態 (最後のチャネルなど)
├── cron/ # スケジュールジョブデータベース
├── skills/ # カスタムスキル
├── AGENTS.md # Agent 動作ガイド
├── HEARTBEAT.md # 定期タスクプロンプト (30 分ごとにチェック)
├── IDENTITY.md # Agent アイデンティティ
├── SOUL.md # Agent ソウル/性格
└── USER.md # ユーザー設定
```
### スキルソース
デフォルトでは、スキルは以下の順序で読み込まれます:
1. `~/.picoclaw/workspace/skills`(ワークスペース)
2. `~/.picoclaw/skills`(グローバル)
3. `<current-working-directory>/skills`(ビルトイン)
高度な/テスト用セットアップでは、以下の環境変数でビルトインスキルのルートを上書きできます:
```bash
export PICOCLAW_BUILTIN_SKILLS=/path/to/skills
```
### 統一コマンド実行ポリシー
- 汎用スラッシュコマンドは `pkg/agent/loop.go` 内の `commands.Executor` を通じて統一的に実行されます。
- チャネルアダプターはローカルで汎用コマンドを消費しなくなりました。受信テキストを bus/agent パスに転送するだけです。Telegram は起動時にサポートするコマンドメニューを自動登録します。
- 未登録のスラッシュコマンド(例: `/foo`)は通常の LLM 処理にパススルーされます。
- 登録済みだが現在のチャネルでサポートされていないコマンド(例: WhatsApp での `/show`)は、明示的なユーザー向けエラーを返し、以降の処理を停止します。
### 🔒 セキュリティサンドボックス
PicoClaw はデフォルトでサンドボックス環境で実行されます。Agent は設定されたワークスペース内のファイルアクセスとコマンド実行のみが可能です。
#### デフォルト設定
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"restrict_to_workspace": true
}
}
}
```
| オプション | デフォルト値 | 説明 |
| ----------------------- | ----------------------- | ------------------------------------- |
| `workspace` | `~/.picoclaw/workspace` | Agent の作業ディレクトリ |
| `restrict_to_workspace` | `true` | ファイル/コマンドアクセスをワークスペース内に制限 |
#### 保護されたツール
`restrict_to_workspace: true` の場合、以下のツールがサンドボックス化されます:
| ツール | 機能 | 制限 |
| ------------- | ---------------- | ---------------------------------- |
| `read_file` | ファイル読み取り | ワークスペース内のファイルのみ |
| `write_file` | ファイル書き込み | ワークスペース内のファイルのみ |
| `list_dir` | ディレクトリ一覧 | ワークスペース内のディレクトリのみ |
| `edit_file` | ファイル編集 | ワークスペース内のファイルのみ |
| `append_file` | ファイル追記 | ワークスペース内のファイルのみ |
| `exec` | コマンド実行 | コマンドパスはワークスペース内必須 |
#### 追加の Exec 保護
`restrict_to_workspace: false` の場合でも、`exec` ツールは以下の危険なコマンドをブロックします:
* `rm -rf``del /f``rmdir /s` — 一括削除
* `format``mkfs``diskpart` — ディスクフォーマット
* `dd if=` — ディスクイメージング
* `/dev/sd[a-z]` への書き込み — 直接ディスク書き込み
* `shutdown``reboot``poweroff` — システムシャットダウン
* Fork bomb `:(){ :|:& };:`
### ファイルアクセス制御
| 設定キー | 型 | デフォルト値 | 説明 |
|----------|------|-------------|------|
| `tools.allow_read_paths` | string[] | `[]` | ワークスペース外で読み取りを許可する追加パス |
| `tools.allow_write_paths` | string[] | `[]` | ワークスペース外で書き込みを許可する追加パス |
### Exec セキュリティ設定
| 設定キー | 型 | デフォルト値 | 説明 |
|----------|------|-------------|------|
| `tools.exec.allow_remote` | bool | `false` | リモートチャネル(Telegram/Discord など)からの exec ツール実行を許可 |
| `tools.exec.enable_deny_patterns` | bool | `true` | 危険なコマンドのインターセプトを有効化 |
| `tools.exec.custom_deny_patterns` | string[] | `[]` | カスタムブロック正規表現パターン |
| `tools.exec.custom_allow_patterns` | string[] | `[]` | カスタム許可正規表現パターン |
> **セキュリティ注意:** Symlink 保護はデフォルトで有効です。すべてのファイルパスはホワイトリストマッチング前に `filepath.EvalSymlinks` で解決され、シンボリックリンクエスケープ攻撃を防止します。
#### 既知の制限:ビルドツールの子プロセス
exec セキュリティガードは PicoClaw が直接起動するコマンドラインのみを検査します。`make``go run``cargo``npm run`、またはカスタムビルドスクリプトなどの開発ツールが生成する子プロセスは再帰的に検査しません。
つまり、トップレベルのコマンドが初期ガードチェックを通過した後、他のバイナリをコンパイルまたは起動できます。実際には、ビルドスクリプト、Makefile、パッケージスクリプト、生成されたバイナリを、直接のシェルコマンドと同等レベルの実行可能コードとしてレビューする必要があります。
高リスク環境の場合:
* 実行前にビルドスクリプトをレビューしてください。
* コンパイル・実行ワークフローには承認/手動レビューを優先してください。
* ビルトインガードより強力な分離が必要な場合は、コンテナまたは VM 内で PicoClaw を実行してください。
#### エラー例
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (path outside working dir)}
```
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (dangerous pattern detected)}
```
#### 制限の無効化(セキュリティリスク)
Agent がワークスペース外のパスにアクセスする必要がある場合:
**方法 1: 設定ファイル**
```json
{
"agents": {
"defaults": {
"restrict_to_workspace": false
}
}
}
```
**方法 2: 環境変数**
```bash
export PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE=false
```
> ⚠️ **警告**: この制限を無効にすると、Agent がシステム上の任意のパスにアクセスできるようになります。管理された環境でのみ慎重に使用してください。
#### セキュリティ境界の一貫性
`restrict_to_workspace` 設定はすべての実行パスで一貫して適用されます:
| 実行パス | セキュリティ境界 |
| ---------------- | ---------------------------- |
| メイン Agent | `restrict_to_workspace` ✅ |
| サブ Agent / Spawn | 同じ制限を継承 ✅ |
| ハートビートタスク | 同じ制限を継承 ✅ |
すべてのパスは同じワークスペース制限を共有しており、サブ Agent やスケジュールタスクを通じてセキュリティ境界を回避することはできません。
### ハートビート(定期タスク)
PicoClaw は定期タスクを自動実行できます。ワークスペースに `HEARTBEAT.md` ファイルを作成してください:
```markdown
# Periodic Tasks
- Check my email for important messages
- Review my calendar for upcoming events
- Check the weather forecast
```
Agent は 30 分ごと(設定可能)にこのファイルを読み取り、利用可能なツールを使用してタスクを実行します。
#### Spawn を使用した非同期タスク
長時間実行タスク(Web 検索、API 呼び出し)には、`spawn` ツールを使用して**サブ Agent (subagent)** を作成します:
```markdown
# Periodic Tasks
## Quick Tasks (respond directly)
- Report current time
## Long Tasks (use spawn for async)
- Search the web for AI news and summarize
- Check email and report important messages
```
**主な動作:**
| 特性 | 説明 |
| ---------------- | -------------------------------------------- |
| **spawn** | 非同期サブ Agent を作成、メインハートビートをブロックしない |
| **独立コンテキスト** | サブ Agent は独自のコンテキストを持ち、セッション履歴なし |
| **message tool** | サブ Agent は message ツールでユーザーと直接通信 |
| **ノンブロッキング** | spawn 後、ハートビートは次のタスクに進む |
**設定:**
```json
{
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
| オプション | デフォルト値 | 説明 |
| ---------- | ------------ | ------------------------------ |
| `enabled` | `true` | ハートビートの有効/無効 |
| `interval` | `30` | チェック間隔(分単位、最小: 5)|
**環境変数:**
- `PICOCLAW_HEARTBEAT_ENABLED=false` で無効化
- `PICOCLAW_HEARTBEAT_INTERVAL=60` で間隔を変更
+168
View File
@@ -0,0 +1,168 @@
# 🐳 Docker とクイックスタート
> [README](../../README.ja.md) に戻る
## 🐳 Docker Compose
Docker Compose を使用して PicoClaw を実行できます。ローカルに何もインストールする必要はありません。
```bash
# 1. リポジトリをクローン
git clone https://github.com/sipeed/picoclaw.git
cd picoclaw
# 2. 初回実行 — docker/data/config.json を自動生成して終了
docker compose -f docker/docker-compose.yml --profile gateway up
# コンテナが "First-run setup complete." と表示して停止します
# 3. API Key を設定
vim docker/data/config.json # provider API key、Bot Token などを設定
# 4. 起動
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
> [!TIP]
> **Docker ユーザー**: デフォルトでは Gateway は `127.0.0.1` でリッスンしており、コンテナ外からはアクセスできません。ヘルスチェックエンドポイントへのアクセスやポート公開が必要な場合は、環境変数で `PICOCLAW_GATEWAY_HOST=0.0.0.0` を設定するか、`config.json` を更新してください。
```bash
# 5. ログを確認
docker compose -f docker/docker-compose.yml logs -f picoclaw-gateway
# 6. 停止
docker compose -f docker/docker-compose.yml --profile gateway down
```
### Launcher モード (Web コンソール)
`launcher` イメージには 3 つのバイナリ(`picoclaw``picoclaw-launcher``picoclaw-launcher-tui`)がすべて含まれており、デフォルトで Web コンソールを起動します。ブラウザベースの設定・チャット画面を提供します。
```bash
docker compose -f docker/docker-compose.yml --profile launcher up -d
```
ブラウザで http://localhost:18800 を開いてください。Launcher が Gateway プロセスを自動管理します。
> [!WARNING]
> Web コンソールはまだ認証をサポートしていません。公開インターネットに公開しないでください。
### Agent モード (ワンショット)
```bash
# 質問する
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent -m "2+2は?"
# インタラクティブモード
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent
```
### イメージの更新
```bash
docker compose -f docker/docker-compose.yml pull
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
---
## 🚀 クイックスタート
> [!TIP]
> `~/.picoclaw/config.json` に API Key を設定してください。API Key の取得先: [Volcengine (CodingPlan)](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) (LLM) · [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM)。Web 検索は**オプション**です — 無料の [Tavily API](https://tavily.com) (月 1000 回無料) または [Brave Search API](https://brave.com/search/api) (月 2000 回無料) を取得できます。
**1. 初期化**
```bash
picoclaw onboard
```
**2. 設定** (`~/.picoclaw/config.json`)
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model_name": "gpt-5.4",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key",
"api_base":"https://ark.cn-beijing.volces.com/api/coding/v3"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "your-api-key",
"request_timeout": 300
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "your-anthropic-key"
}
],
"tools": {
"web": {
"enabled": true,
"fetch_limit_bytes": 10485760,
"format": "plaintext",
"brave": {
"enabled": false,
"api_key": "YOUR_BRAVE_API_KEY",
"max_results": 5
},
"tavily": {
"enabled": false,
"api_key": "YOUR_TAVILY_API_KEY",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "YOUR_PERPLEXITY_API_KEY",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://your-searxng-instance:8888",
"max_results": 5
}
}
}
}
```
> **新機能**: `model_list` 設定形式により、コード変更なしで provider を追加できます。詳細は[モデル設定](providers.md#モデル設定-model_list)を参照してください。
> `request_timeout` はオプションで、単位は秒です。省略または `<= 0` に設定した場合、PicoClaw はデフォルトのタイムアウト(120 秒)を使用します。
**3. API Key の取得**
* **LLM プロバイダー**: [OpenRouter](https://openrouter.ai/keys) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) · [Anthropic](https://console.anthropic.com) · [OpenAI](https://platform.openai.com) · [Gemini](https://aistudio.google.com/api-keys)
* **Web 検索** (オプション):
* [Brave Search](https://brave.com/search/api) - 有料 ($5/1000 queries, ~$5-6/month)
* [Perplexity](https://www.perplexity.ai) - AI 搭載の検索・チャットインターフェース
* [SearXNG](https://github.com/searxng/searxng) - セルフホスト型メタ検索エンジン(無料、API Key 不要)
* [Tavily](https://tavily.com) - AI Agent 向けに最適化 (1000 requests/month)
* DuckDuckGo - 組み込みフォールバック(API Key 不要)
> **注意**: 完全な設定テンプレートは `config.example.json` を参照してください。
**4. チャット**
```bash
picoclaw agent -m "2+2は?"
```
以上です!2 分で動作する AI アシスタントが手に入ります。
---
+434
View File
@@ -0,0 +1,434 @@
# 🔌 プロバイダーとモデル設定
> [README](../../README.ja.md) に戻る
### プロバイダー
> [!NOTE]
> Groq は Whisper による無料の音声文字起こしを提供しています。Groq を設定すると、任意のチャネルからの音声メッセージが Agent レベルで自動的にテキストに変換されます。
| プロバイダー | 用途 | API Key の取得 |
| -------------------- | ---------------------------- | -------------------------------------------------------------------- |
| `gemini` | LLM (Gemini 直接接続) | [aistudio.google.com](https://aistudio.google.com) |
| `zhipu` | LLM (Zhipu 直接接続) | [bigmodel.cn](https://bigmodel.cn) |
| `volcengine` | LLM (Volcengine 直接接続) | [volcengine.com](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| `openrouter` | LLM (推奨、全モデルアクセス可) | [openrouter.ai](https://openrouter.ai) |
| `anthropic` | LLM (Claude 直接接続) | [console.anthropic.com](https://console.anthropic.com) |
| `openai` | LLM (GPT 直接接続) | [platform.openai.com](https://platform.openai.com) |
| `deepseek` | LLM (DeepSeek 直接接続) | [platform.deepseek.com](https://platform.deepseek.com) |
| `qwen` | LLM (Qwen 直接接続) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) |
| `groq` | LLM + **音声文字起こし** (Whisper) | [console.groq.com](https://console.groq.com) |
| `cerebras` | LLM (Cerebras 直接接続) | [cerebras.ai](https://cerebras.ai) |
| `vivgrid` | LLM (Vivgrid 直接接続) | [vivgrid.com](https://vivgrid.com) |
| `moonshot` | LLM (Kimi/Moonshot 直接接続) | [platform.moonshot.cn](https://platform.moonshot.cn) |
| `minimax` | LLM (Minimax 直接接続) | [platform.minimaxi.com](https://platform.minimaxi.com) |
| `avian` | LLM (Avian 直接接続) | [avian.io](https://avian.io) |
| `mistral` | LLM (Mistral 直接接続) | [console.mistral.ai](https://console.mistral.ai) |
| `longcat` | LLM (Longcat 直接接続) | [longcat.ai](https://longcat.ai) |
| `modelscope` | LLM (ModelScope 直接接続) | [modelscope.cn](https://modelscope.cn) |
### モデル設定 (model_list)
> **新機能!** PicoClaw は**モデル中心**の設定方式を採用しました。`ベンダー/モデル` 形式(例: `zhipu/glm-4.7`)を指定するだけで新しい provider を追加できます——**コード変更は一切不要です!**
この設計は**マルチ Agent シナリオ**もサポートし、柔軟な Provider 選択を提供します:
- **Agent ごとに異なる Provider**: 各 Agent が独自の LLM provider を使用可能
- **モデルフォールバック**: プライマリモデルとフォールバックモデルを設定し、信頼性を向上
- **ロードバランシング**: 複数の API エンドポイント間でリクエストを分散
- **一元管理**: すべての provider を一箇所で管理
#### 📋 サポートされている全ベンダー
| ベンダー | `model` プレフィックス | デフォルト API Base | プロトコル | API Key の取得 |
| ------------------- | --------------------- | --------------------------------------------------- | ---------- | ----------------------------------------------------------------- |
| **OpenAI** | `openai/` | `https://api.openai.com/v1` | OpenAI | [キーを取得](https://platform.openai.com) |
| **Anthropic** | `anthropic/` | `https://api.anthropic.com/v1` | Anthropic | [キーを取得](https://console.anthropic.com) |
| **智谱 AI (GLM)** | `zhipu/` | `https://open.bigmodel.cn/api/paas/v4` | OpenAI | [キーを取得](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
| **DeepSeek** | `deepseek/` | `https://api.deepseek.com/v1` | OpenAI | [キーを取得](https://platform.deepseek.com) |
| **Google Gemini** | `gemini/` | `https://generativelanguage.googleapis.com/v1beta` | OpenAI | [キーを取得](https://aistudio.google.com/api-keys) |
| **Groq** | `groq/` | `https://api.groq.com/openai/v1` | OpenAI | [キーを取得](https://console.groq.com) |
| **Moonshot** | `moonshot/` | `https://api.moonshot.cn/v1` | OpenAI | [キーを取得](https://platform.moonshot.cn) |
| **通義千問 (Qwen)** | `qwen/` | `https://dashscope.aliyuncs.com/compatible-mode/v1` | OpenAI | [キーを取得](https://dashscope.console.aliyun.com) |
| **NVIDIA** | `nvidia/` | `https://integrate.api.nvidia.com/v1` | OpenAI | [キーを取得](https://build.nvidia.com) |
| **Ollama** | `ollama/` | `http://localhost:11434/v1` | OpenAI | ローカル(キー不要) |
| **OpenRouter** | `openrouter/` | `https://openrouter.ai/api/v1` | OpenAI | [キーを取得](https://openrouter.ai/keys) |
| **LiteLLM Proxy** | `litellm/` | `http://localhost:4000/v1` | OpenAI | LiteLLM プロキシキー |
| **VLLM** | `vllm/` | `http://localhost:8000/v1` | OpenAI | ローカル |
| **Cerebras** | `cerebras/` | `https://api.cerebras.ai/v1` | OpenAI | [キーを取得](https://cerebras.ai) |
| **VolcEngine (Doubao)** | `volcengine/` | `https://ark.cn-beijing.volces.com/api/v3` | OpenAI | [キーを取得](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| **神算云** | `shengsuanyun/` | `https://router.shengsuanyun.com/api/v1` | OpenAI | - |
| **BytePlus** | `byteplus/` | `https://ark.ap-southeast.bytepluses.com/api/v3` | OpenAI | [キーを取得](https://www.byteplus.com) |
| **Vivgrid** | `vivgrid/` | `https://api.vivgrid.com/v1` | OpenAI | [キーを取得](https://vivgrid.com) |
| **LongCat** | `longcat/` | `https://api.longcat.chat/openai` | OpenAI | [キーを取得](https://longcat.chat/platform) |
| **ModelScope (魔搭)**| `modelscope/` | `https://api-inference.modelscope.cn/v1` | OpenAI | [トークンを取得](https://modelscope.cn/my/tokens) |
| **Antigravity** | `antigravity/` | Google Cloud | カスタム | OAuth のみ |
| **GitHub Copilot** | `github-copilot/` | `localhost:4321` | gRPC | - |
#### 基本設定
```json
{
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-your-openai-key"
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
},
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-zhipu-key"
}
],
"agents": {
"defaults": {
"model": "gpt-5.4"
}
}
}
```
#### ベンダー別設定例
**OpenAI**
```json
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-..."
}
```
**VolcEngine (Doubao)**
```json
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-..."
}
```
**智谱 AI (GLM)**
```json
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
```
**DeepSeek**
```json
{
"model_name": "deepseek-chat",
"model": "deepseek/deepseek-chat",
"api_key": "sk-..."
}
```
**Anthropic (API キー使用)**
```json
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
}
```
> `picoclaw auth login --provider anthropic` を実行して API トークンを設定してください。
**Anthropic Messages API(ネイティブ形式)**
Anthropic API への直接アクセスや、Anthropic のネイティブメッセージ形式のみをサポートするカスタムエンドポイント向け:
```json
{
"model_name": "claude-opus-4-6",
"model": "anthropic-messages/claude-opus-4-6",
"api_key": "sk-ant-your-key",
"api_base": "https://api.anthropic.com"
}
```
> `anthropic-messages` プロトコルを使用するケース:
> - Anthropic のネイティブ `/v1/messages` エンドポイントのみをサポートするサードパーティプロキシを使用する場合(OpenAI 互換の `/v1/chat/completions` 非対応)
> - MiniMax、Synthetic など Anthropic のネイティブメッセージ形式を必要とするサービスに接続する場合
> - 既存の `anthropic` プロトコルが 404 エラーを返す場合(エンドポイントが OpenAI 互換形式をサポートしていないことを示す)
>
> **注意:** `anthropic` プロトコルは OpenAI 互換形式(`/v1/chat/completions`)を使用し、`anthropic-messages` は Anthropic のネイティブ形式(`/v1/messages`)を使用します。エンドポイントがサポートする形式に応じて選択してください。
**Ollama (ローカル)**
```json
{
"model_name": "llama3",
"model": "ollama/llama3"
}
```
**カスタムプロキシ/API**
```json
{
"model_name": "my-custom-model",
"model": "openai/custom-model",
"api_base": "https://my-proxy.com/v1",
"api_key": "sk-...",
"request_timeout": 300
}
```
**LiteLLM Proxy**
```json
{
"model_name": "lite-gpt4",
"model": "litellm/lite-gpt4",
"api_base": "http://localhost:4000/v1",
"api_key": "sk-..."
}
```
PicoClaw はリクエスト送信前に外側の `litellm/` プレフィックスのみを除去するため、`litellm/lite-gpt4``lite-gpt4` を送信し、`litellm/openai/gpt-4o``openai/gpt-4o` を送信します。
#### ロードバランシング
同じモデル名に複数のエンドポイントを設定すると、PicoClaw が自動的にラウンドロビンで分散します:
```json
{
"model_list": [
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api1.example.com/v1",
"api_key": "sk-key1"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api2.example.com/v1",
"api_key": "sk-key2"
}
]
}
```
#### レガシー `providers` 設定からの移行
`providers` 設定形式は**非推奨**ですが、後方互換性のためまだサポートされています。
**旧設定(非推奨):**
```json
{
"providers": {
"zhipu": {
"api_key": "your-key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
},
"agents": {
"defaults": {
"provider": "zhipu",
"model": "glm-4.7"
}
}
}
```
**新設定(推奨):**
```json
{
"model_list": [
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
],
"agents": {
"defaults": {
"model": "glm-4.7"
}
}
}
```
詳細な移行ガイドは [docs/migration/model-list-migration.md](../migration/model-list-migration.md) を参照してください。
### Provider アーキテクチャ
PicoClaw はプロトコルファミリーごとに Provider をルーティングします:
- OpenAI 互換プロトコル:OpenRouter、OpenAI 互換ゲートウェイ、Groq、Zhipu、vLLM スタイルのエンドポイント。
- Anthropic プロトコル:Claude ネイティブ API 動作。
- Codex/OAuth パス:OpenAI OAuth/Token 認証ルート。
これによりランタイムを軽量に保ちつつ、新しい OpenAI 互換バックエンドの追加をほぼ設定操作(`api_base` + `api_key`)のみで実現しています。
<details>
<summary><b>Zhipu 設定例</b></summary>
**1. API key と base URL を取得**
- [API key](https://bigmodel.cn/usercenter/proj-mgmt/apikeys) を取得
**2. 設定**
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model": "glm-4.7",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"providers": {
"zhipu": {
"api_key": "Your API Key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
}
}
```
**3. 実行**
```bash
picoclaw agent -m "こんにちは"
```
</details>
<details>
<summary><b>完全な設定例</b></summary>
```json
{
"agents": {
"defaults": {
"model": "anthropic/claude-opus-4-5"
}
},
"session": {
"dm_scope": "per-channel-peer",
"backlog_limit": 20
},
"providers": {
"openrouter": {
"api_key": "sk-or-v1-xxx"
},
"groq": {
"api_key": "gsk_xxx"
}
},
"channels": {
"telegram": {
"enabled": true,
"token": "123456:ABC...",
"allow_from": ["123456789"]
},
"discord": {
"enabled": true,
"token": "",
"allow_from": [""]
},
"whatsapp": {
"enabled": false,
"bridge_url": "ws://localhost:3001",
"use_native": false,
"session_store_path": "",
"allow_from": []
},
"feishu": {
"enabled": false,
"app_id": "cli_xxx",
"app_secret": "xxx",
"encrypt_key": "",
"verification_token": "",
"allow_from": []
},
"qq": {
"enabled": false,
"app_id": "",
"app_secret": "",
"allow_from": []
}
},
"tools": {
"web": {
"brave": {
"enabled": false,
"api_key": "BSA...",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://localhost:8888",
"max_results": 5
}
},
"cron": {
"exec_timeout_minutes": 5
}
},
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
</details>
---
## 📝 API Key 比較表
| サービス | Pricing | ユースケース |
| ---------------- | ------------------------ | ------------------------------------- |
| **OpenRouter** | Free: 200K tokens/month | マルチモデル (Claude, GPT-4 など) |
| **Volcengine CodingPlan** | ¥9.9/first month | 中国ユーザー向け、複数の SOTA モデル (Doubao, DeepSeek など) |
| **Zhipu** | Free: 200K tokens/month | 中国ユーザー向け |
| **Brave Search** | $5/1000 queries | Web 検索機能 |
| **SearXNG** | Free (self-hosted) | プライバシー重視のメタ検索 (70+ engines) |
| **Groq** | Free tier available | 高速推論 (Llama, Mixtral) |
| **Cerebras** | Free tier available | 高速推論 (Llama, Qwen など) |
| **LongCat** | Free: up to 5M tokens/day | 高速推論 |
| **ModelScope** | Free: 2000 requests/day | 推論 (Qwen, GLM, DeepSeek など) |
---
<div align="center">
<img src="assets/logo.jpg" alt="PicoClaw Meme" width="512">
</div>
+68
View File
@@ -0,0 +1,68 @@
# 🔄 非同期タスクと Spawn
> [README](../../README.ja.md) に戻る
### Spawn を使用した非同期タスク
長時間実行タスク(Web 検索、API 呼び出し)には、`spawn` ツールを使用して**サブ Agent (subagent)** を作成します:
```markdown
# Periodic Tasks
## Quick Tasks (respond directly)
- Report current time
## Long Tasks (use spawn for async)
- Search the web for AI news and summarize
- Check email and report important messages
```
**主な動作:**
| 特性 | 説明 |
| ---------------- | ------------------------------------------------ |
| **spawn** | 非同期サブ Agent を作成、メインハートビートをブロックしない |
| **独立コンテキスト** | サブ Agent は独自のコンテキストを持ち、セッション履歴なし |
| **message tool** | サブ Agent は message ツールでユーザーと直接通信 |
| **ノンブロッキング** | spawn 後、ハートビートは次のタスクに進む |
#### サブ Agent の通信の仕組み
```
ハートビートトリガー (Heartbeat triggers)
Agent が HEARTBEAT.md を読み取り
長時間タスクの場合: サブ Agent を spawn
↓ ↓
次のタスクに進む サブ Agent が独立して作業
↓ ↓
すべてのタスク完了 サブ Agent が "message" ツールを使用
↓ ↓
HEARTBEAT_OK を応答 ユーザーが直接結果を受信
```
サブ Agent はツール(message、web_search など)にアクセスでき、メイン Agent を経由せずにユーザーと独立して通信できます。
**設定:**
```json
{
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
| オプション | デフォルト値 | 説明 |
| ---------- | ------------ | ------------------------------ |
| `enabled` | `true` | ハートビートの有効/無効 |
| `interval` | `30` | チェック間隔(分単位、最小: 5)|
**環境変数:**
- `PICOCLAW_HEARTBEAT_ENABLED=false` で無効化
- `PICOCLAW_HEARTBEAT_INTERVAL=60` で間隔を変更
+336
View File
@@ -0,0 +1,336 @@
# 🔧 ツール設定
> [README](../../README.ja.md) に戻る
PicoClaw のツール設定は `config.json``tools` フィールドにあります。
## ディレクトリ構造
```json
{
"tools": {
"web": {
...
},
"mcp": {
...
},
"exec": {
...
},
"cron": {
...
},
"skills": {
...
}
}
}
```
## Web ツール
Web ツールはウェブ検索とフェッチに使用されます。
### Web Fetcher
ウェブページコンテンツの取得と処理に関する一般設定。
| 設定項目 | 型 | デフォルト | 説明 |
|---------------------|--------|---------------|----------------------------------------------------------------------------------------|
| `enabled` | bool | true | ウェブページ取得機能を有効にする。 |
| `fetch_limit_bytes` | int | 10485760 | 取得するウェブページペイロードの最大サイズ(バイト単位、デフォルトは10MB)。 |
| `format` | string | "plaintext" | 取得コンテンツの出力形式。オプション:`plaintext` または `markdown`(推奨)。 |
### Brave
| 設定項目 | 型 | デフォルト | 説明 |
|---------------|--------|------------|-----------------------|
| `enabled` | bool | false | Brave 検索を有効にする |
| `api_key` | string | - | Brave Search API キー |
| `max_results` | int | 5 | 最大結果数 |
### DuckDuckGo
| 設定項目 | 型 | デフォルト | 説明 |
|---------------|------|------------|---------------------------|
| `enabled` | bool | true | DuckDuckGo 検索を有効にする |
| `max_results` | int | 5 | 最大結果数 |
### Perplexity
| 設定項目 | 型 | デフォルト | 説明 |
|---------------|--------|------------|---------------------------|
| `enabled` | bool | false | Perplexity 検索を有効にする |
| `api_key` | string | - | Perplexity API キー |
| `max_results` | int | 5 | 最大結果数 |
## Exec ツール
Exec ツールはシェルコマンドの実行に使用されます。
| 設定項目 | 型 | デフォルト | 説明 |
|------------------------|-------|------------|------------------------------------|
| `enable_deny_patterns` | bool | true | デフォルトの危険コマンドブロックを有効にする |
| `custom_deny_patterns` | array | [] | カスタム拒否パターン(正規表現) |
### 機能
- **`enable_deny_patterns`**`false` に設定すると、デフォルトの危険コマンドブロックパターンを完全に無効にします
- **`custom_deny_patterns`**:カスタム拒否正規表現パターンを追加します。一致するコマンドはブロックされます
### デフォルトでブロックされるコマンドパターン
デフォルトで、PicoClaw は以下の危険なコマンドをブロックします:
- 削除コマンド:`rm -rf``del /f/q``rmdir /s`
- ディスク操作:`format``mkfs``diskpart``dd if=``/dev/sd*` への書き込み
- システム操作:`shutdown``reboot``poweroff`
- コマンド置換:`$()``${}`、バッククォート
- シェルへのパイプ:`| sh``| bash`
- 権限昇格:`sudo``chmod``chown`
- プロセス制御:`pkill``killall``kill -9`
- リモート操作:`curl | sh``wget | sh``ssh`
- パッケージ管理:`apt``yum``dnf``npm install -g``pip install --user`
- コンテナ:`docker run``docker exec`
- Git`git push``git force`
- その他:`eval``source *.sh`
### 既知のアーキテクチャ上の制限
exec ガードは PicoClaw に送信されたトップレベルのコマンドのみを検証します。そのコマンドの実行開始後にビルドツールやスクリプトが生成する子プロセスを再帰的に検査することは**ありません**。
初期コマンドが許可された後、直接コマンドガードをバイパスできるワークフローの例:
- `make run`
- `go run ./cmd/...`
- `cargo run`
- `npm run build`
これは、明らかに危険な直接コマンドのブロックには有用ですが、未レビューのビルドパイプラインに対する完全なサンドボックスでは**ありません**。脅威モデルにワークスペース内の信頼できないコードが含まれる場合は、コンテナ、VM、またはビルド・実行コマンドに対する承認フローなど、より強力な分離を使用してください。
### 設定例
```json
{
"tools": {
"exec": {
"enable_deny_patterns": true,
"custom_deny_patterns": [
"\\brm\\s+-r\\b",
"\\bkillall\\s+python"
]
}
}
}
```
## Cron ツール
Cron ツールは定期タスクのスケジューリングに使用されます。
| 設定項目 | 型 | デフォルト | 説明 |
|------------------------|-----|------------|-----------------------------------------|
| `exec_timeout_minutes` | int | 5 | 実行タイムアウト(分)、0 は無制限 |
## MCP ツール
MCP ツールは外部の Model Context Protocol サーバーとの統合を可能にします。
### ツールディスカバリ(遅延読み込み)
複数の MCP サーバーに接続する場合、数百のツールを同時に公開すると LLM のコンテキストウィンドウを使い果たし、API コストが増加する可能性があります。**Discovery** 機能は、MCP ツールをデフォルトで*非表示*にすることでこの問題を解決します。
すべてのツールを読み込む代わりに、LLM には軽量な検索ツール(BM25 キーワードマッチングまたは正規表現を使用)が提供されます。LLM が特定の機能を必要とする場合、非表示のライブラリを検索します。一致するツールは一時的に「アンロック」され、設定されたターン数(`ttl`)の間コンテキストに注入されます。
### グローバル設定
| 設定項目 | 型 | デフォルト | 説明 |
|-------------|--------|------------|--------------------------------------|
| `enabled` | bool | false | MCP 統合をグローバルに有効にする |
| `discovery` | object | `{}` | ツールディスカバリ設定(下記参照) |
| `servers` | object | `{}` | サーバー名からサーバー設定へのマップ |
### Discovery 設定(`discovery`
| 設定項目 | 型 | デフォルト | 説明 |
|----------------------|------|------------|---------------------------------------------------------------------------------------------------------------|
| `enabled` | bool | false | true の場合、MCP ツールは非表示になり、検索を通じてオンデマンドで読み込まれます。false の場合、すべてのツールが読み込まれます |
| `ttl` | int | 5 | 発見されたツールがアンロック状態を維持する会話ターン数 |
| `max_search_results` | int | 5 | 検索クエリごとに返されるツールの最大数 |
| `use_bm25` | bool | true | 自然言語/キーワード検索ツール(`tool_search_tool_bm25`)を有効にする。**警告**:正規表現検索よりリソースを消費します |
| `use_regex` | bool | false | 正規表現パターン検索ツール(`tool_search_tool_regex`)を有効にする |
> **注意:** `discovery.enabled` が `true` の場合、少なくとも1つの検索エンジン(`use_bm25` または `use_regex`)を有効にする**必要があります**。
> そうしないとアプリケーションの起動に失敗します。
### サーバーごとの設定
| 設定項目 | 型 | 必須 | 説明 |
|------------|--------|----------|----------------------------------------|
| `enabled` | bool | はい | この MCP サーバーを有効にする |
| `type` | string | いいえ | トランスポートタイプ:`stdio``sse``http` |
| `command` | string | stdio | stdio トランスポートの実行コマンド |
| `args` | array | いいえ | stdio トランスポートのコマンド引数 |
| `env` | object | いいえ | stdio プロセスの環境変数 |
| `env_file` | string | いいえ | stdio プロセスの環境ファイルパス |
| `url` | string | sse/http | `sse`/`http` トランスポートのエンドポイント URL |
| `headers` | object | いいえ | `sse`/`http` トランスポートの HTTP ヘッダー |
### トランスポートの動作
- `type` を省略した場合、トランスポートは自動検出されます:
- `url` が設定されている → `sse`
- `command` が設定されている → `stdio`
- `http``sse` はどちらも `url` + オプションの `headers` を使用します。
- `env``env_file``stdio` サーバーにのみ適用されます。
### 設定例
#### 1) Stdio MCP サーバー
```json
{
"tools": {
"mcp": {
"enabled": true,
"servers": {
"filesystem": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-filesystem",
"/tmp"
]
}
}
}
}
}
```
#### 2) リモート SSE/HTTP MCP サーバー
```json
{
"tools": {
"mcp": {
"enabled": true,
"servers": {
"remote-mcp": {
"enabled": true,
"type": "sse",
"url": "https://example.com/mcp",
"headers": {
"Authorization": "Bearer YOUR_TOKEN"
}
}
}
}
}
}
```
#### 3) ツールディスカバリを有効にした大規模 MCP セットアップ
*この例では、LLM は `tool_search_tool_bm25` のみを認識します。ユーザーからリクエストがあった場合にのみ、Github や Postgres のツールを動的に検索してアンロックします。*
```json
{
"tools": {
"mcp": {
"enabled": true,
"discovery": {
"enabled": true,
"ttl": 5,
"max_search_results": 5,
"use_bm25": true,
"use_regex": false
},
"servers": {
"github": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-github"
],
"env": {
"GITHUB_PERSONAL_ACCESS_TOKEN": "YOUR_GITHUB_TOKEN"
}
},
"postgres": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-postgres",
"postgresql://user:password@localhost/dbname"
]
},
"slack": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-slack"
],
"env": {
"SLACK_BOT_TOKEN": "YOUR_SLACK_BOT_TOKEN",
"SLACK_TEAM_ID": "YOUR_SLACK_TEAM_ID"
}
}
}
}
}
}
```
## Skills ツール
Skills ツールは ClawHub などのレジストリを通じたスキルの発見とインストールを設定します。
### レジストリ
| 設定項目 | 型 | デフォルト | 説明 |
|------------------------------------|--------|----------------------|----------------------------------------------|
| `registries.clawhub.enabled` | bool | true | ClawHub レジストリを有効にする |
| `registries.clawhub.base_url` | string | `https://clawhub.ai` | ClawHub ベース URL |
| `registries.clawhub.auth_token` | string | `""` | より高いレート制限のためのオプションの Bearer トークン |
| `registries.clawhub.search_path` | string | `/api/v1/search` | 検索 API パス |
| `registries.clawhub.skills_path` | string | `/api/v1/skills` | Skills API パス |
| `registries.clawhub.download_path` | string | `/api/v1/download` | ダウンロード API パス |
### 設定例
```json
{
"tools": {
"skills": {
"registries": {
"clawhub": {
"enabled": true,
"base_url": "https://clawhub.ai",
"auth_token": "",
"search_path": "/api/v1/search",
"skills_path": "/api/v1/skills",
"download_path": "/api/v1/download"
}
}
}
}
}
```
## 環境変数
すべての設定オプションは `PICOCLAW_TOOLS_<SECTION>_<KEY>` 形式の環境変数で上書きできます:
例:
- `PICOCLAW_TOOLS_WEB_BRAVE_ENABLED=true`
- `PICOCLAW_TOOLS_EXEC_ENABLE_DENY_PATTERNS=false`
- `PICOCLAW_TOOLS_CRON_EXEC_TIMEOUT_MINUTES=10`
- `PICOCLAW_TOOLS_MCP_ENABLED=true`
注意:ネストされたマップ形式の設定(例:`tools.mcp.servers.<name>.*`)は環境変数ではなく `config.json` で設定します。
+45
View File
@@ -0,0 +1,45 @@
# 🐛 トラブルシューティング
> [README](../../README.ja.md) に戻る
## "model ... not found in model_list" または OpenRouter "free is not a valid model ID"
**症状:** 以下のいずれかのエラーが表示されます:
- `Error creating provider: model "openrouter/free" not found in model_list`
- OpenRouter が 400 を返す:`"free is not a valid model ID"`
**原因:** `model_list` エントリの `model` フィールドは API に送信される値です。OpenRouter では省略形ではなく、**完全な**モデル ID を使用する必要があります。
- **誤り:** `"model": "free"` → OpenRouter は `free` を受け取り、拒否します。
- **正しい:** `"model": "openrouter/free"` → OpenRouter は `openrouter/free` を受け取ります(自動無料枠ルーティング)。
**修正方法:** `~/.picoclaw/config.json`(またはお使いの設定パス)で:
1. **agents.defaults.model**`model_list` 内の `model_name` と一致する必要があります(例:`"openrouter-free"`)。
2. そのエントリの **model** は有効な OpenRouter モデル ID である必要があります。例:
- `"openrouter/free"` 自動無料枠
- `"google/gemini-2.0-flash-exp:free"`
- `"meta-llama/llama-3.1-8b-instruct:free"`
設定例:
```json
{
"agents": {
"defaults": {
"model": "openrouter-free"
}
},
"model_list": [
{
"model_name": "openrouter-free",
"model": "openrouter/free",
"api_key": "sk-or-v1-YOUR_OPENROUTER_KEY",
"api_base": "https://openrouter.ai/api/v1"
}
]
}
```
キーは [OpenRouter Keys](https://openrouter.ai/keys) で取得できます。
+436
View File
@@ -0,0 +1,436 @@
# 🔌 Providers & Model Configuration
> Back to [README](../README.md)
### Providers
> [!NOTE]
> Groq provides free voice transcription via Whisper. If configured, audio messages from any channel will be automatically transcribed at the agent level.
| Provider | Purpose | Get API Key |
| ------------ | --------------------------------------- | ------------------------------------------------------------ |
| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) |
| `zhipu` | LLM (Zhipu direct) | [bigmodel.cn](https://bigmodel.cn) |
| `volcengine` | LLM(Volcengine direct) | [volcengine.com](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| `openrouter` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) |
| `anthropic` | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) |
| `openai` | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) |
| `deepseek` | LLM (DeepSeek direct) | [platform.deepseek.com](https://platform.deepseek.com) |
| `qwen` | LLM (Qwen direct) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) |
| `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) |
| `cerebras` | LLM (Cerebras direct) | [cerebras.ai](https://cerebras.ai) |
| `vivgrid` | LLM (Vivgrid direct) | [vivgrid.com](https://vivgrid.com) |
| `nvidia` | LLM (NVIDIA NIM) | [build.nvidia.com](https://build.nvidia.com) |
| `moonshot` | LLM (Kimi/Moonshot direct) | [platform.moonshot.cn](https://platform.moonshot.cn) |
| `minimax` | LLM (Minimax direct) | [platform.minimaxi.com](https://platform.minimaxi.com) |
| `avian` | LLM (Avian direct) | [avian.io](https://avian.io) |
| `mistral` | LLM (Mistral direct) | [console.mistral.ai](https://console.mistral.ai) |
| `longcat` | LLM (Longcat direct) | [longcat.ai](https://longcat.ai) |
| `modelscope` | LLM (ModelScope direct) | [modelscope.cn](https://modelscope.cn) |
### Model Configuration (model_list)
> **What's New?** PicoClaw now uses a **model-centric** configuration approach. Simply specify `vendor/model` format (e.g., `zhipu/glm-4.7`) to add new providers—**zero code changes required!**
This design also enables **multi-agent support** with flexible provider selection:
- **Different agents, different providers**: Each agent can use its own LLM provider
- **Model fallbacks**: Configure primary and fallback models for resilience
- **Load balancing**: Distribute requests across multiple endpoints
- **Centralized configuration**: Manage all providers in one place
#### 📋 All Supported Vendors
| Vendor | `model` Prefix | Default API Base | Protocol | API Key |
| ------------------- | ----------------- |-----------------------------------------------------| --------- | ---------------------------------------------------------------- |
| **OpenAI** | `openai/` | `https://api.openai.com/v1` | OpenAI | [Get Key](https://platform.openai.com) |
| **Anthropic** | `anthropic/` | `https://api.anthropic.com/v1` | Anthropic | [Get Key](https://console.anthropic.com) |
| **智谱 AI (GLM)** | `zhipu/` | `https://open.bigmodel.cn/api/paas/v4` | OpenAI | [Get Key](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
| **DeepSeek** | `deepseek/` | `https://api.deepseek.com/v1` | OpenAI | [Get Key](https://platform.deepseek.com) |
| **Google Gemini** | `gemini/` | `https://generativelanguage.googleapis.com/v1beta` | OpenAI | [Get Key](https://aistudio.google.com/api-keys) |
| **Groq** | `groq/` | `https://api.groq.com/openai/v1` | OpenAI | [Get Key](https://console.groq.com) |
| **Moonshot** | `moonshot/` | `https://api.moonshot.cn/v1` | OpenAI | [Get Key](https://platform.moonshot.cn) |
| **通义千问 (Qwen)** | `qwen/` | `https://dashscope.aliyuncs.com/compatible-mode/v1` | OpenAI | [Get Key](https://dashscope.console.aliyun.com) |
| **NVIDIA** | `nvidia/` | `https://integrate.api.nvidia.com/v1` | OpenAI | [Get Key](https://build.nvidia.com) |
| **Ollama** | `ollama/` | `http://localhost:11434/v1` | OpenAI | Local (no key needed) |
| **OpenRouter** | `openrouter/` | `https://openrouter.ai/api/v1` | OpenAI | [Get Key](https://openrouter.ai/keys) |
| **LiteLLM Proxy** | `litellm/` | `http://localhost:4000/v1` | OpenAI | Your LiteLLM proxy key |
| **VLLM** | `vllm/` | `http://localhost:8000/v1` | OpenAI | Local |
| **Cerebras** | `cerebras/` | `https://api.cerebras.ai/v1` | OpenAI | [Get Key](https://cerebras.ai) |
| **VolcEngine (Doubao)** | `volcengine/` | `https://ark.cn-beijing.volces.com/api/v3` | OpenAI | [Get Key](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| **神算云** | `shengsuanyun/` | `https://router.shengsuanyun.com/api/v1` | OpenAI | - |
| **BytePlus** | `byteplus/` | `https://ark.ap-southeast.bytepluses.com/api/v3` | OpenAI | [Get Key](https://www.byteplus.com) |
| **Vivgrid** | `vivgrid/` | `https://api.vivgrid.com/v1` | OpenAI | [Get Key](https://vivgrid.com) |
| **LongCat** | `longcat/` | `https://api.longcat.chat/openai` | OpenAI | [Get Key](https://longcat.chat/platform) |
| **ModelScope (魔搭)**| `modelscope/` | `https://api-inference.modelscope.cn/v1` | OpenAI | [Get Token](https://modelscope.cn/my/tokens) |
| **Azure OpenAI** | `azure/` | `https://{resource}.openai.azure.com` | Azure | [Get Key](https://portal.azure.com) |
| **Antigravity** | `antigravity/` | Google Cloud | Custom | OAuth only |
| **GitHub Copilot** | `github-copilot/` | `localhost:4321` | gRPC | - |
#### Basic Configuration
```json
{
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-your-openai-key"
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
},
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-zhipu-key"
}
],
"agents": {
"defaults": {
"model": "gpt-5.4"
}
}
}
```
#### Vendor-Specific Examples
**OpenAI**
```json
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-..."
}
```
**VolcEngine (Doubao)**
```json
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-..."
}
```
**智谱 AI (GLM)**
```json
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
```
**DeepSeek**
```json
{
"model_name": "deepseek-chat",
"model": "deepseek/deepseek-chat",
"api_key": "sk-..."
}
```
**Anthropic (with API key)**
```json
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
}
```
> Run `picoclaw auth login --provider anthropic` to paste your API token.
**Anthropic Messages API (native format)**
For direct Anthropic API access or custom endpoints that only support Anthropic's native message format:
```json
{
"model_name": "claude-opus-4-6",
"model": "anthropic-messages/claude-opus-4-6",
"api_key": "sk-ant-your-key",
"api_base": "https://api.anthropic.com"
}
```
> Use `anthropic-messages` protocol when:
> - Using third-party proxies that only support Anthropic's native `/v1/messages` endpoint (not OpenAI-compatible `/v1/chat/completions`)
> - Connecting to services like MiniMax, Synthetic that require Anthropic's native message format
> - The existing `anthropic` protocol returns 404 errors (indicating the endpoint doesn't support OpenAI-compatible format)
>
> **Note:** The `anthropic` protocol uses OpenAI-compatible format (`/v1/chat/completions`), while `anthropic-messages` uses Anthropic's native format (`/v1/messages`). Choose based on your endpoint's supported format.
**Ollama (local)**
```json
{
"model_name": "llama3",
"model": "ollama/llama3"
}
```
**Custom Proxy/API**
```json
{
"model_name": "my-custom-model",
"model": "openai/custom-model",
"api_base": "https://my-proxy.com/v1",
"api_key": "sk-...",
"request_timeout": 300
}
```
**LiteLLM Proxy**
```json
{
"model_name": "lite-gpt4",
"model": "litellm/lite-gpt4",
"api_base": "http://localhost:4000/v1",
"api_key": "sk-..."
}
```
PicoClaw strips only the outer `litellm/` prefix before sending the request, so proxy aliases like `litellm/lite-gpt4` send `lite-gpt4`, while `litellm/openai/gpt-4o` sends `openai/gpt-4o`.
#### Load Balancing
Configure multiple endpoints for the same model name—PicoClaw will automatically round-robin between them:
```json
{
"model_list": [
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api1.example.com/v1",
"api_key": "sk-key1"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api2.example.com/v1",
"api_key": "sk-key2"
}
]
}
```
#### Migration from Legacy `providers` Config
The old `providers` configuration is **deprecated** but still supported for backward compatibility.
**Old Config (deprecated):**
```json
{
"providers": {
"zhipu": {
"api_key": "your-key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
},
"agents": {
"defaults": {
"provider": "zhipu",
"model": "glm-4.7"
}
}
}
```
**New Config (recommended):**
```json
{
"model_list": [
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
],
"agents": {
"defaults": {
"model": "glm-4.7"
}
}
}
```
For detailed migration guide, see [docs/migration/model-list-migration.md](docs/migration/model-list-migration.md).
### Provider Architecture
PicoClaw routes providers by protocol family:
- OpenAI-compatible protocol: OpenRouter, OpenAI-compatible gateways, Groq, Zhipu, and vLLM-style endpoints.
- Anthropic protocol: Claude-native API behavior.
- Codex/OAuth path: OpenAI OAuth/token authentication route.
This keeps the runtime lightweight while making new OpenAI-compatible backends mostly a config operation (`api_base` + `api_key`).
<details>
<summary><b>Zhipu</b></summary>
**1. Get API key and base URL**
* Get [API key](https://bigmodel.cn/usercenter/proj-mgmt/apikeys)
**2. Configure**
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model": "glm-4.7",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"providers": {
"zhipu": {
"api_key": "Your API Key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
}
}
```
**3. Run**
```bash
picoclaw agent -m "Hello"
```
</details>
<details>
<summary><b>Full config example</b></summary>
```json
{
"agents": {
"defaults": {
"model": "anthropic/claude-opus-4-5"
}
},
"session": {
"dm_scope": "per-channel-peer",
"backlog_limit": 20
},
"providers": {
"openrouter": {
"api_key": "sk-or-v1-xxx"
},
"groq": {
"api_key": "gsk_xxx"
}
},
"channels": {
"telegram": {
"enabled": true,
"token": "123456:ABC...",
"allow_from": ["123456789"]
},
"discord": {
"enabled": true,
"token": "",
"allow_from": [""]
},
"whatsapp": {
"enabled": false,
"bridge_url": "ws://localhost:3001",
"use_native": false,
"session_store_path": "",
"allow_from": []
},
"feishu": {
"enabled": false,
"app_id": "cli_xxx",
"app_secret": "xxx",
"encrypt_key": "",
"verification_token": "",
"allow_from": []
},
"qq": {
"enabled": false,
"app_id": "",
"app_secret": "",
"allow_from": []
}
},
"tools": {
"web": {
"brave": {
"enabled": false,
"api_key": "BSA...",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://localhost:8888",
"max_results": 5
}
},
"cron": {
"exec_timeout_minutes": 5
}
},
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
</details>
---
## 📝 API Key Comparison
| Service | Pricing | Use Case |
| ---------------- | ------------------------ | ------------------------------------- |
| **OpenRouter** | Free: 200K tokens/month | Multiple models (Claude, GPT-4, etc.) |
| **Volcengine CodingPlan** | ¥9.9/first month | Best for Chinese users, multiple SOTA models (Doubao, DeepSeek, etc.) |
| **Zhipu** | Free: 200K tokens/month | Suitable for Chinese users |
| **Brave Search** | $5/1000 queries | Web search functionality |
| **SearXNG** | Free (self-hosted) | Privacy-focused metasearch (70+ engines) |
| **Groq** | Free tier available | Fast inference (Llama, Mixtral) |
| **Cerebras** | Free tier available | Fast inference (Llama, Qwen, etc.) |
| **LongCat** | Free: up to 5M tokens/day | Fast inference |
| **ModelScope** | Free: 2000 requests/day | Inference (Qwen, GLM, DeepSeek, etc.) |
---
<div align="center">
<img src="assets/logo.jpg" alt="PicoClaw Meme" width="512">
</div>
+427
View File
@@ -0,0 +1,427 @@
# 💬 Configuração de Aplicativos de Chat
> Voltar ao [README](../../README.pt-br.md)
## 💬 Aplicativos de Chat
Converse com seu picoclaw através do Telegram, Discord, WhatsApp, Matrix, QQ, DingTalk, LINE, WeCom, Feishu, Slack, IRC, OneBot ou MaixCam
> **Nota**: Todos os canais baseados em webhook (LINE, WeCom, etc.) são servidos em um único servidor HTTP Gateway compartilhado (`gateway.host`:`gateway.port`, padrão `127.0.0.1:18790`). Não há portas por canal para configurar. Nota: Feishu usa o modo WebSocket/SDK e não utiliza o servidor HTTP webhook compartilhado.
| Channel | Setup |
| ------------ | ---------------------------------- |
| **Telegram** | Easy (just a token) |
| **Discord** | Easy (bot token + intents) |
| **WhatsApp** | Easy (native: QR scan; or bridge URL) |
| **Matrix** | Medium (homeserver + bot access token) |
| **QQ** | Easy (AppID + AppSecret) |
| **DingTalk** | Medium (app credentials) |
| **LINE** | Medium (credentials + webhook URL) |
| **WeCom AI Bot** | Medium (Token + AES key) |
| **Feishu** | Medium (App ID + Secret, WebSocket mode) |
| **Slack** | Medium (Bot token + App token) |
| **IRC** | Medium (server + TLS config) |
| **OneBot** | Medium (QQ via OneBot protocol) |
| **MaixCam** | Easy (Sipeed hardware integration) |
| **Pico** | Native PicoClaw protocol |
<details>
<summary><b>Telegram</b> (Recomendado)</summary>
**1. Criar um bot**
* Abra o Telegram, pesquise `@BotFather`
* Envie `/newbot`, siga as instruções
* Copie o token
**2. Configurar**
```json
{
"channels": {
"telegram": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
> Obtenha seu ID de usuário com `@userinfobot` no Telegram.
**3. Executar**
```bash
picoclaw gateway
```
**4. Menu de comandos do Telegram (registrado automaticamente na inicialização)**
O PicoClaw agora mantém definições de comandos em um registro compartilhado. Na inicialização, o Telegram registrará automaticamente os comandos de bot suportados (por exemplo `/start`, `/help`, `/show`, `/list`) para que o menu de comandos e o comportamento em tempo de execução permaneçam sincronizados.
O registro do menu de comandos do Telegram permanece como descoberta UX local do canal; a execução genérica de comandos é tratada centralmente no loop do agente via commands executor.
Se o registro de comandos falhar (erros transitórios de rede/API), o canal ainda inicia e o PicoClaw tenta novamente o registro em segundo plano.
</details>
<details>
<summary><b>Discord</b></summary>
**1. Criar um bot**
* Acesse <https://discord.com/developers/applications>
* Crie um aplicativo → Bot → Add Bot
* Copie o token do bot
**2. Habilitar intents**
* Nas configurações do Bot, habilite **MESSAGE CONTENT INTENT**
* (Opcional) Habilite **SERVER MEMBERS INTENT** se planeja usar listas de permissão baseadas em dados de membros
**3. Obter seu User ID**
* Configurações do Discord → Avançado → habilite **Developer Mode**
* Clique com o botão direito no seu avatar → **Copy User ID**
**4. Configurar**
```json
{
"channels": {
"discord": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
**5. Convidar o bot**
* OAuth2 → URL Generator
* Scopes: `bot`
* Bot Permissions: `Send Messages`, `Read Message History`
* Abra a URL de convite gerada e adicione o bot ao seu servidor
**Opcional: Modo de ativação em grupo**
Por padrão, o bot responde a todas as mensagens em um canal do servidor. Para restringir respostas apenas a @menções, adicione:
```json
{
"channels": {
"discord": {
"group_trigger": { "mention_only": true }
}
}
}
```
Você também pode ativar por prefixos de palavras-chave (ex.: `!bot`):
```json
{
"channels": {
"discord": {
"group_trigger": { "prefixes": ["!bot"] }
}
}
}
```
**6. Executar**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>WhatsApp</b> (nativo via whatsmeow)</summary>
O PicoClaw pode se conectar ao WhatsApp de duas formas:
- **Nativo (recomendado):** In-process usando [whatsmeow](https://github.com/tulir/whatsmeow). Sem bridge separado. Defina `"use_native": true` e deixe `bridge_url` vazio. Na primeira execução, escaneie o QR code com o WhatsApp (Dispositivos Vinculados). A sessão é armazenada no seu workspace (ex.: `workspace/whatsapp/`). O canal nativo é **opcional** para manter o binário padrão pequeno; compile com `-tags whatsapp_native` (ex.: `make build-whatsapp-native` ou `go build -tags whatsapp_native ./cmd/...`).
- **Bridge:** Conecte-se a um bridge WebSocket externo. Defina `bridge_url` (ex.: `ws://localhost:3001`) e mantenha `use_native` como false.
**Configurar (nativo)**
```json
{
"channels": {
"whatsapp": {
"enabled": true,
"use_native": true,
"session_store_path": "",
"allow_from": []
}
}
}
```
Se `session_store_path` estiver vazio, a sessão é armazenada em `<workspace>/whatsapp/`. Execute `picoclaw gateway`; na primeira execução, escaneie o QR code impresso no terminal com WhatsApp → Dispositivos Vinculados.
</details>
<details>
<summary><b>QQ</b></summary>
**1. Criar um bot**
- Acesse a [QQ Open Platform](https://q.qq.com/#)
- Crie um aplicativo → Obtenha **AppID** e **AppSecret**
**2. Configurar**
```json
{
"channels": {
"qq": {
"enabled": true,
"app_id": "YOUR_APP_ID",
"app_secret": "YOUR_APP_SECRET",
"allow_from": []
}
}
}
```
> Defina `allow_from` como vazio para permitir todos os usuários, ou especifique números QQ para restringir o acesso.
**3. Executar**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>DingTalk</b></summary>
**1. Criar um bot**
* Acesse a [Open Platform](https://open.dingtalk.com/)
* Crie um aplicativo interno
* Copie o Client ID e o Client Secret
**2. Configurar**
```json
{
"channels": {
"dingtalk": {
"enabled": true,
"client_id": "YOUR_CLIENT_ID",
"client_secret": "YOUR_CLIENT_SECRET",
"allow_from": []
}
}
}
```
> Defina `allow_from` como vazio para permitir todos os usuários, ou especifique IDs de usuário DingTalk para restringir o acesso.
**3. Executar**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>Matrix</b></summary>
**1. Preparar conta do bot**
* Use seu homeserver preferido (ex.: `https://matrix.org` ou auto-hospedado)
* Crie um usuário bot e obtenha seu access token
**2. Configurar**
```json
{
"channels": {
"matrix": {
"enabled": true,
"homeserver": "https://matrix.org",
"user_id": "@your-bot:matrix.org",
"access_token": "YOUR_MATRIX_ACCESS_TOKEN",
"allow_from": []
}
}
}
```
**3. Executar**
```bash
picoclaw gateway
```
Para opções completas (`device_id`, `join_on_invite`, `group_trigger`, `placeholder`, `reasoning_channel_id`), veja o [Guia de Configuração do Canal Matrix](docs/channels/matrix/README.md).
</details>
<details>
<summary><b>LINE</b></summary>
**1. Criar uma Conta Oficial LINE**
- Acesse o [LINE Developers Console](https://developers.line.biz/)
- Crie um provider → Crie um canal Messaging API
- Copie o **Channel Secret** e o **Channel Access Token**
**2. Configurar**
```json
{
"channels": {
"line": {
"enabled": true,
"channel_secret": "YOUR_CHANNEL_SECRET",
"channel_access_token": "YOUR_CHANNEL_ACCESS_TOKEN",
"webhook_path": "/webhook/line",
"allow_from": []
}
}
}
```
> O webhook do LINE é servido no servidor Gateway compartilhado (`gateway.host`:`gateway.port`, padrão `127.0.0.1:18790`).
**3. Configurar URL do Webhook**
O LINE requer HTTPS para webhooks. Use um proxy reverso ou túnel:
```bash
# Exemplo com ngrok (porta padrão do gateway é 18790)
ngrok http 18790
```
Em seguida, defina a URL do Webhook no LINE Developers Console como `https://your-domain/webhook/line` e habilite **Use webhook**.
**4. Executar**
```bash
picoclaw gateway
```
> Em chats de grupo, o bot responde apenas quando @mencionado. As respostas citam a mensagem original.
</details>
<details>
<summary><b>WeCom (企业微信)</b></summary>
O PicoClaw suporta três tipos de integração WeCom:
**Opção 1: WeCom Bot (Bot)** - Configuração mais fácil, suporta chats de grupo
**Opção 2: WeCom App (App Personalizado)** - Mais recursos, mensagens proativas, apenas chat privado
**Opção 3: WeCom AI Bot (AI Bot)** - AI Bot oficial, respostas em streaming, suporta chat de grupo e privado
Veja o [Guia de Configuração do WeCom AI Bot](docs/channels/wecom/wecom_aibot/README.zh.md) para instruções detalhadas de configuração.
**Configuração Rápida - WeCom Bot:**
**1. Criar um bot**
* Acesse o Console de Administração WeCom → Chat de Grupo → Adicionar Bot de Grupo
* Copie a URL do webhook (formato: `https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx`)
**2. Configurar**
```json
{
"channels": {
"wecom": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_url": "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=YOUR_KEY",
"webhook_path": "/webhook/wecom",
"allow_from": []
}
}
}
```
> O webhook do WeCom é servido no servidor Gateway compartilhado (`gateway.host`:`gateway.port`, padrão `127.0.0.1:18790`).
**Configuração Rápida - WeCom App:**
**1. Criar um aplicativo**
* Acesse o Console de Administração WeCom → Gerenciamento de Apps → Criar App
* Copie o **AgentId** e o **Secret**
* Acesse a página "Minha Empresa", copie o **CorpID**
**2. Configurar recebimento de mensagens**
* Nos detalhes do App, clique em "Receber Mensagem" → "Configurar API"
* Defina a URL como `http://your-server:18790/webhook/wecom-app`
* Gere o **Token** e o **EncodingAESKey**
**3. Configurar**
```json
{
"channels": {
"wecom_app": {
"enabled": true,
"corp_id": "wwxxxxxxxxxxxxxxxx",
"corp_secret": "YOUR_CORP_SECRET",
"agent_id": 1000002,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-app",
"allow_from": []
}
}
}
```
**4. Executar**
```bash
picoclaw gateway
```
> **Nota**: Os callbacks de webhook do WeCom são servidos na porta do Gateway (padrão 18790). Use um proxy reverso para HTTPS.
**Configuração Rápida - WeCom AI Bot:**
**1. Criar um AI Bot**
* Acesse o Console de Administração WeCom → Gerenciamento de Apps → AI Bot
* Nas configurações do AI Bot, configure a URL de callback: `http://your-server:18791/webhook/wecom-aibot`
* Copie o **Token** e clique em "Gerar Aleatoriamente" para o **EncodingAESKey**
**2. Configurar**
```json
{
"channels": {
"wecom_aibot": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_43_CHAR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-aibot",
"allow_from": [],
"welcome_message": "Hello! How can I help you?"
}
}
}
```
**3. Executar**
```bash
picoclaw gateway
```
> **Nota**: O WeCom AI Bot usa protocolo de streaming pull — sem preocupações com timeout de resposta. Tarefas longas (>30 segundos) mudam automaticamente para entrega via `response_url` push.
</details>
+217
View File
@@ -0,0 +1,217 @@
# ⚙️ Guia de Configuração
> Voltar ao [README](../../README.pt-br.md)
## ⚙️ Configuração
Arquivo de configuração: `~/.picoclaw/config.json`
### Variáveis de Ambiente
Você pode substituir os caminhos padrão usando variáveis de ambiente. Isso é útil para instalações portáteis, implantações em contêineres ou execução do picoclaw como serviço do sistema. Essas variáveis são independentes e controlam caminhos diferentes.
| Variável | Descrição | Caminho Padrão |
|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
| `PICOCLAW_CONFIG` | Substitui o caminho para o arquivo de configuração. Isso indica diretamente ao picoclaw qual `config.json` carregar, ignorando todos os outros locais. | `~/.picoclaw/config.json` |
| `PICOCLAW_HOME` | Substitui o diretório raiz para dados do picoclaw. Isso altera o local padrão do `workspace` e outros diretórios de dados. | `~/.picoclaw` |
**Exemplos:**
```bash
# Executar picoclaw usando um arquivo de configuração específico
# O caminho do workspace será lido de dentro desse arquivo de configuração
PICOCLAW_CONFIG=/etc/picoclaw/production.json picoclaw gateway
# Executar picoclaw com todos os dados armazenados em /opt/picoclaw
# A configuração será carregada do padrão ~/.picoclaw/config.json
# O workspace será criado em /opt/picoclaw/workspace
PICOCLAW_HOME=/opt/picoclaw picoclaw agent
# Usar ambos para uma configuração totalmente personalizada
PICOCLAW_HOME=/srv/picoclaw PICOCLAW_CONFIG=/srv/picoclaw/main.json picoclaw gateway
```
### Layout do Workspace
O PicoClaw armazena dados no seu workspace configurado (padrão: `~/.picoclaw/workspace`):
```
~/.picoclaw/workspace/
├── sessions/ # Sessões de conversa e histórico
├── memory/ # Memória de longo prazo (MEMORY.md)
├── state/ # Estado persistente (último canal, etc.)
├── cron/ # Banco de dados de tarefas agendadas
├── skills/ # Skills personalizadas
├── AGENTS.md # Guia de comportamento do agente
├── HEARTBEAT.md # Prompts de tarefas periódicas (verificados a cada 30 min)
├── IDENTITY.md # Identidade do agente
├── SOUL.md # Alma do agente
└── USER.md # Preferências do usuário
```
### Fontes de Skills
Por padrão, as skills são carregadas de:
1. `~/.picoclaw/workspace/skills` (workspace)
2. `~/.picoclaw/skills` (global)
3. `<current-working-directory>/skills` (builtin)
Para configurações avançadas/de teste, você pode substituir o diretório raiz de skills builtin com:
```bash
export PICOCLAW_BUILTIN_SKILLS=/path/to/skills
```
### Política Unificada de Execução de Comandos
- Comandos slash genéricos são executados através de um único caminho em `pkg/agent/loop.go` via `commands.Executor`.
- Os adaptadores de canal não consomem mais comandos genéricos localmente; eles encaminham o texto de entrada para o caminho bus/agent. O Telegram ainda registra automaticamente os comandos suportados na inicialização.
- Comando slash desconhecido (por exemplo `/foo`) passa para o processamento normal do LLM.
- Comando registrado mas não suportado no canal atual (por exemplo `/show` no WhatsApp) retorna um erro explícito ao usuário e interrompe o processamento.
### 🔒 Sandbox de Segurança
O PicoClaw é executado em um ambiente sandbox por padrão. O agente só pode acessar arquivos e executar comandos dentro do workspace configurado.
#### Configuração Padrão
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"restrict_to_workspace": true
}
}
}
```
| Opção | Padrão | Descrição |
| ----------------------- | ----------------------- | ----------------------------------------- |
| `workspace` | `~/.picoclaw/workspace` | Diretório de trabalho do agente |
| `restrict_to_workspace` | `true` | Restringir acesso a arquivos/comandos ao workspace |
#### Ferramentas Protegidas
Quando `restrict_to_workspace: true`, as seguintes ferramentas são isoladas:
| Ferramenta | Função | Restrição |
| ------------- | ---------------- | -------------------------------------- |
| `read_file` | Ler arquivos | Apenas arquivos dentro do workspace |
| `write_file` | Escrever arquivos| Apenas arquivos dentro do workspace |
| `list_dir` | Listar diretórios| Apenas diretórios dentro do workspace |
| `edit_file` | Editar arquivos | Apenas arquivos dentro do workspace |
| `append_file` | Anexar a arquivos| Apenas arquivos dentro do workspace |
| `exec` | Executar comandos| Caminhos de comando devem estar dentro do workspace |
#### Proteção Adicional do Exec
Mesmo com `restrict_to_workspace: false`, a ferramenta `exec` bloqueia estes comandos perigosos:
* `rm -rf`, `del /f`, `rmdir /s` — Exclusão em massa
* `format`, `mkfs`, `diskpart` — Formatação de disco
* `dd if=` — Imagem de disco
* Escrita em `/dev/sd[a-z]` — Escritas diretas em disco
* `shutdown`, `reboot`, `poweroff` — Desligamento do sistema
* Fork bomb `:(){ :|:& };:`
### Controle de Acesso a Arquivos
| Config Key | Type | Default | Description |
|------------|------|---------|-------------|
| `tools.allow_read_paths` | string[] | `[]` | Additional paths allowed for reading outside workspace |
| `tools.allow_write_paths` | string[] | `[]` | Additional paths allowed for writing outside workspace |
### Segurança do Exec
| Config Key | Type | Default | Description |
|------------|------|---------|-------------|
| `tools.exec.allow_remote` | bool | `false` | Allow exec tool from remote channels (Telegram/Discord etc.) |
| `tools.exec.enable_deny_patterns` | bool | `true` | Enable dangerous command interception |
| `tools.exec.custom_deny_patterns` | string[] | `[]` | Custom regex patterns to block |
| `tools.exec.custom_allow_patterns` | string[] | `[]` | Custom regex patterns to allow |
> **Nota de Segurança:** A proteção contra symlinks é habilitada por padrão — todos os caminhos de arquivo são resolvidos através de `filepath.EvalSymlinks` antes da correspondência com a whitelist, prevenindo ataques de escape via symlink.
#### Limitação Conhecida: Processos Filhos de Ferramentas de Build
O guard de segurança do exec inspeciona apenas a linha de comando que o PicoClaw executa diretamente. Ele não inspeciona recursivamente processos filhos gerados por ferramentas de desenvolvimento permitidas como `make`, `go run`, `cargo`, `npm run` ou scripts de build personalizados.
Isso significa que um comando de nível superior ainda pode compilar ou executar outros binários após passar pela verificação inicial do guard. Na prática, trate scripts de build, Makefiles, scripts de pacotes e binários gerados como código executável que precisa do mesmo nível de revisão que um comando shell direto.
Para ambientes de maior risco:
* Revise scripts de build antes da execução.
* Prefira aprovação/revisão manual para fluxos de trabalho de compilação e execução.
* Execute o PicoClaw dentro de um contêiner ou VM se precisar de isolamento mais forte do que o guard integrado oferece.
#### Exemplos de Erro
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (path outside working dir)}
```
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (dangerous pattern detected)}
```
#### Desabilitando Restrições (Risco de Segurança)
Se você precisar que o agente acesse caminhos fora do workspace:
**Método 1: Arquivo de configuração**
```json
{
"agents": {
"defaults": {
"restrict_to_workspace": false
}
}
}
```
**Método 2: Variável de ambiente**
```bash
export PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE=false
```
> ⚠️ **Aviso**: Desabilitar esta restrição permite que o agente acesse qualquer caminho no seu sistema. Use com cautela apenas em ambientes controlados.
#### Consistência do Limite de Segurança
A configuração `restrict_to_workspace` se aplica consistentemente em todos os caminhos de execução:
| Caminho de Execução | Limite de Segurança |
| -------------------- | ---------------------------- |
| Main Agent | `restrict_to_workspace` ✅ |
| Subagent / Spawn | Herda a mesma restrição ✅ |
| Heartbeat tasks | Herda a mesma restrição ✅ |
Todos os caminhos compartilham a mesma restrição de workspace — não há como contornar o limite de segurança através de subagentes ou tarefas agendadas.
### Heartbeat (Tarefas Periódicas)
O PicoClaw pode executar tarefas periódicas automaticamente. Crie um arquivo `HEARTBEAT.md` no seu workspace:
```markdown
# Tarefas Periódicas
- Verificar meu e-mail para mensagens importantes
- Revisar meu calendário para eventos próximos
- Verificar a previsão do tempo
```
O agente lerá este arquivo a cada 30 minutos (configurável) e executará quaisquer tarefas usando as ferramentas disponíveis.
#### Tarefas Assíncronas com Spawn
Para tarefas de longa duração (busca na web, chamadas de API), use a ferramenta `spawn` para criar um **subagente**:
```markdown
# Tarefas Periódicas
```
+166
View File
@@ -0,0 +1,166 @@
# 🐳 Docker e Início Rápido
> Voltar ao [README](../../README.pt-br.md)
## 🐳 Docker Compose
Você também pode executar o PicoClaw usando Docker Compose sem instalar nada localmente.
```bash
# 1. Clone este repositório
git clone https://github.com/sipeed/picoclaw.git
cd picoclaw
# 2. Primeira execução — gera automaticamente docker/data/config.json e encerra
docker compose -f docker/docker-compose.yml --profile gateway up
# O contêiner exibe "First-run setup complete." e para.
# 3. Configure suas chaves de API
vim docker/data/config.json # Set provider API keys, bot tokens, etc.
# 4. Iniciar
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
> [!TIP]
> **Usuários Docker**: Por padrão, o Gateway escuta em `127.0.0.1`, que não é acessível a partir do host. Se você precisar acessar os endpoints de saúde ou expor portas, defina `PICOCLAW_GATEWAY_HOST=0.0.0.0` no seu ambiente ou atualize o `config.json`.
```bash
# 5. Verificar logs
docker compose -f docker/docker-compose.yml logs -f picoclaw-gateway
# 6. Parar
docker compose -f docker/docker-compose.yml --profile gateway down
```
### Modo Launcher (Console Web)
A imagem `launcher` inclui os três binários (`picoclaw`, `picoclaw-launcher`, `picoclaw-launcher-tui`) e inicia o console web por padrão, que fornece uma interface baseada em navegador para configuração e chat.
```bash
docker compose -f docker/docker-compose.yml --profile launcher up -d
```
Abra http://localhost:18800 no seu navegador. O launcher gerencia o processo do gateway automaticamente.
> [!WARNING]
> O console web ainda não suporta autenticação. Evite expô-lo na internet pública.
### Modo Agent (One-shot)
```bash
# Fazer uma pergunta
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent -m "What is 2+2?"
# Modo interativo
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent
```
### Atualização
```bash
docker compose -f docker/docker-compose.yml pull
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
### 🚀 Início Rápido
> [!TIP]
> Configure sua chave de API em `~/.picoclaw/config.json`. Obtenha chaves de API: [Volcengine (CodingPlan)](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) (LLM) · [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM). A busca na web é opcional — obtenha gratuitamente uma [API Tavily](https://tavily.com) (1000 consultas gratuitas/mês) ou [API Brave Search](https://brave.com/search/api) (2000 consultas gratuitas/mês).
**1. Inicializar**
```bash
picoclaw onboard
```
**2. Configurar** (`~/.picoclaw/config.json`)
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model_name": "gpt-5.4",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key",
"api_base":"https://ark.cn-beijing.volces.com/api/coding/v3"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "your-api-key",
"request_timeout": 300
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "your-anthropic-key"
}
],
"tools": {
"web": {
"enabled": true,
"fetch_limit_bytes": 10485760,
"format": "plaintext",
"brave": {
"enabled": false,
"api_key": "YOUR_BRAVE_API_KEY",
"max_results": 5
},
"tavily": {
"enabled": false,
"api_key": "YOUR_TAVILY_API_KEY",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "YOUR_PERPLEXITY_API_KEY",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://your-searxng-instance:8888",
"max_results": 5
}
}
}
}
```
> **Novo**: O formato de configuração `model_list` permite adicionar provedores sem alteração de código. Veja [Configuração de Modelos](#configuração-de-modelos-model_list) para detalhes.
> `request_timeout` é opcional e usa segundos. Se omitido ou definido como `<= 0`, o PicoClaw usa o timeout padrão (120s).
**3. Obter chaves de API**
* **Provedor LLM**: [OpenRouter](https://openrouter.ai/keys) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) · [Anthropic](https://console.anthropic.com) · [OpenAI](https://platform.openai.com) · [Gemini](https://aistudio.google.com/api-keys)
* **Busca na Web** (opcional):
* [Brave Search](https://brave.com/search/api) - Pago ($5/1000 consultas, ~$5-6/mês)
* [Perplexity](https://www.perplexity.ai) - Busca com IA e interface de chat
* [SearXNG](https://github.com/searxng/searxng) - Metabuscador auto-hospedado (gratuito, sem necessidade de chave de API)
* [Tavily](https://tavily.com) - Otimizado para agentes de IA (1000 requisições/mês)
* DuckDuckGo - Fallback integrado (sem necessidade de chave de API)
> **Nota**: Veja `config.example.json` para um modelo de configuração completo.
**4. Conversar**
```bash
picoclaw agent -m "What is 2+2?"
```
Pronto! Você tem um assistente de IA funcionando em 2 minutos.
---
+434
View File
@@ -0,0 +1,434 @@
# 🔌 Provedores e Configuração de Modelos
> Voltar ao [README](../../README.pt-br.md)
### Provedores
> [!NOTE]
> O Groq fornece transcrição de voz gratuita via Whisper. Se configurado, mensagens de áudio de qualquer canal serão automaticamente transcritas no nível do agente.
| Provider | Purpose | Get API Key |
| ------------ | --------------------------------------- | ------------------------------------------------------------ |
| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) |
| `zhipu` | LLM (Zhipu direct) | [bigmodel.cn](https://bigmodel.cn) |
| `volcengine` | LLM(Volcengine direct) | [volcengine.com](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| `openrouter` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) |
| `anthropic` | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) |
| `openai` | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) |
| `deepseek` | LLM (DeepSeek direct) | [platform.deepseek.com](https://platform.deepseek.com) |
| `qwen` | LLM (Qwen direct) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) |
| `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) |
| `cerebras` | LLM (Cerebras direct) | [cerebras.ai](https://cerebras.ai) |
| `vivgrid` | LLM (Vivgrid direct) | [vivgrid.com](https://vivgrid.com) |
| `moonshot` | LLM (Kimi/Moonshot direct) | [platform.moonshot.cn](https://platform.moonshot.cn) |
| `minimax` | LLM (Minimax direct) | [platform.minimaxi.com](https://platform.minimaxi.com) |
| `avian` | LLM (Avian direct) | [avian.io](https://avian.io) |
| `mistral` | LLM (Mistral direct) | [console.mistral.ai](https://console.mistral.ai) |
| `longcat` | LLM (Longcat direct) | [longcat.ai](https://longcat.ai) |
| `modelscope` | LLM (ModelScope direct) | [modelscope.cn](https://modelscope.cn) |
### Configuração de Modelos (model_list)
> **Novidade?** O PicoClaw agora usa uma abordagem de configuração **centrada no modelo**. Basta especificar o formato `vendor/model` (ex.: `zhipu/glm-4.7`) para adicionar novos provedores — **sem necessidade de alteração de código!**
Este design também permite **suporte multi-agente** com seleção flexível de provedores:
- **Agentes diferentes, provedores diferentes**: Cada agente pode usar seu próprio provedor LLM
- **Fallback de modelos**: Configure modelos primários e de fallback para resiliência
- **Balanceamento de carga**: Distribua requisições entre múltiplos endpoints
- **Configuração centralizada**: Gerencie todos os provedores em um só lugar
#### 📋 Todos os Vendors Suportados
| Vendor | `model` Prefix | Default API Base | Protocol | API Key |
| ------------------- | ----------------- |-----------------------------------------------------| --------- | ---------------------------------------------------------------- |
| **OpenAI** | `openai/` | `https://api.openai.com/v1` | OpenAI | [Get Key](https://platform.openai.com) |
| **Anthropic** | `anthropic/` | `https://api.anthropic.com/v1` | Anthropic | [Get Key](https://console.anthropic.com) |
| **智谱 AI (GLM)** | `zhipu/` | `https://open.bigmodel.cn/api/paas/v4` | OpenAI | [Get Key](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
| **DeepSeek** | `deepseek/` | `https://api.deepseek.com/v1` | OpenAI | [Get Key](https://platform.deepseek.com) |
| **Google Gemini** | `gemini/` | `https://generativelanguage.googleapis.com/v1beta` | OpenAI | [Get Key](https://aistudio.google.com/api-keys) |
| **Groq** | `groq/` | `https://api.groq.com/openai/v1` | OpenAI | [Get Key](https://console.groq.com) |
| **Moonshot** | `moonshot/` | `https://api.moonshot.cn/v1` | OpenAI | [Get Key](https://platform.moonshot.cn) |
| **通义千问 (Qwen)** | `qwen/` | `https://dashscope.aliyuncs.com/compatible-mode/v1` | OpenAI | [Get Key](https://dashscope.console.aliyun.com) |
| **NVIDIA** | `nvidia/` | `https://integrate.api.nvidia.com/v1` | OpenAI | [Get Key](https://build.nvidia.com) |
| **Ollama** | `ollama/` | `http://localhost:11434/v1` | OpenAI | Local (no key needed) |
| **OpenRouter** | `openrouter/` | `https://openrouter.ai/api/v1` | OpenAI | [Get Key](https://openrouter.ai/keys) |
| **LiteLLM Proxy** | `litellm/` | `http://localhost:4000/v1` | OpenAI | Your LiteLLM proxy key |
| **VLLM** | `vllm/` | `http://localhost:8000/v1` | OpenAI | Local |
| **Cerebras** | `cerebras/` | `https://api.cerebras.ai/v1` | OpenAI | [Get Key](https://cerebras.ai) |
| **VolcEngine (Doubao)** | `volcengine/` | `https://ark.cn-beijing.volces.com/api/v3` | OpenAI | [Get Key](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| **神算云** | `shengsuanyun/` | `https://router.shengsuanyun.com/api/v1` | OpenAI | - |
| **BytePlus** | `byteplus/` | `https://ark.ap-southeast.bytepluses.com/api/v3` | OpenAI | [Get Key](https://www.byteplus.com) |
| **Vivgrid** | `vivgrid/` | `https://api.vivgrid.com/v1` | OpenAI | [Get Key](https://vivgrid.com) |
| **LongCat** | `longcat/` | `https://api.longcat.chat/openai` | OpenAI | [Get Key](https://longcat.chat/platform) |
| **ModelScope (魔搭)**| `modelscope/` | `https://api-inference.modelscope.cn/v1` | OpenAI | [Get Token](https://modelscope.cn/my/tokens) |
| **Antigravity** | `antigravity/` | Google Cloud | Custom | OAuth only |
| **GitHub Copilot** | `github-copilot/` | `localhost:4321` | gRPC | - |
#### Configuração Básica
```json
{
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-your-openai-key"
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
},
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-zhipu-key"
}
],
"agents": {
"defaults": {
"model": "gpt-5.4"
}
}
}
```
#### Exemplos por Vendor
**OpenAI**
```json
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-..."
}
```
**VolcEngine (Doubao)**
```json
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-..."
}
```
**智谱 AI (GLM)**
```json
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
```
**DeepSeek**
```json
{
"model_name": "deepseek-chat",
"model": "deepseek/deepseek-chat",
"api_key": "sk-..."
}
```
**Anthropic (com chave de API)**
```json
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
}
```
> Execute `picoclaw auth login --provider anthropic` para colar seu token de API.
**Anthropic Messages API (formato nativo)**
Para acesso direto à API Anthropic ou endpoints personalizados que suportam apenas o formato de mensagem nativo da Anthropic:
```json
{
"model_name": "claude-opus-4-6",
"model": "anthropic-messages/claude-opus-4-6",
"api_key": "sk-ant-your-key",
"api_base": "https://api.anthropic.com"
}
```
> Use o protocolo `anthropic-messages` quando:
> - Usar proxies de terceiros que suportam apenas o endpoint nativo `/v1/messages` da Anthropic (não o compatível com OpenAI `/v1/chat/completions`)
> - Conectar a serviços como MiniMax, Synthetic que requerem o formato de mensagem nativo da Anthropic
> - O protocolo `anthropic` existente retorna erros 404 (indicando que o endpoint não suporta formato compatível com OpenAI)
>
> **Nota:** O protocolo `anthropic` usa formato compatível com OpenAI (`/v1/chat/completions`), enquanto `anthropic-messages` usa o formato nativo da Anthropic (`/v1/messages`). Escolha com base no formato suportado pelo seu endpoint.
**Ollama (local)**
```json
{
"model_name": "llama3",
"model": "ollama/llama3"
}
```
**Proxy/API Personalizado**
```json
{
"model_name": "my-custom-model",
"model": "openai/custom-model",
"api_base": "https://my-proxy.com/v1",
"api_key": "sk-...",
"request_timeout": 300
}
```
**LiteLLM Proxy**
```json
{
"model_name": "lite-gpt4",
"model": "litellm/lite-gpt4",
"api_base": "http://localhost:4000/v1",
"api_key": "sk-..."
}
```
O PicoClaw remove apenas o prefixo externo `litellm/` antes de enviar a requisição, então aliases de proxy como `litellm/lite-gpt4` enviam `lite-gpt4`, enquanto `litellm/openai/gpt-4o` envia `openai/gpt-4o`.
#### Balanceamento de Carga
Configure múltiplos endpoints para o mesmo nome de modelo — o PicoClaw fará automaticamente round-robin entre eles:
```json
{
"model_list": [
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api1.example.com/v1",
"api_key": "sk-key1"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api2.example.com/v1",
"api_key": "sk-key2"
}
]
}
```
#### Migração da Configuração Legacy `providers`
A configuração antiga `providers` está **descontinuada** mas ainda é suportada para compatibilidade retroativa.
**Configuração Antiga (descontinuada):**
```json
{
"providers": {
"zhipu": {
"api_key": "your-key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
},
"agents": {
"defaults": {
"provider": "zhipu",
"model": "glm-4.7"
}
}
}
```
**Configuração Nova (recomendada):**
```json
{
"model_list": [
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
],
"agents": {
"defaults": {
"model": "glm-4.7"
}
}
}
```
Para guia de migração detalhado, veja [docs/migration/model-list-migration.md](docs/migration/model-list-migration.md).
### Arquitetura de Provedores
O PicoClaw roteia provedores por família de protocolo:
- Protocolo compatível com OpenAI: OpenRouter, gateways compatíveis com OpenAI, Groq, Zhipu e endpoints estilo vLLM.
- Protocolo Anthropic: Comportamento nativo da API Claude.
- Caminho Codex/OAuth: Rota de autenticação OAuth/token da OpenAI.
Isso mantém o runtime leve enquanto torna novos backends compatíveis com OpenAI basicamente uma operação de configuração (`api_base` + `api_key`).
<details>
<summary><b>Zhipu</b></summary>
**1. Obter chave de API e URL base**
* Obtenha a [chave de API](https://bigmodel.cn/usercenter/proj-mgmt/apikeys)
**2. Configurar**
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model": "glm-4.7",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"providers": {
"zhipu": {
"api_key": "Your API Key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
}
}
```
**3. Executar**
```bash
picoclaw agent -m "Hello"
```
</details>
<details>
<summary><b>Exemplo de configuração completa</b></summary>
```json
{
"agents": {
"defaults": {
"model": "anthropic/claude-opus-4-5"
}
},
"session": {
"dm_scope": "per-channel-peer",
"backlog_limit": 20
},
"providers": {
"openrouter": {
"api_key": "sk-or-v1-xxx"
},
"groq": {
"api_key": "gsk_xxx"
}
},
"channels": {
"telegram": {
"enabled": true,
"token": "123456:ABC...",
"allow_from": ["123456789"]
},
"discord": {
"enabled": true,
"token": "",
"allow_from": [""]
},
"whatsapp": {
"enabled": false,
"bridge_url": "ws://localhost:3001",
"use_native": false,
"session_store_path": "",
"allow_from": []
},
"feishu": {
"enabled": false,
"app_id": "cli_xxx",
"app_secret": "xxx",
"encrypt_key": "",
"verification_token": "",
"allow_from": []
},
"qq": {
"enabled": false,
"app_id": "",
"app_secret": "",
"allow_from": []
}
},
"tools": {
"web": {
"brave": {
"enabled": false,
"api_key": "BSA...",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://localhost:8888",
"max_results": 5
}
},
"cron": {
"exec_timeout_minutes": 5
}
},
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
</details>
---
## 📝 Comparação de Chaves de API
| Service | Pricing | Use Case |
| ---------------- | ------------------------ | ------------------------------------- |
| **OpenRouter** | Free: 200K tokens/month | Multiple models (Claude, GPT-4, etc.) |
| **Volcengine CodingPlan** | ¥9.9/first month | Best for Chinese users, multiple SOTA models (Doubao, DeepSeek, etc.) |
| **Zhipu** | Free: 200K tokens/month | Suitable for Chinese users |
| **Brave Search** | $5/1000 queries | Web search functionality |
| **SearXNG** | Free (self-hosted) | Privacy-focused metasearch (70+ engines) |
| **Groq** | Free tier available | Fast inference (Llama, Mixtral) |
| **Cerebras** | Free tier available | Fast inference (Llama, Qwen, etc.) |
| **LongCat** | Free: up to 5M tokens/day | Fast inference |
| **ModelScope** | Free: 2000 requests/day | Inference (Qwen, GLM, DeepSeek, etc.) |
---
<div align="center">
<img src="assets/logo.jpg" alt="PicoClaw Meme" width="512">
</div>
+61
View File
@@ -0,0 +1,61 @@
# 🔄 Tarefas Assíncronas e Spawn
> Voltar ao [README](../../README.pt-br.md)
## Tarefas Rápidas (resposta direta)
- Informar a hora atual
## Tarefas Longas (usar spawn para assíncrono)
- Pesquisar na web notícias sobre IA e resumir
- Verificar e-mail e relatar mensagens importantes
```
**Comportamentos principais:**
| Feature | Description |
| ----------------------- | --------------------------------------------------------- |
| **spawn** | Creates async subagent, doesn't block heartbeat |
| **Independent context** | Subagent has its own context, no session history |
| **message tool** | Subagent communicates with user directly via message tool |
| **Non-blocking** | After spawning, heartbeat continues to next task |
#### Como Funciona a Comunicação do Subagente
```
Heartbeat é acionado
Agente lê HEARTBEAT.md
Para tarefa longa: spawn subagente
↓ ↓
Continua para próxima tarefa Subagente trabalha independentemente
↓ ↓
Todas as tarefas concluídas Subagente usa ferramenta "message"
↓ ↓
Responde HEARTBEAT_OK Usuário recebe resultado diretamente
```
O subagente tem acesso a ferramentas (message, web_search, etc.) e pode se comunicar com o usuário independentemente sem passar pelo agente principal.
**Configuração:**
```json
{
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
| Option | Default | Description |
| ---------- | ------- | ---------------------------------- |
| `enabled` | `true` | Enable/disable heartbeat |
| `interval` | `30` | Check interval in minutes (min: 5) |
**Variáveis de ambiente:**
* `PICOCLAW_HEARTBEAT_ENABLED=false` para desabilitar
* `PICOCLAW_HEARTBEAT_INTERVAL=60` para alterar o intervalo
+336
View File
@@ -0,0 +1,336 @@
# 🔧 Configuração de Ferramentas
> Voltar ao [README](../../README.pt-br.md)
A configuração de ferramentas do PicoClaw está localizada no campo `tools` do `config.json`.
## Estrutura de diretórios
```json
{
"tools": {
"web": {
...
},
"mcp": {
...
},
"exec": {
...
},
"cron": {
...
},
"skills": {
...
}
}
}
```
## Ferramentas Web
As ferramentas web são usadas para pesquisa e busca de páginas web.
### Web Fetcher
Configurações gerais para busca e processamento de conteúdo de páginas web.
| Config | Tipo | Padrão | Descrição |
|---------------------|--------|---------------|-----------------------------------------------------------------------------------------------|
| `enabled` | bool | true | Habilitar a capacidade de busca de páginas web. |
| `fetch_limit_bytes` | int | 10485760 | Tamanho máximo do payload da página web a ser buscado, em bytes (padrão é 10MB). |
| `format` | string | "plaintext" | Formato de saída do conteúdo buscado. Opções: `plaintext` ou `markdown` (recomendado). |
### Brave
| Config | Tipo | Padrão | Descrição |
|---------------|--------|--------|----------------------------|
| `enabled` | bool | false | Habilitar pesquisa Brave |
| `api_key` | string | - | Chave API do Brave Search |
| `max_results` | int | 5 | Número máximo de resultados |
### DuckDuckGo
| Config | Tipo | Padrão | Descrição |
|---------------|------|--------|--------------------------------|
| `enabled` | bool | true | Habilitar pesquisa DuckDuckGo |
| `max_results` | int | 5 | Número máximo de resultados |
### Perplexity
| Config | Tipo | Padrão | Descrição |
|---------------|--------|--------|--------------------------------|
| `enabled` | bool | false | Habilitar pesquisa Perplexity |
| `api_key` | string | - | Chave API do Perplexity |
| `max_results` | int | 5 | Número máximo de resultados |
## Ferramenta Exec
A ferramenta exec é usada para executar comandos shell.
| Config | Tipo | Padrão | Descrição |
|------------------------|-------|--------|-------------------------------------------------|
| `enable_deny_patterns` | bool | true | Habilitar bloqueio padrão de comandos perigosos |
| `custom_deny_patterns` | array | [] | Padrões de negação personalizados (expressões regulares) |
### Funcionalidade
- **`enable_deny_patterns`**: Defina como `false` para desabilitar completamente os padrões de bloqueio de comandos perigosos padrão
- **`custom_deny_patterns`**: Adicione padrões regex de negação personalizados; comandos correspondentes serão bloqueados
### Padrões de comandos bloqueados por padrão
Por padrão, o PicoClaw bloqueia os seguintes comandos perigosos:
- Comandos de exclusão: `rm -rf`, `del /f/q`, `rmdir /s`
- Operações de disco: `format`, `mkfs`, `diskpart`, `dd if=`, escrita em `/dev/sd*`
- Operações do sistema: `shutdown`, `reboot`, `poweroff`
- Substituição de comandos: `$()`, `${}`, crases
- Pipe para shell: `| sh`, `| bash`
- Escalação de privilégios: `sudo`, `chmod`, `chown`
- Controle de processos: `pkill`, `killall`, `kill -9`
- Operações remotas: `curl | sh`, `wget | sh`, `ssh`
- Gerenciamento de pacotes: `apt`, `yum`, `dnf`, `npm install -g`, `pip install --user`
- Contêineres: `docker run`, `docker exec`
- Git: `git push`, `git force`
- Outros: `eval`, `source *.sh`
### Limitação arquitetural conhecida
O guarda exec apenas valida o comando de nível superior enviado ao PicoClaw. Ele **não** inspeciona recursivamente processos filhos gerados por ferramentas de build ou scripts após o início desse comando.
Exemplos de fluxos de trabalho que podem contornar o guarda de comando direto uma vez que o comando inicial é permitido:
- `make run`
- `go run ./cmd/...`
- `cargo run`
- `npm run build`
Isso significa que o guarda é útil para bloquear comandos diretos obviamente perigosos, mas **não** é um sandbox completo para pipelines de build não revisados. Se seu modelo de ameaça inclui código não confiável no workspace, use isolamento mais forte, como contêineres, VMs ou um fluxo de aprovação em torno de comandos de build e execução.
### Exemplo de configuração
```json
{
"tools": {
"exec": {
"enable_deny_patterns": true,
"custom_deny_patterns": [
"\\brm\\s+-r\\b",
"\\bkillall\\s+python"
]
}
}
}
```
## Ferramenta Cron
A ferramenta cron é usada para agendar tarefas periódicas.
| Config | Tipo | Padrão | Descrição |
|------------------------|------|--------|-----------------------------------------------------|
| `exec_timeout_minutes` | int | 5 | Tempo limite de execução em minutos, 0 significa sem limite |
## Ferramenta MCP
A ferramenta MCP permite a integração com servidores Model Context Protocol externos.
### Descoberta de ferramentas (carregamento preguiçoso)
Ao conectar a vários servidores MCP, expor centenas de ferramentas simultaneamente pode esgotar a janela de contexto do LLM e aumentar os custos de API. O recurso **Discovery** resolve isso mantendo as ferramentas MCP *ocultas* por padrão.
Em vez de carregar todas as ferramentas, o LLM recebe uma ferramenta de pesquisa leve (usando correspondência de palavras-chave BM25 ou Regex). Quando o LLM precisa de uma capacidade específica, ele pesquisa a biblioteca oculta. As ferramentas correspondentes são então temporariamente "desbloqueadas" e injetadas no contexto por um número configurado de turnos (`ttl`).
### Configuração global
| Config | Tipo | Padrão | Descrição |
|-------------|--------|--------|----------------------------------------------|
| `enabled` | bool | false | Habilitar integração MCP globalmente |
| `discovery` | object | `{}` | Configuração de descoberta de ferramentas (veja abaixo) |
| `servers` | object | `{}` | Mapa de nome do servidor para configuração do servidor |
### Configuração Discovery (`discovery`)
| Config | Tipo | Padrão | Descrição |
|----------------------|------|--------|-----------------------------------------------------------------------------------------------------------------------------------|
| `enabled` | bool | false | Se true, as ferramentas MCP ficam ocultas e são carregadas sob demanda via pesquisa. Se false, todas as ferramentas são carregadas |
| `ttl` | int | 5 | Número de turnos de conversa que uma ferramenta descoberta permanece desbloqueada |
| `max_search_results` | int | 5 | Número máximo de ferramentas retornadas por consulta de pesquisa |
| `use_bm25` | bool | true | Habilitar a ferramenta de pesquisa por linguagem natural/palavras-chave (`tool_search_tool_bm25`). **Aviso**: consome mais recursos que a pesquisa regex |
| `use_regex` | bool | false | Habilitar a ferramenta de pesquisa por padrão regex (`tool_search_tool_regex`) |
> **Nota:** Se `discovery.enabled` for `true`, você **deve** habilitar pelo menos um mecanismo de pesquisa (`use_bm25` ou `use_regex`),
> caso contrário a aplicação falhará ao iniciar.
### Configuração por servidor
| Config | Tipo | Obrigatório | Descrição |
|------------|--------|-------------|--------------------------------------------|
| `enabled` | bool | sim | Habilitar este servidor MCP |
| `type` | string | não | Tipo de transporte: `stdio`, `sse`, `http` |
| `command` | string | stdio | Comando executável para transporte stdio |
| `args` | array | não | Argumentos do comando para transporte stdio |
| `env` | object | não | Variáveis de ambiente para processo stdio |
| `env_file` | string | não | Caminho para arquivo de ambiente para processo stdio |
| `url` | string | sse/http | URL do endpoint para transporte `sse`/`http` |
| `headers` | object | não | Cabeçalhos HTTP para transporte `sse`/`http` |
### Comportamento do transporte
- Se `type` for omitido, o transporte é detectado automaticamente:
- `url` está definido → `sse`
- `command` está definido → `stdio`
- `http` e `sse` ambos usam `url` + `headers` opcionais.
- `env` e `env_file` são aplicados apenas a servidores `stdio`.
### Exemplos de configuração
#### 1) Servidor MCP Stdio
```json
{
"tools": {
"mcp": {
"enabled": true,
"servers": {
"filesystem": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-filesystem",
"/tmp"
]
}
}
}
}
}
```
#### 2) Servidor MCP remoto SSE/HTTP
```json
{
"tools": {
"mcp": {
"enabled": true,
"servers": {
"remote-mcp": {
"enabled": true,
"type": "sse",
"url": "https://example.com/mcp",
"headers": {
"Authorization": "Bearer YOUR_TOKEN"
}
}
}
}
}
}
```
#### 3) Configuração MCP massiva com descoberta de ferramentas habilitada
*Neste exemplo, o LLM verá apenas o `tool_search_tool_bm25`. Ele pesquisará e desbloqueará ferramentas do Github ou Postgres dinamicamente apenas quando solicitado pelo usuário.*
```json
{
"tools": {
"mcp": {
"enabled": true,
"discovery": {
"enabled": true,
"ttl": 5,
"max_search_results": 5,
"use_bm25": true,
"use_regex": false
},
"servers": {
"github": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-github"
],
"env": {
"GITHUB_PERSONAL_ACCESS_TOKEN": "YOUR_GITHUB_TOKEN"
}
},
"postgres": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-postgres",
"postgresql://user:password@localhost/dbname"
]
},
"slack": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-slack"
],
"env": {
"SLACK_BOT_TOKEN": "YOUR_SLACK_BOT_TOKEN",
"SLACK_TEAM_ID": "YOUR_SLACK_TEAM_ID"
}
}
}
}
}
}
```
## Ferramenta Skills
A ferramenta skills configura a descoberta e instalação de habilidades via registros como o ClawHub.
### Registros
| Config | Tipo | Padrão | Descrição |
|------------------------------------|--------|-----------------------|----------------------------------------------|
| `registries.clawhub.enabled` | bool | true | Habilitar registro ClawHub |
| `registries.clawhub.base_url` | string | `https://clawhub.ai` | URL base do ClawHub |
| `registries.clawhub.auth_token` | string | `""` | Token Bearer opcional para limites de taxa mais altos |
| `registries.clawhub.search_path` | string | `/api/v1/search` | Caminho da API de pesquisa |
| `registries.clawhub.skills_path` | string | `/api/v1/skills` | Caminho da API de Skills |
| `registries.clawhub.download_path` | string | `/api/v1/download` | Caminho da API de download |
### Exemplo de configuração
```json
{
"tools": {
"skills": {
"registries": {
"clawhub": {
"enabled": true,
"base_url": "https://clawhub.ai",
"auth_token": "",
"search_path": "/api/v1/search",
"skills_path": "/api/v1/skills",
"download_path": "/api/v1/download"
}
}
}
}
}
```
## Variáveis de ambiente
Todas as opções de configuração podem ser substituídas via variáveis de ambiente com o formato `PICOCLAW_TOOLS_<SECTION>_<KEY>`:
Por exemplo:
- `PICOCLAW_TOOLS_WEB_BRAVE_ENABLED=true`
- `PICOCLAW_TOOLS_EXEC_ENABLE_DENY_PATTERNS=false`
- `PICOCLAW_TOOLS_CRON_EXEC_TIMEOUT_MINUTES=10`
- `PICOCLAW_TOOLS_MCP_ENABLED=true`
Nota: Configuração de tipo mapa aninhado (por exemplo `tools.mcp.servers.<name>.*`) é configurada no `config.json` em vez de variáveis de ambiente.
+45
View File
@@ -0,0 +1,45 @@
# 🐛 Solução de Problemas
> Voltar ao [README](../../README.pt-br.md)
## "model ... not found in model_list" ou OpenRouter "free is not a valid model ID"
**Sintoma:** Você vê um dos seguintes erros:
- `Error creating provider: model "openrouter/free" not found in model_list`
- OpenRouter retorna 400: `"free is not a valid model ID"`
**Causa:** O campo `model` na sua entrada `model_list` é o que é enviado para a API. Para o OpenRouter, você deve usar o ID de modelo **completo**, não uma abreviação.
- **Errado:** `"model": "free"` → OpenRouter recebe `free` e rejeita.
- **Correto:** `"model": "openrouter/free"` → OpenRouter recebe `openrouter/free` (roteamento automático do nível gratuito).
**Correção:** Em `~/.picoclaw/config.json` (ou seu caminho de configuração):
1. **agents.defaults.model** deve corresponder a um `model_name` em `model_list` (ex.: `"openrouter-free"`).
2. O **model** dessa entrada deve ser um ID de modelo OpenRouter válido, por exemplo:
- `"openrouter/free"` nível gratuito automático
- `"google/gemini-2.0-flash-exp:free"`
- `"meta-llama/llama-3.1-8b-instruct:free"`
Exemplo:
```json
{
"agents": {
"defaults": {
"model": "openrouter-free"
}
},
"model_list": [
{
"model_name": "openrouter-free",
"model": "openrouter/free",
"api_key": "sk-or-v1-YOUR_OPENROUTER_KEY",
"api_base": "https://openrouter.ai/api/v1"
}
]
}
```
Obtenha sua chave em [OpenRouter Keys](https://openrouter.ai/keys).
+61
View File
@@ -0,0 +1,61 @@
# 🔄 Spawn & Async Tasks
> Back to [README](../README.md)
## Quick Tasks (respond directly)
- Report current time
## Long Tasks (use spawn for async)
- Search the web for AI news and summarize
- Check email and report important messages
```
**Key behaviors:**
| Feature | Description |
| ----------------------- | --------------------------------------------------------- |
| **spawn** | Creates async subagent, doesn't block heartbeat |
| **Independent context** | Subagent has its own context, no session history |
| **message tool** | Subagent communicates with user directly via message tool |
| **Non-blocking** | After spawning, heartbeat continues to next task |
#### How Subagent Communication Works
```
Heartbeat triggers
Agent reads HEARTBEAT.md
For long task: spawn subagent
↓ ↓
Continue to next task Subagent works independently
↓ ↓
All tasks done Subagent uses "message" tool
↓ ↓
Respond HEARTBEAT_OK User receives result directly
```
The subagent has access to tools (message, web_search, etc.) and can communicate with the user independently without going through the main agent.
**Configuration:**
```json
{
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
| Option | Default | Description |
| ---------- | ------- | ---------------------------------- |
| `enabled` | `true` | Enable/disable heartbeat |
| `interval` | `30` | Check interval in minutes (min: 5) |
**Environment variables:**
* `PICOCLAW_HEARTBEAT_ENABLED=false` to disable
* `PICOCLAW_HEARTBEAT_INTERVAL=60` to change interval
+280
View File
@@ -0,0 +1,280 @@
# 🔄 SubTurn Mechanism
> Back to [README](../README.md)
## Overview
The `SubTurn` mechanism is a core feature in PicoClaw that allows tools to spawn isolated, nested agent loops to handle complex sub-tasks.
By using a SubTurn, an agent can break down a problem and run a separate LLM invocation in an independent, ephemeral session. This ensures that intermediate reasoning, background tasks, or sub-agent outputs do not pollute the main conversation history.
## Core Capabilities
- **Context Isolation**: Each SubTurn uses an `ephemeralSessionStore`. Its message history does not leak into the parent task and is destroyed upon completion. The ephemeral session holds at most **50 messages**; older messages are automatically truncated when this limit is reached.
- **Depth & Concurrency Limits**: Prevents infinite loops and resource exhaustion.
- **Maximum Depth**: Up to 3 nested levels.
- **Maximum Concurrency**: Up to 5 concurrent sub-turns per parent turn (managed via a semaphore with a 30-second timeout).
- **Context Protection**: Supports soft context limits (`MaxContextRunes`). It proactively truncates old messages (while preserving system prompts and recent context) before hitting the provider's hard context window limit.
- **Error Recovery**: Automatically detects and recovers from provider context length exceeded errors and truncation errors by compressing history and retrying.
## Configuration (`SubTurnConfig`)
When spawning a SubTurn, you must provide a `SubTurnConfig`:
| Field | Type | Description |
| :--- | :--- | :--- |
| `Model` | `string` | The LLM model to use for the sub-turn (e.g., `gpt-4o-mini`). **Required.** |
| `Tools` | `[]tools.Tool` | Tools granted to the sub-turn. If empty, it inherits the parent's tools. |
| `SystemPrompt` | `string` | The system instruction for the sub-task. |
| `MaxTokens` | `int` | Maximum tokens for the generated response. |
| `Async` | `bool` | Controls the result delivery mode (Synchronous vs. Asynchronous). |
| `Critical` | `bool` | If `true`, the sub-turn continues running even if the parent finishes gracefully. |
| `Timeout` | `time.Duration` | Maximum execution time (default: 5 minutes). |
| `MaxContextRunes`| `int` | Soft context limit. `0` = auto-calculate (75% of model's context window, recommended), `-1` = no limit (disable soft truncation, rely only on hard context error recovery), `>0` = use specified rune limit. |
> **Note:** The `Async` flag does **not** make the call non-blocking. It only controls whether the result is also delivered to the parent's `pendingResults` channel. Both modes block the caller until the sub-turn completes. For true non-blocking execution, the caller must spawn the sub-turn in a separate goroutine.
## Execution Modes
### Synchronous (`Async: false`)
This is the standard mode where the caller needs the result immediately to proceed.
- The caller blocks until the sub-turn completes.
- The result is **only** returned directly via the function return value.
- It is **not** delivered to the parent's pending results channel.
**Example:**
```go
cfg := agent.SubTurnConfig{
Model: "gpt-4o-mini",
SystemPrompt: "Analyze the provided codebase...",
Async: false,
}
result, err := agent.SpawnSubTurn(ctx, cfg)
// Process result immediately
```
### Asynchronous (`Async: true`)
Used for "fire-and-forget" operations or parallel processing where the parent turn collects results later.
- The result is delivered to the parent turn's `pendingResults` channel.
- The result is **also** returned via the function return value (for consistency).
- The parent's Agent Loop will poll this channel in subsequent iterations and automatically inject the results into the ongoing conversation context as `[SubTurn Result]`.
**Example:**
```go
cfg := agent.SubTurnConfig{
Model: "gpt-4o-mini",
SystemPrompt: "Run a background security scan...",
Async: true,
}
result, err := agent.SpawnSubTurn(ctx, cfg)
// The result will also be injected into the parent loop later via channel
```
## Error Recovery and Retries
SubTurns implement automatic retry mechanisms for transient errors:
| Error Type | Max Retries | Recovery Action |
|:-----------|:------------|:----------------|
| Context Length Exceeded | 2 | Force compress history and retry |
| Response Truncated (`finish_reason="truncated"`) | 2 | Inject recovery prompt and retry |
### Truncation Recovery
When the LLM response is truncated (`finish_reason="truncated"`), SubTurn automatically:
1. Detects the truncation from `turnState.lastFinishReason`
2. Injects a recovery prompt: "Your previous response was truncated due to length. Please provide a shorter, complete response..."
3. Retries up to 2 times
### Context Error Recovery
When the provider returns a context length error (e.g., `context_length_exceeded`):
1. Force compresses the message history (drops oldest 50% of conversation)
2. Retries with the compressed context
3. Up to 2 retries before failing
## Lifecycle and Cancellation
SubTurns operate within an independent context but maintain a structural link to their parent `turnState`.
### Graceful Parent Finish
When the parent task finishes naturally (`Finish(false)`):
- **Non-critical** sub-turns receive a signal to exit gracefully without throwing an error.
- **Critical** (`Critical: true`) sub-turns continue running in the background. Once finished, their results are emitted as **Orphan Results** so the data is not lost.
### Hard Abort
When the parent task is forcefully aborted (e.g., user interrupts with `/stop`):
- A cascading cancellation is triggered, instantly terminating all child and grandchild sub-turns.
- The root turn's session history rolls back to the snapshot taken at turn start (`initialHistoryLength`), preventing dirty context. SubTurns are not affected by this rollback as they use ephemeral sessions that are discarded anyway.
## Agent Loop Integration
### Bus Draining During Processing
When a message enters the `Run()` loop, the agent starts a `drainBusToSteering` goroutine before calling `processMessage`. This goroutine runs concurrently with the entire processing lifecycle and continuously consumes any new inbound messages from the bus, redirecting them into the **steering queue** instead of dropping them.
This ensures that if a user sends a follow-up message while the agent is processing (including during SubTurn execution), the message is not lost — it will be picked up between tool call iterations via `dequeueSteeringMessages`.
The drain goroutine stops automatically when `processMessage` returns (via a cancellable context).
### Pending Result Polling
The agent loop polls for async SubTurn results at two points per iteration:
1. **Before the LLM call**: injects any arrived results as `[SubTurn Result]` messages into the conversation context.
2. **After all tool executions**: polls again during the tool loop to catch results that arrived during tool execution.
3. **After the final iteration**: one last poll before the turn ends to avoid losing late-arriving results.
### Turn State Tracking
All active root turns are registered in `AgentLoop.activeTurnStates` (`sync.Map`, keyed by session key). This allows `HardAbort` and `/subagents` observability commands to find and operate on active turns.
## Event Bus Integration
SubTurns emit specific events to the PicoClaw `EventBus` for observability and debugging:
| Event | When Emitted | Payload |
|:------|:-------------|:--------|
| `SubTurnSpawnEvent` | Sub-turn successfully initialized | `ParentID`, `ChildID`, `Config` |
| `SubTurnEndEvent` | Sub-turn finishes (success or error) | `ChildID`, `Result`, `Err` |
| `SubTurnResultDeliveredEvent` | Async result successfully delivered to parent | `ParentID`, `ChildID`, `Result` |
| `SubTurnOrphanResultEvent` | Result cannot be delivered (parent finished or channel full) | `ParentID`, `ChildID`, `Result` |
> **⚠️ POC Note:** The current `EventBus` implementation is `MockEventBus`, a placeholder that only prints events to stdout via `fmt.Printf`. It is not a production-grade event system. Do not rely on it for programmatic event consumption; a real EventBus integration is planned.
## API Reference
### SpawnSubTurn (Public Entry Point)
```go
func SpawnSubTurn(ctx context.Context, cfg SubTurnConfig) (*tools.ToolResult, error)
```
This is the exported package-level entry point for agent-internal code (e.g., tests, direct invocations). It retrieves `AgentLoop` and `turnState` from context and delegates to the internal `spawnSubTurn`.
**Requirements:**
- `AgentLoop` must be injected into context via `WithAgentLoop()`
- Parent `turnState` must exist in context (automatically set when called from tools)
**Returns:**
- `*tools.ToolResult`: Contains `ForLLM` field with the sub-turn's output
- `error`: One of the defined error types or context errors
### AgentLoopSpawner (Interface Implementation)
```go
type AgentLoopSpawner struct { al *AgentLoop }
func (s *AgentLoopSpawner) SpawnSubTurn(ctx context.Context, cfg tools.SubTurnConfig) (*tools.ToolResult, error)
```
This implements the `tools.SubTurnSpawner` interface for use by tools that need to spawn sub-turns without a direct import of the `agent` package (avoiding circular dependencies). It converts `tools.SubTurnConfig``agent.SubTurnConfig` before delegating to the internal `spawnSubTurn`.
### NewSubTurnSpawner
```go
func NewSubTurnSpawner(al *AgentLoop) *AgentLoopSpawner
```
Creates a new spawner instance for the given AgentLoop. Pass the returned value to `SpawnTool.SetSpawner()` or `SubagentTool.SetSpawner()` during tool registration.
### Continue
```go
func (al *AgentLoop) Continue(ctx context.Context, sessionKey string) error
```
Resumes an idle agent turn by injecting any queued steering messages as a new LLM iteration. Used when the agent is waiting and a deferred steering message needs to be processed without a new inbound message arriving.
## Context Propagation
SubTurn relies on context values for proper operation:
| Context Key | Purpose |
|:------------|:--------|
| `agentLoopKey` | Stores `*AgentLoop` for tool access and SubTurn spawning |
| `turnStateKey` | Stores `*turnState` for hierarchy tracking and result delivery |
### Injecting Dependencies
```go
// Before calling tools that may spawn SubTurns
ctx = withTurnState(ctx, turnState)
ctx = WithAgentLoop(ctx, agentLoop)
```
### Independent Child Context
**Important**: The child SubTurn uses an **independent context** derived from `context.Background()`, not from the parent context. This design choice:
- Allows critical SubTurns to continue after parent cancellation
- Prevents parent timeout from affecting child execution
- Child has its own timeout for self-protection (`Timeout` config or 5 minutes default)
## Error Types
| Error | Condition |
|:------|:----------|
| `ErrDepthLimitExceeded` | SubTurn depth exceeds 3 levels |
| `ErrInvalidSubTurnConfig` | Required field `Model` is empty |
| `ErrConcurrencyTimeout` | All 5 concurrency slots occupied for 30+ seconds |
| Context errors | Parent context cancelled during semaphore acquisition |
## Thread Safety
SubTurns are designed for concurrent execution:
- **Parent-child relationships**: Managed under mutex (`parentTS.mu.Lock()`)
- **Active turn tracking**: Uses `sync.Map` for concurrent access to `activeTurnStates`
- **ID generation**: Uses `atomic.Int64` for unique SubTurn IDs (format: `subturn-N`, globally monotonic per `AgentLoop` instance)
- **Result delivery**: Reads parent state under lock, releases before channel send (small race window acceptable)
## Orphan Results
An orphan result occurs when:
1. Parent turn finishes before the SubTurn completes
2. The `pendingResults` channel is full (buffer size: 16)
When a result becomes orphan:
- `SubTurnOrphanResultEvent` is emitted to EventBus
- The result is **NOT** delivered to the LLM context
- External systems can listen to this event for custom handling
### Preventing Orphan Results
- Use `Critical: true` for important SubTurns that must complete
- Monitor `SubTurnOrphanResultEvent` for observability
- Consider the 16-buffer limit when spawning many async SubTurns
## Tool Inheritance
### When `cfg.Tools` is empty:
- SubTurn inherits **all** tools from the parent agent
- Tools are registered in a new `ToolRegistry` instance
- Tool TTL is managed independently from parent
### When `cfg.Tools` is specified:
- Only the specified tools are available to the SubTurn
- Parent tools are **NOT** merged
- Use this to restrict SubTurn capabilities for security or focus
**Example - Restricted SubTurn:**
```go
cfg := agent.SubTurnConfig{
Model: "gpt-4o-mini",
Tools: []tools.Tool{readOnlyTool}, // Only read-only access
SystemPrompt: "Analyze the file structure...",
}
```
## Reference
| Constant | Value |
|:---------|:------|
| `maxSubTurnDepth` | 3 |
| `maxConcurrentSubTurns` | 5 |
| `concurrencyTimeout` | 30s |
| `defaultSubTurnTimeout` | 5m |
| `maxEphemeralHistorySize` | 50 messages |
| `pendingResults` buffer | 16 |
| `MaxContextRunes` default | 75% of model context window |
+9
View File
@@ -30,6 +30,15 @@ PicoClaw's tools configuration is located in the `tools` field of `config.json`.
Web tools are used for web search and fetching.
### Web Fetcher
General settings for fetching and processing webpage content.
| Config | Type | Default | Description |
|---------------------|--------|---------------|-----------------------------------------------------------------------------------------------|
| `enabled` | bool | true | Enable the webpage fetching capability. |
| `fetch_limit_bytes` | int | 10485760 | Maximum size of the webpage payload to fetch, in bytes (default is 10MB). |
| `format` | string | "plaintext" | Output format of the fetched content. Options: `plaintext` or `markdown` (recommended). |
### Brave
| Config | Type | Default | Description |
+427
View File
@@ -0,0 +1,427 @@
# 💬 Cấu Hình Ứng Dụng Chat
> Quay lại [README](../../README.vi.md)
## 💬 Ứng Dụng Chat
Trò chuyện với picoclaw của bạn qua Telegram, Discord, WhatsApp, Matrix, QQ, DingTalk, LINE, WeCom, Feishu, Slack, IRC, OneBot hoặc MaixCam
> **Lưu ý**: Tất cả các kênh dựa trên webhook (LINE, WeCom, v.v.) được phục vụ trên một máy chủ HTTP Gateway chung (`gateway.host`:`gateway.port`, mặc định `127.0.0.1:18790`). Không có port riêng cho từng kênh. Lưu ý: Feishu sử dụng chế độ WebSocket/SDK và không sử dụng máy chủ HTTP webhook chung.
| Channel | Setup |
| ------------ | ---------------------------------- |
| **Telegram** | Easy (just a token) |
| **Discord** | Easy (bot token + intents) |
| **WhatsApp** | Easy (native: QR scan; or bridge URL) |
| **Matrix** | Medium (homeserver + bot access token) |
| **QQ** | Easy (AppID + AppSecret) |
| **DingTalk** | Medium (app credentials) |
| **LINE** | Medium (credentials + webhook URL) |
| **WeCom AI Bot** | Medium (Token + AES key) |
| **Feishu** | Medium (App ID + Secret, WebSocket mode) |
| **Slack** | Medium (Bot token + App token) |
| **IRC** | Medium (server + TLS config) |
| **OneBot** | Medium (QQ via OneBot protocol) |
| **MaixCam** | Easy (Sipeed hardware integration) |
| **Pico** | Native PicoClaw protocol |
<details>
<summary><b>Telegram</b> (Khuyến nghị)</summary>
**1. Tạo bot**
* Mở Telegram, tìm `@BotFather`
* Gửi `/newbot`, làm theo hướng dẫn
* Sao chép token
**2. Cấu hình**
```json
{
"channels": {
"telegram": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
> Lấy user ID của bạn từ `@userinfobot` trên Telegram.
**3. Chạy**
```bash
picoclaw gateway
```
**4. Menu lệnh Telegram (tự động đăng ký khi khởi động)**
PicoClaw hiện lưu trữ định nghĩa lệnh trong một registry chung. Khi khởi động, Telegram sẽ tự động đăng ký các lệnh bot được hỗ trợ (ví dụ `/start`, `/help`, `/show`, `/list`) để menu lệnh và hành vi runtime luôn đồng bộ.
Đăng ký menu lệnh Telegram vẫn là UX khám phá cục bộ của kênh; thực thi lệnh chung được xử lý tập trung trong vòng lặp agent qua commands executor.
Nếu đăng ký lệnh thất bại (lỗi tạm thời mạng/API), kênh vẫn khởi động và PicoClaw thử lại đăng ký trong nền.
</details>
<details>
<summary><b>Discord</b></summary>
**1. Tạo bot**
* Truy cập <https://discord.com/developers/applications>
* Tạo ứng dụng → Bot → Add Bot
* Sao chép bot token
**2. Bật intents**
* Trong cài đặt Bot, bật **MESSAGE CONTENT INTENT**
* (Tùy chọn) Bật **SERVER MEMBERS INTENT** nếu bạn muốn sử dụng danh sách cho phép dựa trên dữ liệu thành viên
**3. Lấy User ID**
* Cài đặt Discord → Nâng cao → bật **Developer Mode**
* Nhấp chuột phải vào avatar → **Copy User ID**
**4. Cấu hình**
```json
{
"channels": {
"discord": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
**5. Mời bot**
* OAuth2 → URL Generator
* Scopes: `bot`
* Bot Permissions: `Send Messages`, `Read Message History`
* Mở URL mời được tạo và thêm bot vào server của bạn
**Tùy chọn: Chế độ kích hoạt nhóm**
Mặc định bot phản hồi tất cả tin nhắn trong kênh server. Để giới hạn phản hồi chỉ khi @mention, thêm:
```json
{
"channels": {
"discord": {
"group_trigger": { "mention_only": true }
}
}
}
```
Bạn cũng có thể kích hoạt bằng tiền tố từ khóa (ví dụ: `!bot`):
```json
{
"channels": {
"discord": {
"group_trigger": { "prefixes": ["!bot"] }
}
}
}
```
**6. Chạy**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>WhatsApp</b> (native qua whatsmeow)</summary>
PicoClaw có thể kết nối WhatsApp theo hai cách:
- **Native (khuyến nghị):** In-process sử dụng [whatsmeow](https://github.com/tulir/whatsmeow). Không cần bridge riêng. Đặt `"use_native": true` và để trống `bridge_url`. Lần chạy đầu tiên, quét mã QR bằng WhatsApp (Thiết bị liên kết). Phiên được lưu trong workspace (ví dụ: `workspace/whatsapp/`). Kênh native là **tùy chọn** để giữ binary mặc định nhỏ; build với `-tags whatsapp_native` (ví dụ: `make build-whatsapp-native` hoặc `go build -tags whatsapp_native ./cmd/...`).
- **Bridge:** Kết nối đến bridge WebSocket bên ngoài. Đặt `bridge_url` (ví dụ: `ws://localhost:3001`) và giữ `use_native` là false.
**Cấu hình (native)**
```json
{
"channels": {
"whatsapp": {
"enabled": true,
"use_native": true,
"session_store_path": "",
"allow_from": []
}
}
}
```
Nếu `session_store_path` trống, phiên được lưu tại `<workspace>/whatsapp/`. Chạy `picoclaw gateway`; lần chạy đầu tiên, quét mã QR hiển thị trong terminal bằng WhatsApp → Thiết bị liên kết.
</details>
<details>
<summary><b>QQ</b></summary>
**1. Tạo bot**
- Truy cập [QQ Open Platform](https://q.qq.com/#)
- Tạo ứng dụng → Lấy **AppID****AppSecret**
**2. Cấu hình**
```json
{
"channels": {
"qq": {
"enabled": true,
"app_id": "YOUR_APP_ID",
"app_secret": "YOUR_APP_SECRET",
"allow_from": []
}
}
}
```
> Đặt `allow_from` trống để cho phép tất cả người dùng, hoặc chỉ định số QQ để giới hạn truy cập.
**3. Chạy**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>DingTalk</b></summary>
**1. Tạo bot**
* Truy cập [Open Platform](https://open.dingtalk.com/)
* Tạo ứng dụng nội bộ
* Sao chép Client ID và Client Secret
**2. Cấu hình**
```json
{
"channels": {
"dingtalk": {
"enabled": true,
"client_id": "YOUR_CLIENT_ID",
"client_secret": "YOUR_CLIENT_SECRET",
"allow_from": []
}
}
}
```
> Đặt `allow_from` trống để cho phép tất cả người dùng, hoặc chỉ định DingTalk user ID để giới hạn truy cập.
**3. Chạy**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>Matrix</b></summary>
**1. Chuẩn bị tài khoản bot**
* Sử dụng homeserver ưa thích (ví dụ: `https://matrix.org` hoặc tự host)
* Tạo user bot và lấy access token
**2. Cấu hình**
```json
{
"channels": {
"matrix": {
"enabled": true,
"homeserver": "https://matrix.org",
"user_id": "@your-bot:matrix.org",
"access_token": "YOUR_MATRIX_ACCESS_TOKEN",
"allow_from": []
}
}
}
```
**3. Chạy**
```bash
picoclaw gateway
```
Để xem đầy đủ các tùy chọn (`device_id`, `join_on_invite`, `group_trigger`, `placeholder`, `reasoning_channel_id`), xem [Hướng Dẫn Cấu Hình Kênh Matrix](docs/channels/matrix/README.md).
</details>
<details>
<summary><b>LINE</b></summary>
**1. Tạo Tài Khoản LINE Official**
- Truy cập [LINE Developers Console](https://developers.line.biz/)
- Tạo provider → Tạo kênh Messaging API
- Sao chép **Channel Secret****Channel Access Token**
**2. Cấu hình**
```json
{
"channels": {
"line": {
"enabled": true,
"channel_secret": "YOUR_CHANNEL_SECRET",
"channel_access_token": "YOUR_CHANNEL_ACCESS_TOKEN",
"webhook_path": "/webhook/line",
"allow_from": []
}
}
}
```
> Webhook LINE được phục vụ trên máy chủ Gateway chung (`gateway.host`:`gateway.port`, mặc định `127.0.0.1:18790`).
**3. Thiết lập Webhook URL**
LINE yêu cầu HTTPS cho webhook. Sử dụng reverse proxy hoặc tunnel:
```bash
# Ví dụ với ngrok (port mặc định gateway là 18790)
ngrok http 18790
```
Sau đó đặt Webhook URL trong LINE Developers Console thành `https://your-domain/webhook/line` và bật **Use webhook**.
**4. Chạy**
```bash
picoclaw gateway
```
> Trong chat nhóm, bot chỉ phản hồi khi được @mention. Phản hồi trích dẫn tin nhắn gốc.
</details>
<details>
<summary><b>WeCom (企业微信)</b></summary>
PicoClaw hỗ trợ ba loại tích hợp WeCom:
**Tùy chọn 1: WeCom Bot (Bot)** - Thiết lập dễ hơn, hỗ trợ chat nhóm
**Tùy chọn 2: WeCom App (App Tùy chỉnh)** - Nhiều tính năng hơn, nhắn tin chủ động, chỉ chat riêng
**Tùy chọn 3: WeCom AI Bot (AI Bot)** - AI Bot chính thức, phản hồi streaming, hỗ trợ chat nhóm & riêng
Xem [Hướng Dẫn Cấu Hình WeCom AI Bot](docs/channels/wecom/wecom_aibot/README.zh.md) để biết hướng dẫn thiết lập chi tiết.
**Thiết Lập Nhanh - WeCom Bot:**
**1. Tạo bot**
* Truy cập Console Quản Trị WeCom → Chat Nhóm → Thêm Bot Nhóm
* Sao chép URL webhook (định dạng: `https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx`)
**2. Cấu hình**
```json
{
"channels": {
"wecom": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_url": "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=YOUR_KEY",
"webhook_path": "/webhook/wecom",
"allow_from": []
}
}
}
```
> Webhook WeCom được phục vụ trên máy chủ Gateway chung (`gateway.host`:`gateway.port`, mặc định `127.0.0.1:18790`).
**Thiết Lập Nhanh - WeCom App:**
**1. Tạo ứng dụng**
* Truy cập Console Quản Trị WeCom → Quản Lý App → Tạo App
* Sao chép **AgentId****Secret**
* Truy cập trang "Công Ty Của Tôi", sao chép **CorpID**
**2. Cấu hình nhận tin nhắn**
* Trong chi tiết App, nhấp "Nhận Tin Nhắn" → "Cấu Hình API"
* Đặt URL thành `http://your-server:18790/webhook/wecom-app`
* Tạo **Token****EncodingAESKey**
**3. Cấu hình**
```json
{
"channels": {
"wecom_app": {
"enabled": true,
"corp_id": "wwxxxxxxxxxxxxxxxx",
"corp_secret": "YOUR_CORP_SECRET",
"agent_id": 1000002,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-app",
"allow_from": []
}
}
}
```
**4. Chạy**
```bash
picoclaw gateway
```
> **Lưu ý**: Callback webhook WeCom được phục vụ trên port Gateway (mặc định 18790). Sử dụng reverse proxy cho HTTPS.
**Thiết Lập Nhanh - WeCom AI Bot:**
**1. Tạo AI Bot**
* Truy cập Console Quản Trị WeCom → Quản Lý App → AI Bot
* Trong cài đặt AI Bot, cấu hình callback URL: `http://your-server:18791/webhook/wecom-aibot`
* Sao chép **Token** và nhấp "Tạo Ngẫu Nhiên" cho **EncodingAESKey**
**2. Cấu hình**
```json
{
"channels": {
"wecom_aibot": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_43_CHAR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-aibot",
"allow_from": [],
"welcome_message": "Hello! How can I help you?"
}
}
}
```
**3. Chạy**
```bash
picoclaw gateway
```
> **Lưu ý**: WeCom AI Bot sử dụng giao thức streaming pull — không lo timeout phản hồi. Tác vụ dài (>30 giây) tự động chuyển sang gửi qua `response_url` push.
</details>
+217
View File
@@ -0,0 +1,217 @@
# ⚙️ Hướng Dẫn Cấu Hình
> Quay lại [README](../../README.vi.md)
## ⚙️ Cấu Hình
File cấu hình: `~/.picoclaw/config.json`
### Biến Môi Trường
Bạn có thể ghi đè các đường dẫn mặc định bằng biến môi trường. Điều này hữu ích cho cài đặt portable, triển khai container, hoặc chạy picoclaw như dịch vụ hệ thống. Các biến này độc lập và kiểm soát các đường dẫn khác nhau.
| Biến | Mô tả | Đường Dẫn Mặc Định |
|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
| `PICOCLAW_CONFIG` | Ghi đè đường dẫn đến file cấu hình. Chỉ định trực tiếp cho picoclaw file `config.json` nào cần tải, bỏ qua tất cả vị trí khác. | `~/.picoclaw/config.json` |
| `PICOCLAW_HOME` | Ghi đè thư mục gốc cho dữ liệu picoclaw. Thay đổi vị trí mặc định của `workspace` và các thư mục dữ liệu khác. | `~/.picoclaw` |
**Ví dụ:**
```bash
# Chạy picoclaw với file cấu hình cụ thể
# Đường dẫn workspace sẽ được đọc từ trong file cấu hình đó
PICOCLAW_CONFIG=/etc/picoclaw/production.json picoclaw gateway
# Chạy picoclaw với tất cả dữ liệu lưu tại /opt/picoclaw
# Cấu hình sẽ được tải từ mặc định ~/.picoclaw/config.json
# Workspace sẽ được tạo tại /opt/picoclaw/workspace
PICOCLAW_HOME=/opt/picoclaw picoclaw agent
# Sử dụng cả hai cho thiết lập tùy chỉnh hoàn toàn
PICOCLAW_HOME=/srv/picoclaw PICOCLAW_CONFIG=/srv/picoclaw/main.json picoclaw gateway
```
### Bố Cục Workspace
PicoClaw lưu trữ dữ liệu trong workspace đã cấu hình (mặc định: `~/.picoclaw/workspace`):
```
~/.picoclaw/workspace/
├── sessions/ # Phiên hội thoại và lịch sử
├── memory/ # Bộ nhớ dài hạn (MEMORY.md)
├── state/ # Trạng thái bền vững (kênh cuối, v.v.)
├── cron/ # Cơ sở dữ liệu tác vụ lên lịch
├── skills/ # Skill tùy chỉnh
├── AGENTS.md # Hướng dẫn hành vi agent
├── HEARTBEAT.md # Prompt tác vụ định kỳ (kiểm tra mỗi 30 phút)
├── IDENTITY.md # Danh tính agent
├── SOUL.md # Linh hồn agent
└── USER.md # Tùy chọn người dùng
```
### Nguồn Skill
Mặc định, skill được tải từ:
1. `~/.picoclaw/workspace/skills` (workspace)
2. `~/.picoclaw/skills` (global)
3. `<current-working-directory>/skills` (builtin)
Cho thiết lập nâng cao/test, bạn có thể ghi đè thư mục gốc skill builtin với:
```bash
export PICOCLAW_BUILTIN_SKILLS=/path/to/skills
```
### Chính Sách Thực Thi Lệnh Thống Nhất
- Lệnh slash chung được thực thi qua một đường dẫn duy nhất trong `pkg/agent/loop.go` qua `commands.Executor`.
- Adapter kênh không còn xử lý lệnh chung cục bộ; chúng chuyển tiếp văn bản đầu vào đến đường dẫn bus/agent. Telegram vẫn tự động đăng ký lệnh được hỗ trợ khi khởi động.
- Lệnh slash không xác định (ví dụ `/foo`) được chuyển sang xử lý LLM bình thường.
- Lệnh đã đăng ký nhưng không được hỗ trợ trên kênh hiện tại (ví dụ `/show` trên WhatsApp) trả về lỗi rõ ràng cho người dùng và dừng xử lý tiếp.
### 🔒 Sandbox Bảo Mật
PicoClaw chạy trong môi trường sandbox mặc định. Agent chỉ có thể truy cập file và thực thi lệnh trong workspace đã cấu hình.
#### Cấu Hình Mặc Định
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"restrict_to_workspace": true
}
}
}
```
| Tùy chọn | Mặc định | Mô tả |
| ----------------------- | ----------------------- | ----------------------------------------- |
| `workspace` | `~/.picoclaw/workspace` | Thư mục làm việc của agent |
| `restrict_to_workspace` | `true` | Giới hạn truy cập file/lệnh trong workspace |
#### Công Cụ Được Bảo Vệ
Khi `restrict_to_workspace: true`, các công cụ sau được sandbox:
| Công cụ | Chức năng | Giới hạn |
| ------------- | ---------------- | -------------------------------------- |
| `read_file` | Đọc file | Chỉ file trong workspace |
| `write_file` | Ghi file | Chỉ file trong workspace |
| `list_dir` | Liệt kê thư mục | Chỉ thư mục trong workspace |
| `edit_file` | Sửa file | Chỉ file trong workspace |
| `append_file` | Nối vào file | Chỉ file trong workspace |
| `exec` | Thực thi lệnh | Đường dẫn lệnh phải trong workspace |
#### Bảo Vệ Exec Bổ Sung
Ngay cả khi `restrict_to_workspace: false`, công cụ `exec` chặn các lệnh nguy hiểm sau:
* `rm -rf`, `del /f`, `rmdir /s` — Xóa hàng loạt
* `format`, `mkfs`, `diskpart` — Định dạng đĩa
* `dd if=` — Tạo ảnh đĩa
* Ghi vào `/dev/sd[a-z]` — Ghi trực tiếp đĩa
* `shutdown`, `reboot`, `poweroff` — Tắt hệ thống
* Fork bomb `:(){ :|:& };:`
### Kiểm Soát Truy Cập File
| Config Key | Type | Default | Description |
|------------|------|---------|-------------|
| `tools.allow_read_paths` | string[] | `[]` | Additional paths allowed for reading outside workspace |
| `tools.allow_write_paths` | string[] | `[]` | Additional paths allowed for writing outside workspace |
### Bảo Mật Exec
| Config Key | Type | Default | Description |
|------------|------|---------|-------------|
| `tools.exec.allow_remote` | bool | `false` | Allow exec tool from remote channels (Telegram/Discord etc.) |
| `tools.exec.enable_deny_patterns` | bool | `true` | Enable dangerous command interception |
| `tools.exec.custom_deny_patterns` | string[] | `[]` | Custom regex patterns to block |
| `tools.exec.custom_allow_patterns` | string[] | `[]` | Custom regex patterns to allow |
> **Lưu ý Bảo Mật:** Bảo vệ symlink được bật mặc định — tất cả đường dẫn file được giải quyết qua `filepath.EvalSymlinks` trước khi so khớp whitelist, ngăn chặn tấn công thoát qua symlink.
#### Hạn Chế Đã Biết: Tiến Trình Con Từ Công Cụ Build
Guard bảo mật exec chỉ kiểm tra dòng lệnh mà PicoClaw khởi chạy trực tiếp. Nó không kiểm tra đệ quy các tiến trình con được tạo bởi công cụ phát triển được phép như `make`, `go run`, `cargo`, `npm run`, hoặc script build tùy chỉnh.
Điều này có nghĩa là lệnh cấp cao nhất vẫn có thể biên dịch hoặc khởi chạy binary khác sau khi vượt qua kiểm tra guard ban đầu. Trong thực tế, hãy coi script build, Makefile, script package, và binary được tạo như mã thực thi cần cùng mức độ review như lệnh shell trực tiếp.
Cho môi trường rủi ro cao hơn:
* Review script build trước khi thực thi.
* Ưu tiên phê duyệt/review thủ công cho quy trình biên dịch và chạy.
* Chạy PicoClaw trong container hoặc VM nếu bạn cần cách ly mạnh hơn guard tích hợp.
#### Ví Dụ Lỗi
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (path outside working dir)}
```
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (dangerous pattern detected)}
```
#### Tắt Giới Hạn (Rủi Ro Bảo Mật)
Nếu bạn cần agent truy cập đường dẫn ngoài workspace:
**Phương pháp 1: File cấu hình**
```json
{
"agents": {
"defaults": {
"restrict_to_workspace": false
}
}
}
```
**Phương pháp 2: Biến môi trường**
```bash
export PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE=false
```
> ⚠️ **Cảnh báo**: Tắt giới hạn này cho phép agent truy cập bất kỳ đường dẫn nào trên hệ thống. Chỉ sử dụng cẩn thận trong môi trường được kiểm soát.
#### Tính Nhất Quán Ranh Giới Bảo Mật
Cài đặt `restrict_to_workspace` áp dụng nhất quán trên tất cả đường dẫn thực thi:
| Đường Dẫn Thực Thi | Ranh Giới Bảo Mật |
| -------------------- | ---------------------------- |
| Main Agent | `restrict_to_workspace` ✅ |
| Subagent / Spawn | Kế thừa cùng giới hạn ✅ |
| Heartbeat tasks | Kế thừa cùng giới hạn ✅ |
Tất cả đường dẫn chia sẻ cùng giới hạn workspace — không có cách nào vượt qua ranh giới bảo mật qua subagent hoặc tác vụ lên lịch.
### Heartbeat (Tác Vụ Định Kỳ)
PicoClaw có thể thực hiện tác vụ định kỳ tự động. Tạo file `HEARTBEAT.md` trong workspace:
```markdown
# Tác Vụ Định Kỳ
- Kiểm tra email cho tin nhắn quan trọng
- Xem lịch cho sự kiện sắp tới
- Kiểm tra dự báo thời tiết
```
Agent sẽ đọc file này mỗi 30 phút (có thể cấu hình) và thực thi các tác vụ sử dụng công cụ có sẵn.
#### Tác Vụ Bất Đồng Bộ Với Spawn
Cho tác vụ chạy lâu (tìm kiếm web, gọi API), sử dụng công cụ `spawn` để tạo **subagent**:
```markdown
# Tác Vụ Định Kỳ
```
+166
View File
@@ -0,0 +1,166 @@
# 🐳 Docker và Bắt Đầu Nhanh
> Quay lại [README](../../README.vi.md)
## 🐳 Docker Compose
Bạn cũng có thể chạy PicoClaw bằng Docker Compose mà không cần cài đặt gì trên máy.
```bash
# 1. Clone repo này
git clone https://github.com/sipeed/picoclaw.git
cd picoclaw
# 2. Lần chạy đầu tiên — tự động tạo docker/data/config.json rồi thoát
docker compose -f docker/docker-compose.yml --profile gateway up
# Container hiển thị "First-run setup complete." và dừng lại.
# 3. Cấu hình API key của bạn
vim docker/data/config.json # Set provider API keys, bot tokens, etc.
# 4. Khởi động
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
> [!TIP]
> **Người dùng Docker**: Mặc định, Gateway lắng nghe trên `127.0.0.1`, không thể truy cập từ host. Nếu bạn cần truy cập các health endpoint hoặc mở port, hãy đặt `PICOCLAW_GATEWAY_HOST=0.0.0.0` trong môi trường hoặc cập nhật `config.json`.
```bash
# 5. Kiểm tra log
docker compose -f docker/docker-compose.yml logs -f picoclaw-gateway
# 6. Dừng
docker compose -f docker/docker-compose.yml --profile gateway down
```
### Chế Độ Launcher (Web Console)
Image `launcher` bao gồm cả ba binary (`picoclaw`, `picoclaw-launcher`, `picoclaw-launcher-tui`) và khởi động web console mặc định, cung cấp giao diện trình duyệt để cấu hình và chat.
```bash
docker compose -f docker/docker-compose.yml --profile launcher up -d
```
Mở http://localhost:18800 trong trình duyệt. Launcher tự động quản lý tiến trình gateway.
> [!WARNING]
> Web console chưa hỗ trợ xác thực. Tránh để lộ ra internet công cộng.
### Chế Độ Agent (One-shot)
```bash
# Đặt câu hỏi
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent -m "What is 2+2?"
# Chế độ tương tác
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent
```
### Cập Nhật
```bash
docker compose -f docker/docker-compose.yml pull
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
### 🚀 Bắt Đầu Nhanh
> [!TIP]
> Cấu hình API Key trong `~/.picoclaw/config.json`. Lấy API Key: [Volcengine (CodingPlan)](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) (LLM) · [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM). Tìm kiếm web là tùy chọn — lấy miễn phí [Tavily API](https://tavily.com) (1000 truy vấn miễn phí/tháng) hoặc [Brave Search API](https://brave.com/search/api) (2000 truy vấn miễn phí/tháng).
**1. Khởi tạo**
```bash
picoclaw onboard
```
**2. Cấu hình** (`~/.picoclaw/config.json`)
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model_name": "gpt-5.4",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key",
"api_base":"https://ark.cn-beijing.volces.com/api/coding/v3"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "your-api-key",
"request_timeout": 300
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "your-anthropic-key"
}
],
"tools": {
"web": {
"enabled": true,
"fetch_limit_bytes": 10485760,
"format": "plaintext",
"brave": {
"enabled": false,
"api_key": "YOUR_BRAVE_API_KEY",
"max_results": 5
},
"tavily": {
"enabled": false,
"api_key": "YOUR_TAVILY_API_KEY",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "YOUR_PERPLEXITY_API_KEY",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://your-searxng-instance:8888",
"max_results": 5
}
}
}
}
```
> **Mới**: Định dạng cấu hình `model_list` cho phép thêm provider mà không cần thay đổi code. Xem [Cấu Hình Mô Hình](#cấu-hình-mô-hình-model_list) để biết chi tiết.
> `request_timeout` là tùy chọn và tính bằng giây. Nếu bỏ qua hoặc đặt `<= 0`, PicoClaw sử dụng timeout mặc định (120s).
**3. Lấy API Key**
* **Nhà cung cấp LLM**: [OpenRouter](https://openrouter.ai/keys) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) · [Anthropic](https://console.anthropic.com) · [OpenAI](https://platform.openai.com) · [Gemini](https://aistudio.google.com/api-keys)
* **Tìm kiếm Web** (tùy chọn):
* [Brave Search](https://brave.com/search/api) - Trả phí ($5/1000 truy vấn, ~$5-6/tháng)
* [Perplexity](https://www.perplexity.ai) - Tìm kiếm bằng AI với giao diện chat
* [SearXNG](https://github.com/searxng/searxng) - Công cụ tìm kiếm tổng hợp tự host (miễn phí, không cần API key)
* [Tavily](https://tavily.com) - Tối ưu cho AI Agent (1000 yêu cầu/tháng)
* DuckDuckGo - Fallback tích hợp (không cần API key)
> **Lưu ý**: Xem `config.example.json` để có mẫu cấu hình đầy đủ.
**4. Chat**
```bash
picoclaw agent -m "What is 2+2?"
```
Vậy là xong! Bạn có một trợ lý AI hoạt động trong 2 phút.
---
+434
View File
@@ -0,0 +1,434 @@
# 🔌 Nhà Cung Cấp và Cấu Hình Mô Hình
> Quay lại [README](../../README.vi.md)
### Nhà Cung Cấp
> [!NOTE]
> Groq cung cấp chuyển đổi giọng nói miễn phí qua Whisper. Nếu được cấu hình, tin nhắn âm thanh từ bất kỳ kênh nào sẽ được tự động chuyển đổi ở cấp agent.
| Provider | Purpose | Get API Key |
| ------------ | --------------------------------------- | ------------------------------------------------------------ |
| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) |
| `zhipu` | LLM (Zhipu direct) | [bigmodel.cn](https://bigmodel.cn) |
| `volcengine` | LLM(Volcengine direct) | [volcengine.com](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| `openrouter` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) |
| `anthropic` | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) |
| `openai` | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) |
| `deepseek` | LLM (DeepSeek direct) | [platform.deepseek.com](https://platform.deepseek.com) |
| `qwen` | LLM (Qwen direct) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) |
| `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) |
| `cerebras` | LLM (Cerebras direct) | [cerebras.ai](https://cerebras.ai) |
| `vivgrid` | LLM (Vivgrid direct) | [vivgrid.com](https://vivgrid.com) |
| `moonshot` | LLM (Kimi/Moonshot direct) | [platform.moonshot.cn](https://platform.moonshot.cn) |
| `minimax` | LLM (Minimax direct) | [platform.minimaxi.com](https://platform.minimaxi.com) |
| `avian` | LLM (Avian direct) | [avian.io](https://avian.io) |
| `mistral` | LLM (Mistral direct) | [console.mistral.ai](https://console.mistral.ai) |
| `longcat` | LLM (Longcat direct) | [longcat.ai](https://longcat.ai) |
| `modelscope` | LLM (ModelScope direct) | [modelscope.cn](https://modelscope.cn) |
### Cấu Hình Mô Hình (model_list)
> **Có gì mới?** PicoClaw hiện sử dụng cách tiếp cận cấu hình **tập trung vào mô hình**. Chỉ cần chỉ định định dạng `vendor/model` (ví dụ: `zhipu/glm-4.7`) để thêm provider mới — **không cần thay đổi code!**
Thiết kế này cũng cho phép **hỗ trợ đa agent** với lựa chọn provider linh hoạt:
- **Agent khác nhau, provider khác nhau**: Mỗi agent có thể sử dụng provider LLM riêng
- **Fallback mô hình**: Cấu hình mô hình chính và dự phòng cho khả năng phục hồi
- **Cân bằng tải**: Phân phối yêu cầu qua nhiều endpoint
- **Cấu hình tập trung**: Quản lý tất cả provider tại một nơi
#### 📋 Tất Cả Vendor Được Hỗ Trợ
| Vendor | `model` Prefix | Default API Base | Protocol | API Key |
| ------------------- | ----------------- |-----------------------------------------------------| --------- | ---------------------------------------------------------------- |
| **OpenAI** | `openai/` | `https://api.openai.com/v1` | OpenAI | [Get Key](https://platform.openai.com) |
| **Anthropic** | `anthropic/` | `https://api.anthropic.com/v1` | Anthropic | [Get Key](https://console.anthropic.com) |
| **智谱 AI (GLM)** | `zhipu/` | `https://open.bigmodel.cn/api/paas/v4` | OpenAI | [Get Key](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
| **DeepSeek** | `deepseek/` | `https://api.deepseek.com/v1` | OpenAI | [Get Key](https://platform.deepseek.com) |
| **Google Gemini** | `gemini/` | `https://generativelanguage.googleapis.com/v1beta` | OpenAI | [Get Key](https://aistudio.google.com/api-keys) |
| **Groq** | `groq/` | `https://api.groq.com/openai/v1` | OpenAI | [Get Key](https://console.groq.com) |
| **Moonshot** | `moonshot/` | `https://api.moonshot.cn/v1` | OpenAI | [Get Key](https://platform.moonshot.cn) |
| **通义千问 (Qwen)** | `qwen/` | `https://dashscope.aliyuncs.com/compatible-mode/v1` | OpenAI | [Get Key](https://dashscope.console.aliyun.com) |
| **NVIDIA** | `nvidia/` | `https://integrate.api.nvidia.com/v1` | OpenAI | [Get Key](https://build.nvidia.com) |
| **Ollama** | `ollama/` | `http://localhost:11434/v1` | OpenAI | Local (no key needed) |
| **OpenRouter** | `openrouter/` | `https://openrouter.ai/api/v1` | OpenAI | [Get Key](https://openrouter.ai/keys) |
| **LiteLLM Proxy** | `litellm/` | `http://localhost:4000/v1` | OpenAI | Your LiteLLM proxy key |
| **VLLM** | `vllm/` | `http://localhost:8000/v1` | OpenAI | Local |
| **Cerebras** | `cerebras/` | `https://api.cerebras.ai/v1` | OpenAI | [Get Key](https://cerebras.ai) |
| **VolcEngine (Doubao)** | `volcengine/` | `https://ark.cn-beijing.volces.com/api/v3` | OpenAI | [Get Key](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| **神算云** | `shengsuanyun/` | `https://router.shengsuanyun.com/api/v1` | OpenAI | - |
| **BytePlus** | `byteplus/` | `https://ark.ap-southeast.bytepluses.com/api/v3` | OpenAI | [Get Key](https://www.byteplus.com) |
| **Vivgrid** | `vivgrid/` | `https://api.vivgrid.com/v1` | OpenAI | [Get Key](https://vivgrid.com) |
| **LongCat** | `longcat/` | `https://api.longcat.chat/openai` | OpenAI | [Get Key](https://longcat.chat/platform) |
| **ModelScope (魔搭)**| `modelscope/` | `https://api-inference.modelscope.cn/v1` | OpenAI | [Get Token](https://modelscope.cn/my/tokens) |
| **Antigravity** | `antigravity/` | Google Cloud | Custom | OAuth only |
| **GitHub Copilot** | `github-copilot/` | `localhost:4321` | gRPC | - |
#### Cấu Hình Cơ Bản
```json
{
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-your-openai-key"
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
},
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-zhipu-key"
}
],
"agents": {
"defaults": {
"model": "gpt-5.4"
}
}
}
```
#### Ví Dụ Theo Vendor
**OpenAI**
```json
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-..."
}
```
**VolcEngine (Doubao)**
```json
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-..."
}
```
**智谱 AI (GLM)**
```json
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
```
**DeepSeek**
```json
{
"model_name": "deepseek-chat",
"model": "deepseek/deepseek-chat",
"api_key": "sk-..."
}
```
**Anthropic (với API key)**
```json
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
}
```
> Chạy `picoclaw auth login --provider anthropic` để dán API token.
**Anthropic Messages API (định dạng native)**
Để truy cập trực tiếp API Anthropic hoặc endpoint tùy chỉnh chỉ hỗ trợ định dạng message native của Anthropic:
```json
{
"model_name": "claude-opus-4-6",
"model": "anthropic-messages/claude-opus-4-6",
"api_key": "sk-ant-your-key",
"api_base": "https://api.anthropic.com"
}
```
> Sử dụng giao thức `anthropic-messages` khi:
> - Sử dụng proxy bên thứ ba chỉ hỗ trợ endpoint native `/v1/messages` của Anthropic (không tương thích OpenAI `/v1/chat/completions`)
> - Kết nối đến dịch vụ như MiniMax, Synthetic yêu cầu định dạng message native của Anthropic
> - Giao thức `anthropic` hiện tại trả về lỗi 404 (cho thấy endpoint không hỗ trợ định dạng tương thích OpenAI)
>
> **Lưu ý:** Giao thức `anthropic` sử dụng định dạng tương thích OpenAI (`/v1/chat/completions`), trong khi `anthropic-messages` sử dụng định dạng native của Anthropic (`/v1/messages`). Chọn dựa trên định dạng endpoint hỗ trợ.
**Ollama (local)**
```json
{
"model_name": "llama3",
"model": "ollama/llama3"
}
```
**Proxy/API Tùy Chỉnh**
```json
{
"model_name": "my-custom-model",
"model": "openai/custom-model",
"api_base": "https://my-proxy.com/v1",
"api_key": "sk-...",
"request_timeout": 300
}
```
**LiteLLM Proxy**
```json
{
"model_name": "lite-gpt4",
"model": "litellm/lite-gpt4",
"api_base": "http://localhost:4000/v1",
"api_key": "sk-..."
}
```
PicoClaw chỉ loại bỏ tiền tố ngoài `litellm/` trước khi gửi yêu cầu, nên alias proxy như `litellm/lite-gpt4` gửi `lite-gpt4`, trong khi `litellm/openai/gpt-4o` gửi `openai/gpt-4o`.
#### Cân Bằng Tải
Cấu hình nhiều endpoint cho cùng tên mô hình — PicoClaw sẽ tự động round-robin giữa chúng:
```json
{
"model_list": [
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api1.example.com/v1",
"api_key": "sk-key1"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api2.example.com/v1",
"api_key": "sk-key2"
}
]
}
```
#### Di Chuyển Từ Cấu Hình Legacy `providers`
Cấu hình `providers` cũ đã **ngừng hỗ trợ** nhưng vẫn được hỗ trợ để tương thích ngược.
**Cấu hình cũ (ngừng hỗ trợ):**
```json
{
"providers": {
"zhipu": {
"api_key": "your-key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
},
"agents": {
"defaults": {
"provider": "zhipu",
"model": "glm-4.7"
}
}
}
```
**Cấu hình mới (khuyến nghị):**
```json
{
"model_list": [
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
],
"agents": {
"defaults": {
"model": "glm-4.7"
}
}
}
```
Để xem hướng dẫn di chuyển chi tiết, xem [docs/migration/model-list-migration.md](docs/migration/model-list-migration.md).
### Kiến Trúc Provider
PicoClaw định tuyến provider theo họ giao thức:
- Giao thức tương thích OpenAI: OpenRouter, gateway tương thích OpenAI, Groq, Zhipu, và endpoint kiểu vLLM.
- Giao thức Anthropic: Hành vi API native của Claude.
- Đường dẫn Codex/OAuth: Tuyến xác thực OAuth/token của OpenAI.
Điều này giữ runtime nhẹ trong khi làm cho backend tương thích OpenAI mới chủ yếu là thao tác cấu hình (`api_base` + `api_key`).
<details>
<summary><b>Zhipu</b></summary>
**1. Lấy API key và URL base**
* Lấy [API key](https://bigmodel.cn/usercenter/proj-mgmt/apikeys)
**2. Cấu hình**
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model": "glm-4.7",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"providers": {
"zhipu": {
"api_key": "Your API Key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
}
}
```
**3. Chạy**
```bash
picoclaw agent -m "Hello"
```
</details>
<details>
<summary><b>Ví dụ cấu hình đầy đủ</b></summary>
```json
{
"agents": {
"defaults": {
"model": "anthropic/claude-opus-4-5"
}
},
"session": {
"dm_scope": "per-channel-peer",
"backlog_limit": 20
},
"providers": {
"openrouter": {
"api_key": "sk-or-v1-xxx"
},
"groq": {
"api_key": "gsk_xxx"
}
},
"channels": {
"telegram": {
"enabled": true,
"token": "123456:ABC...",
"allow_from": ["123456789"]
},
"discord": {
"enabled": true,
"token": "",
"allow_from": [""]
},
"whatsapp": {
"enabled": false,
"bridge_url": "ws://localhost:3001",
"use_native": false,
"session_store_path": "",
"allow_from": []
},
"feishu": {
"enabled": false,
"app_id": "cli_xxx",
"app_secret": "xxx",
"encrypt_key": "",
"verification_token": "",
"allow_from": []
},
"qq": {
"enabled": false,
"app_id": "",
"app_secret": "",
"allow_from": []
}
},
"tools": {
"web": {
"brave": {
"enabled": false,
"api_key": "BSA...",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://localhost:8888",
"max_results": 5
}
},
"cron": {
"exec_timeout_minutes": 5
}
},
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
</details>
---
## 📝 So Sánh API Key
| Service | Pricing | Use Case |
| ---------------- | ------------------------ | ------------------------------------- |
| **OpenRouter** | Free: 200K tokens/month | Multiple models (Claude, GPT-4, etc.) |
| **Volcengine CodingPlan** | ¥9.9/first month | Best for Chinese users, multiple SOTA models (Doubao, DeepSeek, etc.) |
| **Zhipu** | Free: 200K tokens/month | Suitable for Chinese users |
| **Brave Search** | $5/1000 queries | Web search functionality |
| **SearXNG** | Free (self-hosted) | Privacy-focused metasearch (70+ engines) |
| **Groq** | Free tier available | Fast inference (Llama, Mixtral) |
| **Cerebras** | Free tier available | Fast inference (Llama, Qwen, etc.) |
| **LongCat** | Free: up to 5M tokens/day | Fast inference |
| **ModelScope** | Free: 2000 requests/day | Inference (Qwen, GLM, DeepSeek, etc.) |
---
<div align="center">
<img src="assets/logo.jpg" alt="PicoClaw Meme" width="512">
</div>
+61
View File
@@ -0,0 +1,61 @@
# 🔄 Tác Vụ Bất Đồng Bộ và Spawn
> Quay lại [README](../../README.vi.md)
## Tác Vụ Nhanh (phản hồi trực tiếp)
- Báo cáo thời gian hiện tại
## Tác Vụ Dài (sử dụng spawn cho bất đồng bộ)
- Tìm kiếm web tin tức AI và tóm tắt
- Kiểm tra email và báo cáo tin nhắn quan trọng
```
**Hành vi chính:**
| Feature | Description |
| ----------------------- | --------------------------------------------------------- |
| **spawn** | Creates async subagent, doesn't block heartbeat |
| **Independent context** | Subagent has its own context, no session history |
| **message tool** | Subagent communicates with user directly via message tool |
| **Non-blocking** | After spawning, heartbeat continues to next task |
#### Cách Giao Tiếp Subagent Hoạt Động
```
Heartbeat được kích hoạt
Agent đọc HEARTBEAT.md
Cho tác vụ dài: spawn subagent
↓ ↓
Tiếp tục tác vụ tiếp theo Subagent làm việc độc lập
↓ ↓
Tất cả tác vụ hoàn thành Subagent sử dụng công cụ "message"
↓ ↓
Phản hồi HEARTBEAT_OK Người dùng nhận kết quả trực tiếp
```
Subagent có quyền truy cập công cụ (message, web_search, v.v.) và có thể giao tiếp với người dùng độc lập mà không cần qua agent chính.
**Cấu hình:**
```json
{
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
| Option | Default | Description |
| ---------- | ------- | ---------------------------------- |
| `enabled` | `true` | Enable/disable heartbeat |
| `interval` | `30` | Check interval in minutes (min: 5) |
**Biến môi trường:**
* `PICOCLAW_HEARTBEAT_ENABLED=false` để tắt
* `PICOCLAW_HEARTBEAT_INTERVAL=60` để thay đổi khoảng thời gian
+336
View File
@@ -0,0 +1,336 @@
# 🔧 Cấu Hình Công Cụ
> Quay lại [README](../../README.vi.md)
Cấu hình công cụ của PicoClaw nằm trong trường `tools` của `config.json`.
## Cấu trúc thư mục
```json
{
"tools": {
"web": {
...
},
"mcp": {
...
},
"exec": {
...
},
"cron": {
...
},
"skills": {
...
}
}
}
```
## Công cụ Web
Các công cụ web được sử dụng để tìm kiếm và tải nội dung web.
### Web Fetcher
Cài đặt chung để tải và xử lý nội dung trang web.
| Cấu hình | Kiểu | Mặc định | Mô tả |
|----------------------|--------|---------------|-----------------------------------------------------------------------------------------------|
| `enabled` | bool | true | Bật khả năng tải trang web. |
| `fetch_limit_bytes` | int | 10485760 | Kích thước tối đa của payload trang web cần tải, tính bằng byte (mặc định là 10MB). |
| `format` | string | "plaintext" | Định dạng đầu ra của nội dung đã tải. Tùy chọn: `plaintext` hoặc `markdown` (khuyến nghị). |
### Brave
| Cấu hình | Kiểu | Mặc định | Mô tả |
|----------------|--------|----------|----------------------------|
| `enabled` | bool | false | Bật tìm kiếm Brave |
| `api_key` | string | - | Khóa API Brave Search |
| `max_results` | int | 5 | Số kết quả tối đa |
### DuckDuckGo
| Cấu hình | Kiểu | Mặc định | Mô tả |
|----------------|------|----------|-------------------------------|
| `enabled` | bool | true | Bật tìm kiếm DuckDuckGo |
| `max_results` | int | 5 | Số kết quả tối đa |
### Perplexity
| Cấu hình | Kiểu | Mặc định | Mô tả |
|----------------|--------|----------|-------------------------------|
| `enabled` | bool | false | Bật tìm kiếm Perplexity |
| `api_key` | string | - | Khóa API Perplexity |
| `max_results` | int | 5 | Số kết quả tối đa |
## Công cụ Exec
Công cụ exec được sử dụng để thực thi các lệnh shell.
| Cấu hình | Kiểu | Mặc định | Mô tả |
|--------------------------|-------|----------|------------------------------------------------|
| `enable_deny_patterns` | bool | true | Bật chặn lệnh nguy hiểm mặc định |
| `custom_deny_patterns` | array | [] | Mẫu từ chối tùy chỉnh (biểu thức chính quy) |
### Chức năng
- **`enable_deny_patterns`**: Đặt thành `false` để tắt hoàn toàn các mẫu chặn lệnh nguy hiểm mặc định
- **`custom_deny_patterns`**: Thêm các mẫu regex từ chối tùy chỉnh; các lệnh khớp sẽ bị chặn
### Các mẫu lệnh bị chặn mặc định
Theo mặc định, PicoClaw chặn các lệnh nguy hiểm sau:
- Lệnh xóa: `rm -rf`, `del /f/q`, `rmdir /s`
- Thao tác đĩa: `format`, `mkfs`, `diskpart`, `dd if=`, ghi vào `/dev/sd*`
- Thao tác hệ thống: `shutdown`, `reboot`, `poweroff`
- Thay thế lệnh: `$()`, `${}`, dấu backtick
- Pipe đến shell: `| sh`, `| bash`
- Leo thang đặc quyền: `sudo`, `chmod`, `chown`
- Điều khiển tiến trình: `pkill`, `killall`, `kill -9`
- Thao tác từ xa: `curl | sh`, `wget | sh`, `ssh`
- Quản lý gói: `apt`, `yum`, `dnf`, `npm install -g`, `pip install --user`
- Container: `docker run`, `docker exec`
- Git: `git push`, `git force`
- Khác: `eval`, `source *.sh`
### Hạn chế kiến trúc đã biết
Bộ bảo vệ exec chỉ xác thực lệnh cấp cao nhất được gửi đến PicoClaw. Nó **không** kiểm tra đệ quy các tiến trình con được tạo bởi các công cụ build hoặc script sau khi lệnh đó bắt đầu chạy.
Ví dụ về các quy trình có thể bỏ qua bộ bảo vệ lệnh trực tiếp sau khi lệnh ban đầu được cho phép:
- `make run`
- `go run ./cmd/...`
- `cargo run`
- `npm run build`
Điều này có nghĩa là bộ bảo vệ hữu ích để chặn các lệnh trực tiếp rõ ràng nguy hiểm, nhưng nó **không phải** là sandbox đầy đủ cho các pipeline build chưa được xem xét. Nếu mô hình mối đe dọa của bạn bao gồm mã không đáng tin cậy trong workspace, hãy sử dụng cách ly mạnh hơn như container, VM hoặc quy trình phê duyệt xung quanh các lệnh build và chạy.
### Ví dụ cấu hình
```json
{
"tools": {
"exec": {
"enable_deny_patterns": true,
"custom_deny_patterns": [
"\\brm\\s+-r\\b",
"\\bkillall\\s+python"
]
}
}
}
```
## Công cụ Cron
Công cụ cron được sử dụng để lên lịch các tác vụ định kỳ.
| Cấu hình | Kiểu | Mặc định | Mô tả |
|--------------------------|------|----------|-----------------------------------------------------|
| `exec_timeout_minutes` | int | 5 | Thời gian chờ thực thi tính bằng phút, 0 nghĩa là không giới hạn |
## Công cụ MCP
Công cụ MCP cho phép tích hợp với các máy chủ Model Context Protocol bên ngoài.
### Khám phá công cụ (tải chậm)
Khi kết nối với nhiều máy chủ MCP, việc hiển thị hàng trăm công cụ cùng lúc có thể làm cạn kiệt cửa sổ ngữ cảnh của LLM và tăng chi phí API. Tính năng **Discovery** giải quyết vấn đề này bằng cách giữ các công cụ MCP *ẩn* theo mặc định.
Thay vì tải tất cả các công cụ, LLM được cung cấp một công cụ tìm kiếm nhẹ (sử dụng khớp từ khóa BM25 hoặc Regex). Khi LLM cần một khả năng cụ thể, nó tìm kiếm trong thư viện ẩn. Các công cụ khớp sau đó được tạm thời "mở khóa" và đưa vào ngữ cảnh trong số lượt được cấu hình (`ttl`).
### Cấu hình toàn cục
| Cấu hình | Kiểu | Mặc định | Mô tả |
|-------------|--------|----------|-----------------------------------------------|
| `enabled` | bool | false | Bật tích hợp MCP toàn cục |
| `discovery` | object | `{}` | Cấu hình khám phá công cụ (xem bên dưới) |
| `servers` | object | `{}` | Ánh xạ tên máy chủ đến cấu hình máy chủ |
### Cấu hình Discovery (`discovery`)
| Cấu hình | Kiểu | Mặc định | Mô tả |
|----------------------|------|----------|-----------------------------------------------------------------------------------------------------------------------------------|
| `enabled` | bool | false | Nếu true, các công cụ MCP bị ẩn và được tải theo yêu cầu qua tìm kiếm. Nếu false, tất cả công cụ được tải |
| `ttl` | int | 5 | Số lượt hội thoại mà một công cụ đã khám phá vẫn được mở khóa |
| `max_search_results` | int | 5 | Số công cụ tối đa được trả về cho mỗi truy vấn tìm kiếm |
| `use_bm25` | bool | true | Bật công cụ tìm kiếm ngôn ngữ tự nhiên/từ khóa (`tool_search_tool_bm25`). **Cảnh báo**: tiêu tốn nhiều tài nguyên hơn tìm kiếm regex |
| `use_regex` | bool | false | Bật công cụ tìm kiếm mẫu regex (`tool_search_tool_regex`) |
> **Lưu ý:** Nếu `discovery.enabled` là `true`, bạn **phải** bật ít nhất một công cụ tìm kiếm (`use_bm25` hoặc `use_regex`),
> nếu không ứng dụng sẽ không khởi động được.
### Cấu hình từng máy chủ
| Cấu hình | Kiểu | Bắt buộc | Mô tả |
|------------|--------|----------|--------------------------------------------|
| `enabled` | bool | có | Bật máy chủ MCP này |
| `type` | string | không | Loại truyền tải: `stdio`, `sse`, `http` |
| `command` | string | stdio | Lệnh thực thi cho truyền tải stdio |
| `args` | array | không | Đối số lệnh cho truyền tải stdio |
| `env` | object | không | Biến môi trường cho tiến trình stdio |
| `env_file` | string | không | Đường dẫn đến tệp môi trường cho tiến trình stdio |
| `url` | string | sse/http | URL endpoint cho truyền tải `sse`/`http` |
| `headers` | object | không | Header HTTP cho truyền tải `sse`/`http` |
### Hành vi truyền tải
- Nếu bỏ qua `type`, truyền tải được tự động phát hiện:
- `url` được đặt → `sse`
- `command` được đặt → `stdio`
- `http``sse` đều sử dụng `url` + `headers` tùy chọn.
- `env``env_file` chỉ được áp dụng cho máy chủ `stdio`.
### Ví dụ cấu hình
#### 1) Máy chủ MCP Stdio
```json
{
"tools": {
"mcp": {
"enabled": true,
"servers": {
"filesystem": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-filesystem",
"/tmp"
]
}
}
}
}
}
```
#### 2) Máy chủ MCP từ xa SSE/HTTP
```json
{
"tools": {
"mcp": {
"enabled": true,
"servers": {
"remote-mcp": {
"enabled": true,
"type": "sse",
"url": "https://example.com/mcp",
"headers": {
"Authorization": "Bearer YOUR_TOKEN"
}
}
}
}
}
}
```
#### 3) Thiết lập MCP quy mô lớn với khám phá công cụ được bật
*Trong ví dụ này, LLM chỉ thấy `tool_search_tool_bm25`. Nó sẽ tìm kiếm và mở khóa động các công cụ Github hoặc Postgres chỉ khi được người dùng yêu cầu.*
```json
{
"tools": {
"mcp": {
"enabled": true,
"discovery": {
"enabled": true,
"ttl": 5,
"max_search_results": 5,
"use_bm25": true,
"use_regex": false
},
"servers": {
"github": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-github"
],
"env": {
"GITHUB_PERSONAL_ACCESS_TOKEN": "YOUR_GITHUB_TOKEN"
}
},
"postgres": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-postgres",
"postgresql://user:password@localhost/dbname"
]
},
"slack": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-slack"
],
"env": {
"SLACK_BOT_TOKEN": "YOUR_SLACK_BOT_TOKEN",
"SLACK_TEAM_ID": "YOUR_SLACK_TEAM_ID"
}
}
}
}
}
}
```
## Công cụ Skills
Công cụ skills cấu hình khám phá và cài đặt kỹ năng thông qua các registry như ClawHub.
### Registry
| Cấu hình | Kiểu | Mặc định | Mô tả |
|------------------------------------|--------|-----------------------|----------------------------------------------|
| `registries.clawhub.enabled` | bool | true | Bật registry ClawHub |
| `registries.clawhub.base_url` | string | `https://clawhub.ai` | URL cơ sở ClawHub |
| `registries.clawhub.auth_token` | string | `""` | Token Bearer tùy chọn để có giới hạn tốc độ cao hơn |
| `registries.clawhub.search_path` | string | `/api/v1/search` | Đường dẫn API tìm kiếm |
| `registries.clawhub.skills_path` | string | `/api/v1/skills` | Đường dẫn API Skills |
| `registries.clawhub.download_path` | string | `/api/v1/download` | Đường dẫn API tải xuống |
### Ví dụ cấu hình
```json
{
"tools": {
"skills": {
"registries": {
"clawhub": {
"enabled": true,
"base_url": "https://clawhub.ai",
"auth_token": "",
"search_path": "/api/v1/search",
"skills_path": "/api/v1/skills",
"download_path": "/api/v1/download"
}
}
}
}
}
```
## Biến môi trường
Tất cả các tùy chọn cấu hình có thể được ghi đè qua biến môi trường với định dạng `PICOCLAW_TOOLS_<SECTION>_<KEY>`:
Ví dụ:
- `PICOCLAW_TOOLS_WEB_BRAVE_ENABLED=true`
- `PICOCLAW_TOOLS_EXEC_ENABLE_DENY_PATTERNS=false`
- `PICOCLAW_TOOLS_CRON_EXEC_TIMEOUT_MINUTES=10`
- `PICOCLAW_TOOLS_MCP_ENABLED=true`
Lưu ý: Cấu hình kiểu map lồng nhau (ví dụ `tools.mcp.servers.<name>.*`) được cấu hình trong `config.json` thay vì qua biến môi trường.
+45
View File
@@ -0,0 +1,45 @@
# 🐛 Khắc Phục Sự Cố
> Quay lại [README](../../README.vi.md)
## "model ... not found in model_list" hoặc OpenRouter "free is not a valid model ID"
**Triệu chứng:** Bạn thấy một trong các lỗi sau:
- `Error creating provider: model "openrouter/free" not found in model_list`
- OpenRouter trả về 400: `"free is not a valid model ID"`
**Nguyên nhân:** Trường `model` trong mục `model_list` của bạn là giá trị được gửi đến API. Đối với OpenRouter, bạn phải sử dụng ID mô hình **đầy đủ**, không phải dạng viết tắt.
- **Sai:** `"model": "free"` → OpenRouter nhận được `free` và từ chối.
- **Đúng:** `"model": "openrouter/free"` → OpenRouter nhận được `openrouter/free` (định tuyến tự động tầng miễn phí).
**Cách sửa:** Trong `~/.picoclaw/config.json` (hoặc đường dẫn cấu hình của bạn):
1. **agents.defaults.model** phải khớp với một `model_name` trong `model_list` (ví dụ: `"openrouter-free"`).
2. **model** của mục đó phải là ID mô hình OpenRouter hợp lệ, ví dụ:
- `"openrouter/free"` tầng miễn phí tự động
- `"google/gemini-2.0-flash-exp:free"`
- `"meta-llama/llama-3.1-8b-instruct:free"`
Ví dụ:
```json
{
"agents": {
"defaults": {
"model": "openrouter-free"
}
},
"model_list": [
{
"model_name": "openrouter-free",
"model": "openrouter/free",
"api_key": "sk-or-v1-YOUR_OPENROUTER_KEY",
"api_base": "https://openrouter.ai/api/v1"
}
]
}
```
Lấy khóa của bạn tại [OpenRouter Keys](https://openrouter.ai/keys).
+574
View File
@@ -0,0 +1,574 @@
# 💬 聊天应用配置
> 返回 [README](../../README.zh.md)
## 💬 聊天应用集成 (Chat Apps)
PicoClaw 支持多种聊天平台,使您的 Agent 能够连接到任何地方。
> **注意**: 所有 Webhook 类渠道(LINE、WeCom 等)均挂载在同一个 Gateway HTTP 服务器上(`gateway.host`:`gateway.port`,默认 `127.0.0.1:18790`),无需为每个渠道单独配置端口。注意:飞书(Feishu)使用 WebSocket/SDK 模式,不通过该共享 HTTP webhook 服务器接收消息。
### 核心渠道
| 渠道 | 设置难度 | 特性说明 | 文档链接 |
| -------------------- | ----------- | ----------------------------------------- | --------------------------------------------------------------------------------------------------------------- |
| **Telegram** | ⭐ 简单 | 推荐,支持语音转文字,长轮询无需公网 | [查看文档](../channels/telegram/README.zh.md) |
| **Discord** | ⭐ 简单 | Socket Mode,支持群组/私信,Bot 生态成熟 | [查看文档](../channels/discord/README.zh.md) |
| **WhatsApp** | ⭐ 简单 | 原生 (QR 扫码) 或 Bridge URL | [查看文档](../channels/whatsapp/README.zh.md) |
| **Slack** | ⭐ 简单 | **Socket Mode** (无需公网 IP),企业级支持 | [查看文档](../channels/slack/README.zh.md) |
| **Matrix** | ⭐⭐ 中等 | 联邦协议,支持自建 homeserver 与公开服务器 | [查看文档](../channels/matrix/README.zh.md) |
| **QQ** | ⭐⭐ 中等 | 官方机器人 API,适合国内社群 | [查看文档](../channels/qq/README.zh.md) |
| **钉钉 (DingTalk)** | ⭐⭐ 中等 | Stream 模式无需公网,企业办公首选 | [查看文档](../channels/dingtalk/README.zh.md) |
| **LINE** | ⭐⭐⭐ 较难 | 需要 HTTPS Webhook | [查看文档](../channels/line/README.zh.md) |
| **企业微信 (WeCom)** | ⭐⭐⭐ 较难 | 支持群机器人(Webhook)、自建应用(API)和智能机器人(AI Bot) | [Bot 文档](../channels/wecom/wecom_bot/README.zh.md) / [App 文档](../channels/wecom/wecom_app/README.zh.md) / [AI Bot 文档](../channels/wecom/wecom_aibot/README.zh.md) |
| **飞书 (Feishu)** | ⭐⭐⭐ 较难 | 企业级协作,功能丰富 | [查看文档](../channels/feishu/README.zh.md) |
| **IRC** | ⭐⭐ 中等 | 服务器 + TLS 配置 | - |
| **OneBot** | ⭐⭐ 中等 | 兼容 NapCat/Go-CQHTTP,社区生态丰富 | [查看文档](../channels/onebot/README.zh.md) |
| **MaixCam** | ⭐ 简单 | 专为 AI 摄像头设计的硬件集成通道 | [查看文档](../channels/maixcam/README.zh.md) |
| **Pico** | ⭐ 简单 | PicoClaw 原生协议通道 | |
---
<details>
<summary><b>Telegram</b>(推荐)</summary>
**1. 创建 Bot**
* 打开 Telegram,搜索 `@BotFather`
* 发送 `/newbot`,按提示操作
* 复制 Token
**2. 配置**
```json
{
"channels": {
"telegram": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
> 通过 Telegram 上的 `@userinfobot` 获取你的 User ID。
**3. 运行**
```bash
picoclaw gateway
```
**4. Telegram 命令菜单(启动时自动注册)**
PicoClaw 使用统一的命令定义来源。启动时会自动将 Telegram 支持的命令(例如 `/start``/help``/show``/list`)注册到 Bot 命令菜单,确保菜单展示与实际行为一致。
Telegram 侧保留的是命令菜单注册能力;通用命令的实际执行统一走 Agent Loop 中的 commands executor。
如果注册因网络或 API 短暂异常失败,不会阻塞 channel 启动;系统会在后台自动重试。
</details>
<details>
<summary><b>Discord</b></summary>
**1. 创建 Bot**
* 前往 <https://discord.com/developers/applications>
* 创建应用 → Bot → 添加 Bot
* 复制 Bot Token
**2. 启用 Intents**
* 在 Bot 设置中启用 **MESSAGE CONTENT INTENT**
* (可选)启用 **SERVER MEMBERS INTENT**(如需基于成员数据的白名单)
**3. 获取 User ID**
* Discord 设置 → 高级 → 启用 **开发者模式**
* 右键点击头像 → **复制用户 ID**
**4. 配置**
```json
{
"channels": {
"discord": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
}
}
}
```
**5. 邀请 Bot**
* OAuth2 → URL Generator
* Scopes: `bot`
* Bot Permissions: `Send Messages`, `Read Message History`
* 打开生成的邀请链接,将 Bot 添加到服务器
**可选:群组触发模式**
默认情况下 Bot 会回复服务器频道中的所有消息。如需仅在 @提及时回复
```json
{
"channels": {
"discord": {
"group_trigger": { "mention_only": true }
}
}
}
```
也可通过关键词前缀触发(如 `!bot`):
```json
{
"channels": {
"discord": {
"group_trigger": { "prefixes": ["!bot"] }
}
}
}
```
**6. 运行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>WhatsApp</b>(原生 whatsmeow</summary>
PicoClaw 支持两种 WhatsApp 连接方式:
- **原生(推荐):** 进程内使用 [whatsmeow](https://github.com/tulir/whatsmeow),无需独立 Bridge。设置 `"use_native": true` 并留空 `bridge_url`。首次运行时用 WhatsApp 扫描 QR 码(关联设备)。会话存储在工作区下(如 `workspace/whatsapp/`)。原生渠道为**可选**构建,使用 `-tags whatsapp_native` 编译(如 `make build-whatsapp-native``go build -tags whatsapp_native ./cmd/...`)。
- **Bridge** 连接外部 WebSocket Bridge。设置 `bridge_url`(如 `ws://localhost:3001`),保持 `use_native` 为 false。
**配置(原生)**
```json
{
"channels": {
"whatsapp": {
"enabled": true,
"use_native": true,
"session_store_path": "",
"allow_from": []
}
}
}
```
如果 `session_store_path` 为空,会话存储在 `<workspace>/whatsapp/`。运行 `picoclaw gateway`;首次运行时在终端扫描 QR 码(WhatsApp → 关联设备)。
</details>
<details>
<summary><b>Matrix</b></summary>
**1. 准备 Bot 账号**
* 使用你的 homeserver(如 `https://matrix.org` 或自建)
* 创建 Bot 用户并获取 access token
**2. 配置**
```json
{
"channels": {
"matrix": {
"enabled": true,
"homeserver": "https://matrix.org",
"user_id": "@your-bot:matrix.org",
"access_token": "YOUR_MATRIX_ACCESS_TOKEN",
"allow_from": []
}
}
}
```
**3. 运行**
```bash
picoclaw gateway
```
完整选项(`device_id``join_on_invite``group_trigger``placeholder``reasoning_channel_id`)请参考 [Matrix 渠道配置指南](../channels/matrix/README.md)。
</details>
<details>
<summary><b>QQ</b></summary>
**1. 创建 Bot**
- 前往 [QQ 开放平台](https://q.qq.com/#)
- 创建应用 → 获取 **AppID****AppSecret**
**2. 配置**
```json
{
"channels": {
"qq": {
"enabled": true,
"app_id": "YOUR_APP_ID",
"app_secret": "YOUR_APP_SECRET",
"allow_from": []
}
}
}
```
> `allow_from` 留空表示允许所有用户,或指定 QQ 号限制访问。
**3. 运行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>Slack</b></summary>
**1. 创建 Slack App**
* 前往 [Slack API](https://api.slack.com/apps) 创建应用
* 启用 **Socket Mode**
* 获取 **Bot Token****App-Level Token**
**2. 配置**
```json
{
"channels": {
"slack": {
"enabled": true,
"bot_token": "xoxb-YOUR_BOT_TOKEN",
"app_token": "xapp-YOUR_APP_TOKEN",
"allow_from": []
}
}
}
```
**3. 运行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>IRC</b></summary>
**1. 配置**
```json
{
"channels": {
"irc": {
"enabled": true,
"server": "irc.libera.chat:6697",
"nick": "picoclaw-bot",
"use_tls": true,
"channels_to_join": ["#your-channel"],
"allow_from": []
}
}
}
```
**2. 运行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>钉钉 (DingTalk)</b></summary>
**1. 创建 Bot**
* 前往 [开放平台](https://open.dingtalk.com/)
* 创建内部应用
* 复制 Client ID 和 Client Secret
**2. 配置**
```json
{
"channels": {
"dingtalk": {
"enabled": true,
"client_id": "YOUR_CLIENT_ID",
"client_secret": "YOUR_CLIENT_SECRET",
"allow_from": []
}
}
}
```
> `allow_from` 留空表示允许所有用户,或指定钉钉用户 ID 限制访问。
**3. 运行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>LINE</b></summary>
**1. 创建 LINE Official Account**
- 前往 [LINE Developers Console](https://developers.line.biz/)
- 创建 Provider → 创建 Messaging API Channel
- 复制 **Channel Secret****Channel Access Token**
**2. 配置**
```json
{
"channels": {
"line": {
"enabled": true,
"channel_secret": "YOUR_CHANNEL_SECRET",
"channel_access_token": "YOUR_CHANNEL_ACCESS_TOKEN",
"webhook_path": "/webhook/line",
"allow_from": []
}
}
}
```
> LINE Webhook 挂载在共享 Gateway 服务器上(`gateway.host`:`gateway.port`,默认 `127.0.0.1:18790`)。
**3. 设置 Webhook URL**
LINE 要求 HTTPS Webhook。使用反向代理或隧道:
```bash
# 示例:使用 ngrokGateway 默认端口 18790
ngrok http 18790
```
然后在 LINE Developers Console 中将 Webhook URL 设置为 `https://your-domain/webhook/line` 并启用 **Use webhook**
**4. 运行**
```bash
picoclaw gateway
```
> 在群聊中,Bot 仅在被 @提及时回复。回复会引用原始消息。
</details>
<details>
<summary><b>飞书 (Feishu)</b></summary>
**1. 创建应用**
* 前往 [飞书开放平台](https://open.feishu.cn/)
* 创建企业自建应用
* 获取 **App ID****App Secret**
**2. 配置**
```json
{
"channels": {
"feishu": {
"enabled": true,
"app_id": "cli_xxx",
"app_secret": "xxx",
"encrypt_key": "",
"verification_token": "",
"allow_from": []
}
}
}
```
**3. 运行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>企业微信 (WeCom)</b></summary>
PicoClaw 支持三种企业微信集成方式:
**方式 1: 群机器人 (Bot)** — 设置简单,支持群聊
**方式 2: 自建应用 (App)** — 功能更多,支持主动推送,仅私聊
**方式 3: 智能机器人 (AI Bot)** — 官方 AI Bot,流式回复,支持群聊和私聊
详细设置请参考 [企业微信 AI Bot 配置指南](../channels/wecom/wecom_aibot/README.zh.md)。
**快速设置 — 群机器人:**
**1. 创建 Bot**
* 企业微信管理后台 → 群聊 → 添加群机器人
* 复制 Webhook URL(格式:`https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx`
**2. 配置**
```json
{
"channels": {
"wecom": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_url": "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=YOUR_KEY",
"webhook_path": "/webhook/wecom",
"allow_from": []
}
}
}
```
> WeCom Webhook 挂载在共享 Gateway 服务器上(`gateway.host`:`gateway.port`,默认 `127.0.0.1:18790`)。
**快速设置 — 自建应用:**
**1. 创建应用**
* 企业微信管理后台 → 应用管理 → 创建应用
* 复制 **AgentId****Secret**
* 前往"我的企业"页面,复制 **CorpID**
**2. 配置接收消息**
* 在应用详情中,点击"接收消息" → "设置 API"
* 设置 URL 为 `http://your-server:18790/webhook/wecom-app`
* 生成 **Token****EncodingAESKey**
**3. 配置**
```json
{
"channels": {
"wecom_app": {
"enabled": true,
"corp_id": "wwxxxxxxxxxxxxxxxx",
"corp_secret": "YOUR_CORP_SECRET",
"agent_id": 1000002,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-app",
"allow_from": []
}
}
}
```
**4. 运行**
```bash
picoclaw gateway
```
> **注意**: WeCom Webhook 回调挂载在 Gateway 端口(默认 18790)。使用反向代理配置 HTTPS。
**快速设置 — 智能机器人 (AI Bot):**
**1. 创建 AI Bot**
* 企业微信管理后台 → 应用管理 → AI Bot
* 在 AI Bot 设置中配置回调 URL:`http://your-server:18791/webhook/wecom-aibot`
* 复制 **Token** 并点击"随机生成" **EncodingAESKey**
**2. 配置**
```json
{
"channels": {
"wecom_aibot": {
"enabled": true,
"token": "YOUR_TOKEN",
"encoding_aes_key": "YOUR_43_CHAR_ENCODING_AES_KEY",
"webhook_path": "/webhook/wecom-aibot",
"allow_from": [],
"welcome_message": "你好!有什么可以帮你的?"
}
}
}
```
**3. 运行**
```bash
picoclaw gateway
```
> **注意**: 企业微信 AI Bot 使用流式拉取协议,无回复超时问题。长任务(>30 秒)会自动切换到 `response_url` 推送投递。
</details>
<details>
<summary><b>OneBot</b></summary>
**1. 配置**
兼容 NapCat / Go-CQHTTP 等 OneBot 实现。
```json
{
"channels": {
"onebot": {
"enabled": true,
"allow_from": []
}
}
}
```
**2. 运行**
```bash
picoclaw gateway
```
</details>
<details>
<summary><b>MaixCam</b></summary>
专为 Sipeed AI 摄像头硬件设计的集成通道。
```json
{
"channels": {
"maixcam": {
"enabled": true
}
}
}
```
```bash
picoclaw gateway
```
</details>
+256
View File
@@ -0,0 +1,256 @@
# ⚙️ 配置指南
> 返回 [README](../../README.zh.md)
## ⚙️ 配置详解
配置文件路径: `~/.picoclaw/config.json`
### 环境变量
你可以使用环境变量覆盖默认路径。这对于便携安装、容器化部署或将 picoclaw 作为系统服务运行非常有用。这些变量是独立的,控制不同的路径。
| 变量 | 描述 | 默认路径 |
|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
| `PICOCLAW_CONFIG` | 覆盖配置文件的路径。这直接告诉 picoclaw 加载哪个 `config.json`,忽略所有其他位置。 | `~/.picoclaw/config.json` |
| `PICOCLAW_HOME` | 覆盖 picoclaw 数据根目录。这会更改 `workspace` 和其他数据目录的默认位置。 | `~/.picoclaw` |
**示例:**
```bash
# 使用特定的配置文件运行 picoclaw
# 工作区路径将从该配置文件中读取
PICOCLAW_CONFIG=/etc/picoclaw/production.json picoclaw gateway
# 在 /opt/picoclaw 中存储所有数据运行 picoclaw
# 配置将从默认的 ~/.picoclaw/config.json 加载
# 工作区将在 /opt/picoclaw/workspace 创建
PICOCLAW_HOME=/opt/picoclaw picoclaw agent
# 同时使用两者进行完全自定义设置
PICOCLAW_HOME=/srv/picoclaw PICOCLAW_CONFIG=/srv/picoclaw/main.json picoclaw gateway
```
### 工作区布局 (Workspace Layout)
PicoClaw 将数据存储在您配置的工作区中(默认:`~/.picoclaw/workspace`):
```
~/.picoclaw/workspace/
├── sessions/ # 对话会话和历史
├── memory/ # 长期记忆 (MEMORY.md)
├── state/ # 持久化状态 (最后一次频道等)
├── cron/ # 定时任务数据库
├── skills/ # 自定义技能
├── AGENTS.md # Agent 行为指南
├── HEARTBEAT.md # 周期性任务提示词 (每 30 分钟检查一次)
├── IDENTITY.md # Agent 身份设定
├── SOUL.md # Agent 灵魂/性格
└── USER.md # 用户偏好
```
### 技能来源 (Skill Sources)
默认情况下,技能会按以下顺序加载:
1. `~/.picoclaw/workspace/skills`(工作区)
2. `~/.picoclaw/skills`(全局)
3. `<current-working-directory>/skills`(内置)
在高级/测试场景下,可通过以下环境变量覆盖内置技能目录:
```bash
export PICOCLAW_BUILTIN_SKILLS=/path/to/skills
```
### 统一命令执行策略
- 通用斜杠命令通过 `pkg/agent/loop.go` 中的 `commands.Executor` 统一执行。
- Channel 适配器不再在本地消费通用命令;它们只负责把入站文本转发到 bus/agent 路径。Telegram 仍会在启动时自动注册其支持的命令菜单。
- 未注册的斜杠命令(例如 `/foo`)会透传给 LLM 按普通输入处理。
- 已注册但当前 channel 不支持的命令(例如 WhatsApp 上的 `/show`)会返回明确的用户可见错误,并停止后续处理。
### 🔒 安全沙箱 (Security Sandbox)
PicoClaw 默认在沙箱环境中运行。Agent 只能访问配置的工作区内的文件和执行命令。
#### 默认配置
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"restrict_to_workspace": true
}
}
}
```
| 选项 | 默认值 | 描述 |
| ----------------------- | ----------------------- | ----------------------------- |
| `workspace` | `~/.picoclaw/workspace` | Agent 的工作目录 |
| `restrict_to_workspace` | `true` | 限制文件/命令访问在工作区内 |
#### 受保护的工具
`restrict_to_workspace: true` 时,以下工具会被沙箱化:
| 工具 | 功能 | 限制 |
| ------------- | ------------ | ------------------------------ |
| `read_file` | 读取文件 | 仅限工作区内的文件 |
| `write_file` | 写入文件 | 仅限工作区内的文件 |
| `list_dir` | 列出目录 | 仅限工作区内的目录 |
| `edit_file` | 编辑文件 | 仅限工作区内的文件 |
| `append_file` | 追加文件 | 仅限工作区内的文件 |
| `exec` | 执行命令 | 命令路径必须在工作区内 |
#### 额外的 Exec 保护
即使 `restrict_to_workspace: false``exec` 工具也会阻止以下危险命令:
* `rm -rf``del /f``rmdir /s` — 批量删除
* `format``mkfs``diskpart` — 磁盘格式化
* `dd if=` — 磁盘镜像
* 写入 `/dev/sd[a-z]` — 直接磁盘写入
* `shutdown``reboot``poweroff` — 系统关机
* Fork bomb `:(){ :|:& };:`
### 文件访问控制
| 配置键 | 类型 | 默认值 | 描述 |
|--------|------|--------|------|
| `tools.allow_read_paths` | string[] | `[]` | 允许在工作区外读取的额外路径 |
| `tools.allow_write_paths` | string[] | `[]` | 允许在工作区外写入的额外路径 |
### Exec 安全配置
| 配置键 | 类型 | 默认值 | 描述 |
|--------|------|--------|------|
| `tools.exec.allow_remote` | bool | `false` | 允许从远程渠道(Telegram/Discord 等)执行 exec 工具 |
| `tools.exec.enable_deny_patterns` | bool | `true` | 启用危险命令拦截 |
| `tools.exec.custom_deny_patterns` | string[] | `[]` | 自定义阻止的正则表达式模式 |
| `tools.exec.custom_allow_patterns` | string[] | `[]` | 自定义允许的正则表达式模式 |
> **安全提示:** Symlink 保护默认启用——所有文件路径在白名单匹配前都会通过 `filepath.EvalSymlinks` 解析,防止符号链接逃逸攻击。
#### 已知限制:构建工具的子进程
exec 安全守卫仅检查 PicoClaw 直接启动的命令行。它不会递归检查由 `make``go run``cargo``npm run` 或自定义构建脚本等开发工具产生的子进程。
这意味着顶层命令通过初始守卫检查后,仍可以编译或启动其他二进制文件。实际上,应将构建脚本、Makefile、包脚本和生成的二进制文件视为与直接 shell 命令同等级别的可执行代码进行审查。
对于高风险环境:
* 执行前审查构建脚本。
* 对编译并运行的工作流优先使用审批/手动审查。
* 如果需要比内置守卫更强的隔离,请在容器或虚拟机中运行 PicoClaw。
#### 错误示例
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (path outside working dir)}
```
```
[ERROR] tool: Tool execution failed
{tool=exec, error=Command blocked by safety guard (dangerous pattern detected)}
```
#### 禁用限制(安全风险)
如果需要 Agent 访问工作区外的路径:
**方法 1: 配置文件**
```json
{
"agents": {
"defaults": {
"restrict_to_workspace": false
}
}
}
```
**方法 2: 环境变量**
```bash
export PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE=false
```
> ⚠️ **警告**: 禁用此限制将允许 Agent 访问系统上的任何路径。仅在受控环境中谨慎使用。
#### 安全边界一致性
`restrict_to_workspace` 设置在所有执行路径中一致应用:
| 执行路径 | 安全边界 |
| ---------------- | ---------------------------- |
| 主 Agent | `restrict_to_workspace` ✅ |
| 子 Agent / Spawn | 继承相同限制 ✅ |
| 心跳任务 | 继承相同限制 ✅ |
所有路径共享相同的工作区限制——无法通过子 Agent 或定时任务绕过安全边界。
### 心跳 / 周期性任务 (Heartbeat)
PicoClaw 可以自动执行周期性任务。在工作区创建 `HEARTBEAT.md` 文件:
```markdown
# Periodic Tasks
- Check my email for important messages
- Review my calendar for upcoming events
- Check the weather forecast
```
Agent 将每隔 30 分钟(可配置)读取此文件,并使用可用工具执行任务。
#### 使用 Spawn 的异步任务
对于耗时较长的任务(网络搜索、API 调用),使用 `spawn` 工具创建一个 **子 Agent (subagent)**
```markdown
# Periodic Tasks
## Quick Tasks (respond directly)
- Report current time
## Long Tasks (use spawn for async)
- Search the web for AI news and summarize
- Check email and report important messages
```
**关键行为:**
| 特性 | 描述 |
| ---------------- | ---------------------------------------- |
| **spawn** | 创建异步子 Agent,不阻塞主心跳进程 |
| **独立上下文** | 子 Agent 拥有独立上下文,无会话历史 |
| **message tool** | 子 Agent 通过 message 工具直接与用户通信 |
| **非阻塞** | spawn 后,心跳继续处理下一个任务 |
**配置:**
```json
{
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
| 选项 | 默认值 | 描述 |
| ---------- | ------ | ---------------------------- |
| `enabled` | `true` | 启用/禁用心跳 |
| `interval` | `30` | 检查间隔,单位分钟 (最小: 5) |
**环境变量:**
- `PICOCLAW_HEARTBEAT_ENABLED=false` 禁用
- `PICOCLAW_HEARTBEAT_INTERVAL=60` 更改间隔
+168
View File
@@ -0,0 +1,168 @@
# 🐳 Docker 与快速开始
> 返回 [README](../../README.zh.md)
## 🐳 Docker Compose
您也可以使用 Docker Compose 运行 PicoClaw,无需在本地安装任何环境。
```bash
# 1. 克隆仓库
git clone https://github.com/sipeed/picoclaw.git
cd picoclaw
# 2. 首次运行 — 自动生成 docker/data/config.json 后退出
docker compose -f docker/docker-compose.yml --profile gateway up
# 容器打印 "First-run setup complete." 后自动停止
# 3. 填写 API Key 等配置
vim docker/data/config.json # 设置 provider API key、Bot Token 等
# 4. 正式启动
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
> [!TIP]
> **Docker 用户**: 默认情况下, Gateway 监听 `127.0.0.1`,该端口不会暴露到容器外。如果需要通过端口映射访问健康检查接口,请在环境变量中设置 `PICOCLAW_GATEWAY_HOST=0.0.0.0` 或修改 `config.json`。
```bash
# 5. 查看日志
docker compose -f docker/docker-compose.yml logs -f picoclaw-gateway
# 6. 停止
docker compose -f docker/docker-compose.yml --profile gateway down
```
### Launcher 模式 (Web 控制台)
`launcher` 镜像包含所有三个二进制文件(`picoclaw``picoclaw-launcher``picoclaw-launcher-tui`),默认启动 Web 控制台,提供基于浏览器的配置和聊天界面。
```bash
docker compose -f docker/docker-compose.yml --profile launcher up -d
```
在浏览器中打开 http://localhost:18800。Launcher 会自动管理 Gateway 进程。
> [!WARNING]
> Web 控制台尚不支持身份验证。请勿将其暴露到公网。
### Agent 模式 (一次性运行)
```bash
# 提问
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent -m "2+2 等于几?"
# 交互模式
docker compose -f docker/docker-compose.yml run --rm picoclaw-agent
```
### 更新镜像
```bash
docker compose -f docker/docker-compose.yml pull
docker compose -f docker/docker-compose.yml --profile gateway up -d
```
---
## 🚀 快速开始
> [!TIP]
> 在 `~/.picoclaw/config.json` 中设置您的 API Key。获取 API Key: [火山引擎 (CodingPlan)](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) (LLM) · [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu (智谱)](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM)。网络搜索是 **可选的** — 获取免费的 [Tavily API](https://tavily.com) (每月 1000 次免费查询) 或 [Brave Search API](https://brave.com/search/api) (每月 2000 次免费查询)。
**1. 初始化 (Initialize)**
```bash
picoclaw onboard
```
**2. 配置 (Configure)** (`~/.picoclaw/config.json`)
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model_name": "gpt-5.4",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key",
"api_base":"https://ark.cn-beijing.volces.com/api/coding/v3"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "your-api-key",
"request_timeout": 300
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "your-anthropic-key"
}
],
"tools": {
"web": {
"enabled": true,
"fetch_limit_bytes": 10485760,
"format": "plaintext",
"brave": {
"enabled": false,
"api_key": "YOUR_BRAVE_API_KEY",
"max_results": 5
},
"tavily": {
"enabled": false,
"api_key": "YOUR_TAVILY_API_KEY",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "YOUR_PERPLEXITY_API_KEY",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://your-searxng-instance:8888",
"max_results": 5
}
}
}
}
```
> **新功能**: `model_list` 配置格式支持零代码添加 provider。详见[模型配置](providers.md#模型配置-model_list)章节。
> `request_timeout` 为可选项,单位为秒。若省略或设置为 `<= 0`PicoClaw 使用默认超时(120 秒)。
**3. 获取 API Key**
* **LLM 提供商**: [OpenRouter](https://openrouter.ai/keys) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) · [Anthropic](https://console.anthropic.com) · [OpenAI](https://platform.openai.com) · [Gemini](https://aistudio.google.com/api-keys)
* **网络搜索** (可选):
* [Brave Search](https://brave.com/search/api) - 付费 ($5/1000 次查询,约 $5-6/月)
* [Perplexity](https://www.perplexity.ai) - AI 驱动的搜索与聊天界面
* [SearXNG](https://github.com/searxng/searxng) - 自建元搜索引擎(免费,无需 API Key)
* [Tavily](https://tavily.com) - 专为 AI Agent 优化 (1000 请求/月)
* DuckDuckGo - 内置回退(无需 API Key
> **注意**: 完整的配置模板请参考 `config.example.json`。
**4. 对话 (Chat)**
```bash
picoclaw agent -m "2+2 等于几?"
```
就是这样!您在 2 分钟内就拥有了一个可工作的 AI 助手。
---
+428
View File
@@ -0,0 +1,428 @@
# 🔌 提供商与模型配置
> 返回 [README](../../README.zh.md)
### 提供商 (Providers)
> [!NOTE]
> Groq 通过 Whisper 提供免费的语音转录。如果配置了 Groq,任意渠道的音频消息都将在 Agent 层面自动转录为文字。
| 提供商 | 用途 | 获取 API Key |
| -------------------- | ---------------------------- | -------------------------------------------------------------------- |
| `gemini` | LLM (Gemini 直连) | [aistudio.google.com](https://aistudio.google.com) |
| `zhipu` | LLM (智谱直连) | [bigmodel.cn](https://bigmodel.cn) |
| `volcengine` | LLM (火山引擎直连) | [volcengine.com](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| `openrouter` | LLM (推荐,可访问所有模型) | [openrouter.ai](https://openrouter.ai) |
| `anthropic` | LLM (Claude 直连) | [console.anthropic.com](https://console.anthropic.com) |
| `openai` | LLM (GPT 直连) | [platform.openai.com](https://platform.openai.com) |
| `deepseek` | LLM (DeepSeek 直连) | [platform.deepseek.com](https://platform.deepseek.com) |
| `qwen` | LLM (通义千问) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) |
| `groq` | LLM + **语音转录** (Whisper) | [console.groq.com](https://console.groq.com) |
| `cerebras` | LLM (Cerebras 直连) | [cerebras.ai](https://cerebras.ai) |
| `vivgrid` | LLM (Vivgrid 直连) | [vivgrid.com](https://vivgrid.com) |
| `moonshot` | LLM (Kimi/Moonshot 直连) | [platform.moonshot.cn](https://platform.moonshot.cn) |
| `minimax` | LLM (Minimax 直连) | [platform.minimaxi.com](https://platform.minimaxi.com) |
| `avian` | LLM (Avian 直连) | [avian.io](https://avian.io) |
| `mistral` | LLM (Mistral 直连) | [console.mistral.ai](https://console.mistral.ai) |
| `longcat` | LLM (Longcat 直连) | [longcat.ai](https://longcat.ai) |
| `modelscope` | LLM (ModelScope 直连) | [modelscope.cn](https://modelscope.cn) |
### 模型配置 (model_list)
> **新功能!** PicoClaw 现在采用**以模型为中心**的配置方式。只需使用 `厂商/模型` 格式(如 `zhipu/glm-4.7`)即可添加新的 provider——**无需修改任何代码!**
该设计同时支持**多 Agent 场景**,提供灵活的 Provider 选择:
- **不同 Agent 使用不同 Provider**:每个 Agent 可以使用自己的 LLM provider
- **模型回退(Fallback)**:配置主模型和备用模型,提高可靠性
- **负载均衡**:在多个 API 端点之间分配请求
- **集中化配置**:在一个地方管理所有 provider
#### 📋 所有支持的厂商
| 厂商 | `model` 前缀 | 默认 API Base | 协议 | 获取 API Key |
| ------------------- | ----------------- | --------------------------------------------------- | --------- | ----------------------------------------------------------------- |
| **OpenAI** | `openai/` | `https://api.openai.com/v1` | OpenAI | [获取密钥](https://platform.openai.com) |
| **Anthropic** | `anthropic/` | `https://api.anthropic.com/v1` | Anthropic | [获取密钥](https://console.anthropic.com) |
| **智谱 AI (GLM)** | `zhipu/` | `https://open.bigmodel.cn/api/paas/v4` | OpenAI | [获取密钥](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
| **DeepSeek** | `deepseek/` | `https://api.deepseek.com/v1` | OpenAI | [获取密钥](https://platform.deepseek.com) |
| **Google Gemini** | `gemini/` | `https://generativelanguage.googleapis.com/v1beta` | OpenAI | [获取密钥](https://aistudio.google.com/api-keys) |
| **Groq** | `groq/` | `https://api.groq.com/openai/v1` | OpenAI | [获取密钥](https://console.groq.com) |
| **Moonshot** | `moonshot/` | `https://api.moonshot.cn/v1` | OpenAI | [获取密钥](https://platform.moonshot.cn) |
| **通义千问 (Qwen)** | `qwen/` | `https://dashscope.aliyuncs.com/compatible-mode/v1` | OpenAI | [获取密钥](https://dashscope.console.aliyun.com) |
| **NVIDIA** | `nvidia/` | `https://integrate.api.nvidia.com/v1` | OpenAI | [获取密钥](https://build.nvidia.com) |
| **Ollama** | `ollama/` | `http://localhost:11434/v1` | OpenAI | 本地(无需密钥) |
| **OpenRouter** | `openrouter/` | `https://openrouter.ai/api/v1` | OpenAI | [获取密钥](https://openrouter.ai/keys) |
| **LiteLLM Proxy** | `litellm/` | `http://localhost:4000/v1` | OpenAI | 你的 LiteLLM 代理密钥 |
| **VLLM** | `vllm/` | `http://localhost:8000/v1` | OpenAI | 本地 |
| **Cerebras** | `cerebras/` | `https://api.cerebras.ai/v1` | OpenAI | [获取密钥](https://cerebras.ai) |
| **火山引擎(Doubao** | `volcengine/` | `https://ark.cn-beijing.volces.com/api/v3` | OpenAI | [获取密钥](https://www.volcengine.com/activity/codingplan?utm_campaign=PicoClaw&utm_content=PicoClaw&utm_medium=devrel&utm_source=OWO&utm_term=PicoClaw) |
| **神算云** | `shengsuanyun/` | `https://router.shengsuanyun.com/api/v1` | OpenAI | - |
| **BytePlus** | `byteplus/` | `https://ark.ap-southeast.bytepluses.com/api/v3` | OpenAI | [获取密钥](https://www.byteplus.com) |
| **Vivgrid** | `vivgrid/` | `https://api.vivgrid.com/v1` | OpenAI | [获取密钥](https://vivgrid.com) |
| **LongCat** | `longcat/` | `https://api.longcat.chat/openai` | OpenAI | [获取密钥](https://longcat.chat/platform) |
| **ModelScope (魔搭)**| `modelscope/` | `https://api-inference.modelscope.cn/v1` | OpenAI | [获取 Token](https://modelscope.cn/my/tokens) |
| **Antigravity** | `antigravity/` | Google Cloud | 自定义 | 仅 OAuth |
| **GitHub Copilot** | `github-copilot/` | `localhost:4321` | gRPC | - |
#### 基础配置示例
```json
{
"model_list": [
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-your-api-key"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-your-openai-key"
},
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key"
},
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-zhipu-key"
}
],
"agents": {
"defaults": {
"model": "gpt-5.4"
}
}
}
```
#### 各厂商配置示例
**OpenAI**
```json
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_key": "sk-..."
}
```
**火山引擎(Doubao**
```json
{
"model_name": "ark-code-latest",
"model": "volcengine/ark-code-latest",
"api_key": "sk-..."
}
```
**智谱 AI (GLM)**
```json
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
```
**DeepSeek**
```json
{
"model_name": "deepseek-chat",
"model": "deepseek/deepseek-chat",
"api_key": "sk-..."
}
```
**Anthropic (使用 OAuth)**
```json
{
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"auth_method": "oauth"
}
```
> 运行 `picoclaw auth login --provider anthropic` 来设置 OAuth 凭证。
**Anthropic Messages API(原生格式)**
用于直接访问 Anthropic API 或仅支持 Anthropic 原生消息格式的自定义端点:
```json
{
"model_name": "claude-opus-4-6",
"model": "anthropic-messages/claude-opus-4-6",
"api_key": "sk-ant-your-key",
"api_base": "https://api.anthropic.com"
}
```
> 使用 `anthropic-messages` 协议的场景:
> - 使用仅支持 Anthropic 原生 `/v1/messages` 端点的第三方代理(不支持 OpenAI 兼容的 `/v1/chat/completions`
> - 连接到 MiniMax、Synthetic 等需要 Anthropic 原生消息格式的服务
> - 现有的 `anthropic` 协议返回 404 错误(说明端点不支持 OpenAI 兼容格式)
>
> **注意:** `anthropic` 协议使用 OpenAI 兼容格式(`/v1/chat/completions`),而 `anthropic-messages` 使用 Anthropic 原生格式(`/v1/messages`)。请根据端点支持的格式选择。
**Ollama (本地)**
```json
{
"model_name": "llama3",
"model": "ollama/llama3"
}
```
**自定义代理/API**
```json
{
"model_name": "my-custom-model",
"model": "openai/custom-model",
"api_base": "https://my-proxy.com/v1",
"api_key": "sk-...",
"request_timeout": 300
}
```
**LiteLLM Proxy**
```json
{
"model_name": "lite-gpt4",
"model": "litellm/lite-gpt4",
"api_base": "http://localhost:4000/v1",
"api_key": "sk-..."
}
```
PicoClaw 在发送请求前仅去除外层 `litellm/` 前缀,因此 `litellm/lite-gpt4` 会发送 `lite-gpt4`,而 `litellm/openai/gpt-4o` 会发送 `openai/gpt-4o`
#### 负载均衡
为同一个模型名称配置多个端点——PicoClaw 会自动在它们之间轮询:
```json
{
"model_list": [
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api1.example.com/v1",
"api_key": "sk-key1"
},
{
"model_name": "gpt-5.4",
"model": "openai/gpt-5.4",
"api_base": "https://api2.example.com/v1",
"api_key": "sk-key2"
}
]
}
```
#### 从旧的 `providers` 配置迁移
旧的 `providers` 配置格式**已弃用**,但为向后兼容仍支持。
**旧配置(已弃用):**
```json
{
"providers": {
"zhipu": {
"api_key": "your-key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
},
"agents": {
"defaults": {
"provider": "zhipu",
"model": "glm-4.7"
}
}
}
```
**新配置(推荐):**
```json
{
"model_list": [
{
"model_name": "glm-4.7",
"model": "zhipu/glm-4.7",
"api_key": "your-key"
}
],
"agents": {
"defaults": {
"model": "glm-4.7"
}
}
}
```
详细的迁移指南请参考 [docs/migration/model-list-migration.md](../migration/model-list-migration.md)。
### Provider 架构
PicoClaw 按协议族路由 Provider
- OpenAI 兼容协议:OpenRouter、OpenAI 兼容网关、Groq、智谱、vLLM 风格端点。
- Anthropic 协议:Claude 原生 API 行为。
- Codex/OAuth 路径:OpenAI OAuth/Token 认证路由。
这使得运行时保持轻量,同时让新的 OpenAI 兼容后端基本只需配置操作(`api_base` + `api_key`)。
<details>
<summary><b>智谱 (Zhipu) 配置示例</b></summary>
**1. 获取 API key 和 base URL**
- 获取 [API key](https://bigmodel.cn/usercenter/proj-mgmt/apikeys)
**2. 配置**
```json
{
"agents": {
"defaults": {
"workspace": "~/.picoclaw/workspace",
"model": "glm-4.7",
"max_tokens": 8192,
"temperature": 0.7,
"max_tool_iterations": 20
}
},
"providers": {
"zhipu": {
"api_key": "Your API Key",
"api_base": "https://open.bigmodel.cn/api/paas/v4"
}
}
}
```
**3. 运行**
```bash
picoclaw agent -m "你好"
```
</details>
<details>
<summary><b>完整配置示例</b></summary>
```json
{
"agents": {
"defaults": {
"model": "anthropic/claude-opus-4-5"
}
},
"session": {
"dm_scope": "per-channel-peer",
"backlog_limit": 20
},
"providers": {
"openrouter": {
"api_key": "sk-or-v1-xxx"
},
"groq": {
"api_key": "gsk_xxx"
}
},
"channels": {
"telegram": {
"enabled": true,
"token": "123456:ABC...",
"allow_from": ["123456789"]
},
"discord": {
"enabled": true,
"token": "",
"allow_from": [""]
},
"whatsapp": {
"enabled": false,
"bridge_url": "ws://localhost:3001",
"use_native": false,
"session_store_path": "",
"allow_from": []
},
"feishu": {
"enabled": false,
"app_id": "cli_xxx",
"app_secret": "xxx",
"encrypt_key": "",
"verification_token": "",
"allow_from": []
},
"qq": {
"enabled": false,
"app_id": "",
"app_secret": "",
"allow_from": []
}
},
"tools": {
"web": {
"brave": {
"enabled": false,
"api_key": "BSA...",
"max_results": 5
},
"duckduckgo": {
"enabled": true,
"max_results": 5
},
"perplexity": {
"enabled": false,
"api_key": "",
"max_results": 5
},
"searxng": {
"enabled": false,
"base_url": "http://localhost:8888",
"max_results": 5
}
},
"cron": {
"exec_timeout_minutes": 5
}
},
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
</details>
---
## 📝 API Key 对比
| 服务 | 价格 | 适用场景 |
| --- | --- | --- |
| **OpenRouter** | 免费: 200K tokens/月 | 多模型聚合 (Claude, GPT-4 等) |
| **火山引擎 CodingPlan** | ¥9.9/首月 | 最适合国内用户,多种 SOTA 模型(豆包、DeepSeek 等) |
| **智谱 (Zhipu)** | 免费: 200K tokens/月 | 适合中国用户 |
| **Brave Search** | $5/1000 次查询 | 网络搜索功能 |
| **SearXNG** | 免费(自建) | 隐私优先的元搜索引擎(70+ 搜索引擎) |
| **Groq** | 免费额度可用 | 极速推理 (Llama, Mixtral) |
| **Cerebras** | 免费额度可用 | 极速推理 (Llama, Qwen 等) |
| **LongCat** | 免费: 最多 5M tokens/天 | 极速推理 |
| **ModelScope (魔搭)** | 免费: 2000 次请求/天 | 推理 (Qwen, GLM, DeepSeek 等) |
+68
View File
@@ -0,0 +1,68 @@
# 🔄 异步任务与 Spawn
> 返回 [README](../../README.zh.md)
### 使用 Spawn 的异步任务
对于耗时较长的任务(网络搜索、API 调用),使用 `spawn` 工具创建一个 **子 Agent (subagent)**
```markdown
# Periodic Tasks
## Quick Tasks (respond directly)
- Report current time
## Long Tasks (use spawn for async)
- Search the web for AI news and summarize
- Check email and report important messages
```
**关键行为:**
| 特性 | 描述 |
| ---------------- | ---------------------------------------- |
| **spawn** | 创建异步子 Agent,不阻塞主心跳进程 |
| **独立上下文** | 子 Agent 拥有独立上下文,无会话历史 |
| **message tool** | 子 Agent 通过 message 工具直接与用户通信 |
| **非阻塞** | spawn 后,心跳继续处理下一个任务 |
#### 子 Agent 通信原理
```
心跳触发 (Heartbeat triggers)
Agent 读取 HEARTBEAT.md
对于长任务: spawn 子 Agent
↓ ↓
继续下一个任务 子 Agent 独立工作
↓ ↓
所有任务完成 子 Agent 使用 "message" 工具
↓ ↓
响应 HEARTBEAT_OK 用户直接收到结果
```
子 Agent 可以访问工具(message, web_search 等),并且无需通过主 Agent 即可独立与用户通信。
**配置:**
```json
{
"heartbeat": {
"enabled": true,
"interval": 30
}
}
```
| 选项 | 默认值 | 描述 |
| ---------- | ------ | ---------------------------- |
| `enabled` | `true` | 启用/禁用心跳 |
| `interval` | `30` | 检查间隔,单位分钟 (最小: 5) |
**环境变量:**
- `PICOCLAW_HEARTBEAT_ENABLED=false` 禁用
- `PICOCLAW_HEARTBEAT_INTERVAL=60` 更改间隔
+336
View File
@@ -0,0 +1,336 @@
# 🔧 工具配置
> 返回 [README](../../README.zh.md)
PicoClaw 的工具配置位于 `config.json``tools` 字段中。
## 目录结构
```json
{
"tools": {
"web": {
...
},
"mcp": {
...
},
"exec": {
...
},
"cron": {
...
},
"skills": {
...
}
}
}
```
## Web 工具
Web 工具用于网页搜索和抓取。
### Web Fetcher
用于抓取和处理网页内容的通用设置。
| 配置项 | 类型 | 默认值 | 描述 |
|---------------------|--------|---------------|----------------------------------------------------------------------------------------|
| `enabled` | bool | true | 启用网页抓取功能。 |
| `fetch_limit_bytes` | int | 10485760 | 抓取网页负载的最大大小,单位为字节(默认 10MB)。 |
| `format` | string | "plaintext" | 抓取内容的输出格式。选项:`plaintext``markdown`(推荐)。 |
### Brave
| 配置项 | 类型 | 默认值 | 描述 |
|---------------|--------|--------|--------------------|
| `enabled` | bool | false | 启用 Brave 搜索 |
| `api_key` | string | - | Brave Search API 密钥 |
| `max_results` | int | 5 | 最大结果数 |
### DuckDuckGo
| 配置项 | 类型 | 默认值 | 描述 |
|---------------|------|--------|-----------------------|
| `enabled` | bool | true | 启用 DuckDuckGo 搜索 |
| `max_results` | int | 5 | 最大结果数 |
### Perplexity
| 配置项 | 类型 | 默认值 | 描述 |
|---------------|--------|--------|-----------------------|
| `enabled` | bool | false | 启用 Perplexity 搜索 |
| `api_key` | string | - | Perplexity API 密钥 |
| `max_results` | int | 5 | 最大结果数 |
## Exec 工具
Exec 工具用于执行 shell 命令。
| 配置项 | 类型 | 默认值 | 描述 |
|------------------------|-------|--------|--------------------------------|
| `enable_deny_patterns` | bool | true | 启用默认的危险命令拦截 |
| `custom_deny_patterns` | array | [] | 自定义拒绝模式(正则表达式) |
### 功能说明
- **`enable_deny_patterns`**:设为 `false` 可完全禁用默认的危险命令拦截模式
- **`custom_deny_patterns`**:添加自定义拒绝正则模式;匹配的命令将被拦截
### 默认拦截的命令模式
默认情况下,PicoClaw 会拦截以下危险命令:
- 删除命令:`rm -rf``del /f/q``rmdir /s`
- 磁盘操作:`format``mkfs``diskpart``dd if=`、写入 `/dev/sd*`
- 系统操作:`shutdown``reboot``poweroff`
- 命令替换:`$()``${}`、反引号
- 管道到 shell`| sh``| bash`
- 权限提升:`sudo``chmod``chown`
- 进程控制:`pkill``killall``kill -9`
- 远程操作:`curl | sh``wget | sh``ssh`
- 包管理:`apt``yum``dnf``npm install -g``pip install --user`
- 容器:`docker run``docker exec`
- Git`git push``git force`
- 其他:`eval``source *.sh`
### 已知架构限制
exec 守卫仅验证发送给 PicoClaw 的顶层命令。它**不会**递归检查该命令启动后由构建工具或脚本生成的子进程。
以下工作流在初始命令被允许后可以绕过直接命令守卫:
- `make run`
- `go run ./cmd/...`
- `cargo run`
- `npm run build`
这意味着守卫对于拦截明显危险的直接命令很有用,但它**不是**未审查构建管道的完整沙箱。如果你的威胁模型包括工作区中的不受信任代码,请使用更强的隔离措施,如容器、虚拟机或围绕构建和运行命令的审批流程。
### 配置示例
```json
{
"tools": {
"exec": {
"enable_deny_patterns": true,
"custom_deny_patterns": [
"\\brm\\s+-r\\b",
"\\bkillall\\s+python"
]
}
}
}
```
## Cron 工具
Cron 工具用于调度周期性任务。
| 配置项 | 类型 | 默认值 | 描述 |
|------------------------|------|--------|-------------------------------------|
| `exec_timeout_minutes` | int | 5 | 执行超时时间(分钟),0 表示无限制 |
## MCP 工具
MCP 工具支持与外部 Model Context Protocol 服务器集成。
### 工具发现(延迟加载)
当连接多个 MCP 服务器时,同时暴露数百个工具可能会耗尽 LLM 的上下文窗口并增加 API 成本。**Discovery** 功能通过默认*隐藏* MCP 工具来解决此问题。
LLM 不会加载所有工具,而是获得一个轻量级搜索工具(使用 BM25 关键词匹配或正则表达式)。当 LLM 需要特定功能时,它会搜索隐藏的工具库。匹配的工具随后被临时"解锁"并注入上下文中,持续配置的轮数(`ttl`)。
### 全局配置
| 配置项 | 类型 | 默认值 | 描述 |
|-------------|--------|--------|--------------------------------------|
| `enabled` | bool | false | 全局启用 MCP 集成 |
| `discovery` | object | `{}` | 工具发现配置(见下文) |
| `servers` | object | `{}` | 服务器名称到服务器配置的映射 |
### Discovery 配置(`discovery`
| 配置项 | 类型 | 默认值 | 描述 |
|----------------------|------|--------|---------------------------------------------------------------------------------------------------------------|
| `enabled` | bool | false | 如果为 true,MCP 工具将被隐藏并按需通过搜索加载。如果为 false,所有工具都会被加载 |
| `ttl` | int | 5 | 已发现工具保持解锁状态的对话轮数 |
| `max_search_results` | int | 5 | 每次搜索查询返回的最大工具数 |
| `use_bm25` | bool | true | 启用自然语言/关键词搜索工具(`tool_search_tool_bm25`)。**警告**:比正则搜索消耗更多资源 |
| `use_regex` | bool | false | 启用正则模式搜索工具(`tool_search_tool_regex`) |
> **注意:** 如果 `discovery.enabled` 为 `true`,你**必须**启用至少一个搜索引擎(`use_bm25` 或 `use_regex`),
> 否则应用程序将无法启动。
### 单服务器配置
| 配置项 | 类型 | 必需 | 描述 |
|------------|--------|----------|------------------------------------|
| `enabled` | bool | 是 | 启用此 MCP 服务器 |
| `type` | string | 否 | 传输类型:`stdio``sse``http` |
| `command` | string | stdio | stdio 传输的可执行命令 |
| `args` | array | 否 | stdio 传输的命令参数 |
| `env` | object | 否 | stdio 进程的环境变量 |
| `env_file` | string | 否 | stdio 进程的环境文件路径 |
| `url` | string | sse/http | `sse`/`http` 传输的端点 URL |
| `headers` | object | 否 | `sse`/`http` 传输的 HTTP 头 |
### 传输行为
- 如果省略 `type`,传输方式将自动检测:
- 设置了 `url``sse`
- 设置了 `command``stdio`
- `http``sse` 都使用 `url` + 可选的 `headers`
- `env``env_file` 仅应用于 `stdio` 服务器。
### 配置示例
#### 1) Stdio MCP 服务器
```json
{
"tools": {
"mcp": {
"enabled": true,
"servers": {
"filesystem": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-filesystem",
"/tmp"
]
}
}
}
}
}
```
#### 2) 远程 SSE/HTTP MCP 服务器
```json
{
"tools": {
"mcp": {
"enabled": true,
"servers": {
"remote-mcp": {
"enabled": true,
"type": "sse",
"url": "https://example.com/mcp",
"headers": {
"Authorization": "Bearer YOUR_TOKEN"
}
}
}
}
}
}
```
#### 3) 启用工具发现的大规模 MCP 设置
*在此示例中,LLM 只会看到 `tool_search_tool_bm25`。它将仅在用户请求时动态搜索并解锁 Github 或 Postgres 工具。*
```json
{
"tools": {
"mcp": {
"enabled": true,
"discovery": {
"enabled": true,
"ttl": 5,
"max_search_results": 5,
"use_bm25": true,
"use_regex": false
},
"servers": {
"github": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-github"
],
"env": {
"GITHUB_PERSONAL_ACCESS_TOKEN": "YOUR_GITHUB_TOKEN"
}
},
"postgres": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-postgres",
"postgresql://user:password@localhost/dbname"
]
},
"slack": {
"enabled": true,
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-slack"
],
"env": {
"SLACK_BOT_TOKEN": "YOUR_SLACK_BOT_TOKEN",
"SLACK_TEAM_ID": "YOUR_SLACK_TEAM_ID"
}
}
}
}
}
}
```
## Skills 工具
Skills 工具配置通过 ClawHub 等注册表进行技能发现和安装。
### 注册表
| 配置项 | 类型 | 默认值 | 描述 |
|------------------------------------|--------|----------------------|--------------------------------------|
| `registries.clawhub.enabled` | bool | true | 启用 ClawHub 注册表 |
| `registries.clawhub.base_url` | string | `https://clawhub.ai` | ClawHub 基础 URL |
| `registries.clawhub.auth_token` | string | `""` | 可选的 Bearer 令牌,用于更高速率限制 |
| `registries.clawhub.search_path` | string | `/api/v1/search` | 搜索 API 路径 |
| `registries.clawhub.skills_path` | string | `/api/v1/skills` | Skills API 路径 |
| `registries.clawhub.download_path` | string | `/api/v1/download` | 下载 API 路径 |
### 配置示例
```json
{
"tools": {
"skills": {
"registries": {
"clawhub": {
"enabled": true,
"base_url": "https://clawhub.ai",
"auth_token": "",
"search_path": "/api/v1/search",
"skills_path": "/api/v1/skills",
"download_path": "/api/v1/download"
}
}
}
}
}
```
## 环境变量
所有配置选项都可以通过格式为 `PICOCLAW_TOOLS_<SECTION>_<KEY>` 的环境变量覆盖:
例如:
- `PICOCLAW_TOOLS_WEB_BRAVE_ENABLED=true`
- `PICOCLAW_TOOLS_EXEC_ENABLE_DENY_PATTERNS=false`
- `PICOCLAW_TOOLS_CRON_EXEC_TIMEOUT_MINUTES=10`
- `PICOCLAW_TOOLS_MCP_ENABLED=true`
注意:嵌套的映射式配置(例如 `tools.mcp.servers.<name>.*`)在 `config.json` 中配置,而非通过环境变量。
+45
View File
@@ -0,0 +1,45 @@
# 🐛 疑难解答
> 返回 [README](../../README.zh.md)
## "model ... not found in model_list" 或 OpenRouter "free is not a valid model ID"
**症状:** 你看到以下任一错误:
- `Error creating provider: model "openrouter/free" not found in model_list`
- OpenRouter 返回 400`"free is not a valid model ID"`
**原因:** `model_list` 条目中的 `model` 字段是发送给 API 的内容。对于 OpenRouter,你必须使用**完整的**模型 ID,而不是简写。
- **错误:** `"model": "free"` → OpenRouter 收到 `free` 并拒绝。
- **正确:** `"model": "openrouter/free"` → OpenRouter 收到 `openrouter/free`(自动免费层路由)。
**修复方法:**`~/.picoclaw/config.json`(或你的配置路径)中:
1. **agents.defaults.model** 必须匹配 `model_list` 中的某个 `model_name`(例如 `"openrouter-free"`)。
2. 该条目的 **model** 必须是有效的 OpenRouter 模型 ID,例如:
- `"openrouter/free"` 自动免费层
- `"google/gemini-2.0-flash-exp:free"`
- `"meta-llama/llama-3.1-8b-instruct:free"`
示例片段:
```json
{
"agents": {
"defaults": {
"model": "openrouter-free"
}
},
"model_list": [
{
"model_name": "openrouter-free",
"model": "openrouter/free",
"api_key": "sk-or-v1-YOUR_OPENROUTER_KEY",
"api_base": "https://openrouter.ai/api/v1"
}
]
}
```
在 [OpenRouter Keys](https://openrouter.ai/keys) 获取你的密钥。
+24 -5
View File
@@ -52,7 +52,7 @@ func (cb *ContextBuilder) WithToolDiscovery(useBM25, useRegex bool) *ContextBuil
}
func getGlobalConfigDir() string {
if home := os.Getenv("PICOCLAW_HOME"); home != "" {
if home := os.Getenv(config.EnvHome); home != "" {
return home
}
home, err := os.UserHomeDir()
@@ -65,7 +65,7 @@ func getGlobalConfigDir() string {
func NewContextBuilder(workspace string) *ContextBuilder {
// builtin skills: skills directory in current project
// Use the skills/ directory under the current working directory
builtinSkillsDir := strings.TrimSpace(os.Getenv("PICOCLAW_BUILTIN_SKILLS"))
builtinSkillsDir := strings.TrimSpace(os.Getenv(config.EnvBuiltinSkills))
if builtinSkillsDir == "" {
wd, _ := os.Getwd()
builtinSkillsDir = filepath.Join(wd, "skills")
@@ -458,7 +458,23 @@ func (cb *ContextBuilder) LoadBootstrapFiles() string {
//
// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
// See: https://platform.openai.com/docs/guides/prompt-caching
func (cb *ContextBuilder) buildDynamicContext(channel, chatID string) string {
func formatCurrentSenderLine(senderID, senderDisplayName string) string {
senderID = strings.TrimSpace(senderID)
senderDisplayName = strings.TrimSpace(senderDisplayName)
switch {
case senderDisplayName != "" && senderID != "":
return fmt.Sprintf("Current sender: %s (ID: %s)", senderDisplayName, senderID)
case senderDisplayName != "":
return fmt.Sprintf("Current sender: %s", senderDisplayName)
case senderID != "":
return fmt.Sprintf("Current sender: %s", senderID)
default:
return ""
}
}
func (cb *ContextBuilder) buildDynamicContext(channel, chatID, senderID, senderDisplayName string) string {
now := time.Now().Format("2006-01-02 15:04 (Monday)")
rt := fmt.Sprintf("%s %s, Go %s", runtime.GOOS, runtime.GOARCH, runtime.Version())
@@ -468,6 +484,9 @@ func (cb *ContextBuilder) buildDynamicContext(channel, chatID string) string {
if channel != "" && chatID != "" {
fmt.Fprintf(&sb, "\n\n## Current Session\nChannel: %s\nChat ID: %s", channel, chatID)
}
if senderLine := formatCurrentSenderLine(senderID, senderDisplayName); senderLine != "" {
fmt.Fprintf(&sb, "\n\n## Current Sender\n%s", senderLine)
}
return sb.String()
}
@@ -477,7 +496,7 @@ func (cb *ContextBuilder) BuildMessages(
summary string,
currentMessage string,
media []string,
channel, chatID string,
channel, chatID, senderID, senderDisplayName string,
) []providers.Message {
messages := []providers.Message{}
@@ -493,7 +512,7 @@ func (cb *ContextBuilder) BuildMessages(
staticPrompt := cb.BuildSystemPromptWithCache()
// Build short dynamic context (time, runtime, session) — changes per request
dynamicCtx := cb.buildDynamicContext(channel, chatID)
dynamicCtx := cb.buildDynamicContext(channel, chatID, senderID, senderDisplayName)
// Compose a single system message: static (cached) + dynamic + optional summary.
// Keeping all system content in one message ensures every provider adapter can
+65 -3
View File
@@ -82,7 +82,7 @@ func TestSingleSystemMessage(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
msgs := cb.BuildMessages(tt.history, tt.summary, tt.message, nil, "test", "chat1")
msgs := cb.BuildMessages(tt.history, tt.summary, tt.message, nil, "test", "chat1", "", "")
systemCount := 0
for _, m := range msgs {
@@ -126,6 +126,68 @@ func TestSingleSystemMessage(t *testing.T) {
}
}
func TestBuildMessages_CurrentSenderDynamicContext(t *testing.T) {
tmpDir := setupWorkspace(t, map[string]string{
"IDENTITY.md": "# Identity\nTest agent.",
})
defer os.RemoveAll(tmpDir)
cb := NewContextBuilder(tmpDir)
tests := []struct {
name string
senderID string
senderDisplayName string
wantLine string
wantSection bool
}{
{
name: "both id and display name",
senderID: "feishu:ou_xxx",
senderDisplayName: "Zhang San",
wantLine: "Current sender: Zhang San (ID: feishu:ou_xxx)",
wantSection: true,
},
{
name: "display name only",
senderDisplayName: "Alice",
wantLine: "Current sender: Alice",
wantSection: true,
},
{
name: "id only",
senderID: "discord:123",
wantLine: "Current sender: discord:123",
wantSection: true,
},
{
name: "no sender info",
wantSection: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
msgs := cb.BuildMessages(nil, "", "hello", nil, "discord", "chat1", tt.senderID, tt.senderDisplayName)
sys := msgs[0].Content
if tt.wantSection {
if !strings.Contains(sys, "## Current Sender") {
t.Fatalf("system prompt missing Current Sender section:\n%s", sys)
}
if !strings.Contains(sys, tt.wantLine) {
t.Fatalf("system prompt missing sender line %q:\n%s", tt.wantLine, sys)
}
return
}
if strings.Contains(sys, "## Current Sender") {
t.Fatalf("system prompt should omit Current Sender section:\n%s", sys)
}
})
}
}
// TestMtimeAutoInvalidation verifies that the cache detects source file changes
// via mtime without requiring explicit InvalidateCache().
// Fix: original implementation had no auto-invalidation — edits to bootstrap files,
@@ -576,7 +638,7 @@ func TestConcurrentBuildSystemPromptWithCache(t *testing.T) {
}
// Also exercise BuildMessages concurrently
msgs := cb.BuildMessages(nil, "", "hello", nil, "test", "chat")
msgs := cb.BuildMessages(nil, "", "hello", nil, "test", "chat", "", "")
if len(msgs) < 2 {
errs <- "BuildMessages returned fewer than 2 messages"
return
@@ -664,6 +726,6 @@ func BenchmarkBuildMessagesWithCache(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = cb.BuildMessages(history, "summary", "new message", nil, "cli", "test")
_ = cb.BuildMessages(history, "summary", "new message", nil, "cli", "test", "", "")
}
}
+72 -15
View File
@@ -61,6 +61,8 @@ type processOptions struct {
SessionKey string // Session identifier for history/context
Channel string // Target channel for tool execution
ChatID string // Target chat ID for tool execution
SenderID string // Current sender ID for dynamic context
SenderDisplayName string // Current sender display name for dynamic context
UserMessage string // User message content (may include prefix)
Media []string // media:// refs from inbound message
DefaultResponse string // Response when LLM returns empty
@@ -166,7 +168,12 @@ func registerSharedTools(
}
}
if cfg.Tools.IsToolEnabled("web_fetch") {
fetchTool, err := tools.NewWebFetchToolWithProxy(50000, cfg.Tools.Web.Proxy, cfg.Tools.Web.FetchLimitBytes)
fetchTool, err := tools.NewWebFetchToolWithProxy(
50000,
cfg.Tools.Web.Proxy,
cfg.Tools.Web.Format,
cfg.Tools.Web.FetchLimitBytes,
cfg.Tools.Web.PrivateHostWhitelist)
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
} else {
@@ -338,10 +345,9 @@ func (al *AgentLoop) Run(ctx context.Context) error {
select {
case <-ctx.Done():
return nil
default:
msg, ok := al.bus.ConsumeInbound(ctx)
case msg, ok := <-al.bus.InboundChan():
if !ok {
continue
return nil
}
// Start a goroutine that drains the bus while processMessage is
@@ -408,6 +414,8 @@ func (al *AgentLoop) Run(ctx context.Context) error {
}
}
}()
default:
time.Sleep(time.Microsecond * 200)
}
}
@@ -419,9 +427,15 @@ func (al *AgentLoop) Run(ctx context.Context) error {
// is active and stops when drainCtx is canceled (i.e., processMessage returns).
func (al *AgentLoop) drainBusToSteering(ctx context.Context) {
for {
msg, ok := al.bus.ConsumeInbound(ctx)
if !ok {
var msg bus.InboundMessage
select {
case <-ctx.Done():
return
case m, ok := <-al.bus.InboundChan():
if !ok {
return
}
msg = m
}
// Transcribe audio if needed before steering, so the agent sees text.
@@ -861,14 +875,16 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage)
})
opts := processOptions{
SessionKey: sessionKey,
Channel: msg.Channel,
ChatID: msg.ChatID,
UserMessage: msg.Content,
Media: msg.Media,
DefaultResponse: defaultResponse,
EnableSummary: true,
SendResponse: false,
SessionKey: sessionKey,
Channel: msg.Channel,
ChatID: msg.ChatID,
SenderID: msg.SenderID,
SenderDisplayName: msg.Sender.DisplayName,
UserMessage: msg.Content,
Media: msg.Media,
DefaultResponse: defaultResponse,
EnableSummary: true,
SendResponse: false,
}
// context-dependent commands check their own Runtime fields and report
@@ -1039,6 +1055,8 @@ func (al *AgentLoop) runAgentLoop(
opts.Media,
opts.Channel,
opts.ChatID,
opts.SenderID,
opts.SenderDisplayName,
)
// Resolve media:// refs: images→base64 data URLs, non-images→local paths in content
@@ -1256,6 +1274,19 @@ func (al *AgentLoop) runLLMIteration(
// Build tool definitions
providerToolDefs := agent.Tools.ToProviderDefs()
// Determine whether the provider's native web search should replace
// the client-side web_search tool for this request. Only enable when web
// search is actually enabled and registered (so users who disabled web
// access do not get provider-side search or billing).
_, hasWebSearch := agent.Tools.Get("web_search")
useNativeSearch := al.cfg.Tools.Web.PreferNative &&
isNativeSearchProvider(agent.Provider) &&
hasWebSearch
if useNativeSearch {
providerToolDefs = filterClientWebSearch(providerToolDefs)
}
// Log LLM request details
logger.DebugCF("agent", "LLM request",
map[string]any{
@@ -1264,6 +1295,7 @@ func (al *AgentLoop) runLLMIteration(
"model": activeModel,
"messages_count": len(messages),
"tools_count": len(providerToolDefs),
"native_search": useNativeSearch,
"max_tokens": agent.MaxTokens,
"temperature": agent.Temperature,
"system_prompt_len": len(messages[0].Content),
@@ -1286,6 +1318,9 @@ func (al *AgentLoop) runLLMIteration(
"temperature": agent.Temperature,
"prompt_cache_key": agent.ID,
}
if useNativeSearch {
llmOpts["native_search"] = true
}
// parseThinkingLevel guarantees ThinkingOff for empty/unknown values,
// so checking != ThinkingOff is sufficient.
if agent.ThinkingLevel != ThinkingOff {
@@ -1387,7 +1422,7 @@ func (al *AgentLoop) runLLMIteration(
newSummary := agent.Sessions.GetSummary(opts.SessionKey)
messages = agent.ContextBuilder.BuildMessages(
newHistory, newSummary, "",
nil, opts.Channel, opts.ChatID,
nil, opts.Channel, opts.ChatID, opts.SenderID, opts.SenderDisplayName,
)
continue
}
@@ -2246,6 +2281,28 @@ func extractParentPeer(msg bus.InboundMessage) *routing.RoutePeer {
return &routing.RoutePeer{Kind: parentKind, ID: parentID}
}
// isNativeSearchProvider reports whether the given LLM provider implements
// NativeSearchCapable and returns true for SupportsNativeSearch.
func isNativeSearchProvider(p providers.LLMProvider) bool {
if ns, ok := p.(providers.NativeSearchCapable); ok {
return ns.SupportsNativeSearch()
}
return false
}
// filterClientWebSearch returns a copy of tools with the client-side
// web_search tool removed. Used when native provider search is preferred.
func filterClientWebSearch(tools []providers.ToolDefinition) []providers.ToolDefinition {
result := make([]providers.ToolDefinition, 0, len(tools))
for _, t := range tools {
if strings.EqualFold(t.Function.Name, "web_search") {
continue
}
result = append(result, t)
}
return result
}
// Helper to extract provider from registry for cleanup
func extractProvider(registry *AgentRegistry) (providers.LLMProvider, bool) {
if registry == nil {
+228 -39
View File
@@ -30,6 +30,28 @@ func (f *fakeChannel) IsAllowed(string) bool {
func (f *fakeChannel) IsAllowedSender(sender bus.SenderInfo) bool { return true }
func (f *fakeChannel) ReasoningChannelID() string { return f.id }
type recordingProvider struct {
lastMessages []providers.Message
}
func (r *recordingProvider) Chat(
ctx context.Context,
messages []providers.Message,
tools []providers.ToolDefinition,
model string,
opts map[string]any,
) (*providers.LLMResponse, error) {
r.lastMessages = append([]providers.Message(nil), messages...)
return &providers.LLMResponse{
Content: "Mock response",
ToolCalls: []providers.ToolCall{},
}, nil
}
func (r *recordingProvider) GetDefaultModel() string {
return "mock-model"
}
func newTestAgentLoop(
t *testing.T,
) (al *AgentLoop, cfg *config.Config, msgBus *bus.MessageBus, provider *mockProvider, cleanup func()) {
@@ -54,6 +76,59 @@ func newTestAgentLoop(
return al, cfg, msgBus, provider, func() { os.RemoveAll(tmpDir) }
}
func TestProcessMessage_IncludesCurrentSenderInDynamicContext(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "agent-test-*")
if err != nil {
t.Fatalf("Failed to create temp dir: %v", err)
}
defer os.RemoveAll(tmpDir)
cfg := &config.Config{
Agents: config.AgentsConfig{
Defaults: config.AgentDefaults{
Workspace: tmpDir,
Model: "test-model",
MaxTokens: 4096,
MaxToolIterations: 10,
},
},
}
msgBus := bus.NewMessageBus()
provider := &recordingProvider{}
al := NewAgentLoop(cfg, msgBus, provider)
response, err := al.processMessage(context.Background(), bus.InboundMessage{
Channel: "discord",
SenderID: "discord:123",
Sender: bus.SenderInfo{
DisplayName: "Alice",
},
ChatID: "group-1",
Content: "hello",
})
if err != nil {
t.Fatalf("processMessage() error = %v", err)
}
if response != "Mock response" {
t.Fatalf("processMessage() response = %q, want %q", response, "Mock response")
}
if len(provider.lastMessages) == 0 {
t.Fatal("provider did not receive any messages")
}
systemPrompt := provider.lastMessages[0].Content
wantSender := "## Current Sender\nCurrent sender: Alice (ID: discord:123)"
if !strings.Contains(systemPrompt, wantSender) {
t.Fatalf("system prompt missing sender context %q:\n%s", wantSender, systemPrompt)
}
lastMessage := provider.lastMessages[len(provider.lastMessages)-1]
if lastMessage.Role != "user" || lastMessage.Content != "hello" {
t.Fatalf("last provider message = %+v, want unchanged user message", lastMessage)
}
}
func TestRecordLastChannel(t *testing.T) {
al, cfg, msgBus, provider, cleanup := newTestAgentLoop(t)
defer cleanup()
@@ -922,10 +997,25 @@ func TestHandleReasoning(t *testing.T) {
al, msgBus := newLoop(t)
al.handleReasoning(context.Background(), "reasoning", "telegram", "")
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
if msg, ok := msgBus.SubscribeOutbound(ctx); ok {
t.Fatalf("expected no outbound message, got %+v", msg)
for {
select {
case msg, ok := <-msgBus.OutboundChan():
if !ok {
t.Fatalf("expected no outbound message, got %+v", msg)
}
if msg.Content == "reasoning" {
t.Fatalf("expected no message for empty chatID, got %+v", msg)
}
return
case <-ctx.Done():
t.Log("expected an outbound message, got none within timeout")
return
default:
// Continue to check for message
time.Sleep(5 * time.Millisecond) // Avoid busy loop
}
}
})
@@ -933,9 +1023,7 @@ func TestHandleReasoning(t *testing.T) {
al, msgBus := newLoop(t)
al.handleReasoning(context.Background(), "hello reasoning", "slack", "channel-1")
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
defer cancel()
msg, ok := msgBus.SubscribeOutbound(ctx)
msg, ok := <-msgBus.OutboundChan()
if !ok {
t.Fatal("expected an outbound message")
}
@@ -949,35 +1037,52 @@ func TestHandleReasoning(t *testing.T) {
reasoning := "hello telegram reasoning"
al.handleReasoning(context.Background(), reasoning, "telegram", "tg-chat")
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
msg, ok := msgBus.SubscribeOutbound(ctx)
if !ok {
t.Fatal("expected outbound message")
}
for {
select {
case <-ctx.Done():
t.Fatal("expected an outbound message, got none within timeout")
return
case msg, ok := <-msgBus.OutboundChan():
if !ok {
t.Fatal("expected outbound message")
}
if msg.Channel != "telegram" {
t.Fatalf("expected telegram channel message, got %+v", msg)
}
if msg.ChatID != "tg-chat" {
t.Fatalf("expected chatID tg-chat, got %+v", msg)
}
if msg.Content != reasoning {
t.Fatalf("content mismatch: got %q want %q", msg.Content, reasoning)
if msg.Channel != "telegram" {
t.Fatalf("expected telegram channel message, got %+v", msg)
}
if msg.ChatID != "tg-chat" {
t.Fatalf("expected chatID tg-chat, got %+v", msg)
}
if msg.Content != reasoning {
t.Fatalf("content mismatch: got %q want %q", msg.Content, reasoning)
}
return
}
}
})
t.Run("expired ctx", func(t *testing.T) {
al, msgBus := newLoop(t)
reasoning := "hello telegram reasoning"
ctx, cancel := context.WithCancel(context.Background())
cancel()
al.handleReasoning(ctx, reasoning, "telegram", "tg-chat")
ctx, cancel = context.WithTimeout(context.Background(), 200*time.Millisecond)
defer cancel()
msg, ok := msgBus.SubscribeOutbound(ctx)
if ok {
t.Fatalf("expected no outbound message, got %+v", msg)
al.handleReasoning(context.Background(), reasoning, "telegram", "tg-chat")
consumeCtx, consumeCancel := context.WithTimeout(context.Background(), 2*time.Second)
defer consumeCancel()
for {
select {
case msg, ok := <-msgBus.OutboundChan():
if !ok {
t.Fatalf("expected no outbound message, but received: %+v", msg)
}
t.Logf("Received unexpected outbound message: %+v", msg)
return
case <-consumeCtx.Done():
t.Fatalf("failed: no message received within timeout")
return
}
}
})
@@ -1017,20 +1122,23 @@ func TestHandleReasoning(t *testing.T) {
// Drain the bus and verify the reasoning message was NOT published
// (it should have been dropped due to timeout).
drainCtx, drainCancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer drainCancel()
foundReasoning := false
timeer := time.After(1 * time.Second)
for {
msg, ok := msgBus.SubscribeOutbound(drainCtx)
if !ok {
break
select {
case <-timeer:
t.Logf(
"no reasoning message received after draining bus for 1s, as expected,length=%d",
len(msgBus.OutboundChan()),
)
return
case msg, ok := <-msgBus.OutboundChan():
if !ok {
break
}
if msg.Content == "should timeout" {
t.Fatal("expected reasoning message to be dropped when bus is full, but it was published")
}
}
if msg.Content == "should timeout" {
foundReasoning = true
}
}
if foundReasoning {
t.Fatal("expected reasoning message to be dropped when bus is full, but it was published")
}
})
}
@@ -1318,3 +1426,84 @@ func TestResolveMediaRefs_MixedImageAndFile(t *testing.T) {
t.Fatalf("expected content %q, got %q", expectedContent, result[0].Content)
}
}
// --- Native search helper tests ---
type nativeSearchProvider struct {
supported bool
}
func (p *nativeSearchProvider) Chat(
ctx context.Context, msgs []providers.Message, tools []providers.ToolDefinition,
model string, opts map[string]any,
) (*providers.LLMResponse, error) {
return &providers.LLMResponse{Content: "ok"}, nil
}
func (p *nativeSearchProvider) GetDefaultModel() string { return "test-model" }
func (p *nativeSearchProvider) SupportsNativeSearch() bool { return p.supported }
type plainProvider struct{}
func (p *plainProvider) Chat(
ctx context.Context, msgs []providers.Message, tools []providers.ToolDefinition,
model string, opts map[string]any,
) (*providers.LLMResponse, error) {
return &providers.LLMResponse{Content: "ok"}, nil
}
func (p *plainProvider) GetDefaultModel() string { return "test-model" }
func TestIsNativeSearchProvider_Supported(t *testing.T) {
if !isNativeSearchProvider(&nativeSearchProvider{supported: true}) {
t.Fatal("expected true for provider that supports native search")
}
}
func TestIsNativeSearchProvider_NotSupported(t *testing.T) {
if isNativeSearchProvider(&nativeSearchProvider{supported: false}) {
t.Fatal("expected false for provider that does not support native search")
}
}
func TestIsNativeSearchProvider_NoInterface(t *testing.T) {
if isNativeSearchProvider(&plainProvider{}) {
t.Fatal("expected false for provider that does not implement NativeSearchCapable")
}
}
func TestFilterClientWebSearch_RemovesWebSearch(t *testing.T) {
defs := []providers.ToolDefinition{
{Type: "function", Function: providers.ToolFunctionDefinition{Name: "web_search"}},
{Type: "function", Function: providers.ToolFunctionDefinition{Name: "read_file"}},
{Type: "function", Function: providers.ToolFunctionDefinition{Name: "exec"}},
}
result := filterClientWebSearch(defs)
if len(result) != 2 {
t.Fatalf("len(result) = %d, want 2", len(result))
}
for _, td := range result {
if td.Function.Name == "web_search" {
t.Fatal("web_search should be filtered out")
}
}
}
func TestFilterClientWebSearch_NoWebSearch(t *testing.T) {
defs := []providers.ToolDefinition{
{Type: "function", Function: providers.ToolFunctionDefinition{Name: "read_file"}},
{Type: "function", Function: providers.ToolFunctionDefinition{Name: "exec"}},
}
result := filterClientWebSearch(defs)
if len(result) != 2 {
t.Fatalf("len(result) = %d, want 2", len(result))
}
}
func TestFilterClientWebSearch_EmptyInput(t *testing.T) {
result := filterClientWebSearch(nil)
if len(result) != 0 {
t.Fatalf("len(result) = %d, want 0", len(result))
}
}
+2 -1
View File
@@ -6,6 +6,7 @@ import (
"path/filepath"
"time"
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/fileutil"
)
@@ -39,7 +40,7 @@ func (c *AuthCredential) NeedsRefresh() bool {
}
func authFilePath() string {
if home := os.Getenv("PICOCLAW_HOME"); home != "" {
if home := os.Getenv(config.EnvHome); home != "" {
return filepath.Join(home, "auth.json")
}
home, _ := os.UserHomeDir()
+60 -93
View File
@@ -3,6 +3,7 @@ package bus
import (
"context"
"errors"
"sync"
"sync/atomic"
"github.com/sipeed/picoclaw/pkg/logger"
@@ -17,8 +18,11 @@ type MessageBus struct {
inbound chan InboundMessage
outbound chan OutboundMessage
outboundMedia chan OutboundMediaMessage
done chan struct{}
closed atomic.Bool
closeOnce sync.Once
done chan struct{}
closed atomic.Bool
wg sync.WaitGroup
}
func NewMessageBus() *MessageBus {
@@ -30,128 +34,91 @@ func NewMessageBus() *MessageBus {
}
}
func (mb *MessageBus) PublishInbound(ctx context.Context, msg InboundMessage) error {
func publish[T any](ctx context.Context, mb *MessageBus, ch chan T, msg T) error {
// check bus closed before acquiring wg, to avoid unnecessary wg.Add and potential deadlock
if mb.closed.Load() {
return ErrBusClosed
}
if err := ctx.Err(); err != nil {
return err
}
// check again,before sending message, to avoid sending to closed channel
select {
case mb.inbound <- msg:
return nil
case <-mb.done:
return ErrBusClosed
case <-ctx.Done():
return ctx.Err()
case <-mb.done:
return ErrBusClosed
default:
}
mb.wg.Add(1)
defer mb.wg.Done()
select {
case ch <- msg:
return nil
case <-ctx.Done():
return ctx.Err()
case <-mb.done:
return ErrBusClosed
}
}
func (mb *MessageBus) ConsumeInbound(ctx context.Context) (InboundMessage, bool) {
select {
case msg, ok := <-mb.inbound:
return msg, ok
case <-mb.done:
return InboundMessage{}, false
case <-ctx.Done():
return InboundMessage{}, false
}
func (mb *MessageBus) PublishInbound(ctx context.Context, msg InboundMessage) error {
return publish(ctx, mb, mb.inbound, msg)
}
func (mb *MessageBus) InboundChan() <-chan InboundMessage {
return mb.inbound
}
func (mb *MessageBus) PublishOutbound(ctx context.Context, msg OutboundMessage) error {
if mb.closed.Load() {
return ErrBusClosed
}
if err := ctx.Err(); err != nil {
return err
}
select {
case mb.outbound <- msg:
return nil
case <-mb.done:
return ErrBusClosed
case <-ctx.Done():
return ctx.Err()
}
return publish(ctx, mb, mb.outbound, msg)
}
func (mb *MessageBus) SubscribeOutbound(ctx context.Context) (OutboundMessage, bool) {
select {
case msg, ok := <-mb.outbound:
return msg, ok
case <-mb.done:
return OutboundMessage{}, false
case <-ctx.Done():
return OutboundMessage{}, false
}
func (mb *MessageBus) OutboundChan() <-chan OutboundMessage {
return mb.outbound
}
func (mb *MessageBus) PublishOutboundMedia(ctx context.Context, msg OutboundMediaMessage) error {
if mb.closed.Load() {
return ErrBusClosed
}
if err := ctx.Err(); err != nil {
return err
}
select {
case mb.outboundMedia <- msg:
return nil
case <-mb.done:
return ErrBusClosed
case <-ctx.Done():
return ctx.Err()
}
return publish(ctx, mb, mb.outboundMedia, msg)
}
func (mb *MessageBus) SubscribeOutboundMedia(ctx context.Context) (OutboundMediaMessage, bool) {
select {
case msg, ok := <-mb.outboundMedia:
return msg, ok
case <-mb.done:
return OutboundMediaMessage{}, false
case <-ctx.Done():
return OutboundMediaMessage{}, false
}
func (mb *MessageBus) OutboundMediaChan() <-chan OutboundMediaMessage {
return mb.outboundMedia
}
func (mb *MessageBus) Close() {
if mb.closed.CompareAndSwap(false, true) {
mb.closeOnce.Do(func() {
// notify all blocked publishers to exit
close(mb.done)
// Drain buffered channels so messages aren't silently lost.
// Channels are NOT closed to avoid send-on-closed panics from concurrent publishers.
// because every publisher will check mb.closed before acquiring wg
// so we can be sure that new publishers will not be added new messages after this point
mb.closed.Store(true)
// wait for all ongoing Publish calls to finish, ensuring all messages have been sent to channels or exited
mb.wg.Wait()
// close channels safely
close(mb.inbound)
close(mb.outbound)
close(mb.outboundMedia)
// clean up any remaining messages in channels
drained := 0
for {
select {
case <-mb.inbound:
drained++
default:
goto doneInbound
}
for range mb.inbound {
drained++
}
doneInbound:
for {
select {
case <-mb.outbound:
drained++
default:
goto doneOutbound
}
for range mb.outbound {
drained++
}
doneOutbound:
for {
select {
case <-mb.outboundMedia:
drained++
default:
goto doneMedia
}
for range mb.outboundMedia {
drained++
}
doneMedia:
if drained > 0 {
logger.DebugCF("bus", "Drained buffered messages during close", map[string]any{
"count": drained,
})
}
}
})
}
+35 -17
View File
@@ -24,7 +24,7 @@ func TestPublishConsume(t *testing.T) {
t.Fatalf("PublishInbound failed: %v", err)
}
got, ok := mb.ConsumeInbound(ctx)
got, ok := <-mb.InboundChan()
if !ok {
t.Fatal("ConsumeInbound returned ok=false")
}
@@ -52,7 +52,7 @@ func TestPublishOutboundSubscribe(t *testing.T) {
t.Fatalf("PublishOutbound failed: %v", err)
}
got, ok := mb.SubscribeOutbound(ctx)
got, ok := <-mb.OutboundChan()
if !ok {
t.Fatal("SubscribeOutbound returned ok=false")
}
@@ -108,27 +108,48 @@ func TestPublishOutbound_BusClosed(t *testing.T) {
func TestConsumeInbound_ContextCancel(t *testing.T) {
mb := NewMessageBus()
defer mb.Close()
ctx, cancel := context.WithCancel(context.Background())
cancel()
for i := range defaultBusBufferSize {
if err := mb.PublishInbound(context.Background(), InboundMessage{Content: "fill"}); err != nil {
t.Fatalf("fill failed at %d: %v", i, err)
}
}
_, ok := mb.ConsumeInbound(ctx)
if ok {
t.Fatal("expected ok=false when context is canceled")
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
mb.PublishInbound(ctx, InboundMessage{Content: "ContextCancel"})
select {
case <-ctx.Done():
t.Log("context canceled, as expected")
case msg, ok := <-mb.InboundChan():
if !ok {
t.Fatal("expected ok=false when context is canceled")
}
if msg.Content == "ContextCancel" {
t.Fatalf("expected content 'ContextCancel', got %q", msg.Content)
}
}
}
func TestConsumeInbound_BusClosed(t *testing.T) {
mb := NewMessageBus()
mb.Close()
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
timer := time.AfterFunc(100*time.Millisecond, func() {
mb.Close()
})
_, ok := mb.ConsumeInbound(ctx)
if ok {
t.Fatal("expected ok=false when bus is closed")
select {
case <-timer.C:
t.Log("context canceled, as expected")
case _, ok := <-mb.InboundChan():
if ok {
t.Fatal("expected ok=false when context is canceled")
}
}
}
@@ -136,10 +157,7 @@ func TestSubscribeOutbound_BusClosed(t *testing.T) {
mb := NewMessageBus()
mb.Close()
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
_, ok := mb.SubscribeOutbound(ctx)
_, ok := <-mb.OutboundChan()
if ok {
t.Fatal("expected ok=false when bus is closed")
}
+33 -4
View File
@@ -29,11 +29,17 @@ import (
"github.com/sipeed/picoclaw/pkg/utils"
)
// errCodeTenantTokenInvalid is the Feishu API error code for an expired/revoked
// tenant_access_token. The Lark SDK's built-in retry does not clear its cache
// on this error, so we do it ourselves.
const errCodeTenantTokenInvalid = 99991663
type FeishuChannel struct {
*channels.BaseChannel
config config.FeishuConfig
client *lark.Client
wsClient *larkws.Client
config config.FeishuConfig
client *lark.Client
wsClient *larkws.Client
tokenCache *tokenCache // custom cache that supports invalidation
botOpenID atomic.Value // stores string; populated lazily for @mention detection
@@ -47,10 +53,12 @@ func NewFeishuChannel(cfg config.FeishuConfig, bus *bus.MessageBus) (*FeishuChan
channels.WithReasoningChannelID(cfg.ReasoningChannelID),
)
tc := newTokenCache()
ch := &FeishuChannel{
BaseChannel: base,
config: cfg,
client: lark.NewClient(cfg.AppID, cfg.AppSecret),
tokenCache: tc,
client: lark.NewClient(cfg.AppID, cfg.AppSecret, lark.WithTokenCache(tc)),
}
ch.SetOwner(ch)
return ch, nil
@@ -147,6 +155,7 @@ func (c *FeishuChannel) EditMessage(ctx context.Context, chatID, messageID, cont
return fmt.Errorf("feishu edit: %w", err)
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
return fmt.Errorf("feishu edit api error (code=%d msg=%s)", resp.Code, resp.Msg)
}
return nil
@@ -186,6 +195,7 @@ func (c *FeishuChannel) SendPlaceholder(ctx context.Context, chatID string) (str
return "", fmt.Errorf("feishu placeholder send: %w", err)
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
return "", fmt.Errorf("feishu placeholder api error (code=%d msg=%s)", resp.Code, resp.Msg)
}
@@ -226,6 +236,7 @@ func (c *FeishuChannel) ReactToMessage(ctx context.Context, chatID, messageID st
return func() {}, fmt.Errorf("feishu react: %w", err)
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
logger.ErrorCF("feishu", "Reaction API error", map[string]any{
"emoji": chosenEmoji,
"message_id": messageID,
@@ -451,6 +462,7 @@ func (c *FeishuChannel) fetchBotOpenID(ctx context.Context) error {
return fmt.Errorf("bot info parse: %w", err)
}
if result.Code != 0 {
c.invalidateTokenOnAuthError(result.Code)
return fmt.Errorf("bot info api error (code=%d)", result.Code)
}
if result.Bot.OpenID == "" {
@@ -593,6 +605,7 @@ func (c *FeishuChannel) downloadResource(
return ""
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
logger.ErrorCF("feishu", "Resource download api error", map[string]any{
"code": resp.Code,
"msg": resp.Msg,
@@ -705,6 +718,7 @@ func (c *FeishuChannel) sendCard(ctx context.Context, chatID, cardContent string
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
return fmt.Errorf("feishu api error (code=%d msg=%s): %w", resp.Code, resp.Msg, channels.ErrTemporary)
}
@@ -730,6 +744,7 @@ func (c *FeishuChannel) sendImage(ctx context.Context, chatID string, file *os.F
return fmt.Errorf("feishu image upload: %w", err)
}
if !uploadResp.Success() {
c.invalidateTokenOnAuthError(uploadResp.Code)
return fmt.Errorf("feishu image upload api error (code=%d msg=%s)", uploadResp.Code, uploadResp.Msg)
}
if uploadResp.Data == nil || uploadResp.Data.ImageKey == nil {
@@ -754,6 +769,7 @@ func (c *FeishuChannel) sendImage(ctx context.Context, chatID string, file *os.F
return fmt.Errorf("feishu image send: %w", err)
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
return fmt.Errorf("feishu image send api error (code=%d msg=%s)", resp.Code, resp.Msg)
}
return nil
@@ -784,6 +800,7 @@ func (c *FeishuChannel) sendFile(ctx context.Context, chatID string, file *os.Fi
return fmt.Errorf("feishu file upload: %w", err)
}
if !uploadResp.Success() {
c.invalidateTokenOnAuthError(uploadResp.Code)
return fmt.Errorf("feishu file upload api error (code=%d msg=%s)", uploadResp.Code, uploadResp.Msg)
}
if uploadResp.Data == nil || uploadResp.Data.FileKey == nil {
@@ -808,6 +825,7 @@ func (c *FeishuChannel) sendFile(ctx context.Context, chatID string, file *os.Fi
return fmt.Errorf("feishu file send: %w", err)
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
return fmt.Errorf("feishu file send api error (code=%d msg=%s)", resp.Code, resp.Msg)
}
return nil
@@ -830,3 +848,14 @@ func extractFeishuSenderID(sender *larkim.EventSender) string {
return ""
}
// invalidateTokenOnAuthError clears the cached tenant_access_token when the
// Feishu API reports it as invalid (99991663), so the next request fetches a
// fresh one. The Lark SDK's built-in retry does not clear the cache, causing
// all API calls to fail until the token naturally expires (~2 hours).
func (c *FeishuChannel) invalidateTokenOnAuthError(code int) {
if code == errCodeTenantTokenInvalid {
c.tokenCache.InvalidateAll()
logger.WarnCF("feishu", "Invalidated cached token due to auth error", nil)
}
}
+52
View File
@@ -0,0 +1,52 @@
package feishu
import (
"context"
"sync"
"time"
)
// tokenCache implements larkcore.Cache with an extra InvalidateAll method.
// This works around a bug in the Lark SDK v3 where the built-in token retry
// loop does not clear stale tokens from cache on auth errors.
type tokenCache struct {
mu sync.RWMutex
store map[string]*tokenEntry
}
type tokenEntry struct {
value string
expireAt time.Time
}
func newTokenCache() *tokenCache {
return &tokenCache{store: make(map[string]*tokenEntry)}
}
func (c *tokenCache) Set(_ context.Context, key, value string, ttl time.Duration) error {
c.mu.Lock()
defer c.mu.Unlock()
c.store[key] = &tokenEntry{value: value, expireAt: time.Now().Add(ttl)}
return nil
}
func (c *tokenCache) Get(_ context.Context, key string) (string, error) {
c.mu.Lock()
defer c.mu.Unlock()
e, ok := c.store[key]
if !ok {
return "", nil
}
if e.expireAt.Before(time.Now()) {
delete(c.store, key)
return "", nil
}
return e.value, nil
}
// InvalidateAll removes all cached tokens, forcing fresh acquisition.
func (c *tokenCache) InvalidateAll() {
c.mu.Lock()
defer c.mu.Unlock()
clear(c.store)
}
+33 -27
View File
@@ -585,7 +585,7 @@ func (m *Manager) sendWithRetry(ctx context.Context, name string, w *channelWork
func dispatchLoop[M any](
ctx context.Context,
m *Manager,
subscribe func(context.Context) (M, bool),
ch <-chan M,
getChannel func(M) string,
enqueue func(context.Context, *channelWorker, M) bool,
startMsg, stopMsg, unknownMsg, noWorkerMsg string,
@@ -593,35 +593,41 @@ func dispatchLoop[M any](
logger.InfoC("channels", startMsg)
for {
msg, ok := subscribe(ctx)
if !ok {
select {
case <-ctx.Done():
logger.InfoC("channels", stopMsg)
return
}
channel := getChannel(msg)
// Silently skip internal channels
if constants.IsInternalChannel(channel) {
continue
}
m.mu.RLock()
_, exists := m.channels[channel]
w, wExists := m.workers[channel]
m.mu.RUnlock()
if !exists {
logger.WarnCF("channels", unknownMsg, map[string]any{"channel": channel})
continue
}
if wExists && w != nil {
if !enqueue(ctx, w, msg) {
case msg, ok := <-ch:
if !ok {
logger.InfoC("channels", stopMsg)
return
}
} else if exists {
logger.WarnCF("channels", noWorkerMsg, map[string]any{"channel": channel})
channel := getChannel(msg)
// Silently skip internal channels
if constants.IsInternalChannel(channel) {
continue
}
m.mu.RLock()
_, exists := m.channels[channel]
w, wExists := m.workers[channel]
m.mu.RUnlock()
if !exists {
logger.WarnCF("channels", unknownMsg, map[string]any{"channel": channel})
continue
}
if wExists && w != nil {
if !enqueue(ctx, w, msg) {
return
}
} else if exists {
logger.WarnCF("channels", noWorkerMsg, map[string]any{"channel": channel})
}
}
}
}
@@ -629,7 +635,7 @@ func dispatchLoop[M any](
func (m *Manager) dispatchOutbound(ctx context.Context) {
dispatchLoop(
ctx, m,
m.bus.SubscribeOutbound,
m.bus.OutboundChan(),
func(msg bus.OutboundMessage) string { return msg.Channel },
func(ctx context.Context, w *channelWorker, msg bus.OutboundMessage) bool {
select {
@@ -649,7 +655,7 @@ func (m *Manager) dispatchOutbound(ctx context.Context) {
func (m *Manager) dispatchOutboundMedia(ctx context.Context) {
dispatchLoop(
ctx, m,
m.bus.SubscribeOutboundMedia,
m.bus.OutboundMediaChan(),
func(msg bus.OutboundMediaMessage) string { return msg.Channel },
func(ctx context.Context, w *channelWorker, msg bus.OutboundMediaMessage) bool {
select {
+14 -6
View File
@@ -34,11 +34,19 @@ func TestHandleC2CMessage_IncludesAccountIDMetadata(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
inbound, ok := messageBus.ConsumeInbound(ctx)
if !ok {
t.Fatal("expected inbound message")
}
if inbound.Metadata["account_id"] != "7750283E123456" {
t.Fatalf("account_id metadata = %q, want %q", inbound.Metadata["account_id"], "7750283E123456")
for {
select {
case <-ctx.Done():
t.Fatal("timeout waiting for inbound message")
return
case inbound, ok := <-messageBus.InboundChan():
if !ok {
t.Fatal("expected inbound message")
}
if inbound.Metadata["account_id"] != "7750283E123456" {
t.Fatalf("account_id metadata = %q, want %q", inbound.Metadata["account_id"], "7750283E123456")
}
return
}
}
}
@@ -0,0 +1,197 @@
package telegram
import (
"regexp"
"strings"
)
// mdV2SpecialChars are all characters that must be escaped in Telegram MarkdownV2
var mdV2SpecialChars = map[rune]bool{
'*': true,
'_': true,
'[': true,
']': true,
'(': true,
')': true,
'~': true,
'`': true,
'>': true,
'<': true,
'#': true,
'+': true,
'-': true,
'=': true,
'|': true,
'{': true,
'}': true,
'.': true,
'!': true,
'\\': true,
}
// entityPattern describes one Telegram MarkdownV2 inline entity type.
type entityPattern struct {
re *regexp.Regexp
open string
close string
}
// allEntityPatterns lists every recognized entity in priority order
// (longer / more-specific delimiters first so they win over shorter ones).
// Each entry's regex is anchored to find the first occurrence in a string.
var allEntityPatterns = []entityPattern{
// fenced code block — content is completely verbatim
{re: regexp.MustCompile("(?s)```(?:[\\w]*\\n)?[\\s\\S]*?```"), open: "```", close: "```"},
// inline code — content is completely verbatim
{re: regexp.MustCompile("`(?:[^`\\\n]|\\\\.)*`"), open: "`", close: "`"},
// expandable block-quote opener **>…
{re: regexp.MustCompile(`(?m)\*\*>(?:[^\n]*)`), open: "**>", close: ""},
// block-quote line >…
{re: regexp.MustCompile(`(?m)^>(?:[^\n]*)`), open: ">", close: ""},
// custom emoji / timestamp ![…](…) — must come before plain link
{re: regexp.MustCompile(`!\[[^\]]*\]\([^)]*\)`), open: "!", close: ""},
// inline URL / user mention […](…)
{re: regexp.MustCompile(`\[[^\]]*\]\([^)]*\)`), open: "[", close: ""},
// spoiler ||…|| — before single | so it wins
{re: regexp.MustCompile(`\|\|(?:[^|\\\n]|\\.)*\|\|`), open: "||", close: "||"},
// underline __…__ — before single _ so it wins
{re: regexp.MustCompile(`__(?:[^_\\\n]|\\.)*__`), open: "__", close: "__"},
// bold *…*
{re: regexp.MustCompile(`\*(?:[^*\\\n]|\\.)*\*`), open: "*", close: "*"},
// italic _…_
{re: regexp.MustCompile(`_(?:[^_\\\n]|\\.)*_`), open: "_", close: "_"},
// strikethrough ~…~
{re: regexp.MustCompile(`~(?:[^~\\\n]|\\.)*~`), open: "~", close: "~"},
}
// verbatimEntities are entity types whose inner content must never be
// touched (code blocks, URLs, quotes, custom emoji).
// Their content is passed through completely unchanged.
var verbatimEntities = map[string]bool{
"```": true,
"`": true,
"**>": true,
">": true,
"!": true,
"[": true,
}
// markdownToTelegramMarkdownV2 converts a Markdown string into a string safe
// for sending with Telegram's MarkdownV2 parse mode.
//
// Rules:
// - Markdown headings (# … ######) are converted to *bold*.
// - **bold** Markdown syntax is converted to *bold*.
// - Recognized Telegram MarkdownV2 entity spans are preserved; their inner
// content is processed recursively so that nested valid entities are kept
// intact while stray special characters are escaped.
// - All plain-text segments have their MarkdownV2 special characters escaped.
//
// Reference: https://core.telegram.org/bots/api#formatting-options
func markdownToTelegramMarkdownV2(text string) string {
// 1. Convert Markdown headings → *escaped heading text*
text = reHeading.ReplaceAllStringFunc(text, func(match string) string {
sub := reHeading.FindStringSubmatch(match)
if len(sub) < 2 {
return match
}
// The heading content is fresh plain text — escape everything
// including * so the resulting *…* bold span stays valid.
return "*" + escapeMarkdownV2(sub[1]) + "*"
})
// 2. Convert **bold** → *bold*
text = reBoldStar.ReplaceAllString(text, "*$1*")
// 3. Recursively escape the full string.
return processText(text)
}
// processText walks `text`, finds the leftmost / longest matching entity,
// escapes the gap before it, processes the entity (recursing into its inner
// content when appropriate), then continues with the remainder.
func processText(text string) string {
if text == "" {
return ""
}
// Find the leftmost match among all entity patterns.
bestStart := -1
bestEnd := -1
var bestPat *entityPattern
for i := range allEntityPatterns {
p := &allEntityPatterns[i]
loc := p.re.FindStringIndex(text)
if loc == nil {
continue
}
if bestStart == -1 || loc[0] < bestStart ||
(loc[0] == bestStart && (loc[1]-loc[0]) > (bestEnd-bestStart)) {
bestStart = loc[0]
bestEnd = loc[1]
bestPat = p
}
}
if bestPat == nil {
// No entity found — escape everything.
return escapeMarkdownV2(text)
}
var b strings.Builder
// Plain text before the entity.
if bestStart > 0 {
b.WriteString(escapeMarkdownV2(text[:bestStart]))
}
// The matched entity span.
matched := text[bestStart:bestEnd]
if verbatimEntities[bestPat.open] {
// Code blocks, URLs, quotes: pass through completely untouched.
b.WriteString(matched)
} else {
// Inline formatting (bold, italic, underline, strikethrough, spoiler):
// keep the delimiters and recursively process the inner content so that
// nested entities survive but stray specials get escaped.
openLen := len(bestPat.open)
closeLen := len(bestPat.close)
inner := matched[openLen : len(matched)-closeLen]
b.WriteString(bestPat.open)
b.WriteString(processText(inner))
b.WriteString(bestPat.close)
}
// Continue with the remainder of the string.
b.WriteString(processText(text[bestEnd:]))
return b.String()
}
// escapeMarkdownV2 escapes every MarkdownV2 special character in a plain-text
// segment (i.e. a segment that is not part of any recognized entity).
// Already-escaped sequences (backslash + char) are forwarded verbatim to avoid
// double-escaping.
func escapeMarkdownV2(s string) string {
var b strings.Builder
b.Grow(len(s) + 8)
runes := []rune(s)
for i := 0; i < len(runes); i++ {
ch := runes[i]
// Forward an existing escape sequence verbatim.
if ch == '\\' && i+1 < len(runes) {
b.WriteRune(ch)
b.WriteRune(runes[i+1])
i++
continue
}
if mdV2SpecialChars[ch] {
b.WriteByte('\\')
}
b.WriteRune(ch)
}
return b.String()
}
@@ -0,0 +1,68 @@
package telegram
import (
_ "embed"
"testing"
"github.com/stretchr/testify/require"
)
//go:embed testdata/md2_all_formats.txt
var md2AllFormats string
func Test_markdownToTelegramMarkdownV2(t *testing.T) {
cases := []struct {
name string
input string
expected string
}{
{
name: "heading -> bolding",
input: `## HeadingH2 #`,
expected: "*HeadingH2 \\#*",
},
{
name: "strikethrough",
input: "~strikethroughMD~",
expected: "~strikethroughMD~",
},
{
name: "inline URL",
input: "[inline URL](http://www.example.com/)",
expected: "[inline URL](http://www.example.com/)",
},
{
name: "all telegram formats",
input: md2AllFormats,
expected: md2AllFormats,
},
{
name: "empty",
input: "",
expected: "",
},
{
name: "one letter",
input: "o",
expected: "o",
},
{
name: "",
input: "*Last update: ~10 24h*",
expected: "*Last update: \\~10 24h*",
},
{
name: "",
input: "<Market Capitalization>",
expected: "\\<Market Capitalization\\>",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
actual := markdownToTelegramMarkdownV2(tc.input)
require.EqualValues(t, tc.expected, actual)
})
}
}
@@ -0,0 +1,111 @@
package telegram
import (
"fmt"
"strings"
)
func markdownToTelegramHTML(text string) string {
if text == "" {
return ""
}
codeBlocks := extractCodeBlocks(text)
text = codeBlocks.text
inlineCodes := extractInlineCodes(text)
text = inlineCodes.text
text = reHeading.ReplaceAllString(text, "$1")
text = reBlockquote.ReplaceAllString(text, "$1")
text = escapeHTML(text)
text = reLink.ReplaceAllString(text, `<a href="$2">$1</a>`)
text = reBoldStar.ReplaceAllString(text, "<b>$1</b>")
text = reBoldUnder.ReplaceAllString(text, "<b>$1</b>")
text = reItalic.ReplaceAllStringFunc(text, func(s string) string {
match := reItalic.FindStringSubmatch(s)
if len(match) < 2 {
return s
}
return "<i>" + match[1] + "</i>"
})
text = reStrike.ReplaceAllString(text, "<s>$1</s>")
text = reListItem.ReplaceAllString(text, "• ")
for i, code := range inlineCodes.codes {
escaped := escapeHTML(code)
text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("<code>%s</code>", escaped))
}
for i, code := range codeBlocks.codes {
escaped := escapeHTML(code)
text = strings.ReplaceAll(
text,
fmt.Sprintf("\x00CB%d\x00", i),
fmt.Sprintf("<pre><code>%s</code></pre>", escaped),
)
}
return text
}
type codeBlockMatch struct {
text string
codes []string
}
func extractCodeBlocks(text string) codeBlockMatch {
matches := reCodeBlock.FindAllStringSubmatch(text, -1)
codes := make([]string, 0, len(matches))
for _, match := range matches {
codes = append(codes, match[1])
}
i := 0
text = reCodeBlock.ReplaceAllStringFunc(text, func(m string) string {
placeholder := fmt.Sprintf("\x00CB%d\x00", i)
i++
return placeholder
})
return codeBlockMatch{text: text, codes: codes}
}
type inlineCodeMatch struct {
text string
codes []string
}
func extractInlineCodes(text string) inlineCodeMatch {
matches := reInlineCode.FindAllStringSubmatch(text, -1)
codes := make([]string, 0, len(matches))
for _, match := range matches {
codes = append(codes, match[1])
}
i := 0
text = reInlineCode.ReplaceAllStringFunc(text, func(m string) string {
placeholder := fmt.Sprintf("\x00IC%d\x00", i)
i++
return placeholder
})
return inlineCodeMatch{text: text, codes: codes}
}
func escapeHTML(text string) string {
text = strings.ReplaceAll(text, "&", "&amp;")
text = strings.ReplaceAll(text, "<", "&lt;")
text = strings.ReplaceAll(text, ">", "&gt;")
return text
}
+129 -128
View File
@@ -3,6 +3,7 @@ package telegram
import (
"context"
"fmt"
"io"
"net/http"
"net/url"
"os"
@@ -26,7 +27,7 @@ import (
)
var (
reHeading = regexp.MustCompile(`^#{1,6}\s+(.+)$`)
reHeading = regexp.MustCompile(`(?m)^#{1,6}\s+([^\n]+)`)
reBlockquote = regexp.MustCompile(`^>\s*(.*)$`)
reLink = regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`)
reBoldStar = regexp.MustCompile(`\*\*(.+?)\*\*`)
@@ -169,6 +170,8 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err
return channels.ErrNotRunning
}
useMarkdownV2 := c.config.Channels.Telegram.UseMarkdownV2
chatID, threadID, err := parseTelegramChatID(msg.ChatID)
if err != nil {
return fmt.Errorf("invalid chat ID %s: %w", msg.ChatID, channels.ErrSendFailed)
@@ -187,22 +190,65 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err
chunk := queue[0]
queue = queue[1:]
htmlContent := markdownToTelegramHTML(chunk)
content := parseContent(chunk, useMarkdownV2)
if len([]rune(htmlContent)) > 4096 {
ratio := float64(len([]rune(chunk))) / float64(len([]rune(htmlContent)))
if len([]rune(content)) > 4096 {
runeChunk := []rune(chunk)
ratio := float64(len(runeChunk)) / float64(len([]rune(content)))
smallerLen := int(float64(4096) * ratio * 0.95) // 5% safety margin
if smallerLen < 100 {
smallerLen = 100
// Guarantee progress: if estimated length is >= chunk length, force it smaller
if smallerLen >= len(runeChunk) {
smallerLen = len(runeChunk) - 1
}
// Push sub-chunks back to the front of the queue for
// re-validation instead of sending them blindly.
if smallerLen <= 0 {
if err := c.sendChunk(ctx, sendChunkParams{
chatID: chatID,
threadID: threadID,
content: content,
replyToID: replyToID,
mdFallback: chunk,
useMarkdownV2: useMarkdownV2,
}); err != nil {
return err
}
replyToID = ""
continue
}
// Use the estimated smaller length as a guide for SplitMessage.
// SplitMessage will find natural break points (newlines/spaces) and respect code blocks.
subChunks := channels.SplitMessage(chunk, smallerLen)
queue = append(subChunks, queue...)
// Safety fallback: If SplitMessage failed to shorten the chunk, force a manual hard split.
if len(subChunks) == 1 && subChunks[0] == chunk {
part1 := string(runeChunk[:smallerLen])
part2 := string(runeChunk[smallerLen:])
subChunks = []string{part1, part2}
}
// Filter out empty chunks to avoid sending empty messages to Telegram.
nonEmpty := make([]string, 0, len(subChunks))
for _, s := range subChunks {
if s != "" {
nonEmpty = append(nonEmpty, s)
}
}
// Push sub-chunks back to the front of the queue
queue = append(nonEmpty, queue...)
continue
}
if err := c.sendHTMLChunk(ctx, chatID, threadID, htmlContent, chunk, replyToID); err != nil {
if err := c.sendChunk(ctx, sendChunkParams{
chatID: chatID,
threadID: threadID,
content: content,
replyToID: replyToID,
mdFallback: chunk,
useMarkdownV2: useMarkdownV2,
}); err != nil {
return err
}
// Only the first chunk should be a reply; subsequent chunks are normal messages.
@@ -212,17 +258,31 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err
return nil
}
// sendHTMLChunk sends a single HTML message, falling back to the original
// markdown as plain text on parse failure so users never see raw HTML tags.
func (c *TelegramChannel) sendHTMLChunk(
ctx context.Context, chatID int64, threadID int, htmlContent, mdFallback string, replyToID string,
) error {
tgMsg := tu.Message(tu.ID(chatID), htmlContent)
tgMsg.ParseMode = telego.ModeHTML
tgMsg.MessageThreadID = threadID
type sendChunkParams struct {
chatID int64
threadID int
content string
replyToID string
mdFallback string
useMarkdownV2 bool
}
if replyToID != "" {
if mid, parseErr := strconv.Atoi(replyToID); parseErr == nil {
// sendChunk sends a single HTML/MarkdownV2 message, falling back to the original
// markdown as plain text on parse failure so users never see raw HTML/MarkdownV2 tags.
func (c *TelegramChannel) sendChunk(
ctx context.Context,
params sendChunkParams,
) error {
tgMsg := tu.Message(tu.ID(params.chatID), params.content)
tgMsg.MessageThreadID = params.threadID
if params.useMarkdownV2 {
tgMsg.WithParseMode(telego.ModeMarkdownV2)
} else {
tgMsg.WithParseMode(telego.ModeHTML)
}
if params.replyToID != "" {
if mid, parseErr := strconv.Atoi(params.replyToID); parseErr == nil {
tgMsg.ReplyParameters = &telego.ReplyParameters{
MessageID: mid,
}
@@ -230,15 +290,15 @@ func (c *TelegramChannel) sendHTMLChunk(
}
if _, err := c.bot.SendMessage(ctx, tgMsg); err != nil {
logger.ErrorCF("telegram", "HTML parse failed, falling back to plain text", map[string]any{
"error": err.Error(),
})
tgMsg.Text = mdFallback
logParseFailed(err, params.useMarkdownV2)
tgMsg.Text = params.mdFallback
tgMsg.ParseMode = ""
if _, err = c.bot.SendMessage(ctx, tgMsg); err != nil {
return fmt.Errorf("telegram send: %w", channels.ErrTemporary)
}
}
return nil
}
@@ -279,6 +339,7 @@ func (c *TelegramChannel) StartTyping(ctx context.Context, chatID string) (func(
// EditMessage implements channels.MessageEditor.
func (c *TelegramChannel) EditMessage(ctx context.Context, chatID string, messageID string, content string) error {
useMarkdownV2 := c.config.Channels.Telegram.UseMarkdownV2
cid, _, err := parseTelegramChatID(chatID)
if err != nil {
return err
@@ -287,10 +348,19 @@ func (c *TelegramChannel) EditMessage(ctx context.Context, chatID string, messag
if err != nil {
return err
}
htmlContent := markdownToTelegramHTML(content)
editMsg := tu.EditMessageText(tu.ID(cid), mid, htmlContent)
editMsg.ParseMode = telego.ModeHTML
parsedContent := parseContent(content, useMarkdownV2)
editMsg := tu.EditMessageText(tu.ID(cid), mid, parsedContent)
if useMarkdownV2 {
editMsg.WithParseMode(telego.ModeMarkdownV2)
} else {
editMsg.WithParseMode(telego.ModeHTML)
}
_, err = c.bot.EditMessageText(ctx, editMsg)
if err != nil {
logParseFailed(err, useMarkdownV2)
_, err = c.bot.EditMessageText(ctx, tu.EditMessageText(tu.ID(cid), mid, content))
}
return err
}
@@ -367,6 +437,20 @@ func (c *TelegramChannel) SendMedia(ctx context.Context, msg bus.OutboundMediaMe
Caption: part.Caption,
}
_, err = c.bot.SendPhoto(ctx, params)
if err != nil && strings.Contains(err.Error(), "PHOTO_INVALID_DIMENSIONS") {
if _, seekErr := file.Seek(0, io.SeekStart); seekErr != nil {
file.Close()
return fmt.Errorf("telegram rewind media after photo failure: %w", channels.ErrTemporary)
}
docParams := &telego.SendDocumentParams{
ChatID: tu.ID(chatID),
MessageThreadID: threadID,
Document: telego.InputFile{File: file},
Caption: part.Caption,
}
_, err = c.bot.SendDocument(ctx, docParams)
}
case "audio":
params := &telego.SendAudioParams{
ChatID: tu.ID(chatID),
@@ -624,6 +708,14 @@ func (c *TelegramChannel) downloadFile(ctx context.Context, fileID, ext string)
return c.downloadFileWithInfo(file, ext)
}
func parseContent(text string, useMarkdownV2 bool) string {
if useMarkdownV2 {
return markdownToTelegramMarkdownV2(text)
}
return markdownToTelegramHTML(text)
}
// parseTelegramChatID splits "chatID/threadID" into its components.
// Returns threadID=0 when no "/" is present (non-forum messages).
func parseTelegramChatID(chatID string) (int64, int, error) {
@@ -643,109 +735,18 @@ func parseTelegramChatID(chatID string) (int64, int, error) {
return cid, tid, nil
}
func markdownToTelegramHTML(text string) string {
if text == "" {
return ""
func logParseFailed(err error, useMarkdownV2 bool) {
parsingName := "HTML"
if useMarkdownV2 {
parsingName = "MarkdownV2"
}
codeBlocks := extractCodeBlocks(text)
text = codeBlocks.text
inlineCodes := extractInlineCodes(text)
text = inlineCodes.text
text = reHeading.ReplaceAllString(text, "$1")
text = reBlockquote.ReplaceAllString(text, "$1")
text = escapeHTML(text)
text = reLink.ReplaceAllString(text, `<a href="$2">$1</a>`)
text = reBoldStar.ReplaceAllString(text, "<b>$1</b>")
text = reBoldUnder.ReplaceAllString(text, "<b>$1</b>")
text = reItalic.ReplaceAllStringFunc(text, func(s string) string {
match := reItalic.FindStringSubmatch(s)
if len(match) < 2 {
return s
}
return "<i>" + match[1] + "</i>"
})
text = reStrike.ReplaceAllString(text, "<s>$1</s>")
text = reListItem.ReplaceAllString(text, "• ")
for i, code := range inlineCodes.codes {
escaped := escapeHTML(code)
text = strings.ReplaceAll(text, fmt.Sprintf("\x00IC%d\x00", i), fmt.Sprintf("<code>%s</code>", escaped))
}
for i, code := range codeBlocks.codes {
escaped := escapeHTML(code)
text = strings.ReplaceAll(
text,
fmt.Sprintf("\x00CB%d\x00", i),
fmt.Sprintf("<pre><code>%s</code></pre>", escaped),
)
}
return text
}
type codeBlockMatch struct {
text string
codes []string
}
func extractCodeBlocks(text string) codeBlockMatch {
matches := reCodeBlock.FindAllStringSubmatch(text, -1)
codes := make([]string, 0, len(matches))
for _, match := range matches {
codes = append(codes, match[1])
}
i := 0
text = reCodeBlock.ReplaceAllStringFunc(text, func(m string) string {
placeholder := fmt.Sprintf("\x00CB%d\x00", i)
i++
return placeholder
})
return codeBlockMatch{text: text, codes: codes}
}
type inlineCodeMatch struct {
text string
codes []string
}
func extractInlineCodes(text string) inlineCodeMatch {
matches := reInlineCode.FindAllStringSubmatch(text, -1)
codes := make([]string, 0, len(matches))
for _, match := range matches {
codes = append(codes, match[1])
}
i := 0
text = reInlineCode.ReplaceAllStringFunc(text, func(m string) string {
placeholder := fmt.Sprintf("\x00IC%d\x00", i)
i++
return placeholder
})
return inlineCodeMatch{text: text, codes: codes}
}
func escapeHTML(text string) string {
text = strings.ReplaceAll(text, "&", "&amp;")
text = strings.ReplaceAll(text, "<", "&lt;")
text = strings.ReplaceAll(text, ">", "&gt;")
return text
logger.ErrorCF("telegram",
fmt.Sprintf("%s parse failed, falling back to plain text", parsingName),
map[string]any{
"error": err.Error(),
},
)
}
// isBotMentioned checks if the bot is mentioned in the message via entities.
@@ -3,7 +3,6 @@ package telegram
import (
"context"
"testing"
"time"
"github.com/mymmrac/telego"
@@ -36,10 +35,7 @@ func TestHandleMessage_DoesNotConsumeGenericCommandsLocally(t *testing.T) {
t.Fatalf("handleMessage error: %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
inbound, ok := messageBus.ConsumeInbound(ctx)
inbound, ok := <-messageBus.InboundChan()
if !ok {
t.Fatal("expected inbound message to be forwarded")
}
@@ -108,22 +108,24 @@ func TestHandleMessage_GroupMentionOnly_BotCommandEntity(t *testing.T) {
t.Fatalf("handleMessage error: %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 150*time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Microsecond)
defer cancel()
inbound, ok := messageBus.ConsumeInbound(ctx)
if tc.wantForwarded {
if !ok {
t.Fatal("expected inbound message to be forwarded")
select {
case <-ctx.Done():
if tc.wantForwarded {
t.Fatal("timeout waiting for message to be forwarded")
return
}
if inbound.Content != tc.wantContent {
t.Fatalf("content=%q want=%q", inbound.Content, tc.wantContent)
case inbound, ok := <-messageBus.InboundChan():
if tc.wantForwarded {
if !ok {
t.Fatal("expected inbound message to be forwarded")
}
if inbound.Content != tc.wantContent {
t.Fatalf("content=%q want=%q", inbound.Content, tc.wantContent)
}
return
}
return
}
if ok {
t.Fatalf("expected message to be filtered, got content=%q", inbound.Content)
}
})
}
+196 -15
View File
@@ -4,9 +4,11 @@ import (
"context"
"encoding/json"
"errors"
"io"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/mymmrac/telego"
ta "github.com/mymmrac/telego/telegoapi"
@@ -15,6 +17,8 @@ import (
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/channels"
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/media"
)
const testToken = "1234567890:aaaabbbbaaaabbbbaaaabbbbaaaabbbbccc"
@@ -38,8 +42,20 @@ func (s *stubCaller) Call(ctx context.Context, url string, data *ta.RequestData)
// stubConstructor implements ta.RequestConstructor for testing.
type stubConstructor struct{}
type multipartCall struct {
Parameters map[string]string
FileSizes map[string]int
}
func (s *stubConstructor) JSONRequest(parameters any) (*ta.RequestData, error) {
return &ta.RequestData{}, nil
b, err := json.Marshal(parameters)
if err != nil {
return nil, err
}
return &ta.RequestData{
ContentType: "application/json",
BodyRaw: b,
}, nil
}
func (s *stubConstructor) MultipartRequest(
@@ -49,6 +65,36 @@ func (s *stubConstructor) MultipartRequest(
return &ta.RequestData{}, nil
}
type multipartRecordingConstructor struct {
stubConstructor
calls []multipartCall
}
func (s *multipartRecordingConstructor) MultipartRequest(
parameters map[string]string,
files map[string]ta.NamedReader,
) (*ta.RequestData, error) {
call := multipartCall{
Parameters: make(map[string]string, len(parameters)),
FileSizes: make(map[string]int, len(files)),
}
for k, v := range parameters {
call.Parameters[k] = v
}
for field, file := range files {
if file == nil {
continue
}
data, err := io.ReadAll(file)
if err != nil {
return nil, err
}
call.FileSizes[field] = len(data)
}
s.calls = append(s.calls, call)
return &ta.RequestData{}, nil
}
// successResponse returns a ta.Response that telego will treat as a successful SendMessage.
func successResponse(t *testing.T) *ta.Response {
t.Helper()
@@ -60,11 +106,19 @@ func successResponse(t *testing.T) *ta.Response {
// newTestChannel creates a TelegramChannel with a mocked bot for unit testing.
func newTestChannel(t *testing.T, caller *stubCaller) *TelegramChannel {
return newTestChannelWithConstructor(t, caller, &stubConstructor{})
}
func newTestChannelWithConstructor(
t *testing.T,
caller *stubCaller,
constructor ta.RequestConstructor,
) *TelegramChannel {
t.Helper()
bot, err := telego.NewBot(testToken,
telego.WithAPICaller(caller),
telego.WithRequestConstructor(&stubConstructor{}),
telego.WithRequestConstructor(constructor),
telego.WithDiscardLogger(),
)
require.NoError(t, err)
@@ -78,9 +132,96 @@ func newTestChannel(t *testing.T, caller *stubCaller) *TelegramChannel {
BaseChannel: base,
bot: bot,
chatIDs: make(map[string]int64),
config: config.DefaultConfig(),
}
}
func TestSendMedia_ImageFallbacksToDocumentOnInvalidDimensions(t *testing.T) {
constructor := &multipartRecordingConstructor{}
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
switch {
case strings.Contains(url, "sendPhoto"):
return nil, errors.New(`api: 400 "Bad Request: PHOTO_INVALID_DIMENSIONS"`)
case strings.Contains(url, "sendDocument"):
return successResponse(t), nil
default:
t.Fatalf("unexpected API call: %s", url)
return nil, nil
}
},
}
ch := newTestChannelWithConstructor(t, caller, constructor)
store := media.NewFileMediaStore()
ch.SetMediaStore(store)
tmpDir := t.TempDir()
localPath := filepath.Join(tmpDir, "woodstock-en-10s.png")
content := []byte("fake-png-content")
require.NoError(t, os.WriteFile(localPath, content, 0o644))
ref, err := store.Store(
localPath,
media.MediaMeta{Filename: "woodstock-en-10s.png", ContentType: "image/png"},
"scope-1",
)
require.NoError(t, err)
err = ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "12345",
Parts: []bus.MediaPart{{
Type: "image",
Ref: ref,
Caption: "caption",
}},
})
require.NoError(t, err)
require.Len(t, caller.calls, 2)
assert.Contains(t, caller.calls[0].URL, "sendPhoto")
assert.Contains(t, caller.calls[1].URL, "sendDocument")
require.Len(t, constructor.calls, 2)
assert.Equal(t, len(content), constructor.calls[0].FileSizes["photo"])
assert.Equal(t, len(content), constructor.calls[1].FileSizes["document"])
assert.Equal(t, "caption", constructor.calls[1].Parameters["caption"])
}
func TestSendMedia_ImageNonDimensionErrorDoesNotFallback(t *testing.T) {
constructor := &multipartRecordingConstructor{}
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
return nil, errors.New("api: 500 \"server exploded\"")
},
}
ch := newTestChannelWithConstructor(t, caller, constructor)
store := media.NewFileMediaStore()
ch.SetMediaStore(store)
tmpDir := t.TempDir()
localPath := filepath.Join(tmpDir, "image.png")
require.NoError(t, os.WriteFile(localPath, []byte("fake-png-content"), 0o644))
ref, err := store.Store(localPath, media.MediaMeta{Filename: "image.png", ContentType: "image/png"}, "scope-1")
require.NoError(t, err)
err = ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "12345",
Parts: []bus.MediaPart{{
Type: "image",
Ref: ref,
}},
})
require.Error(t, err)
assert.ErrorIs(t, err, channels.ErrTemporary)
require.Len(t, caller.calls, 1)
assert.Contains(t, caller.calls[0].URL, "sendPhoto")
require.Len(t, constructor.calls, 1)
assert.NotContains(t, caller.calls[0].URL, "sendDocument")
}
func TestSend_EmptyContent(t *testing.T) {
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
@@ -235,6 +376,55 @@ func TestSend_MarkdownShortButHTMLLong_MultipleCalls(t *testing.T) {
)
}
func TestSend_HTMLOverflow_WordBoundary(t *testing.T) {
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
return successResponse(t), nil
},
}
ch := newTestChannel(t, caller)
// We want to force a split near index ~2600 while keeping markdown length <= 4000.
// Prefix of 430 bold units (6 chars each) = 2580 chars.
// Expansion per unit is +3 chars when converted to HTML, so 2580 + 430*3 = 3870.
prefix := strings.Repeat("**a** ", 430)
targetWord := "TARGETWORDTHATSTAYSTOGETHER"
// Suffix of 230 bold units (6 chars each) = 1380 chars.
// Total markdown length: 2580 (prefix) + 27 (target word) + 1380 (suffix) = 3987 <= 4000.
// HTML expansion adds ~3 chars per bold unit: (430 + 230)*3 = 1980 extra chars,
// so total HTML length comfortably exceeds 4096.
suffix := strings.Repeat(" **b**", 230)
content := prefix + targetWord + suffix
// Ensure the test content matches the intended boundary conditions.
assert.LessOrEqual(t, len([]rune(content)), 4000, "markdown content must not exceed chunk size for this test")
err := ch.Send(context.Background(), bus.OutboundMessage{
ChatID: "123456",
Content: content,
})
assert.NoError(t, err)
foundFullWord := false
for i, call := range caller.calls {
var params map[string]any
err := json.Unmarshal(call.Data.BodyRaw, &params)
require.NoError(t, err)
text, _ := params["text"].(string)
hasWord := strings.Contains(text, targetWord)
t.Logf("Chunk %d length: %d, contains target word: %v", i, len(text), hasWord)
if hasWord {
foundFullWord = true
break
}
}
assert.True(t, foundFullWord, "The target word should not be split between chunks")
}
func TestSend_NotRunning(t *testing.T) {
caller := &stubCaller{
callFn: func(ctx context.Context, url string, data *ta.RequestData) (*ta.Response, error) {
@@ -355,10 +545,7 @@ func TestHandleMessage_ForumTopic_SetsMetadata(t *testing.T) {
err := ch.handleMessage(context.Background(), msg)
require.NoError(t, err)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
inbound, ok := messageBus.ConsumeInbound(ctx)
inbound, ok := <-messageBus.InboundChan()
require.True(t, ok, "expected inbound message")
// Composite chatID should include thread ID
@@ -397,10 +584,7 @@ func TestHandleMessage_NoForum_NoThreadMetadata(t *testing.T) {
err := ch.handleMessage(context.Background(), msg)
require.NoError(t, err)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
inbound, ok := messageBus.ConsumeInbound(ctx)
inbound, ok := <-messageBus.InboundChan()
require.True(t, ok)
// Plain chatID without thread suffix
@@ -443,10 +627,7 @@ func TestHandleMessage_ReplyThread_NonForum_NoIsolation(t *testing.T) {
err := ch.handleMessage(context.Background(), msg)
require.NoError(t, err)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
inbound, ok := messageBus.ConsumeInbound(ctx)
inbound, ok := <-messageBus.InboundChan()
require.True(t, ok)
// chatID should NOT include thread suffix for non-forum groups
+31
View File
@@ -0,0 +1,31 @@
*bold \*text*
_italic \*text_
__underline__
~strikethrough~
||spoiler||
*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic bold___ bold*
[inline URL](http://www.example.com/)
[inline mention of a user](tg://user?id=123456789)
![👍](tg://emoji?id=5368324170671202286)
![22:45 tomorrow](tg://time?unix=1647531900&format=wDT)
![22:45 tomorrow](tg://time?unix=1647531900&format=t)
![22:45 tomorrow](tg://time?unix=1647531900&format=r)
![22:45 tomorrow](tg://time?unix=1647531900)
`inline fixed-width code`
```
pre-formatted fixed-width code block
```
```python
pre-formatted fixed-width code block written in the Python programming language
```
>Block quotation started
>Block quotation continued
>Block quotation continued
>Block quotation continued
>The last line of the block quotation
**>The expandable block quotation started right after the previous block quotation
>It is separated from the previous block quotation by an empty bold entity
>Expandable block quotation continued
>Hidden by default part of the expandable block quotation started
>Expandable block quotation continued
>The last line of the expandable block quotation with the expandability mark||
@@ -3,7 +3,6 @@ package whatsapp
import (
"context"
"testing"
"time"
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/channels"
@@ -25,10 +24,7 @@ func TestHandleIncomingMessage_DoesNotConsumeGenericCommandsLocally(t *testing.T
"content": "/help",
})
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
inbound, ok := messageBus.ConsumeInbound(ctx)
inbound, ok := <-messageBus.InboundChan()
if !ok {
t.Fatal("expected inbound message to be forwarded")
}
@@ -43,14 +43,19 @@ func TestHandleIncoming_DoesNotConsumeGenericCommandsLocally(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
inbound, ok := messageBus.ConsumeInbound(ctx)
if !ok {
t.Fatal("expected inbound message to be forwarded")
}
if inbound.Channel != "whatsapp_native" {
t.Fatalf("channel=%q", inbound.Channel)
}
if inbound.Content != "/new" {
t.Fatalf("content=%q", inbound.Content)
select {
case <-ctx.Done():
t.Fatal("timeout waiting for message to be forwarded")
return
case inbound, ok := <-messageBus.InboundChan():
if !ok {
t.Fatal("expected inbound message to be forwarded")
}
if inbound.Channel != "whatsapp_native" {
t.Fatalf("channel=%q", inbound.Channel)
}
if inbound.Content != "/new" {
t.Fatalf("content=%q", inbound.Content)
}
}
}
+18 -5
View File
@@ -312,6 +312,7 @@ type TelegramConfig struct {
Typing TypingConfig `json:"typing,omitempty"`
Placeholder PlaceholderConfig `json:"placeholder,omitempty"`
ReasoningChannelID string `json:"reasoning_channel_id" env:"PICOCLAW_CHANNELS_TELEGRAM_REASONING_CHANNEL_ID"`
UseMarkdownV2 bool `json:"use_markdown_v2" env:"PICOCLAW_CHANNELS_TELEGRAM_USE_MARKDOWN_V2"`
}
type FeishuConfig struct {
@@ -532,6 +533,7 @@ type ProvidersConfig struct {
Minimax ProviderConfig `json:"minimax"`
LongCat ProviderConfig `json:"longcat"`
ModelScope ProviderConfig `json:"modelscope"`
Novita ProviderConfig `json:"novita"`
}
// IsEmpty checks if all provider configs are empty (no API keys or API bases set)
@@ -560,7 +562,8 @@ func (p ProvidersConfig) IsEmpty() bool {
p.Avian.APIKey == "" && p.Avian.APIBase == "" &&
p.Minimax.APIKey == "" && p.Minimax.APIBase == "" &&
p.LongCat.APIKey == "" && p.LongCat.APIBase == "" &&
p.ModelScope.APIKey == "" && p.ModelScope.APIBase == ""
p.ModelScope.APIKey == "" && p.ModelScope.APIBase == "" &&
p.Novita.APIKey == "" && p.Novita.APIBase == ""
}
// MarshalJSON implements custom JSON marshaling for ProvidersConfig
@@ -590,7 +593,9 @@ type OpenAIProviderConfig struct {
// ModelConfig represents a model-centric provider configuration.
// It allows adding new providers (especially OpenAI-compatible ones) via configuration only.
// The model field uses protocol prefix format: [protocol/]model-identifier
// Supported protocols: openai, anthropic, antigravity, claude-cli, codex-cli, github-copilot
// Supported protocols include openai, anthropic, antigravity, claude-cli,
// codex-cli, github-copilot, and named OpenAI-compatible protocols such as
// groq, deepseek, modelscope, and novita.
// Default protocol is "openai" if no prefix is specified.
type ModelConfig struct {
// Required fields
@@ -694,10 +699,18 @@ type WebToolsConfig struct {
Perplexity PerplexityConfig ` json:"perplexity"`
SearXNG SearXNGConfig ` json:"searxng"`
GLMSearch GLMSearchConfig ` json:"glm_search"`
// PreferNative controls whether to use provider-native web search when
// the active LLM supports it (e.g. OpenAI web_search_preview). When true,
// the client-side web_search tool is hidden to avoid duplicate search surfaces,
// and the provider's built-in search is used instead. Falls back to client-side
// search when the provider does not support native search.
PreferNative bool `json:"prefer_native" env:"PICOCLAW_TOOLS_WEB_PREFER_NATIVE"`
// Proxy is an optional proxy URL for web tools (http/https/socks5/socks5h).
// For authenticated proxies, prefer HTTP_PROXY/HTTPS_PROXY env vars instead of embedding credentials in config.
Proxy string `json:"proxy,omitempty" env:"PICOCLAW_TOOLS_WEB_PROXY"`
FetchLimitBytes int64 `json:"fetch_limit_bytes,omitempty" env:"PICOCLAW_TOOLS_WEB_FETCH_LIMIT_BYTES"`
Proxy string `json:"proxy,omitempty" env:"PICOCLAW_TOOLS_WEB_PROXY"`
FetchLimitBytes int64 `json:"fetch_limit_bytes,omitempty" env:"PICOCLAW_TOOLS_WEB_FETCH_LIMIT_BYTES"`
Format string `json:"format,omitempty" env:"PICOCLAW_TOOLS_WEB_FORMAT"`
PrivateHostWhitelist FlexibleStringSlice `json:"private_host_whitelist,omitempty" env:"PICOCLAW_TOOLS_WEB_PRIVATE_HOST_WHITELIST"`
}
type CronToolsConfig struct {
@@ -1030,7 +1043,7 @@ func (c *Config) GetModelConfig(modelName string) (*ModelConfig, error) {
}
// Multiple configs - use round-robin for load balancing
idx := rrCounter.Add(1) % uint64(len(matches))
idx := (rrCounter.Add(1) - 1) % uint64(len(matches))
return &matches[idx], nil
}
+55
View File
@@ -77,6 +77,22 @@ func TestAgentModelConfig_MarshalObject(t *testing.T) {
}
}
func TestProvidersConfig_IsEmpty(t *testing.T) {
var empty ProvidersConfig
if !empty.IsEmpty() {
t.Fatal("empty ProvidersConfig should report empty")
}
novita := ProvidersConfig{
Novita: ProviderConfig{
APIKey: "test-key",
},
}
if novita.IsEmpty() {
t.Fatal("ProvidersConfig with novita settings should not report empty")
}
}
func TestAgentConfig_FullParse(t *testing.T) {
jsonData := `{
"agents": {
@@ -401,6 +417,45 @@ func TestDefaultConfig_OpenAIWebSearchEnabled(t *testing.T) {
}
}
func TestDefaultConfig_WebPreferNativeEnabled(t *testing.T) {
cfg := DefaultConfig()
if !cfg.Tools.Web.PreferNative {
t.Fatal("DefaultConfig().Tools.Web.PreferNative should be true")
}
}
func TestLoadConfig_WebPreferNativeDefaultsTrueWhenUnset(t *testing.T) {
dir := t.TempDir()
configPath := filepath.Join(dir, "config.json")
if err := os.WriteFile(configPath, []byte(`{"tools":{"web":{"enabled":true}}}`), 0o600); err != nil {
t.Fatalf("WriteFile() error: %v", err)
}
cfg, err := LoadConfig(configPath)
if err != nil {
t.Fatalf("LoadConfig() error: %v", err)
}
if !cfg.Tools.Web.PreferNative {
t.Fatal("PreferNative should remain true when unset in config file")
}
}
func TestLoadConfig_WebPreferNativeCanBeDisabled(t *testing.T) {
dir := t.TempDir()
configPath := filepath.Join(dir, "config.json")
if err := os.WriteFile(configPath, []byte(`{"tools":{"web":{"prefer_native":false}}}`), 0o600); err != nil {
t.Fatalf("WriteFile() error: %v", err)
}
cfg, err := LoadConfig(configPath)
if err != nil {
t.Fatalf("LoadConfig() error: %v", err)
}
if cfg.Tools.Web.PreferNative {
t.Fatal("PreferNative should be false when disabled in config file")
}
}
func TestDefaultConfig_ExecAllowRemoteEnabled(t *testing.T) {
cfg := DefaultConfig()
if !cfg.Tools.Exec.AllowRemote {
+4 -1
View File
@@ -15,7 +15,7 @@ func DefaultConfig() *Config {
// Determine the base path for the workspace.
// Priority: $PICOCLAW_HOME > ~/.picoclaw
var homePath string
if picoclawHome := os.Getenv("PICOCLAW_HOME"); picoclawHome != "" {
if picoclawHome := os.Getenv(EnvHome); picoclawHome != "" {
homePath = picoclawHome
} else {
userHome, _ := os.UserHomeDir()
@@ -59,6 +59,7 @@ func DefaultConfig() *Config {
Enabled: true,
Text: "Thinking... 💭",
},
UseMarkdownV2: false,
},
Feishu: FeishuConfig{
Enabled: false,
@@ -412,8 +413,10 @@ func DefaultConfig() *Config {
ToolConfig: ToolConfig{
Enabled: true,
},
PreferNative: true,
Proxy: "",
FetchLimitBytes: 10 * 1024 * 1024, // 10MB by default
Format: "plaintext",
Brave: BraveConfig{
Enabled: false,
APIKey: "",
+37
View File
@@ -0,0 +1,37 @@
// PicoClaw - Ultra-lightweight personal AI agent
// License: MIT
//
// Copyright (c) 2026 PicoClaw contributors
package config
// Runtime environment variable keys for the picoclaw process.
// These control the location of files and binaries at runtime and are read
// directly via os.Getenv / os.LookupEnv. All picoclaw-specific keys use the
// PICOCLAW_ prefix. Reference these constants instead of inline string
// literals to keep all supported knobs visible in one place and to prevent
// typos.
const (
// EnvHome overrides the base directory for all picoclaw data
// (config, workspace, skills, auth store, …).
// Default: ~/.picoclaw
EnvHome = "PICOCLAW_HOME"
// EnvConfig overrides the full path to the JSON config file.
// Default: $PICOCLAW_HOME/config.json
EnvConfig = "PICOCLAW_CONFIG"
// EnvBuiltinSkills overrides the directory from which built-in
// skills are loaded.
// Default: <cwd>/skills
EnvBuiltinSkills = "PICOCLAW_BUILTIN_SKILLS"
// EnvBinary overrides the path to the picoclaw executable.
// Used by the web launcher when spawning the gateway subprocess.
// Default: resolved from the same directory as the current executable.
EnvBinary = "PICOCLAW_BINARY"
// EnvGatewayHost overrides the host address for the gateway server.
// Default: "127.0.0.1"
EnvGatewayHost = "PICOCLAW_GATEWAY_HOST"
)
+30
View File
@@ -80,6 +80,36 @@ func TestGetModelConfig_RoundRobin(t *testing.T) {
}
}
func TestGetModelConfig_RoundRobinStartsFromFirstMatch(t *testing.T) {
rrCounter.Store(0)
cfg := &Config{
ModelList: []ModelConfig{
{ModelName: "lb-model", Model: "openai/gpt-4o-1", APIKey: "key1"},
{ModelName: "lb-model", Model: "openai/gpt-4o-2", APIKey: "key2"},
{ModelName: "lb-model", Model: "openai/gpt-4o-3", APIKey: "key3"},
},
}
wantOrder := []string{
"openai/gpt-4o-1",
"openai/gpt-4o-2",
"openai/gpt-4o-3",
"openai/gpt-4o-1",
"openai/gpt-4o-2",
}
for i, want := range wantOrder {
result, err := cfg.GetModelConfig("lb-model")
if err != nil {
t.Fatalf("GetModelConfig() call %d error = %v", i, err)
}
if result.Model != want {
t.Fatalf("GetModelConfig() call %d model = %q, want %q", i, result.Model, want)
}
}
}
func TestGetModelConfig_Concurrent(t *testing.T) {
cfg := &Config{
ModelList: []ModelConfig{
+11 -4
View File
@@ -66,6 +66,14 @@ var ErrPassphraseRequired = errors.New("credential: enc:// passphrase required")
// indicating a wrong passphrase or SSH key. Callers can detect this with errors.Is.
var ErrDecryptionFailed = errors.New("credential: enc:// decryption failed (wrong passphrase or SSH key?)")
// SSHKeyPathEnvVar is the environment variable that specifies the path to the
// SSH private key used for enc:// credential encryption and decryption.
const SSHKeyPathEnvVar = "PICOCLAW_SSH_KEY_PATH"
// picoclawHome is a package-local copy of config.EnvHome. It is kept here to
// avoid a circular import between pkg/credential and pkg/config.
const picoclawHome = "PICOCLAW_HOME"
const (
fileScheme = "file://"
encScheme = "enc://"
@@ -73,7 +81,6 @@ const (
saltLen = 16
nonceLen = 12
keyLen = 32
sshKeyEnv = "PICOCLAW_SSH_KEY_PATH"
)
// Resolver resolves raw credential strings for model_list api_key fields.
@@ -248,14 +255,14 @@ func allowedSSHKeyPath(path string) bool {
clean := filepath.Clean(path)
// Exact match with PICOCLAW_SSH_KEY_PATH.
if envPath, ok := os.LookupEnv(sshKeyEnv); ok && envPath != "" {
if envPath, ok := os.LookupEnv(SSHKeyPathEnvVar); ok && envPath != "" {
if clean == filepath.Clean(envPath) {
return true
}
}
// Within PICOCLAW_HOME.
if picoHome := os.Getenv("PICOCLAW_HOME"); picoHome != "" {
if picoHome := os.Getenv(picoclawHome); picoHome != "" {
if isWithinDir(clean, picoHome) {
return true
}
@@ -316,7 +323,7 @@ func pickSSHKeyPath(override string) string {
if override != "" {
return override
}
if p, ok := os.LookupEnv(sshKeyEnv); ok {
if p, ok := os.LookupEnv(SSHKeyPathEnvVar); ok {
return p // respect explicit setting, even if ""
}
return findDefaultSSHKey()
+66 -11
View File
@@ -65,6 +65,7 @@ type CronService struct {
mu sync.RWMutex
running bool
stopChan chan struct{}
wakeChan chan struct{}
gronx *gronx.Gronx
}
@@ -73,6 +74,7 @@ func NewCronService(storePath string, onJob JobHandler) *CronService {
storePath: storePath,
onJob: onJob,
gronx: gronx.New(),
wakeChan: make(chan struct{}),
}
// Initialize and load store on creation
cs.loadStore()
@@ -97,6 +99,9 @@ func (cs *CronService) Start() error {
}
cs.stopChan = make(chan struct{})
if cs.wakeChan == nil {
cs.wakeChan = make(chan struct{})
}
cs.running = true
go cs.runLoop(cs.stopChan)
@@ -119,14 +124,47 @@ func (cs *CronService) Stop() {
}
func (cs *CronService) runLoop(stopChan chan struct{}) {
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
timer := time.NewTimer(time.Hour)
if !timer.Stop() {
<-timer.C
}
defer timer.Stop()
for {
// every loop, recalculate the next wake time
cs.mu.RLock()
nextWake := cs.getNextWakeMS()
cs.mu.RUnlock()
var delay time.Duration
now := time.Now().UnixMilli()
if nextWake == nil {
// no jobs, sleep for a long time (or until a new job is added)
delay = time.Hour
} else {
diff := *nextWake - now
if diff <= 0 {
delay = 0
} else {
delay = time.Duration(diff) * time.Millisecond
}
}
timer.Reset(delay)
select {
case <-stopChan:
return
case <-ticker.C:
case <-cs.wakeChan: // wake on new job or update
if !timer.Stop() {
select {
case <-timer.C:
default:
}
}
continue
case <-timer.C:
cs.checkJobs()
}
}
@@ -264,22 +302,19 @@ func (cs *CronService) executeJobByID(jobID string) {
}
func (cs *CronService) computeNextRun(schedule *CronSchedule, nowMS int64) *int64 {
if schedule.Kind == "at" {
switch schedule.Kind {
case "at":
if schedule.AtMS != nil && *schedule.AtMS > nowMS {
return schedule.AtMS
}
return nil
}
if schedule.Kind == "every" {
case "every":
if schedule.EveryMS == nil || *schedule.EveryMS <= 0 {
return nil
}
next := nowMS + *schedule.EveryMS
return &next
}
if schedule.Kind == "cron" {
case "cron":
if schedule.Expr == "" {
return nil
}
@@ -294,9 +329,19 @@ func (cs *CronService) computeNextRun(schedule *CronSchedule, nowMS int64) *int6
nextMS := nextTime.UnixMilli()
return &nextMS
default:
log.Printf("[cron] unknown schedule kind '%s'", schedule.Kind)
return nil
}
}
return nil
// wake up the loop to re-evaluate next wake time immediately (e.g. after add/update/remove jobs)
func (cs *CronService) notify() {
select {
case cs.wakeChan <- struct{}{}:
default:
// if the channel is full, it means the loop will wake up soon anyway, so we can skip sending
}
}
func (cs *CronService) recomputeNextRuns() {
@@ -400,6 +445,8 @@ func (cs *CronService) AddJob(
return nil, err
}
cs.notify()
return &job, nil
}
@@ -411,6 +458,9 @@ func (cs *CronService) UpdateJob(job *CronJob) error {
if cs.store.Jobs[i].ID == job.ID {
cs.store.Jobs[i] = *job
cs.store.Jobs[i].UpdatedAtMS = time.Now().UnixMilli()
cs.notify()
return cs.saveStoreUnsafe()
}
}
@@ -441,6 +491,8 @@ func (cs *CronService) removeJobUnsafe(jobID string) bool {
}
}
cs.notify()
return removed
}
@@ -463,6 +515,9 @@ func (cs *CronService) EnableJob(jobID string, enabled bool) *CronJob {
if err := cs.saveStoreUnsafe(); err != nil {
log.Printf("[cron] failed to save store after enable: %v", err)
}
cs.notify()
return job
}
}
+199
View File
@@ -1,10 +1,13 @@
package cron
import (
"fmt"
"os"
"path/filepath"
"runtime"
"sync"
"testing"
"time"
)
func TestSaveStore_FilePermissions(t *testing.T) {
@@ -36,3 +39,199 @@ func TestSaveStore_FilePermissions(t *testing.T) {
func int64Ptr(v int64) *int64 {
return &v
}
func setupService(handler JobHandler) (*CronService, string) {
tmpFile := fmt.Sprintf("test_cron_%d.json", time.Now().UnixNano())
cs := NewCronService(tmpFile, handler)
return cs, tmpFile
}
func TestCronService_CRUD(t *testing.T) {
cs, path := setupService(nil)
defer os.Remove(path)
// Test AddJob
at := time.Now().Add(time.Hour).UnixMilli()
job, err := cs.AddJob("Task1", CronSchedule{Kind: "at", AtMS: &at}, "msg", true, "ch", "to")
if err != nil || job.ID == "" {
t.Fatalf("AddJob failed: %v", err)
}
// Test ListJobs
if len(cs.ListJobs(true)) != 1 {
t.Error("ListJobs should return 1 job")
}
// Test UpdateJob
job.Name = "UpdatedName"
err = cs.UpdateJob(job)
if err != nil || cs.store.Jobs[0].Name != "UpdatedName" {
t.Error("UpdateJob failed")
}
// Test EnableJob
cs.EnableJob(job.ID, false)
if cs.store.Jobs[0].Enabled != false || cs.store.Jobs[0].State.NextRunAtMS != nil {
t.Error("EnableJob(false) failed to clear state")
}
// Test RemoveJob
removed := cs.RemoveJob(job.ID)
if !removed || len(cs.store.Jobs) != 0 {
t.Error("RemoveJob failed")
}
}
// 2. Test Cron Expression Calculation Logic
func TestCronService_ComputeNextRun(t *testing.T) {
cs, path := setupService(nil)
defer os.Remove(path)
now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC).UnixMilli()
tests := []struct {
name string
schedule CronSchedule
wantNil bool
}{
{"Valid Cron", CronSchedule{Kind: "cron", Expr: "0 * * * *"}, false},
{"Invalid Cron", CronSchedule{Kind: "cron", Expr: "invalid"}, true},
{"Every MS", CronSchedule{Kind: "every", EveryMS: int64Ptr(5000)}, false},
{"At Future", CronSchedule{Kind: "at", AtMS: int64Ptr(now + 1000)}, false},
{"At Past", CronSchedule{Kind: "at", AtMS: int64Ptr(now - 1000)}, true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := cs.computeNextRun(&tt.schedule, now)
if (got == nil) != tt.wantNil {
t.Errorf("%s: got %v, wantNil %v", tt.name, got, tt.wantNil)
}
})
}
}
// 3. Test Execution Flow
func TestCronService_ExecutionFlow(t *testing.T) {
var mu sync.Mutex
executedJobs := make(map[string]bool)
handler := func(job *CronJob) (string, error) {
mu.Lock()
executedJobs[job.ID] = true
mu.Unlock()
return "ok", nil
}
cs, path := setupService(handler)
defer os.Remove(path)
// Start the service
if err := cs.Start(); err != nil {
t.Fatalf("Start failed: %v", err)
}
defer cs.Stop()
// Add a job then runs 100ms from now
target := time.Now().Add(100 * time.Millisecond).UnixMilli()
job, _ := cs.AddJob("FastJob", CronSchedule{Kind: "at", AtMS: &target}, "", false, "", "")
// Check for job execution with a timeout
success := false
for range 20 {
mu.Lock()
if executedJobs[job.ID] {
success = true
mu.Unlock()
break
}
mu.Unlock()
time.Sleep(100 * time.Millisecond)
}
if !success {
t.Error("Job was not executed in time")
}
// check that the job is removed after execution (DeleteAfterRun = true)
status := cs.Status()
if status["jobs"].(int) != 0 {
t.Errorf("Job should be deleted after run, got count: %v", status["jobs"])
}
}
func TestCronService_PersistenceIntegrity(t *testing.T) {
tmpFile := "persist_test.json"
defer os.Remove(tmpFile)
// write a job and persist
cs1 := NewCronService(tmpFile, nil)
at := int64(2000000000000)
cs1.AddJob("PersistMe", CronSchedule{Kind: "at", AtMS: &at}, "payload", true, "ch1", "")
// check file exists
if _, err := os.Stat(tmpFile); os.IsNotExist(err) {
t.Fatal("Store file was not created")
}
// reload and check data integrity
cs2 := NewCronService(tmpFile, nil)
if err := cs2.Load(); err != nil {
t.Fatalf("Failed to load store: %v", err)
}
jobs := cs2.ListJobs(true)
if len(jobs) != 1 || jobs[0].Name != "PersistMe" {
t.Errorf("Data corruption after reload. Got: %+v", jobs)
}
// test loading invalid JSON
os.WriteFile(tmpFile, []byte("{invalid json}"), 0o644)
cs3 := NewCronService(tmpFile, nil)
err := cs3.loadStore()
if err == nil {
t.Error("Should return error when loading invalid JSON")
}
}
func TestCronService_ConcurrentAccess(t *testing.T) {
cs, path := setupService(nil)
defer os.Remove(path)
cs.Start()
defer cs.Stop()
var wg sync.WaitGroup
workers := 10
iterations := 50
wg.Add(workers * 2)
// add jobs concurrently
for i := range workers {
go func(id int) {
defer wg.Done()
for j := range iterations {
at := time.Now().Add(time.Hour).UnixMilli()
cs.AddJob(fmt.Sprintf("Job-%d-%d", id, j), CronSchedule{Kind: "at", AtMS: &at}, "", false, "", "")
time.Sleep(100 * time.Microsecond)
}
}(i)
}
// read and update jobs concurrently
for range workers {
go func() {
defer wg.Done()
for j := range iterations {
jobs := cs.ListJobs(true)
if len(jobs) > 0 {
cs.EnableJob(jobs[0].ID, j%2 == 0)
}
time.Sleep(100 * time.Microsecond)
}
}()
}
wg.Wait()
}
+17 -13
View File
@@ -51,7 +51,7 @@ func init() {
FormatFieldValue: formatFieldValue,
}
logger = zerolog.New(consoleWriter).With().Timestamp().Logger()
logger = zerolog.New(consoleWriter).With().Timestamp().Caller().Logger()
fileLogger = zerolog.Logger{}
})
}
@@ -94,6 +94,12 @@ func SetLevel(level LogLevel) {
zerolog.SetGlobalLevel(level)
}
func SetConsoleLevel(level LogLevel) {
mu.Lock()
defer mu.Unlock()
logger = logger.Level(level)
}
func GetLevel() LogLevel {
mu.RLock()
defer mu.RUnlock()
@@ -134,9 +140,9 @@ func DisableFileLogging() {
fileLogger = zerolog.Logger{}
}
func getCallerInfo() (string, int, string) {
func getCallerSkip() int {
for i := 2; i < 15; i++ {
pc, file, line, ok := runtime.Caller(i)
pc, file, _, ok := runtime.Caller(i)
if !ok {
continue
}
@@ -158,10 +164,10 @@ func getCallerInfo() (string, int, string) {
continue
}
return filepath.Base(file), line, filepath.Base(funcName)
return i - 1
}
return "???", 0, "???"
return 3
}
//nolint:zerologlint
@@ -187,19 +193,16 @@ func logMessage(level LogLevel, component string, message string, fields map[str
return
}
callerFile, callerLine, callerFunc := getCallerInfo()
skip := getCallerSkip()
event := getEvent(logger, level)
// Build combined field with component and caller
if component != "" {
event.Str("caller", fmt.Sprintf("%-6s %s:%d (%s)", component, callerFile, callerLine, callerFunc))
} else {
event.Str("caller", fmt.Sprintf("<none> %s:%d (%s)", callerFile, callerLine, callerFunc))
event.Str("component", component)
}
appendFields(event, fields)
event.Msg(message)
event.CallerSkipFrame(skip).Msg(message)
// Also log to file if enabled
if fileLogger.GetLevel() != zerolog.NoLevel {
@@ -208,9 +211,10 @@ func logMessage(level LogLevel, component string, message string, fields map[str
if component != "" {
fileEvent.Str("component", component)
}
// fileEvent.Str("caller", fmt.Sprintf("%s:%d (%s)", callerFile, callerLine, callerFunc))
appendFields(event, fields)
fileEvent.Msg(message)
appendFields(fileEvent, fields)
fileEvent.CallerSkipFrame(skip).Msg(message)
}
if level == FATAL {
+3 -1
View File
@@ -5,13 +5,15 @@ import (
"io"
"os"
"path/filepath"
"github.com/sipeed/picoclaw/pkg/config"
)
func ResolveTargetHome(override string) (string, error) {
if override != "" {
return ExpandHome(override), nil
}
if envHome := os.Getenv("PICOCLAW_HOME"); envHome != "" {
if envHome := os.Getenv(config.EnvHome); envHome != "" {
return ExpandHome(envHome), nil
}
home, err := os.UserHomeDir()
+15 -11
View File
@@ -132,11 +132,12 @@ type OpenClawChannels struct {
}
type OpenClawTelegramConfig struct {
BotToken *string `json:"botToken"`
AllowFrom []string `json:"allowFrom"`
GroupPolicy *string `json:"groupPolicy"`
DmPolicy *string `json:"dmPolicy"`
Enabled *bool `json:"enabled"`
BotToken *string `json:"botToken"`
AllowFrom []string `json:"allowFrom"`
GroupPolicy *string `json:"groupPolicy"`
DmPolicy *string `json:"dmPolicy"`
Enabled *bool `json:"enabled"`
UseMarkdownV2 *bool `json:"useMarkdownV2"`
}
type OpenClawDiscordConfig struct {
@@ -645,10 +646,11 @@ type WhatsAppConfig struct {
}
type TelegramConfig struct {
Enabled bool `json:"enabled"`
Token string `json:"token"`
Proxy string `json:"proxy"`
AllowFrom []string `json:"allow_from"`
Enabled bool `json:"enabled"`
Token string `json:"token"`
Proxy string `json:"proxy"`
AllowFrom []string `json:"allow_from"`
UseMarkdownV2 bool `json:"use_markdown_v2"`
}
type FeishuConfig struct {
@@ -777,9 +779,11 @@ func (c *OpenClawConfig) convertChannels(warnings *[]string) ChannelsConfig {
if c.Channels.Telegram != nil {
enabled := c.Channels.Telegram.Enabled == nil || *c.Channels.Telegram.Enabled
useMarkdownV2 := c.Channels.Telegram.UseMarkdownV2 != nil && *c.Channels.Telegram.UseMarkdownV2
channels.Telegram = TelegramConfig{
Enabled: enabled,
AllowFrom: c.Channels.Telegram.AllowFrom,
Enabled: enabled,
AllowFrom: c.Channels.Telegram.AllowFrom,
UseMarkdownV2: useMarkdownV2,
}
if c.Channels.Telegram.BotToken != nil {
channels.Telegram.Token = *c.Channels.Telegram.BotToken
@@ -10,6 +10,11 @@ import (
"github.com/sipeed/picoclaw/pkg/migrate/internal"
)
// OpenclawHomeEnvVar is the environment variable that overrides the source
// openclaw home directory when migrating from openclaw to picoclaw.
// Default: ~/.openclaw
const OpenclawHomeEnvVar = "OPENCLAW_HOME"
var providerMapping = map[string]string{
"anthropic": "anthropic",
"claude": "anthropic",
@@ -112,7 +117,7 @@ func resolveSourceHome(override string) (string, error) {
if override != "" {
return internal.ExpandHome(override), nil
}
if envHome := os.Getenv("OPENCLAW_HOME"); envHome != "" {
if envHome := os.Getenv(OpenclawHomeEnvVar); envHome != "" {
return internal.ExpandHome(envHome), nil
}
home, err := os.UserHomeDir()
+4
View File
@@ -180,6 +180,10 @@ func buildParams(
blocks = append(blocks, anthropic.NewTextBlock(msg.Content))
}
for _, tc := range msg.ToolCalls {
// Skip tool calls with empty names to avoid API errors
if tc.Name == "" {
continue
}
args := tc.Arguments
if args == nil && tc.Function != nil && tc.Function.Arguments != "" {
if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil {
+10 -2
View File
@@ -50,10 +50,18 @@ func (p *ClaudeCliProvider) Chat(
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
if stderrStr := stderr.String(); stderrStr != "" {
stderrStr := strings.TrimSpace(stderr.String())
stdoutStr := strings.TrimSpace(stdout.String())
switch {
case stderrStr != "" && stdoutStr != "":
return nil, fmt.Errorf("claude cli error: %w\nstderr: %s\nstdout: %s", err, stderrStr, stdoutStr)
case stderrStr != "":
return nil, fmt.Errorf("claude cli error: %s", stderrStr)
case stdoutStr != "":
return nil, fmt.Errorf("claude cli error: %w\noutput: %s", err, stdoutStr)
default:
return nil, fmt.Errorf("claude cli error: %w", err)
}
return nil, fmt.Errorf("claude cli error: %w", err)
}
return p.parseClaudeCliResponse(stdout.String())
+6 -1
View File
@@ -8,6 +8,11 @@ import (
"time"
)
// CodexHomeEnvVar is the environment variable that overrides the Codex CLI
// home directory when resolving the codex auth.json credentials file.
// Default: ~/.codex
const CodexHomeEnvVar = "CODEX_HOME"
// CodexCliAuth represents the ~/.codex/auth.json file structure.
type CodexCliAuth struct {
Tokens struct {
@@ -69,7 +74,7 @@ func CreateCodexCliTokenSource() func() (string, string, error) {
}
func resolveCodexAuthPath() (string, error) {
codexHome := os.Getenv("CODEX_HOME")
codexHome := os.Getenv(CodexHomeEnvVar)
if codexHome == "" {
home, err := os.UserHomeDir()
if err != nil {
+8 -1
View File
@@ -95,7 +95,10 @@ func (p *CodexProvider) Chat(
)
}
params := buildCodexParams(messages, tools, resolvedModel, options, p.enableWebSearch)
// Respect tools.web.prefer_native: only inject native search when the agent
// loop requested it (options["native_search"]), so prefer_native: false
useNativeSearch := p.enableWebSearch && (options["native_search"] == true)
params := buildCodexParams(messages, tools, resolvedModel, options, useNativeSearch)
stream := p.client.Responses.NewStreaming(ctx, params, opts...)
defer stream.Close()
@@ -157,6 +160,10 @@ func (p *CodexProvider) GetDefaultModel() string {
return codexDefaultModel
}
func (p *CodexProvider) SupportsNativeSearch() bool {
return p.enableWebSearch
}
func resolveCodexModel(model string) (string, string) {
m := strings.ToLower(strings.TrimSpace(model))
if m == "" {
+3 -1
View File
@@ -355,7 +355,9 @@ func TestCodexProvider_ChatRoundTrip(t *testing.T) {
provider.client = createOpenAITestClient(server.URL, "test-token", "acc-123")
messages := []Message{{Role: "user", Content: "Hello"}}
resp, err := provider.Chat(t.Context(), messages, nil, "gpt-4o", map[string]any{"max_tokens": 1024})
// Pass native_search so Codex injects built-in web search (mirrors agent loop when prefer_native is true).
opts := map[string]any{"max_tokens": 1024, "native_search": true}
resp, err := provider.Chat(t.Context(), messages, nil, "gpt-4o", opts)
if err != nil {
t.Fatalf("Chat() error: %v", err)
}
+5 -3
View File
@@ -55,8 +55,8 @@ func ExtractProtocol(model string) (protocol, modelID string) {
// CreateProviderFromConfig creates a provider based on the ModelConfig.
// It uses the protocol prefix in the Model field to determine which provider to create.
// Supported protocols: openai, litellm, anthropic, anthropic-messages, antigravity,
// claude-cli, codex-cli, github-copilot
// Supported protocols: openai, litellm, novita, anthropic, anthropic-messages,
// antigravity, claude-cli, codex-cli, github-copilot
// Returns the provider, the model ID (without protocol prefix), and any error.
func CreateProviderFromConfig(cfg *config.ModelConfig) (LLMProvider, string, error) {
if cfg == nil {
@@ -116,7 +116,7 @@ func CreateProviderFromConfig(cfg *config.ModelConfig) (LLMProvider, string, err
case "litellm", "openrouter", "groq", "zhipu", "gemini", "nvidia",
"ollama", "moonshot", "shengsuanyun", "deepseek", "cerebras",
"vivgrid", "volcengine", "vllm", "qwen", "mistral", "avian",
"minimax", "longcat", "modelscope":
"minimax", "longcat", "modelscope", "novita":
// All other OpenAI-compatible HTTP providers
if cfg.APIKey == "" && cfg.APIBase == "" {
return nil, "", fmt.Errorf("api_key or api_base is required for HTTP-based protocol %q", protocol)
@@ -219,6 +219,8 @@ func getDefaultAPIBase(protocol string) string {
return "https://openrouter.ai/api/v1"
case "litellm":
return "http://localhost:4000/v1"
case "novita":
return "https://api.novita.ai/openai"
case "groq":
return "https://api.groq.com/openai/v1"
case "zhipu":
+29
View File
@@ -112,6 +112,7 @@ func TestCreateProviderFromConfig_DefaultAPIBase(t *testing.T) {
}{
{"openai", "openai"},
{"groq", "groq"},
{"novita", "novita"},
{"openrouter", "openrouter"},
{"cerebras", "cerebras"},
{"vivgrid", "vivgrid"},
@@ -222,6 +223,34 @@ func TestGetDefaultAPIBase_ModelScope(t *testing.T) {
}
}
func TestCreateProviderFromConfig_Novita(t *testing.T) {
cfg := &config.ModelConfig{
ModelName: "test-novita",
Model: "novita/deepseek/deepseek-v3.2",
APIKey: "test-key",
}
provider, modelID, err := CreateProviderFromConfig(cfg)
if err != nil {
t.Fatalf("CreateProviderFromConfig() error = %v", err)
}
if provider == nil {
t.Fatal("CreateProviderFromConfig() returned nil provider")
}
if modelID != "deepseek/deepseek-v3.2" {
t.Errorf("modelID = %q, want %q", modelID, "deepseek/deepseek-v3.2")
}
if _, ok := provider.(*HTTPProvider); !ok {
t.Fatalf("expected *HTTPProvider, got %T", provider)
}
}
func TestGetDefaultAPIBase_Novita(t *testing.T) {
if got := getDefaultAPIBase("novita"); got != "https://api.novita.ai/openai" {
t.Fatalf("getDefaultAPIBase(%q) = %q, want %q", "novita", got, "https://api.novita.ai/openai")
}
}
func TestCreateProviderFromConfig_Anthropic(t *testing.T) {
cfg := &config.ModelConfig{
ModelName: "test-anthropic",
+4
View File
@@ -55,3 +55,7 @@ func (p *HTTPProvider) Chat(
func (p *HTTPProvider) GetDefaultModel() string {
return ""
}
func (p *HTTPProvider) SupportsNativeSearch() bool {
return p.delegate.SupportsNativeSearch()
}
+33 -3
View File
@@ -103,8 +103,11 @@ func (p *Provider) Chat(
"messages": common.SerializeMessages(messages),
}
if len(tools) > 0 {
requestBody["tools"] = tools
// When fallback uses a different provider (e.g. DeepSeek), that provider must not inject web_search_preview.
nativeSearch, _ := options["native_search"].(bool)
nativeSearch = nativeSearch && isNativeSearchHost(p.apiBase)
if len(tools) > 0 || nativeSearch {
requestBody["tools"] = buildToolsList(tools, nativeSearch)
requestBody["tool_choice"] = "auto"
}
@@ -188,13 +191,40 @@ func normalizeModel(model, apiBase string) string {
prefix := strings.ToLower(before)
switch prefix {
case "litellm", "moonshot", "nvidia", "groq", "ollama", "deepseek", "google",
"openrouter", "zhipu", "mistral", "vivgrid", "minimax":
"openrouter", "zhipu", "mistral", "vivgrid", "minimax", "novita":
return after
default:
return model
}
}
func buildToolsList(tools []ToolDefinition, nativeSearch bool) []any {
result := make([]any, 0, len(tools)+1)
for _, t := range tools {
if nativeSearch && strings.EqualFold(t.Function.Name, "web_search") {
continue
}
result = append(result, t)
}
if nativeSearch {
result = append(result, map[string]any{"type": "web_search_preview"})
}
return result
}
func (p *Provider) SupportsNativeSearch() bool {
return isNativeSearchHost(p.apiBase)
}
func isNativeSearchHost(apiBase string) bool {
u, err := url.Parse(apiBase)
if err != nil {
return false
}
host := u.Hostname()
return host == "api.openai.com" || strings.HasSuffix(host, ".openai.azure.com")
}
// supportsPromptCacheKey reports whether the given API base is known to
// support the prompt_cache_key request field. Currently only OpenAI's own
// API and Azure OpenAI support this. All other OpenAI-compatible providers
+269 -22
View File
@@ -432,7 +432,28 @@ func TestProviderChat_StripsMoonshotPrefixAndNormalizesKimiTemperature(t *testin
}
}
func TestProviderChat_StripsGroqOllamaDeepseekVivgridPrefixes(t *testing.T) {
func TestProviderChat_StripsGroqOllamaDeepseekVivgridNovitaPrefixes(t *testing.T) {
var requestBody map[string]any
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if err := json.NewDecoder(r.Body).Decode(&requestBody); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
resp := map[string]any{
"choices": []map[string]any{
{
"message": map[string]any{"content": "ok"},
"finish_reason": "stop",
},
},
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(resp)
}))
defer server.Close()
p := NewProvider("key", server.URL, "")
tests := []struct {
name string
input string
@@ -463,31 +484,25 @@ func TestProviderChat_StripsGroqOllamaDeepseekVivgridPrefixes(t *testing.T) {
input: "vivgrid/auto",
wantModel: "auto",
},
{
name: "strips novita prefix deepseek model",
input: "novita/deepseek/deepseek-v3.2",
wantModel: "deepseek/deepseek-v3.2",
},
{
name: "strips novita prefix zai model",
input: "novita/zai-org/glm-5",
wantModel: "zai-org/glm-5",
},
{
name: "strips novita prefix minimax model",
input: "novita/minimax/minimax-m2.5",
wantModel: "minimax/minimax-m2.5",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var requestBody map[string]any
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if err := json.NewDecoder(r.Body).Decode(&requestBody); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
resp := map[string]any{
"choices": []map[string]any{
{
"message": map[string]any{"content": "ok"},
"finish_reason": "stop",
},
},
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(resp)
}))
defer server.Close()
p := NewProvider("key", server.URL, "")
_, err := p.Chat(t.Context(), []Message{{Role: "user", Content: "hi"}}, nil, tt.input, nil)
if err != nil {
t.Fatalf("Chat() error = %v", err)
@@ -573,6 +588,12 @@ func TestNormalizeModel_UsesAPIBase(t *testing.T) {
if got := normalizeModel("vivgrid/auto", "https://api.vivgrid.com/v1"); got != "auto" {
t.Fatalf("normalizeModel(vivgrid auto) = %q, want %q", got, "auto")
}
if got := normalizeModel(
"novita/deepseek/deepseek-v3.2",
"https://api.novita.ai/openai",
); got != "deepseek/deepseek-v3.2" {
t.Fatalf("normalizeModel(novita) = %q, want %q", got, "deepseek/deepseek-v3.2")
}
}
func TestProvider_RequestTimeoutDefault(t *testing.T) {
@@ -824,6 +845,232 @@ func TestSupportsPromptCacheKey(t *testing.T) {
}
}
func TestBuildToolsList_NativeSearchAddsWebSearchPreview(t *testing.T) {
tools := []ToolDefinition{
{Type: "function", Function: ToolFunctionDefinition{Name: "read_file", Description: "read"}},
}
result := buildToolsList(tools, true)
if len(result) != 2 {
t.Fatalf("len(result) = %d, want 2", len(result))
}
wsEntry, ok := result[1].(map[string]any)
if !ok {
t.Fatalf("web search entry is %T, want map[string]any", result[1])
}
if wsEntry["type"] != "web_search_preview" {
t.Fatalf("type = %v, want web_search_preview", wsEntry["type"])
}
}
func TestBuildToolsList_NativeSearchFiltersClientWebSearch(t *testing.T) {
tools := []ToolDefinition{
{Type: "function", Function: ToolFunctionDefinition{Name: "web_search", Description: "search"}},
{Type: "function", Function: ToolFunctionDefinition{Name: "read_file", Description: "read"}},
}
result := buildToolsList(tools, true)
for _, entry := range result {
if td, ok := entry.(ToolDefinition); ok && strings.EqualFold(td.Function.Name, "web_search") {
t.Fatal("client-side web_search should be filtered out when native search is enabled")
}
}
if len(result) != 2 { // read_file + web_search_preview
t.Fatalf("len(result) = %d, want 2 (read_file + web_search_preview)", len(result))
}
}
func TestBuildToolsList_NoNativeSearchPassesThrough(t *testing.T) {
tools := []ToolDefinition{
{Type: "function", Function: ToolFunctionDefinition{Name: "web_search", Description: "search"}},
{Type: "function", Function: ToolFunctionDefinition{Name: "read_file", Description: "read"}},
}
result := buildToolsList(tools, false)
if len(result) != 2 {
t.Fatalf("len(result) = %d, want 2", len(result))
}
}
func TestIsNativeSearchHost(t *testing.T) {
tests := []struct {
apiBase string
want bool
}{
{"https://api.openai.com/v1", true},
{"https://myresource.openai.azure.com/openai/deployments/gpt-4", true},
{"https://api.mistral.ai/v1", false},
{"https://api.deepseek.com/v1", false},
{"https://api.groq.com/openai/v1", false},
{"http://localhost:11434/v1", false},
{"", false},
}
for _, tt := range tests {
if got := isNativeSearchHost(tt.apiBase); got != tt.want {
t.Errorf("isNativeSearchHost(%q) = %v, want %v", tt.apiBase, got, tt.want)
}
}
}
func TestSupportsNativeSearch_OpenAI(t *testing.T) {
p := NewProvider("key", "https://api.openai.com/v1", "")
if !p.SupportsNativeSearch() {
t.Fatal("OpenAI provider should support native search")
}
}
func TestSupportsNativeSearch_NonOpenAI(t *testing.T) {
p := NewProvider("key", "https://api.deepseek.com/v1", "")
if p.SupportsNativeSearch() {
t.Fatal("DeepSeek provider should not support native search")
}
}
func TestProviderChat_NativeSearchToolInjected(t *testing.T) {
var requestBody map[string]any
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if err := json.NewDecoder(r.Body).Decode(&requestBody); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
resp := map[string]any{
"choices": []map[string]any{
{
"message": map[string]any{"content": "ok"},
"finish_reason": "stop",
},
},
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(resp)
}))
defer server.Close()
p := NewProvider("key", server.URL, "")
p.apiBase = "https://api.openai.com/v1"
p.httpClient = &http.Client{
Transport: roundTripperFunc(func(r *http.Request) (*http.Response, error) {
r.URL, _ = url.Parse(server.URL + r.URL.Path)
return http.DefaultTransport.RoundTrip(r)
}),
}
tools := []ToolDefinition{
{Type: "function", Function: ToolFunctionDefinition{Name: "read_file", Description: "read"}},
}
_, err := p.Chat(
t.Context(),
[]Message{{Role: "user", Content: "hi"}},
tools,
"gpt-5.4",
map[string]any{"native_search": true},
)
if err != nil {
t.Fatalf("Chat() error = %v", err)
}
toolsRaw, ok := requestBody["tools"].([]any)
if !ok {
t.Fatalf("tools is %T, want []any", requestBody["tools"])
}
if len(toolsRaw) != 2 {
t.Fatalf("len(tools) = %d, want 2 (read_file + web_search_preview)", len(toolsRaw))
}
lastTool, ok := toolsRaw[1].(map[string]any)
if !ok {
t.Fatalf("last tool is %T, want map[string]any", toolsRaw[1])
}
if lastTool["type"] != "web_search_preview" {
t.Fatalf("last tool type = %v, want web_search_preview", lastTool["type"])
}
}
func TestProviderChat_NativeSearchNotInjectedWithoutOption(t *testing.T) {
var requestBody map[string]any
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if err := json.NewDecoder(r.Body).Decode(&requestBody); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
resp := map[string]any{
"choices": []map[string]any{
{
"message": map[string]any{"content": "ok"},
"finish_reason": "stop",
},
},
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(resp)
}))
defer server.Close()
p := NewProvider("key", server.URL, "")
tools := []ToolDefinition{
{Type: "function", Function: ToolFunctionDefinition{Name: "web_search", Description: "search"}},
}
_, err := p.Chat(
t.Context(),
[]Message{{Role: "user", Content: "hi"}},
tools,
"gpt-5.4",
map[string]any{},
)
if err != nil {
t.Fatalf("Chat() error = %v", err)
}
toolsRaw, ok := requestBody["tools"].([]any)
if !ok {
t.Fatalf("tools is %T, want []any", requestBody["tools"])
}
if len(toolsRaw) != 1 {
t.Fatalf("len(tools) = %d, want 1 (web_search only)", len(toolsRaw))
}
}
// TestProviderChat_NativeSearchIgnoredOnNonOpenAI verifies that when native_search
// is true in options but the provider's apiBase is not OpenAI (e.g. fallback to DeepSeek),
// we do not inject web_search_preview to avoid API errors.
func TestProviderChat_NativeSearchIgnoredOnNonOpenAI(t *testing.T) {
var requestBody map[string]any
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if err := json.NewDecoder(r.Body).Decode(&requestBody); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
resp := map[string]any{
"choices": []map[string]any{
{
"message": map[string]any{"content": "ok"},
"finish_reason": "stop",
},
},
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(resp)
}))
defer server.Close()
// Use server.URL so host is not api.openai.com — simulates DeepSeek/other provider
p := NewProvider("key", server.URL, "")
_, err := p.Chat(
t.Context(),
[]Message{{Role: "user", Content: "hi"}},
nil,
"deepseek-chat",
map[string]any{"native_search": true},
)
if err != nil {
t.Fatalf("Chat() error = %v", err)
}
// Should not have tools at all (no tools passed, and we must not add web_search_preview)
if toolsRaw, ok := requestBody["tools"]; ok {
t.Fatalf("tools should be omitted for non-OpenAI when only native_search was requested, got %v", toolsRaw)
}
}
func TestSerializeMessages_StripsSystemParts(t *testing.T) {
messages := []protocoltypes.Message{
{
+9
View File
@@ -44,6 +44,15 @@ type ThinkingCapable interface {
SupportsThinking() bool
}
// NativeSearchCapable is an optional interface for providers that support
// built-in web search during LLM inference (e.g. OpenAI web_search_preview,
// xAI Grok search). When the active provider implements this interface and
// returns true, the agent loop can hide the client-side web_search tool to
// avoid duplicate search surfaces and use the provider's native search instead.
type NativeSearchCapable interface {
SupportsNativeSearch() bool
}
// FailoverReason classifies why an LLM request failed for fallback decisions.
type FailoverReason string
+6 -3
View File
@@ -226,9 +226,12 @@ func TestCronTool_ExecuteJobPublishesErrorWhenExecDisabled(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
msg, ok := tool.msgBus.SubscribeOutbound(ctx)
if !ok {
t.Fatal("expected outbound message")
var msg bus.OutboundMessage
select {
case msg = <-tool.msgBus.OutboundChan():
// got message
case <-ctx.Done():
t.Fatal("timeout waiting for outbound message")
}
if !strings.Contains(msg.Content, "command execution is disabled") {
t.Fatalf("expected exec disabled message, got: %s", msg.Content)
+174 -25
View File
@@ -7,6 +7,7 @@ import (
"errors"
"fmt"
"io"
"mime"
"net"
"net/http"
"net/url"
@@ -15,6 +16,7 @@ import (
"sync/atomic"
"time"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/utils"
)
@@ -776,22 +778,49 @@ type WebFetchTool struct {
maxChars int
proxy string
client *http.Client
format string
fetchLimitBytes int64
whitelist *privateHostWhitelist
}
func NewWebFetchTool(maxChars int, fetchLimitBytes int64) (*WebFetchTool, error) {
type privateHostWhitelist struct {
exact map[string]struct{}
cidrs []*net.IPNet
}
func NewWebFetchTool(maxChars int, format string, fetchLimitBytes int64) (*WebFetchTool, error) {
// createHTTPClient cannot fail with an empty proxy string.
return NewWebFetchToolWithProxy(maxChars, "", fetchLimitBytes)
return NewWebFetchToolWithConfig(maxChars, "", format, fetchLimitBytes, nil)
}
// allowPrivateWebFetchHosts controls whether loopback/private hosts are allowed.
// This is false in normal runtime to reduce SSRF exposure, and tests can override it temporarily.
var allowPrivateWebFetchHosts atomic.Bool
func NewWebFetchToolWithProxy(maxChars int, proxy string, fetchLimitBytes int64) (*WebFetchTool, error) {
func NewWebFetchToolWithProxy(
maxChars int,
proxy string,
format string,
fetchLimitBytes int64,
privateHostWhitelist []string,
) (*WebFetchTool, error) {
return NewWebFetchToolWithConfig(maxChars, proxy, format, fetchLimitBytes, privateHostWhitelist)
}
func NewWebFetchToolWithConfig(
maxChars int,
proxy string,
format string,
fetchLimitBytes int64,
privateHostWhitelist []string,
) (*WebFetchTool, error) {
if maxChars <= 0 {
maxChars = defaultMaxChars
}
whitelist, err := newPrivateHostWhitelist(privateHostWhitelist)
if err != nil {
return nil, fmt.Errorf("failed to parse web fetch private host whitelist: %w", err)
}
client, err := utils.CreateHTTPClient(proxy, fetchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP client for web fetch: %w", err)
@@ -801,13 +830,13 @@ func NewWebFetchToolWithProxy(maxChars int, proxy string, fetchLimitBytes int64)
Timeout: 15 * time.Second,
KeepAlive: 30 * time.Second,
}
transport.DialContext = newSafeDialContext(dialer)
transport.DialContext = newSafeDialContext(dialer, whitelist)
}
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
if len(via) >= maxRedirects {
return fmt.Errorf("stopped after %d redirects", maxRedirects)
}
if isObviousPrivateHost(req.URL.Hostname()) {
if isObviousPrivateHost(req.URL.Hostname(), whitelist) {
return fmt.Errorf("redirect target is private or local network host")
}
return nil
@@ -819,7 +848,9 @@ func NewWebFetchToolWithProxy(maxChars int, proxy string, fetchLimitBytes int64)
maxChars: maxChars,
proxy: proxy,
client: client,
format: format,
fetchLimitBytes: fetchLimitBytes,
whitelist: whitelist,
}, nil
}
@@ -871,7 +902,7 @@ func (t *WebFetchTool) Execute(ctx context.Context, args map[string]any) *ToolRe
// Lightweight pre-flight: block obvious localhost/literal-IP without DNS resolution.
// The real SSRF guard is newSafeDialContext at connect time.
hostname := parsedURL.Hostname()
if isObviousPrivateHost(hostname) {
if isObviousPrivateHost(hostname, t.whitelist) {
return ErrorResult("fetching private or local network hosts is not allowed")
}
@@ -906,26 +937,68 @@ func (t *WebFetchTool) Execute(ctx context.Context, args map[string]any) *ToolRe
return ErrorResult(fmt.Sprintf("failed to read response: %v", err))
}
bodyStr := string(body)
contentType := resp.Header.Get("Content-Type")
mediaType, params, err := mime.ParseMediaType(contentType)
if err != nil {
// The most common error here is "mime: no media type" if the header is empty.
logger.WarnCF("tool", "Failed to parse Content-Type", map[string]any{
"raw_header": contentType,
"error": err.Error(),
})
// security fallback
mediaType = "application/octet-stream"
}
charset, hasCharset := params["charset"]
if hasCharset {
// If the charset is not utf-8, we might have to convert the bodyStr
// before passing it to the HTML/Markdown parser
if strings.ToLower(charset) != "utf-8" {
logger.WarnCF("tool", "Note: the content is not in UTF-8", map[string]any{"charset": charset})
}
}
var text, extractor string
if strings.Contains(contentType, "application/json") {
switch {
case mediaType == "application/json":
var jsonData any
if err := json.Unmarshal(body, &jsonData); err == nil {
formatted, _ := json.MarshalIndent(jsonData, "", " ")
text = string(formatted)
extractor = "json"
} else {
text = string(body)
if err := json.Unmarshal(body, &jsonData); err != nil {
text = bodyStr
extractor = "raw"
break
}
} else if strings.Contains(contentType, "text/html") || len(body) > 0 &&
(strings.HasPrefix(string(body), "<!DOCTYPE") || strings.HasPrefix(strings.ToLower(string(body)), "<html")) {
text = t.extractText(string(body))
extractor = "text"
} else {
text = string(body)
formatted, err := json.MarshalIndent(jsonData, "", " ")
if err != nil {
text = bodyStr
extractor = "raw"
break
}
text = string(formatted)
extractor = "json"
case mediaType == "text/html" || looksLikeHTML(bodyStr):
switch strings.ToLower(t.format) {
case "markdown":
var err error
text, err = utils.HtmlToMarkdown(bodyStr)
if err != nil {
return ErrorResult(fmt.Sprintf("failed to HTML to markdown: %v", err))
}
extractor = "markdown"
default:
text = t.extractText(bodyStr)
extractor = "text"
}
default:
text = bodyStr
extractor = "raw"
}
@@ -957,6 +1030,17 @@ func (t *WebFetchTool) Execute(ctx context.Context, args map[string]any) *ToolRe
}
}
func looksLikeHTML(body string) bool {
if body == "" {
return false
}
lower := strings.ToLower(body)
return strings.HasPrefix(body, "<!doctype") ||
strings.HasPrefix(lower, "<html")
}
func (t *WebFetchTool) extractText(htmlContent string) string {
result := reScript.ReplaceAllLiteralString(htmlContent, "")
result = reStyle.ReplaceAllLiteralString(result, "")
@@ -981,7 +1065,10 @@ func (t *WebFetchTool) extractText(htmlContent string) string {
// newSafeDialContext re-resolves DNS at connect time to mitigate DNS rebinding (TOCTOU)
// where a hostname resolves to a public IP during pre-flight but a private IP at connect time.
func newSafeDialContext(dialer *net.Dialer) func(context.Context, string, string) (net.Conn, error) {
func newSafeDialContext(
dialer *net.Dialer,
whitelist *privateHostWhitelist,
) func(context.Context, string, string) (net.Conn, error) {
return func(ctx context.Context, network, address string) (net.Conn, error) {
if allowPrivateWebFetchHosts.Load() {
return dialer.DialContext(ctx, network, address)
@@ -996,7 +1083,7 @@ func newSafeDialContext(dialer *net.Dialer) func(context.Context, string, string
}
if ip := net.ParseIP(host); ip != nil {
if isPrivateOrRestrictedIP(ip) {
if shouldBlockPrivateIP(ip, whitelist) {
return nil, fmt.Errorf("blocked private or local target: %s", host)
}
return dialer.DialContext(ctx, network, net.JoinHostPort(ip.String(), port))
@@ -1010,7 +1097,7 @@ func newSafeDialContext(dialer *net.Dialer) func(context.Context, string, string
attempted := 0
var lastErr error
for _, ipAddr := range ipAddrs {
if isPrivateOrRestrictedIP(ipAddr.IP) {
if shouldBlockPrivateIP(ipAddr.IP, whitelist) {
continue
}
attempted++
@@ -1022,7 +1109,7 @@ func newSafeDialContext(dialer *net.Dialer) func(context.Context, string, string
}
if attempted == 0 {
return nil, fmt.Errorf("all resolved addresses for %s are private or restricted", host)
return nil, fmt.Errorf("all resolved addresses for %s are private, restricted, or not whitelisted", host)
}
if lastErr != nil {
return nil, fmt.Errorf("failed connecting to public addresses for %s: %w", host, lastErr)
@@ -1031,10 +1118,72 @@ func newSafeDialContext(dialer *net.Dialer) func(context.Context, string, string
}
}
func newPrivateHostWhitelist(entries []string) (*privateHostWhitelist, error) {
if len(entries) == 0 {
return nil, nil
}
whitelist := &privateHostWhitelist{
exact: make(map[string]struct{}),
cidrs: make([]*net.IPNet, 0, len(entries)),
}
for _, entry := range entries {
entry = strings.TrimSpace(entry)
if entry == "" {
continue
}
if ip := net.ParseIP(entry); ip != nil {
whitelist.exact[normalizeWhitelistIP(ip).String()] = struct{}{}
continue
}
_, network, err := net.ParseCIDR(entry)
if err != nil {
return nil, fmt.Errorf("invalid entry %q: expected IP or CIDR", entry)
}
whitelist.cidrs = append(whitelist.cidrs, network)
}
if len(whitelist.exact) == 0 && len(whitelist.cidrs) == 0 {
return nil, nil
}
return whitelist, nil
}
func (w *privateHostWhitelist) Contains(ip net.IP) bool {
if w == nil || ip == nil {
return false
}
normalized := normalizeWhitelistIP(ip)
if _, ok := w.exact[normalized.String()]; ok {
return true
}
for _, network := range w.cidrs {
if network.Contains(normalized) {
return true
}
}
return false
}
func normalizeWhitelistIP(ip net.IP) net.IP {
if ip == nil {
return nil
}
if ip4 := ip.To4(); ip4 != nil {
return ip4
}
return ip
}
func shouldBlockPrivateIP(ip net.IP, whitelist *privateHostWhitelist) bool {
return isPrivateOrRestrictedIP(ip) && !whitelist.Contains(ip)
}
// isObviousPrivateHost performs a lightweight, no-DNS check for obviously private hosts.
// It catches localhost, literal private IPs, and empty hosts. It does NOT resolve DNS —
// the real SSRF guard is newSafeDialContext which checks IPs at connect time.
func isObviousPrivateHost(host string) bool {
func isObviousPrivateHost(host string, whitelist *privateHostWhitelist) bool {
if allowPrivateWebFetchHosts.Load() {
return false
}
@@ -1050,7 +1199,7 @@ func isObviousPrivateHost(host string) bool {
}
if ip := net.ParseIP(h); ip != nil {
return isPrivateOrRestrictedIP(ip)
return shouldBlockPrivateIP(ip, whitelist)
}
return false
+170 -20
View File
@@ -10,11 +10,15 @@ import (
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/sipeed/picoclaw/pkg/logger"
)
const testFetchLimit = int64(10 * 1024 * 1024)
const (
testFetchLimit = int64(10 * 1024 * 1024)
format = "plaintext"
)
// TestWebTool_WebFetch_Success verifies successful URL fetching
func TestWebTool_WebFetch_Success(t *testing.T) {
@@ -27,7 +31,7 @@ func TestWebTool_WebFetch_Success(t *testing.T) {
}))
defer server.Close()
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
@@ -69,7 +73,7 @@ func TestWebTool_WebFetch_JSON(t *testing.T) {
}))
defer server.Close()
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
}
@@ -94,7 +98,7 @@ func TestWebTool_WebFetch_JSON(t *testing.T) {
// TestWebTool_WebFetch_InvalidURL verifies error handling for invalid URL
func TestWebTool_WebFetch_InvalidURL(t *testing.T) {
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
}
@@ -119,7 +123,7 @@ func TestWebTool_WebFetch_InvalidURL(t *testing.T) {
// TestWebTool_WebFetch_UnsupportedScheme verifies error handling for non-http URLs
func TestWebTool_WebFetch_UnsupportedScheme(t *testing.T) {
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
}
@@ -144,7 +148,7 @@ func TestWebTool_WebFetch_UnsupportedScheme(t *testing.T) {
// TestWebTool_WebFetch_MissingURL verifies error handling for missing URL
func TestWebTool_WebFetch_MissingURL(t *testing.T) {
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
}
@@ -178,7 +182,7 @@ func TestWebTool_WebFetch_Truncation(t *testing.T) {
}))
defer server.Close()
tool, err := NewWebFetchTool(1000, testFetchLimit) // Limit to 1000 chars
tool, err := NewWebFetchTool(1000, format, testFetchLimit) // Limit to 1000 chars
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
}
@@ -228,7 +232,7 @@ func TestWebFetchTool_PayloadTooLarge(t *testing.T) {
defer ts.Close()
// Initialize the tool
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
}
@@ -311,7 +315,7 @@ func TestWebTool_WebFetch_HTMLExtraction(t *testing.T) {
}))
defer server.Close()
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
}
@@ -423,8 +427,31 @@ func withPrivateWebFetchHostsAllowed(t *testing.T) {
})
}
func serverHostAndPort(t *testing.T, rawURL string) (string, string) {
t.Helper()
hostPort := strings.TrimPrefix(rawURL, "http://")
hostPort = strings.TrimPrefix(hostPort, "https://")
host, port, err := net.SplitHostPort(hostPort)
if err != nil {
t.Fatalf("failed to split host/port from %q: %v", rawURL, err)
}
return host, port
}
func singleHostCIDR(t *testing.T, host string) string {
t.Helper()
ip := net.ParseIP(host)
if ip == nil {
t.Fatalf("failed to parse IP %q", host)
}
if ip.To4() != nil {
return ip.String() + "/32"
}
return ip.String() + "/128"
}
func TestWebTool_WebFetch_PrivateHostBlocked(t *testing.T) {
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
@@ -441,6 +468,56 @@ func TestWebTool_WebFetch_PrivateHostBlocked(t *testing.T) {
}
}
func TestWebTool_WebFetch_PrivateHostAllowedByExactWhitelist(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(http.StatusOK)
w.Write([]byte("exact whitelist ok"))
}))
defer server.Close()
host, _ := serverHostAndPort(t, server.URL)
tool, err := NewWebFetchToolWithConfig(50000, "", format, testFetchLimit, []string{host})
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
result := tool.Execute(context.Background(), map[string]any{
"url": server.URL,
})
if result.IsError {
t.Fatalf("expected success for exact whitelisted private IP, got %q", result.ForLLM)
}
if !strings.Contains(result.ForLLM, "exact whitelist ok") {
t.Fatalf("expected fetched content, got %q", result.ForLLM)
}
}
func TestWebTool_WebFetch_PrivateHostAllowedByCIDRWhitelist(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(http.StatusOK)
w.Write([]byte("cidr whitelist ok"))
}))
defer server.Close()
host, _ := serverHostAndPort(t, server.URL)
tool, err := NewWebFetchToolWithConfig(50000, "", format, testFetchLimit, []string{singleHostCIDR(t, host)})
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
result := tool.Execute(context.Background(), map[string]any{
"url": server.URL,
})
if result.IsError {
t.Fatalf("expected success for CIDR-whitelisted private IP, got %q", result.ForLLM)
}
if !strings.Contains(result.ForLLM, "cidr whitelist ok") {
t.Fatalf("expected fetched content, got %q", result.ForLLM)
}
}
func TestWebTool_WebFetch_PrivateHostAllowedForTests(t *testing.T) {
withPrivateWebFetchHostsAllowed(t)
@@ -451,7 +528,7 @@ func TestWebTool_WebFetch_PrivateHostAllowedForTests(t *testing.T) {
}))
defer server.Close()
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
@@ -466,7 +543,7 @@ func TestWebTool_WebFetch_PrivateHostAllowedForTests(t *testing.T) {
// TestWebFetch_BlocksIPv4MappedIPv6Loopback verifies ::ffff:127.0.0.1 is blocked
func TestWebFetch_BlocksIPv4MappedIPv6Loopback(t *testing.T) {
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
@@ -481,7 +558,7 @@ func TestWebFetch_BlocksIPv4MappedIPv6Loopback(t *testing.T) {
// TestWebFetch_BlocksMetadataIP verifies 169.254.169.254 is blocked
func TestWebFetch_BlocksMetadataIP(t *testing.T) {
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
@@ -496,7 +573,7 @@ func TestWebFetch_BlocksMetadataIP(t *testing.T) {
// TestWebFetch_BlocksIPv6UniqueLocal verifies fc00::/7 addresses are blocked
func TestWebFetch_BlocksIPv6UniqueLocal(t *testing.T) {
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
@@ -511,7 +588,7 @@ func TestWebFetch_BlocksIPv6UniqueLocal(t *testing.T) {
// TestWebFetch_Blocks6to4WithPrivateEmbed verifies 6to4 with private embedded IPv4 is blocked
func TestWebFetch_Blocks6to4WithPrivateEmbed(t *testing.T) {
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
@@ -527,7 +604,7 @@ func TestWebFetch_Blocks6to4WithPrivateEmbed(t *testing.T) {
// TestWebFetch_Allows6to4WithPublicEmbed verifies 6to4 with public embedded IPv4 is NOT blocked
func TestWebFetch_Allows6to4WithPublicEmbed(t *testing.T) {
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
@@ -557,7 +634,7 @@ func TestWebFetch_RedirectToPrivateBlocked(t *testing.T) {
allowPrivateWebFetchHosts.Store(false)
defer allowPrivateWebFetchHosts.Store(true)
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
t.Fatalf("Failed to create web fetch tool: %v", err)
}
@@ -570,6 +647,69 @@ func TestWebFetch_RedirectToPrivateBlocked(t *testing.T) {
}
}
func TestNewSafeDialContext_BlocksPrivateDNSResolutionWithoutWhitelist(t *testing.T) {
listener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("failed to listen on loopback: %v", err)
}
defer listener.Close()
_, port, err := net.SplitHostPort(listener.Addr().String())
if err != nil {
t.Fatalf("failed to split listener address: %v", err)
}
dialContext := newSafeDialContext(&net.Dialer{Timeout: time.Second}, nil)
_, err = dialContext(context.Background(), "tcp", net.JoinHostPort("localhost", port))
if err == nil {
t.Fatal("expected localhost DNS resolution to be blocked without whitelist")
}
if !strings.Contains(err.Error(), "private") && !strings.Contains(err.Error(), "whitelisted") {
t.Fatalf("unexpected error: %v", err)
}
}
func TestNewSafeDialContext_AllowsWhitelistedPrivateDNSResolution(t *testing.T) {
listener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("failed to listen on loopback: %v", err)
}
defer listener.Close()
accepted := make(chan struct{}, 1)
go func() {
conn, acceptErr := listener.Accept()
if acceptErr != nil {
return
}
conn.Close()
accepted <- struct{}{}
}()
_, port, err := net.SplitHostPort(listener.Addr().String())
if err != nil {
t.Fatalf("failed to split listener address: %v", err)
}
whitelist, err := newPrivateHostWhitelist([]string{"127.0.0.0/8"})
if err != nil {
t.Fatalf("failed to parse whitelist: %v", err)
}
dialContext := newSafeDialContext(&net.Dialer{Timeout: time.Second}, whitelist)
conn, err := dialContext(context.Background(), "tcp", net.JoinHostPort("localhost", port))
if err != nil {
t.Fatalf("expected localhost DNS resolution to succeed with whitelist, got %v", err)
}
conn.Close()
select {
case <-accepted:
case <-time.After(time.Second):
t.Fatal("expected localhost listener to accept a connection")
}
}
// TestIsPrivateOrRestrictedIP_Table tests IP classification logic
func TestIsPrivateOrRestrictedIP_Table(t *testing.T) {
tests := []struct {
@@ -615,7 +755,7 @@ func TestIsPrivateOrRestrictedIP_Table(t *testing.T) {
// TestWebTool_WebFetch_MissingDomain verifies error handling for URL without domain
func TestWebTool_WebFetch_MissingDomain(t *testing.T) {
tool, err := NewWebFetchTool(50000, testFetchLimit)
tool, err := NewWebFetchTool(50000, format, testFetchLimit)
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
}
@@ -639,7 +779,7 @@ func TestWebTool_WebFetch_MissingDomain(t *testing.T) {
}
func TestNewWebFetchToolWithProxy(t *testing.T) {
tool, err := NewWebFetchToolWithProxy(1024, "http://127.0.0.1:7890", testFetchLimit)
tool, err := NewWebFetchToolWithProxy(1024, "http://127.0.0.1:7890", format, testFetchLimit, nil)
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
} else if tool.maxChars != 1024 {
@@ -650,7 +790,7 @@ func TestNewWebFetchToolWithProxy(t *testing.T) {
t.Fatalf("proxy = %q, want %q", tool.proxy, "http://127.0.0.1:7890")
}
tool, err = NewWebFetchToolWithProxy(0, "http://127.0.0.1:7890", testFetchLimit)
tool, err = NewWebFetchToolWithProxy(0, "http://127.0.0.1:7890", format, testFetchLimit, nil)
if err != nil {
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
}
@@ -660,6 +800,16 @@ func TestNewWebFetchToolWithProxy(t *testing.T) {
}
}
func TestNewWebFetchToolWithConfig_InvalidPrivateHostWhitelist(t *testing.T) {
_, err := NewWebFetchToolWithConfig(1024, "", format, testFetchLimit, []string{"not-an-ip-or-cidr"})
if err == nil {
t.Fatal("expected invalid whitelist entry to fail")
}
if !strings.Contains(err.Error(), "invalid entry") {
t.Fatalf("unexpected error: %v", err)
}
}
func TestNewWebSearchTool_PropagatesProxy(t *testing.T) {
t.Run("perplexity", func(t *testing.T) {
tool, err := NewWebSearchTool(WebSearchToolOptions{

Some files were not shown because too many files have changed in this diff Show More