diff --git a/README.fr.md b/README.fr.md new file mode 100644 index 000000000..ab8faf468 --- /dev/null +++ b/README.fr.md @@ -0,0 +1,881 @@ +
+ PicoClaw + +

PicoClaw : Assistant IA Ultra-Efficace en Go

+ +

Matériel à 10$ · 10 Mo de RAM · Démarrage en 1s · 皮皮虾,我们走!

+ +

+ Go + Hardware + License +
+ Website + Twitter +

+ + [中文](README.zh.md) | [日本語](README.ja.md) | [Português](README.pt-br.md) | [Tiếng Việt](README.vi.md) | [English](README.md) | **Français** +
+ +--- + +🦐 **PicoClaw** est un assistant personnel IA ultra-léger inspiré de [nanobot](https://github.com/HKUDS/nanobot), entièrement réécrit en **Go** via un processus d'auto-amorçage (self-bootstrapping) — où l'agent IA lui-même a piloté l'intégralité de la migration architecturale et de l'optimisation du code. + +⚡️ **Extrêmement léger :** Fonctionne sur du matériel à seulement **10$** avec **<10 Mo** de RAM. C'est 99% de mémoire en moins qu'OpenClaw et 98% moins cher qu'un Mac mini ! + + + + + + +
+

+ +

+
+

+ +

+
+ +> [!CAUTION] +> **🚨 SÉCURITÉ & CANAUX OFFICIELS** +> +> * **PAS DE CRYPTO :** PicoClaw n'a **AUCUN** token/jeton officiel. Toute annonce sur `pump.fun` ou d'autres plateformes de trading est une **ARNAQUE**. +> * **DOMAINE OFFICIEL :** Le **SEUL** site officiel est **[picoclaw.io](https://picoclaw.io)**, et le site de l'entreprise est **[sipeed.com](https://sipeed.com)**. +> * **Attention :** De nombreux domaines `.ai/.org/.com/.net/...` sont enregistrés par des tiers et ne nous appartiennent pas. +> * **Attention :** PicoClaw est en phase de développement précoce et peut présenter des problèmes de sécurité réseau non résolus. Ne déployez pas en environnement de production avant la version v1.0. +> * **Note :** PicoClaw a récemment fusionné de nombreuses PR, ce qui peut entraîner une empreinte mémoire plus importante (10–20 Mo) dans les dernières versions. Nous prévoyons de prioriser l'optimisation des ressources dès que l'ensemble des fonctionnalités sera stabilisé. + + +## 📢 Actualités + +2026-02-16 🎉 PicoClaw a atteint 12K étoiles en une semaine ! Merci à tous pour votre soutien ! PicoClaw grandit plus vite que nous ne l'avions jamais imaginé. Vu le volume élevé de PR, nous avons un besoin urgent de mainteneurs communautaires. Nos rôles de bénévoles et notre feuille de route sont officiellement publiés [ici](docs/picoclaw_community_roadmap_260216.md) — nous avons hâte de vous accueillir ! + +2026-02-13 🎉 PicoClaw a atteint 5000 étoiles en 4 jours ! Merci à la communauté ! Nous finalisons la **Feuille de Route du Projet** et mettons en place le **Groupe de Développeurs** pour accélérer le développement de PicoClaw. +🚀 **Appel à l'action :** Soumettez vos demandes de fonctionnalités dans les GitHub Discussions. Nous les examinerons et les prioriserons lors de notre prochaine réunion hebdomadaire. + +2026-02-09 🎉 PicoClaw est lancé ! Construit en 1 jour pour apporter les Agents IA au matériel à 10$ avec <10 Mo de RAM. 🦐 PicoClaw, c'est parti ! + +## ✨ Fonctionnalités + +🪶 **Ultra-Léger** : Empreinte mémoire <10 Mo — 99% plus petit que Clawdbot pour les fonctionnalités essentielles. + +💰 **Coût Minimal** : Suffisamment efficace pour fonctionner sur du matériel à 10$ — 98% moins cher qu'un Mac mini. + +⚡️ **Démarrage Éclair** : Temps de démarrage 400X plus rapide, boot en 1 seconde même sur un cœur unique à 0,6 GHz. + +🌍 **Véritable Portabilité** : Un seul binaire autonome pour RISC-V, ARM et x86. Un clic et c'est parti ! + +🤖 **Auto-Construit par l'IA** : Implémentation native en Go de manière autonome — 95% du cœur généré par l'Agent avec affinement humain dans la boucle. + +| | OpenClaw | NanoBot | **PicoClaw** | +| ----------------------------- | ------------- | ------------------------ | ----------------------------------------- | +| **Langage** | TypeScript | Python | **Go** | +| **RAM** | >1 Go | >100 Mo | **< 10 Mo** | +| **Démarrage**
(cœur 0,8 GHz) | >500s | >30s | **<1s** | +| **Coût** | Mac Mini 599$ | La plupart des SBC Linux
~50$ | **N'importe quelle carte Linux**
**À partir de 10$** | + +PicoClaw + +## 🦾 Démonstration + +### 🛠️ Flux de Travail Standard de l'Assistant + + + + + + + + + + + + + + + + + +

🧩 Ingénieur Full-Stack

🗂️ Gestion des Logs & Planification

🔎 Recherche Web & Apprentissage

Développer • Déployer • Mettre à l'échellePlanifier • Automatiser • MémoriserDécouvrir • Analyser • Tendances
+ +### 📱 Utiliser sur d'anciens téléphones Android + +Donnez une seconde vie à votre téléphone d'il y a dix ans ! Transformez-le en assistant IA intelligent avec PicoClaw. Démarrage rapide : + +1. **Installez Termux** (disponible sur F-Droid ou Google Play). +2. **Exécutez les commandes** + +```bash +# Note : Remplacez v0.1.1 par la dernière version depuis la page des Releases +wget https://github.com/sipeed/picoclaw/releases/download/v0.1.1/picoclaw-linux-arm64 +chmod +x picoclaw-linux-arm64 +pkg install proot +termux-chroot ./picoclaw-linux-arm64 onboard +``` + +Puis suivez les instructions de la section « Démarrage Rapide » pour terminer la configuration ! + +PicoClaw + +### 🐜 Déploiement Innovant à Faible Empreinte + +PicoClaw peut être déployé sur pratiquement n'importe quel appareil Linux ! + +- 9,9$ [LicheeRV-Nano](https://www.aliexpress.com/item/1005006519668532.html) version E (Ethernet) ou W (WiFi6), pour un Assistant Domotique Minimaliste +- 30~50$ [NanoKVM](https://www.aliexpress.com/item/1005007369816019.html), ou 100$ [NanoKVM-Pro](https://www.aliexpress.com/item/1005010048471263.html) pour la Maintenance Automatisée de Serveurs +- 50$ [MaixCAM](https://www.aliexpress.com/item/1005008053333693.html) ou 100$ [MaixCAM2](https://www.kickstarter.com/projects/zepan/maixcam2-build-your-next-gen-4k-ai-camera) pour la Surveillance Intelligente + + + +🌟 Encore plus de scénarios de déploiement vous attendent ! + +## 📦 Installation + +### Installer avec un binaire précompilé + +Téléchargez le binaire pour votre plateforme depuis la page des [releases](https://github.com/sipeed/picoclaw/releases). + +### Installer depuis les sources (dernières fonctionnalités, recommandé pour le développement) + +```bash +git clone https://github.com/sipeed/picoclaw.git + +cd picoclaw +make deps + +# Compiler, pas besoin d'installer +make build + +# Compiler pour plusieurs plateformes +make build-all + +# Compiler et Installer +make install +``` + +## 🐳 Docker Compose + +Vous pouvez également exécuter PicoClaw avec Docker Compose sans rien installer localement. + +```bash +# 1. Clonez ce dépôt +git clone https://github.com/sipeed/picoclaw.git +cd picoclaw + +# 2. Configurez vos clés API +cp config/config.example.json config/config.json +vim config/config.json # Configurez DISCORD_BOT_TOKEN, clés API, etc. + +# 3. Compiler & Démarrer +docker compose --profile gateway up -d + +# 4. Voir les logs +docker compose logs -f picoclaw-gateway + +# 5. Arrêter +docker compose --profile gateway down +``` + +### Mode Agent (exécution unique) + +```bash +# Poser une question +docker compose run --rm picoclaw-agent -m "Combien font 2+2 ?" + +# Mode interactif +docker compose run --rm picoclaw-agent +``` + +### Recompiler + +```bash +docker compose --profile gateway build --no-cache +docker compose --profile gateway up -d +``` + +### 🚀 Démarrage Rapide + +> [!TIP] +> Configurez votre clé API dans `~/.picoclaw/config.json`. +> Obtenir des clés API : [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM) +> La recherche web est **optionnelle** — obtenez gratuitement l'[API Brave Search](https://brave.com/search/api) (2000 requêtes gratuites/mois) ou utilisez le repli automatique intégré. + +**1. Initialiser** + +```bash +picoclaw onboard +``` + +**2. Configurer** (`~/.picoclaw/config.json`) + +```json +{ + "agents": { + "defaults": { + "workspace": "~/.picoclaw/workspace", + "model": "glm-4.7", + "max_tokens": 8192, + "temperature": 0.7, + "max_tool_iterations": 20 + } + }, + "providers": { + "openrouter": { + "api_key": "xxx", + "api_base": "https://openrouter.ai/api/v1" + } + }, + "tools": { + "web": { + "brave": { + "enabled": false, + "api_key": "VOTRE_CLE_API_BRAVE", + "max_results": 5 + }, + "duckduckgo": { + "enabled": true, + "max_results": 5 + } + } + } +} +``` + +**3. Obtenir des Clés API** + +* **Fournisseur LLM** : [OpenRouter](https://openrouter.ai/keys) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) · [Anthropic](https://console.anthropic.com) · [OpenAI](https://platform.openai.com) · [Gemini](https://aistudio.google.com/api-keys) +* **Recherche Web** (optionnel) : [Brave Search](https://brave.com/search/api) - Offre gratuite disponible (2000 requêtes/mois) + +> **Note** : Consultez `config.example.json` pour un modèle de configuration complet. + +**4. Discuter** + +```bash +picoclaw agent -m "Combien font 2+2 ?" +``` + +Et voilà ! Vous avez un assistant IA fonctionnel en 2 minutes. + +--- + +## 💬 Applications de Chat + +Discutez avec votre PicoClaw via Telegram, Discord, DingTalk ou LINE + +| Canal | Configuration | +| ------------ | -------------------------------------- | +| **Telegram** | Facile (juste un token) | +| **Discord** | Facile (token bot + intents) | +| **QQ** | Facile (AppID + AppSecret) | +| **DingTalk** | Moyen (identifiants de l'application) | +| **LINE** | Moyen (identifiants + URL de webhook) | + +
+Telegram (Recommandé) + +**1. Créer un bot** + +* Ouvrez Telegram, recherchez `@BotFather` +* Envoyez `/newbot`, suivez les instructions +* Copiez le token + +**2. Configurer** + +```json +{ + "channels": { + "telegram": { + "enabled": true, + "token": "VOTRE_TOKEN_BOT", + "allowFrom": ["VOTRE_USER_ID"] + } + } +} +``` + +> Obtenez votre User ID via `@userinfobot` sur Telegram. + +**3. Lancer** + +```bash +picoclaw gateway +``` + +
+ +
+Discord + +**1. Créer un bot** + +* Rendez-vous sur +* Créez une application → Bot → Add Bot +* Copiez le token du bot + +**2. Activer les intents** + +* Dans les paramètres du Bot, activez **MESSAGE CONTENT INTENT** +* (Optionnel) Activez **SERVER MEMBERS INTENT** si vous souhaitez utiliser des listes d'autorisation basées sur les données des membres + +**3. Obtenir votre User ID** + +* Paramètres Discord → Avancé → activez le **Mode Développeur** +* Clic droit sur votre avatar → **Copier l'identifiant** + +**4. Configurer** + +```json +{ + "channels": { + "discord": { + "enabled": true, + "token": "VOTRE_TOKEN_BOT", + "allowFrom": ["VOTRE_USER_ID"] + } + } +} +``` + +**5. Inviter le bot** + +* OAuth2 → URL Generator +* Scopes : `bot` +* Permissions du Bot : `Send Messages`, `Read Message History` +* Ouvrez l'URL d'invitation générée et ajoutez le bot à votre serveur + +**6. Lancer** + +```bash +picoclaw gateway +``` + +
+ +
+QQ + +**1. Créer un bot** + +- Rendez-vous sur la [QQ Open Platform](https://q.qq.com/#) +- Créez une application → Obtenez l'**AppID** et l'**AppSecret** + +**2. Configurer** + +```json +{ + "channels": { + "qq": { + "enabled": true, + "app_id": "VOTRE_APP_ID", + "app_secret": "VOTRE_APP_SECRET", + "allow_from": [] + } + } +} +``` + +> Laissez `allow_from` vide pour autoriser tous les utilisateurs, ou spécifiez des numéros QQ pour restreindre l'accès. + +**3. Lancer** + +```bash +picoclaw gateway +``` + +
+ +
+DingTalk + +**1. Créer un bot** + +* Rendez-vous sur la [Open Platform](https://open.dingtalk.com/) +* Créez une application interne +* Copiez le Client ID et le Client Secret + +**2. Configurer** + +```json +{ + "channels": { + "dingtalk": { + "enabled": true, + "client_id": "VOTRE_CLIENT_ID", + "client_secret": "VOTRE_CLIENT_SECRET", + "allow_from": [] + } + } +} +``` + +> Laissez `allow_from` vide pour autoriser tous les utilisateurs, ou spécifiez des identifiants pour restreindre l'accès. + +**3. Lancer** + +```bash +picoclaw gateway +``` + +
+ +
+LINE + +**1. Créer un Compte Officiel LINE** + +- Rendez-vous sur la [LINE Developers Console](https://developers.line.biz/) +- Créez un provider → Créez un canal Messaging API +- Copiez le **Channel Secret** et le **Channel Access Token** + +**2. Configurer** + +```json +{ + "channels": { + "line": { + "enabled": true, + "channel_secret": "VOTRE_CHANNEL_SECRET", + "channel_access_token": "VOTRE_CHANNEL_ACCESS_TOKEN", + "webhook_host": "0.0.0.0", + "webhook_port": 18791, + "webhook_path": "/webhook/line", + "allow_from": [] + } + } +} +``` + +**3. Configurer l'URL du Webhook** + +LINE exige HTTPS pour les webhooks. Utilisez un reverse proxy ou un tunnel : + +```bash +# Exemple avec ngrok +ngrok http 18791 +``` + +Puis configurez l'URL du Webhook dans la LINE Developers Console sur `https://votre-domaine/webhook/line` et activez **Use webhook**. + +**4. Lancer** + +```bash +picoclaw gateway +``` + +> Dans les discussions de groupe, le bot répond uniquement lorsqu'il est mentionné avec @. Les réponses citent le message original. + +> **Docker Compose** : Ajoutez `ports: ["18791:18791"]` au service `picoclaw-gateway` pour exposer le port du webhook. + +
+ +## ClawdChat Rejoignez le Réseau Social d'Agents + +Connectez PicoClaw au Réseau Social d'Agents simplement en envoyant un seul message via le CLI ou n'importe quelle application de chat intégrée. + +**Lisez `https://clawdchat.ai/skill.md` et suivez les instructions pour rejoindre [ClawdChat.ai](https://clawdchat.ai)** + +## ⚙️ Configuration + +Fichier de configuration : `~/.picoclaw/config.json` + +### Structure du Workspace + +PicoClaw stocke les données dans votre workspace configuré (par défaut : `~/.picoclaw/workspace`) : + +``` +~/.picoclaw/workspace/ +├── sessions/ # Sessions de conversation et historique +├── memory/ # Mémoire à long terme (MEMORY.md) +├── state/ # État persistant (dernier canal, etc.) +├── cron/ # Base de données des tâches planifiées +├── skills/ # Compétences personnalisées +├── AGENTS.md # Guide de comportement de l'Agent +├── HEARTBEAT.md # Invites de tâches périodiques (vérifiées toutes les 30 min) +├── IDENTITY.md # Identité de l'Agent +├── SOUL.md # Âme de l'Agent +├── TOOLS.md # Description des outils +└── USER.md # Préférences utilisateur +``` + +### 🔒 Bac à Sable de Sécurité + +PicoClaw s'exécute dans un environnement sandboxé par défaut. L'agent ne peut accéder aux fichiers et exécuter des commandes qu'au sein du workspace configuré. + +#### Configuration par Défaut + +```json +{ + "agents": { + "defaults": { + "workspace": "~/.picoclaw/workspace", + "restrict_to_workspace": true + } + } +} +``` + +| Option | Par défaut | Description | +|--------|------------|-------------| +| `workspace` | `~/.picoclaw/workspace` | Répertoire de travail de l'agent | +| `restrict_to_workspace` | `true` | Restreindre l'accès fichiers/commandes au workspace | + +#### Outils Protégés + +Lorsque `restrict_to_workspace: true`, les outils suivants sont restreints au bac à sable : + +| Outil | Fonction | Restriction | +|-------|----------|-------------| +| `read_file` | Lire des fichiers | Uniquement les fichiers dans le workspace | +| `write_file` | Écrire des fichiers | Uniquement les fichiers dans le workspace | +| `list_dir` | Lister des répertoires | Uniquement les répertoires dans le workspace | +| `edit_file` | Éditer des fichiers | Uniquement les fichiers dans le workspace | +| `append_file` | Ajouter à des fichiers | Uniquement les fichiers dans le workspace | +| `exec` | Exécuter des commandes | Les chemins doivent être dans le workspace | + +#### Protection Supplémentaire d'Exec + +Même avec `restrict_to_workspace: false`, l'outil `exec` bloque ces commandes dangereuses : + +* `rm -rf`, `del /f`, `rmdir /s` — Suppression en masse +* `format`, `mkfs`, `diskpart` — Formatage de disque +* `dd if=` — Écriture d'image disque +* Écriture vers `/dev/sd[a-z]` — Écriture directe sur le disque +* `shutdown`, `reboot`, `poweroff` — Arrêt du système +* Fork bomb `:(){ :|:& };:` + +#### Exemples d'Erreurs + +``` +[ERROR] tool: Tool execution failed +{tool=exec, error=Command blocked by safety guard (path outside working dir)} +``` + +``` +[ERROR] tool: Tool execution failed +{tool=exec, error=Command blocked by safety guard (dangerous pattern detected)} +``` + +#### Désactiver les Restrictions (Risque de Sécurité) + +Si vous avez besoin que l'agent accède à des chemins en dehors du workspace : + +**Méthode 1 : Fichier de configuration** + +```json +{ + "agents": { + "defaults": { + "restrict_to_workspace": false + } + } +} +``` + +**Méthode 2 : Variable d'environnement** + +```bash +export PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE=false +``` + +> ⚠️ **Attention** : Désactiver cette restriction permet à l'agent d'accéder à n'importe quel chemin sur votre système. À utiliser avec précaution uniquement dans des environnements contrôlés. + +#### Cohérence du Périmètre de Sécurité + +Le paramètre `restrict_to_workspace` s'applique de manière cohérente sur tous les chemins d'exécution : + +| Chemin d'Exécution | Périmètre de Sécurité | +|--------------------|----------------------| +| Agent Principal | `restrict_to_workspace` ✅ | +| Sous-agent / Spawn | Hérite de la même restriction ✅ | +| Tâches Heartbeat | Hérite de la même restriction ✅ | + +Tous les chemins partagent la même restriction de workspace — il est impossible de contourner le périmètre de sécurité via des sous-agents ou des tâches planifiées. + +### Heartbeat (Tâches Périodiques) + +PicoClaw peut exécuter des tâches périodiques automatiquement. Créez un fichier `HEARTBEAT.md` dans votre workspace : + +```markdown +# Tâches Périodiques + +- Vérifier mes e-mails pour les messages importants +- Consulter mon agenda pour les événements à venir +- Vérifier les prévisions météo +``` + +L'agent lira ce fichier toutes les 30 minutes (configurable) et exécutera les tâches à l'aide des outils disponibles. + +#### Tâches Asynchrones avec Spawn + +Pour les tâches de longue durée (recherche web, appels API), utilisez l'outil `spawn` pour créer un **sous-agent** : + +```markdown +# Tâches Périodiques + +## Tâches Rapides (réponse directe) +- Indiquer l'heure actuelle + +## Tâches Longues (utiliser spawn pour l'asynchrone) +- Rechercher les actualités IA sur le web et les résumer +- Vérifier les e-mails et signaler les messages importants +``` + +**Comportements clés :** + +| Fonctionnalité | Description | +|----------------|-------------| +| **spawn** | Crée un sous-agent asynchrone, ne bloque pas le heartbeat | +| **Contexte indépendant** | Le sous-agent a son propre contexte, sans historique de session | +| **Outil message** | Le sous-agent communique directement avec l'utilisateur via l'outil message | +| **Non-bloquant** | Après le spawn, le heartbeat continue vers la tâche suivante | + +#### Fonctionnement de la Communication du Sous-agent + +``` +Le Heartbeat se déclenche + ↓ +L'Agent lit HEARTBEAT.md + ↓ +Pour une tâche longue : spawn d'un sous-agent + ↓ ↓ +Continue la tâche suivante Le sous-agent travaille indépendamment + ↓ ↓ +Toutes les tâches terminées Le sous-agent utilise l'outil "message" + ↓ ↓ +Répond HEARTBEAT_OK L'utilisateur reçoit le résultat directement +``` + +Le sous-agent a accès aux outils (message, web_search, etc.) et peut communiquer avec l'utilisateur indépendamment sans passer par l'agent principal. + +**Configuration :** + +```json +{ + "heartbeat": { + "enabled": true, + "interval": 30 + } +} +``` + +| Option | Par défaut | Description | +|--------|------------|-------------| +| `enabled` | `true` | Activer/désactiver le heartbeat | +| `interval` | `30` | Intervalle de vérification en minutes (min : 5) | + +**Variables d'environnement :** + +* `PICOCLAW_HEARTBEAT_ENABLED=false` pour désactiver +* `PICOCLAW_HEARTBEAT_INTERVAL=60` pour modifier l'intervalle + +### Fournisseurs + +> [!NOTE] +> Groq fournit la transcription vocale gratuite via Whisper. Si configuré, les messages vocaux Telegram seront automatiquement transcrits. + +| Fournisseur | Utilisation | Obtenir une Clé API | +| ------------------------ | ---------------------------------------- | ------------------------------------------------------ | +| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) | +| `zhipu` | LLM (Zhipu direct) | [bigmodel.cn](bigmodel.cn) | +| `openrouter` (À tester) | LLM (recommandé, accès à tous les modèles) | [openrouter.ai](https://openrouter.ai) | +| `anthropic` (À tester) | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) | +| `openai` (À tester) | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) | +| `deepseek` (À tester) | LLM (DeepSeek direct) | [platform.deepseek.com](https://platform.deepseek.com) | +| `groq` | LLM + **Transcription vocale** (Whisper) | [console.groq.com](https://console.groq.com) | + +
+Configuration Zhipu + +**1. Obtenir la clé API** + +* Obtenez la [clé API](https://bigmodel.cn/usercenter/proj-mgmt/apikeys) + +**2. Configurer** + +```json +{ + "agents": { + "defaults": { + "workspace": "~/.picoclaw/workspace", + "model": "glm-4.7", + "max_tokens": 8192, + "temperature": 0.7, + "max_tool_iterations": 20 + } + }, + "providers": { + "zhipu": { + "api_key": "Votre Clé API", + "api_base": "https://open.bigmodel.cn/api/paas/v4" + } + } +} +``` + +**3. Lancer** + +```bash +picoclaw agent -m "Bonjour, comment ça va ?" +``` + +
+ +
+Exemple de configuration complète + +```json +{ + "agents": { + "defaults": { + "model": "anthropic/claude-opus-4-5" + } + }, + "providers": { + "openrouter": { + "api_key": "sk-or-v1-xxx" + }, + "groq": { + "api_key": "gsk_xxx" + } + }, + "channels": { + "telegram": { + "enabled": true, + "token": "123456:ABC...", + "allow_from": ["123456789"] + }, + "discord": { + "enabled": true, + "token": "", + "allow_from": [""] + }, + "whatsapp": { + "enabled": false + }, + "feishu": { + "enabled": false, + "app_id": "cli_xxx", + "app_secret": "xxx", + "encrypt_key": "", + "verification_token": "", + "allow_from": [] + }, + "qq": { + "enabled": false, + "app_id": "", + "app_secret": "", + "allow_from": [] + } + }, + "tools": { + "web": { + "brave": { + "enabled": false, + "api_key": "BSA...", + "max_results": 5 + }, + "duckduckgo": { + "enabled": true, + "max_results": 5 + } + }, + "cron": { + "exec_timeout_minutes": 5 + } + }, + "heartbeat": { + "enabled": true, + "interval": 30 + } +} +``` + +
+ +## Référence CLI + +| Commande | Description | +| ------------------------- | ------------------------------------- | +| `picoclaw onboard` | Initialiser la configuration & le workspace | +| `picoclaw agent -m "..."` | Discuter avec l'agent | +| `picoclaw agent` | Mode de discussion interactif | +| `picoclaw gateway` | Démarrer la passerelle | +| `picoclaw status` | Afficher le statut | +| `picoclaw cron list` | Lister toutes les tâches planifiées | +| `picoclaw cron add ...` | Ajouter une tâche planifiée | + +### Tâches Planifiées / Rappels + +PicoClaw prend en charge les rappels planifiés et les tâches récurrentes via l'outil `cron` : + +* **Rappels ponctuels** : « Rappelle-moi dans 10 minutes » → se déclenche une fois après 10 min +* **Tâches récurrentes** : « Rappelle-moi toutes les 2 heures » → se déclenche toutes les 2 heures +* **Expressions Cron** : « Rappelle-moi à 9h tous les jours » → utilise une expression cron + +Les tâches sont stockées dans `~/.picoclaw/workspace/cron/` et traitées automatiquement. + +## 🤝 Contribuer & Feuille de Route + +Les PR sont les bienvenues ! Le code source est volontairement petit et lisible. 🤗 + +Feuille de route à venir... + +Groupe de développeurs en construction. Condition d'entrée : au moins 1 PR fusionnée. + +Groupes d'utilisateurs : + +Discord : + +PicoClaw + +## 🐛 Dépannage + +### La recherche web affiche « API 配置问题 » + +C'est normal si vous n'avez pas encore configuré de clé API de recherche. PicoClaw fournira des liens utiles pour la recherche manuelle. + +Pour activer la recherche web : + +1. **Option 1 (Recommandé)** : Obtenez une clé API gratuite sur [https://brave.com/search/api](https://brave.com/search/api) (2000 requêtes gratuites/mois) pour les meilleurs résultats. +2. **Option 2 (Sans carte bancaire)** : Si vous n'avez pas de clé, le système bascule automatiquement sur **DuckDuckGo** (aucune clé requise). + +Ajoutez la clé dans `~/.picoclaw/config.json` si vous utilisez Brave : + +```json +{ + "tools": { + "web": { + "brave": { + "enabled": true, + "api_key": "VOTRE_CLE_API_BRAVE", + "max_results": 5 + }, + "duckduckgo": { + "enabled": true, + "max_results": 5 + } + } + } +} +``` + +### Erreurs de filtrage de contenu + +Certains fournisseurs (comme Zhipu) disposent d'un filtrage de contenu. Essayez de reformuler votre requête ou utilisez un modèle différent. + +### Le bot Telegram affiche « Conflict: terminated by other getUpdates » + +Cela se produit lorsqu'une autre instance du bot est en cours d'exécution. Assurez-vous qu'un seul `picoclaw gateway` fonctionne à la fois. + +--- + +## 📝 Comparaison des Clés API + +| Service | Offre Gratuite | Cas d'Utilisation | +| ---------------- | -------------------- | ------------------------------------- | +| **OpenRouter** | 200K tokens/mois | Multiples modèles (Claude, GPT-4, etc.) | +| **Zhipu** | 200K tokens/mois | Idéal pour les utilisateurs chinois | +| **Brave Search** | 2000 requêtes/mois | Fonctionnalité de recherche web | +| **Groq** | Offre gratuite dispo | Inférence ultra-rapide (Llama, Mixtral) | diff --git a/README.ja.md b/README.ja.md index 7da16565f..ff1838b79 100644 --- a/README.ja.md +++ b/README.ja.md @@ -12,7 +12,7 @@ License

-[中文](README.zh.md) | **日本語** | [Português](README.pt-br.md) | [Tiếng Việt](README.vi.md) | [English](README.md) +[中文](README.zh.md) | **日本語** | [Português](README.pt-br.md) | [Tiếng Việt](README.vi.md) | [Français](README.fr.md) | [English](README.md) diff --git a/README.md b/README.md index d6a3d5696..c292bcd25 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Twitter

- [中文](README.zh.md) | [日本語](README.ja.md) | [Português](README.pt-br.md) | [Tiếng Việt](README.vi.md) | **English** + [中文](README.zh.md) | [日本語](README.ja.md) | [Português](README.pt-br.md) | [Tiếng Việt](README.vi.md) | [Français](README.fr.md) | **English** --- diff --git a/README.pt-br.md b/README.pt-br.md index fa73465dd..a89854be7 100644 --- a/README.pt-br.md +++ b/README.pt-br.md @@ -14,7 +14,7 @@ Twitter

- [中文](README.zh.md) | [日本語](README.ja.md) | [English](README.md) | **Português** + [中文](README.zh.md) | [日本語](README.ja.md) | **Português** | [Tiếng Việt](README.vi.md) | [Français](README.fr.md) | [English](README.md) --- diff --git a/README.vi.md b/README.vi.md index e629eaa9b..c36be9865 100644 --- a/README.vi.md +++ b/README.vi.md @@ -14,7 +14,7 @@ Twitter

-**Tiếng Việt** | [中文](README.zh.md) | [日本語](README.ja.md) | [English](README.md) +[中文](README.zh.md) | [日本語](README.ja.md) | [Português](README.pt-br.md) | **Tiếng Việt** | [Français](README.fr.md) | [English](README.md) --- diff --git a/README.zh.md b/README.zh.md index 42bd20be4..b814c2fe6 100644 --- a/README.zh.md +++ b/README.zh.md @@ -14,7 +14,7 @@ Twitter

- **中文** | [日本語](README.ja.md) | [Português](README.pt-br.md) | [Tiếng Việt](README.vi.md) | [English](README.md) + **中文** | [日本語](README.ja.md) | [Português](README.pt-br.md) | [Tiếng Việt](README.vi.md) | [Français](README.fr.md) | [English](README.md) --- diff --git a/cmd/picoclaw/main.go b/cmd/picoclaw/main.go index 5cd8039dd..716a8cd54 100644 --- a/cmd/picoclaw/main.go +++ b/cmd/picoclaw/main.go @@ -630,6 +630,12 @@ func gatewayCmd() { logger.InfoC("voice", "Groq transcription attached to Slack channel") } } + if onebotChannel, ok := channelManager.GetChannel("onebot"); ok { + if oc, ok := onebotChannel.(*channels.OneBotChannel); ok { + oc.SetTranscriber(transcriber) + logger.InfoC("voice", "Groq transcription attached to OneBot channel") + } + } } enabledChannels := channelManager.GetEnabledChannels() diff --git a/pkg/agent/instance.go b/pkg/agent/instance.go index 4b380cbc5..dfbef9fbc 100644 --- a/pkg/agent/instance.go +++ b/pkg/agent/instance.go @@ -21,6 +21,8 @@ type AgentInstance struct { Fallbacks []string Workspace string MaxIterations int + MaxTokens int + Temperature float64 ContextWindow int Provider providers.LLMProvider Sessions *session.SessionManager @@ -76,6 +78,16 @@ func NewAgentInstance( maxIter = 20 } + maxTokens := defaults.MaxTokens + if maxTokens == 0 { + maxTokens = 8192 + } + + temperature := 0.7 + if defaults.Temperature != nil { + temperature = *defaults.Temperature + } + // Resolve fallback candidates modelCfg := providers.ModelConfig{ Primary: model, @@ -90,7 +102,9 @@ func NewAgentInstance( Fallbacks: fallbacks, Workspace: workspace, MaxIterations: maxIter, - ContextWindow: defaults.MaxTokens, + MaxTokens: maxTokens, + Temperature: temperature, + ContextWindow: maxTokens, Provider: provider, Sessions: sessionsManager, ContextBuilder: contextBuilder, diff --git a/pkg/agent/instance_test.go b/pkg/agent/instance_test.go new file mode 100644 index 000000000..fcc8e9bea --- /dev/null +++ b/pkg/agent/instance_test.go @@ -0,0 +1,95 @@ +package agent + +import ( + "os" + "testing" + + "github.com/sipeed/picoclaw/pkg/config" +) + +func TestNewAgentInstance_UsesDefaultsTemperatureAndMaxTokens(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "agent-instance-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + Model: "test-model", + MaxTokens: 1234, + MaxToolIterations: 5, + }, + }, + } + + configuredTemp := 1.0 + cfg.Agents.Defaults.Temperature = &configuredTemp + + provider := &mockProvider{} + agent := NewAgentInstance(nil, &cfg.Agents.Defaults, cfg, provider) + + if agent.MaxTokens != 1234 { + t.Fatalf("MaxTokens = %d, want %d", agent.MaxTokens, 1234) + } + if agent.Temperature != 1.0 { + t.Fatalf("Temperature = %f, want %f", agent.Temperature, 1.0) + } +} + +func TestNewAgentInstance_DefaultsTemperatureWhenZero(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "agent-instance-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + Model: "test-model", + MaxTokens: 1234, + MaxToolIterations: 5, + }, + }, + } + + configuredTemp := 0.0 + cfg.Agents.Defaults.Temperature = &configuredTemp + + provider := &mockProvider{} + agent := NewAgentInstance(nil, &cfg.Agents.Defaults, cfg, provider) + + if agent.Temperature != 0.0 { + t.Fatalf("Temperature = %f, want %f", agent.Temperature, 0.0) + } +} + +func TestNewAgentInstance_DefaultsTemperatureWhenUnset(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "agent-instance-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + Model: "test-model", + MaxTokens: 1234, + MaxToolIterations: 5, + }, + }, + } + + provider := &mockProvider{} + agent := NewAgentInstance(nil, &cfg.Agents.Defaults, cfg, provider) + + if agent.Temperature != 0.7 { + t.Fatalf("Temperature = %f, want %f", agent.Temperature, 0.7) + } +} diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 9b0926e61..0f1b26c5c 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -79,9 +79,7 @@ func NewAgentLoop(cfg *config.Config, msgBus *bus.MessageBus, provider providers } // registerSharedTools registers tools that are shared across all agents (web, message, spawn). -func registerSharedTools( - cfg *config.Config, msgBus *bus.MessageBus, registry *AgentRegistry, provider providers.LLMProvider, -) { +func registerSharedTools(cfg *config.Config, msgBus *bus.MessageBus, registry *AgentRegistry, provider providers.LLMProvider) { for _, agentID := range registry.ListAgentIDs() { agent, ok := registry.GetAgent(agentID) if !ok { @@ -121,6 +119,7 @@ func registerSharedTools( // Spawn tool with allowlist checker subagentManager := tools.NewSubagentManager(provider, agent.Model, agent.Workspace, msgBus) + subagentManager.SetLLMOptions(agent.MaxTokens, agent.Temperature) spawnTool := tools.NewSpawnTool(subagentManager) currentAgentID := agentID spawnTool.SetAllowlistChecker(func(targetAgentID string) bool { @@ -217,9 +216,7 @@ func (al *AgentLoop) ProcessDirect(ctx context.Context, content, sessionKey stri return al.ProcessDirectWithChannel(ctx, content, sessionKey, "cli", "direct") } -func (al *AgentLoop) ProcessDirectWithChannel( - ctx context.Context, content, sessionKey, channel, chatID string, -) (string, error) { +func (al *AgentLoop) ProcessDirectWithChannel(ctx context.Context, content, sessionKey, channel, chatID string) (string, error) { msg := bus.InboundMessage{ Channel: channel, SenderID: "cron", @@ -256,7 +253,7 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage) logContent = utils.Truncate(msg.Content, 80) } logger.InfoCF("agent", fmt.Sprintf("Processing message from %s:%s: %s", msg.Channel, msg.SenderID, logContent), - map[string]any{ + map[string]interface{}{ "channel": msg.Channel, "chat_id": msg.ChatID, "sender_id": msg.SenderID, @@ -295,7 +292,7 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage) } logger.InfoCF("agent", "Routed message", - map[string]any{ + map[string]interface{}{ "agent_id": agent.ID, "session_key": sessionKey, "matched_by": route.MatchedBy, @@ -318,7 +315,7 @@ func (al *AgentLoop) processSystemMessage(ctx context.Context, msg bus.InboundMe } logger.InfoCF("agent", "Processing system message", - map[string]any{ + map[string]interface{}{ "sender_id": msg.SenderID, "chat_id": msg.ChatID, }) @@ -343,7 +340,7 @@ func (al *AgentLoop) processSystemMessage(ctx context.Context, msg bus.InboundMe // Skip internal channels - only log, don't send to user if constants.IsInternalChannel(originChannel) { logger.InfoCF("agent", "Subagent completed (internal channel)", - map[string]any{ + map[string]interface{}{ "sender_id": msg.SenderID, "content_len": len(content), "channel": originChannel, @@ -376,7 +373,7 @@ func (al *AgentLoop) runAgentLoop(ctx context.Context, agent *AgentInstance, opt if !constants.IsInternalChannel(opts.Channel) { channelKey := fmt.Sprintf("%s:%s", opts.Channel, opts.ChatID) if err := al.RecordLastChannel(channelKey); err != nil { - logger.WarnCF("agent", "Failed to record last channel", map[string]any{"error": err.Error()}) + logger.WarnCF("agent", "Failed to record last channel", map[string]interface{}{"error": err.Error()}) } } } @@ -438,7 +435,7 @@ func (al *AgentLoop) runAgentLoop(ctx context.Context, agent *AgentInstance, opt // 9. Log response responsePreview := utils.Truncate(finalContent, 120) logger.InfoCF("agent", fmt.Sprintf("Response: %s", responsePreview), - map[string]any{ + map[string]interface{}{ "agent_id": agent.ID, "session_key": opts.SessionKey, "iterations": iteration, @@ -449,9 +446,7 @@ func (al *AgentLoop) runAgentLoop(ctx context.Context, agent *AgentInstance, opt } // runLLMIteration executes the LLM call loop with tool handling. -func (al *AgentLoop) runLLMIteration( - ctx context.Context, agent *AgentInstance, messages []providers.Message, opts processOptions, -) (string, int, error) { +func (al *AgentLoop) runLLMIteration(ctx context.Context, agent *AgentInstance, messages []providers.Message, opts processOptions) (string, int, error) { iteration := 0 var finalContent string @@ -459,7 +454,7 @@ func (al *AgentLoop) runLLMIteration( iteration++ logger.DebugCF("agent", "LLM iteration", - map[string]any{ + map[string]interface{}{ "agent_id": agent.ID, "iteration": iteration, "max": agent.MaxIterations, @@ -470,20 +465,20 @@ func (al *AgentLoop) runLLMIteration( // Log LLM request details logger.DebugCF("agent", "LLM request", - map[string]any{ + map[string]interface{}{ "agent_id": agent.ID, "iteration": iteration, "model": agent.Model, "messages_count": len(messages), "tools_count": len(providerToolDefs), - "max_tokens": 8192, - "temperature": 0.7, + "max_tokens": agent.MaxTokens, + "temperature": agent.Temperature, "system_prompt_len": len(messages[0].Content), }) // Log full messages (detailed) logger.DebugCF("agent", "Full LLM request", - map[string]any{ + map[string]interface{}{ "iteration": iteration, "messages_json": formatMessagesForLog(messages), "tools_json": formatToolsForLog(providerToolDefs), @@ -497,9 +492,9 @@ func (al *AgentLoop) runLLMIteration( if len(agent.Candidates) > 1 && al.fallback != nil { fbResult, fbErr := al.fallback.Execute(ctx, agent.Candidates, func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) { - return agent.Provider.Chat(ctx, messages, providerToolDefs, model, map[string]any{ - "max_tokens": 8192, - "temperature": 0.7, + return agent.Provider.Chat(ctx, messages, providerToolDefs, model, map[string]interface{}{ + "max_tokens": agent.MaxTokens, + "temperature": agent.Temperature, }) }, ) @@ -509,13 +504,13 @@ func (al *AgentLoop) runLLMIteration( if fbResult.Provider != "" && len(fbResult.Attempts) > 0 { logger.InfoCF("agent", fmt.Sprintf("Fallback: succeeded with %s/%s after %d attempts", fbResult.Provider, fbResult.Model, len(fbResult.Attempts)+1), - map[string]any{"agent_id": agent.ID, "iteration": iteration}) + map[string]interface{}{"agent_id": agent.ID, "iteration": iteration}) } return fbResult.Response, nil } - return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]any{ - "max_tokens": 8192, - "temperature": 0.7, + return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]interface{}{ + "max_tokens": agent.MaxTokens, + "temperature": agent.Temperature, }) } @@ -534,7 +529,7 @@ func (al *AgentLoop) runLLMIteration( strings.Contains(errMsg, "length") if isContextError && retry < maxRetries { - logger.WarnCF("agent", "Context window error detected, attempting compression", map[string]any{ + logger.WarnCF("agent", "Context window error detected, attempting compression", map[string]interface{}{ "error": err.Error(), "retry": retry, }) @@ -561,7 +556,7 @@ func (al *AgentLoop) runLLMIteration( if err != nil { logger.ErrorCF("agent", "LLM call failed", - map[string]any{ + map[string]interface{}{ "agent_id": agent.ID, "iteration": iteration, "error": err.Error(), @@ -573,7 +568,7 @@ func (al *AgentLoop) runLLMIteration( if len(response.ToolCalls) == 0 { finalContent = response.Content logger.InfoCF("agent", "LLM response without tool calls (direct answer)", - map[string]any{ + map[string]interface{}{ "agent_id": agent.ID, "iteration": iteration, "content_chars": len(finalContent), @@ -587,7 +582,7 @@ func (al *AgentLoop) runLLMIteration( toolNames = append(toolNames, tc.Name) } logger.InfoCF("agent", "LLM requested tool calls", - map[string]any{ + map[string]interface{}{ "agent_id": agent.ID, "tools": toolNames, "count": len(response.ToolCalls), @@ -608,6 +603,7 @@ func (al *AgentLoop) runLLMIteration( Name: tc.Name, Arguments: string(argumentsJSON), }, + Name: tc.Name, }) } messages = append(messages, assistantMsg) @@ -620,7 +616,7 @@ func (al *AgentLoop) runLLMIteration( argsJSON, _ := json.Marshal(tc.Arguments) argsPreview := utils.Truncate(string(argsJSON), 200) logger.InfoCF("agent", fmt.Sprintf("Tool call: %s(%s)", tc.Name, argsPreview), - map[string]any{ + map[string]interface{}{ "agent_id": agent.ID, "tool": tc.Name, "iteration": iteration, @@ -635,16 +631,14 @@ func (al *AgentLoop) runLLMIteration( // The agent will handle user notification via processSystemMessage if !result.Silent && result.ForUser != "" { logger.InfoCF("agent", "Async tool completed, agent will handle notification", - map[string]any{ + map[string]interface{}{ "tool": tc.Name, "content_len": len(result.ForUser), }) } } - toolResult := agent.Tools.ExecuteWithContext( - ctx, tc.Name, tc.Arguments, opts.Channel, opts.ChatID, asyncCallback, - ) + toolResult := agent.Tools.ExecuteWithContext(ctx, tc.Name, tc.Arguments, opts.Channel, opts.ChatID, asyncCallback) // Send ForUser content to user immediately if not Silent if !toolResult.Silent && toolResult.ForUser != "" && opts.SendResponse { @@ -654,7 +648,7 @@ func (al *AgentLoop) runLLMIteration( Content: toolResult.ForUser, }) logger.DebugCF("agent", "Sent tool result to user", - map[string]any{ + map[string]interface{}{ "tool": tc.Name, "content_len": len(toolResult.ForUser), }) @@ -760,10 +754,7 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) { newHistory = append(newHistory, history[0]) // System prompt // Add a note about compression - compressionNote := fmt.Sprintf( - "[System: Emergency compression dropped %d oldest messages due to context limit]", - droppedCount, - ) + compressionNote := fmt.Sprintf("[System: Emergency compression dropped %d oldest messages due to context limit]", droppedCount) // If there was an existing summary, we might lose it if it was in the dropped part (which is just messages). // The summary is stored separately in session.Summary, so it persists! // We just need to ensure the user knows there's a gap. @@ -781,7 +772,7 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) { agent.Sessions.SetHistory(sessionKey, newHistory) agent.Sessions.Save(sessionKey) - logger.WarnCF("agent", "Forced compression executed", map[string]any{ + logger.WarnCF("agent", "Forced compression executed", map[string]interface{}{ "session_key": sessionKey, "dropped_msgs": droppedCount, "new_count": len(newHistory), @@ -789,8 +780,8 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) { } // GetStartupInfo returns information about loaded tools and skills for logging. -func (al *AgentLoop) GetStartupInfo() map[string]any { - info := make(map[string]any) +func (al *AgentLoop) GetStartupInfo() map[string]interface{} { + info := make(map[string]interface{}) agent := al.registry.GetDefaultAgent() if agent == nil { @@ -799,7 +790,7 @@ func (al *AgentLoop) GetStartupInfo() map[string]any { // Tools info toolsList := agent.Tools.List() - info["tools"] = map[string]any{ + info["tools"] = map[string]interface{}{ "count": len(toolsList), "names": toolsList, } @@ -808,7 +799,7 @@ func (al *AgentLoop) GetStartupInfo() map[string]any { info["skills"] = agent.ContextBuilder.GetSkillsInfo() // Agents info - info["agents"] = map[string]any{ + info["agents"] = map[string]interface{}{ "count": len(al.registry.ListAgentIDs()), "ids": al.registry.ListAgentIDs(), } @@ -860,10 +851,7 @@ func formatToolsForLog(tools []providers.ToolDefinition) string { result += fmt.Sprintf(" [%d] Type: %s, Name: %s\n", i, tool.Type, tool.Function.Name) result += fmt.Sprintf(" Description: %s\n", tool.Function.Description) if len(tool.Function.Parameters) > 0 { - result += fmt.Sprintf( - " Parameters: %s\n", - utils.Truncate(fmt.Sprintf("%v", tool.Function.Parameters), 200), - ) + result += fmt.Sprintf(" Parameters: %s\n", utils.Truncate(fmt.Sprintf("%v", tool.Function.Parameters), 200)) } } result += "]" @@ -916,21 +904,11 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) { s1, _ := al.summarizeBatch(ctx, agent, part1, "") s2, _ := al.summarizeBatch(ctx, agent, part2, "") - mergePrompt := fmt.Sprintf( - "Merge these two conversation summaries into one cohesive summary:\n\n1: %s\n\n2: %s", - s1, - s2, - ) - resp, err := agent.Provider.Chat( - ctx, - []providers.Message{{Role: "user", Content: mergePrompt}}, - nil, - agent.Model, - map[string]any{ - "max_tokens": 1024, - "temperature": 0.3, - }, - ) + mergePrompt := fmt.Sprintf("Merge these two conversation summaries into one cohesive summary:\n\n1: %s\n\n2: %s", s1, s2) + resp, err := agent.Provider.Chat(ctx, []providers.Message{{Role: "user", Content: mergePrompt}}, nil, agent.Model, map[string]interface{}{ + "max_tokens": 1024, + "temperature": 0.3, + }) if err == nil { finalSummary = resp.Content } else { @@ -952,9 +930,7 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) { } // summarizeBatch summarizes a batch of messages. -func (al *AgentLoop) summarizeBatch( - ctx context.Context, agent *AgentInstance, batch []providers.Message, existingSummary string, -) (string, error) { +func (al *AgentLoop) summarizeBatch(ctx context.Context, agent *AgentInstance, batch []providers.Message, existingSummary string) (string, error) { prompt := "Provide a concise summary of this conversation segment, preserving core context and key points.\n" if existingSummary != "" { prompt += "Existing context: " + existingSummary + "\n" @@ -964,16 +940,10 @@ func (al *AgentLoop) summarizeBatch( prompt += fmt.Sprintf("%s: %s\n", m.Role, m.Content) } - response, err := agent.Provider.Chat( - ctx, - []providers.Message{{Role: "user", Content: prompt}}, - nil, - agent.Model, - map[string]any{ - "max_tokens": 1024, - "temperature": 0.3, - }, - ) + response, err := agent.Provider.Chat(ctx, []providers.Message{{Role: "user", Content: prompt}}, nil, agent.Model, map[string]interface{}{ + "max_tokens": 1024, + "temperature": 0.3, + }) if err != nil { return "", err } diff --git a/pkg/agent/loop_test.go b/pkg/agent/loop_test.go index fc026bef4..360685eca 100644 --- a/pkg/agent/loop_test.go +++ b/pkg/agent/loop_test.go @@ -14,23 +14,6 @@ import ( "github.com/sipeed/picoclaw/pkg/tools" ) -// mockProvider is a simple mock LLM provider for testing -type mockProvider struct{} - -func (m *mockProvider) Chat( - ctx context.Context, messages []providers.Message, tools []providers.ToolDefinition, model string, - opts map[string]any, -) (*providers.LLMResponse, error) { - return &providers.LLMResponse{ - Content: "Mock response", - ToolCalls: []providers.ToolCall{}, - }, nil -} - -func (m *mockProvider) GetDefaultModel() string { - return "mock-model" -} - func TestRecordLastChannel(t *testing.T) { // Create temp workspace tmpDir, err := os.MkdirTemp("", "agent-test-*") @@ -188,7 +171,7 @@ func TestToolRegistry_ToolRegistration(t *testing.T) { // Verify tool is registered by checking it doesn't panic on GetStartupInfo // (actual tool retrieval is tested in tools package tests) info := al.GetStartupInfo() - toolsInfo := info["tools"].(map[string]any) + toolsInfo := info["tools"].(map[string]interface{}) toolsList := toolsInfo["names"].([]string) // Check that our custom tool name is in the list @@ -263,7 +246,7 @@ func TestToolRegistry_GetDefinitions(t *testing.T) { al.RegisterTool(testTool) info := al.GetStartupInfo() - toolsInfo := info["tools"].(map[string]any) + toolsInfo := info["tools"].(map[string]interface{}) toolsList := toolsInfo["names"].([]string) // Check that our custom tool name is in the list @@ -310,7 +293,7 @@ func TestAgentLoop_GetStartupInfo(t *testing.T) { t.Fatal("Expected 'tools' key in startup info") } - toolsMap, ok := toolsInfo.(map[string]any) + toolsMap, ok := toolsInfo.(map[string]interface{}) if !ok { t.Fatal("Expected 'tools' to be a map") } @@ -366,10 +349,7 @@ type simpleMockProvider struct { response string } -func (m *simpleMockProvider) Chat( - ctx context.Context, messages []providers.Message, tools []providers.ToolDefinition, model string, - opts map[string]any, -) (*providers.LLMResponse, error) { +func (m *simpleMockProvider) Chat(ctx context.Context, messages []providers.Message, tools []providers.ToolDefinition, model string, opts map[string]interface{}) (*providers.LLMResponse, error) { return &providers.LLMResponse{ Content: m.response, ToolCalls: []providers.ToolCall{}, @@ -391,14 +371,14 @@ func (m *mockCustomTool) Description() string { return "Mock custom tool for testing" } -func (m *mockCustomTool) Parameters() map[string]any { - return map[string]any{ +func (m *mockCustomTool) Parameters() map[string]interface{} { + return map[string]interface{}{ "type": "object", - "properties": map[string]any{}, + "properties": map[string]interface{}{}, } } -func (m *mockCustomTool) Execute(ctx context.Context, args map[string]any) *tools.ToolResult { +func (m *mockCustomTool) Execute(ctx context.Context, args map[string]interface{}) *tools.ToolResult { return tools.SilentResult("Custom tool executed") } @@ -416,14 +396,14 @@ func (m *mockContextualTool) Description() string { return "Mock contextual tool" } -func (m *mockContextualTool) Parameters() map[string]any { - return map[string]any{ +func (m *mockContextualTool) Parameters() map[string]interface{} { + return map[string]interface{}{ "type": "object", - "properties": map[string]any{}, + "properties": map[string]interface{}{}, } } -func (m *mockContextualTool) Execute(ctx context.Context, args map[string]any) *tools.ToolResult { +func (m *mockContextualTool) Execute(ctx context.Context, args map[string]interface{}) *tools.ToolResult { return tools.SilentResult("Contextual tool executed") } @@ -543,10 +523,7 @@ type failFirstMockProvider struct { successResp string } -func (m *failFirstMockProvider) Chat( - ctx context.Context, messages []providers.Message, tools []providers.ToolDefinition, model string, - opts map[string]any, -) (*providers.LLMResponse, error) { +func (m *failFirstMockProvider) Chat(ctx context.Context, messages []providers.Message, tools []providers.ToolDefinition, model string, opts map[string]interface{}) (*providers.LLMResponse, error) { m.currentCall++ if m.currentCall <= m.failures { return nil, m.failError @@ -611,13 +588,7 @@ func TestAgentLoop_ContextExhaustionRetry(t *testing.T) { // Call ProcessDirectWithChannel // Note: ProcessDirectWithChannel calls processMessage which will execute runLLMIteration - response, err := al.ProcessDirectWithChannel( - context.Background(), - "Trigger message", - sessionKey, - "test", - "test-chat", - ) + response, err := al.ProcessDirectWithChannel(context.Background(), "Trigger message", sessionKey, "test", "test-chat") if err != nil { t.Fatalf("Expected success after retry, got error: %v", err) } diff --git a/pkg/agent/mock_provider_test.go b/pkg/agent/mock_provider_test.go new file mode 100644 index 000000000..ccbecbafe --- /dev/null +++ b/pkg/agent/mock_provider_test.go @@ -0,0 +1,20 @@ +package agent + +import ( + "context" + + "github.com/sipeed/picoclaw/pkg/providers" +) + +type mockProvider struct{} + +func (m *mockProvider) Chat(ctx context.Context, messages []providers.Message, tools []providers.ToolDefinition, model string, opts map[string]interface{}) (*providers.LLMResponse, error) { + return &providers.LLMResponse{ + Content: "Mock response", + ToolCalls: []providers.ToolCall{}, + }, nil +} + +func (m *mockProvider) GetDefaultModel() string { + return "mock-model" +} diff --git a/pkg/channels/discord.go b/pkg/channels/discord.go index 74ae44412..9ddec662c 100644 --- a/pkg/channels/discord.go +++ b/pkg/channels/discord.go @@ -4,11 +4,10 @@ import ( "context" "fmt" "os" - "strings" + "sync" "time" "github.com/bwmarrin/discordgo" - "github.com/sipeed/picoclaw/pkg/bus" "github.com/sipeed/picoclaw/pkg/config" "github.com/sipeed/picoclaw/pkg/logger" @@ -27,6 +26,8 @@ type DiscordChannel struct { config config.DiscordConfig transcriber *voice.GroqTranscriber ctx context.Context + typingMu sync.Mutex + typingStop map[string]chan struct{} // chatID → stop signal } func NewDiscordChannel(cfg config.DiscordConfig, bus *bus.MessageBus) (*DiscordChannel, error) { @@ -43,6 +44,7 @@ func NewDiscordChannel(cfg config.DiscordConfig, bus *bus.MessageBus) (*DiscordC config: cfg, transcriber: nil, ctx: context.Background(), + typingStop: make(map[string]chan struct{}), }, nil } @@ -85,6 +87,14 @@ func (c *DiscordChannel) Stop(ctx context.Context) error { logger.InfoC("discord", "Stopping Discord bot") c.setRunning(false) + // Stop all typing goroutines before closing session + c.typingMu.Lock() + for chatID, stop := range c.typingStop { + close(stop) + delete(c.typingStop, chatID) + } + c.typingMu.Unlock() + if err := c.session.Close(); err != nil { return fmt.Errorf("failed to close discord session: %w", err) } @@ -93,6 +103,8 @@ func (c *DiscordChannel) Stop(ctx context.Context) error { } func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) error { + c.stopTyping(msg.ChatID) + if !c.IsRunning() { return fmt.Errorf("discord bot not running") } @@ -107,9 +119,7 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro return nil } - chunks := splitMessage( - msg.Content, 1500, - ) // Discord has a limit of 2000 characters per message, leave 500 for natural split e.g. code blocks + chunks := utils.SplitMessage(msg.Content, 2000) // Split messages into chunks, Discord length limit: 2000 chars for _, chunk := range chunks { if err := c.sendChunk(ctx, channelID, chunk); err != nil { @@ -120,132 +130,6 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro return nil } -// splitMessage splits long messages into chunks, preserving code block integrity -// Uses natural boundaries (newlines, spaces) and extends messages slightly to avoid breaking code blocks -func splitMessage(content string, limit int) []string { - var messages []string - - for len(content) > 0 { - if len(content) <= limit { - messages = append(messages, content) - break - } - - msgEnd := limit - - // Find natural split point within the limit - msgEnd = findLastNewline(content[:limit], 200) - if msgEnd <= 0 { - msgEnd = findLastSpace(content[:limit], 100) - } - if msgEnd <= 0 { - msgEnd = limit - } - - // Check if this would end with an incomplete code block - candidate := content[:msgEnd] - unclosedIdx := findLastUnclosedCodeBlock(candidate) - - if unclosedIdx >= 0 { - // Message would end with incomplete code block - // Try to extend to include the closing ``` (with some buffer) - extendedLimit := limit + 500 // Allow 500 char buffer for code blocks - if len(content) > extendedLimit { - closingIdx := findNextClosingCodeBlock(content, msgEnd) - if closingIdx > 0 && closingIdx <= extendedLimit { - // Extend to include the closing ``` - msgEnd = closingIdx - } else { - // Can't find closing, split before the code block - msgEnd = findLastNewline(content[:unclosedIdx], 200) - if msgEnd <= 0 { - msgEnd = findLastSpace(content[:unclosedIdx], 100) - } - if msgEnd <= 0 { - msgEnd = unclosedIdx - } - } - } else { - // Remaining content fits within extended limit - msgEnd = len(content) - } - } - - if msgEnd <= 0 { - msgEnd = limit - } - - messages = append(messages, content[:msgEnd]) - content = strings.TrimSpace(content[msgEnd:]) - } - - return messages -} - -// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ``` -// Returns the position of the opening ``` or -1 if all code blocks are complete -func findLastUnclosedCodeBlock(text string) int { - count := 0 - lastOpenIdx := -1 - - for i := 0; i < len(text); i++ { - if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { - if count == 0 { - lastOpenIdx = i - } - count++ - i += 2 - } - } - - // If odd number of ``` markers, last one is unclosed - if count%2 == 1 { - return lastOpenIdx - } - return -1 -} - -// findNextClosingCodeBlock finds the next closing ``` starting from a position -// Returns the position after the closing ``` or -1 if not found -func findNextClosingCodeBlock(text string, startIdx int) int { - for i := startIdx; i < len(text); i++ { - if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { - return i + 3 - } - } - return -1 -} - -// findLastNewline finds the last newline character within the last N characters -// Returns the position of the newline or -1 if not found -func findLastNewline(s string, searchWindow int) int { - searchStart := len(s) - searchWindow - if searchStart < 0 { - searchStart = 0 - } - for i := len(s) - 1; i >= searchStart; i-- { - if s[i] == '\n' { - return i - } - } - return -1 -} - -// findLastSpace finds the last space character within the last N characters -// Returns the position of the space or -1 if not found -func findLastSpace(s string, searchWindow int) int { - searchStart := len(s) - searchWindow - if searchStart < 0 { - searchStart = 0 - } - for i := len(s) - 1; i >= searchStart; i-- { - if s[i] == ' ' || s[i] == '\t' { - return i - } - } - return -1 -} - func (c *DiscordChannel) sendChunk(ctx context.Context, channelID, content string) error { // 使用传入的 ctx 进行超时控制 sendCtx, cancel := context.WithTimeout(ctx, sendTimeout) @@ -285,12 +169,6 @@ func (c *DiscordChannel) handleMessage(s *discordgo.Session, m *discordgo.Messag return } - if err := c.session.ChannelTyping(m.ChannelID); err != nil { - logger.ErrorCF("discord", "Failed to send typing indicator", map[string]any{ - "error": err.Error(), - }) - } - // 检查白名单,避免为被拒绝的用户下载附件和转录 if !c.IsAllowed(m.Author.ID) { logger.DebugCF("discord", "Message rejected by allowlist", map[string]any{ @@ -373,6 +251,9 @@ func (c *DiscordChannel) handleMessage(s *discordgo.Session, m *discordgo.Messag content = "[media only]" } + // Start typing after all early returns — guaranteed to have a matching Send() + c.startTyping(m.ChannelID) + logger.DebugCF("discord", "Received message", map[string]any{ "sender_name": senderName, "sender_id": senderID, @@ -401,6 +282,52 @@ func (c *DiscordChannel) handleMessage(s *discordgo.Session, m *discordgo.Messag c.HandleMessage(senderID, m.ChannelID, content, mediaPaths, metadata) } +// startTyping starts a continuous typing indicator loop for the given chatID. +// It stops any existing typing loop for that chatID before starting a new one. +func (c *DiscordChannel) startTyping(chatID string) { + c.typingMu.Lock() + // Stop existing loop for this chatID if any + if stop, ok := c.typingStop[chatID]; ok { + close(stop) + } + stop := make(chan struct{}) + c.typingStop[chatID] = stop + c.typingMu.Unlock() + + go func() { + if err := c.session.ChannelTyping(chatID); err != nil { + logger.DebugCF("discord", "ChannelTyping error", map[string]interface{}{"chatID": chatID, "err": err}) + } + ticker := time.NewTicker(8 * time.Second) + defer ticker.Stop() + timeout := time.After(5 * time.Minute) + for { + select { + case <-stop: + return + case <-timeout: + return + case <-c.ctx.Done(): + return + case <-ticker.C: + if err := c.session.ChannelTyping(chatID); err != nil { + logger.DebugCF("discord", "ChannelTyping error", map[string]interface{}{"chatID": chatID, "err": err}) + } + } + } + }() +} + +// stopTyping stops the typing indicator loop for the given chatID. +func (c *DiscordChannel) stopTyping(chatID string) { + c.typingMu.Lock() + defer c.typingMu.Unlock() + if stop, ok := c.typingStop[chatID]; ok { + close(stop) + delete(c.typingStop, chatID) + } +} + func (c *DiscordChannel) downloadAttachment(url, filename string) string { return utils.DownloadFile(url, filename, utils.DownloadOptions{ LoggerPrefix: "discord", diff --git a/pkg/channels/onebot.go b/pkg/channels/onebot.go index 607aaed2a..53e82b44d 100644 --- a/pkg/channels/onebot.go +++ b/pkg/channels/onebot.go @@ -4,9 +4,11 @@ import ( "context" "encoding/json" "fmt" + "os" "strconv" "strings" "sync" + "sync/atomic" "time" "github.com/gorilla/websocket" @@ -14,20 +16,28 @@ import ( "github.com/sipeed/picoclaw/pkg/bus" "github.com/sipeed/picoclaw/pkg/config" "github.com/sipeed/picoclaw/pkg/logger" + "github.com/sipeed/picoclaw/pkg/utils" + "github.com/sipeed/picoclaw/pkg/voice" ) type OneBotChannel struct { *BaseChannel - config config.OneBotConfig - conn *websocket.Conn - ctx context.Context - cancel context.CancelFunc - dedup map[string]struct{} - dedupRing []string - dedupIdx int - mu sync.Mutex - writeMu sync.Mutex - echoCounter int64 + config config.OneBotConfig + conn *websocket.Conn + ctx context.Context + cancel context.CancelFunc + dedup map[string]struct{} + dedupRing []string + dedupIdx int + mu sync.Mutex + writeMu sync.Mutex + echoCounter int64 + selfID int64 + pending map[string]chan json.RawMessage + pendingMu sync.Mutex + transcriber *voice.GroqTranscriber + lastMessageID sync.Map + pendingEmojiMsg sync.Map } type oneBotRawEvent struct { @@ -43,9 +53,11 @@ type oneBotRawEvent struct { SelfID json.RawMessage `json:"self_id"` Time json.RawMessage `json:"time"` MetaEventType string `json:"meta_event_type"` + NoticeType string `json:"notice_type"` Echo string `json:"echo"` RetCode json.RawMessage `json:"retcode"` - Status BotStatus `json:"status"` + Status json.RawMessage `json:"status"` + Data json.RawMessage `json:"data"` } type BotStatus struct { @@ -53,42 +65,36 @@ type BotStatus struct { Good bool `json:"good"` } +func isAPIResponse(raw json.RawMessage) bool { + if len(raw) == 0 { + return false + } + var s string + if json.Unmarshal(raw, &s) == nil { + return s == "ok" || s == "failed" + } + var bs BotStatus + if json.Unmarshal(raw, &bs) == nil { + return bs.Online || bs.Good + } + return false +} + type oneBotSender struct { UserID json.RawMessage `json:"user_id"` Nickname string `json:"nickname"` Card string `json:"card"` } -type oneBotEvent struct { - PostType string - MessageType string - SubType string - MessageID string - UserID int64 - GroupID int64 - Content string - RawContent string - IsBotMentioned bool - Sender oneBotSender - SelfID int64 - Time int64 - MetaEventType string -} - type oneBotAPIRequest struct { - Action string `json:"action"` - Params any `json:"params"` - Echo string `json:"echo,omitempty"` + Action string `json:"action"` + Params interface{} `json:"params"` + Echo string `json:"echo,omitempty"` } -type oneBotSendPrivateMsgParams struct { - UserID int64 `json:"user_id"` - Message string `json:"message"` -} - -type oneBotSendGroupMsgParams struct { - GroupID int64 `json:"group_id"` - Message string `json:"message"` +type oneBotMessageSegment struct { + Type string `json:"type"` + Data map[string]interface{} `json:"data"` } func NewOneBotChannel(cfg config.OneBotConfig, messageBus *bus.MessageBus) (*OneBotChannel, error) { @@ -101,32 +107,53 @@ func NewOneBotChannel(cfg config.OneBotConfig, messageBus *bus.MessageBus) (*One dedup: make(map[string]struct{}, dedupSize), dedupRing: make([]string, dedupSize), dedupIdx: 0, + pending: make(map[string]chan json.RawMessage), }, nil } +func (c *OneBotChannel) SetTranscriber(transcriber *voice.GroqTranscriber) { + c.transcriber = transcriber +} + +func (c *OneBotChannel) setMsgEmojiLike(messageID string, emojiID int, set bool) { + go func() { + _, err := c.sendAPIRequest("set_msg_emoji_like", map[string]interface{}{ + "message_id": messageID, + "emoji_id": emojiID, + "set": set, + }, 5*time.Second) + if err != nil { + logger.DebugCF("onebot", "Failed to set emoji like", map[string]interface{}{ + "message_id": messageID, + "error": err.Error(), + }) + } + }() +} + func (c *OneBotChannel) Start(ctx context.Context) error { if c.config.WSUrl == "" { return fmt.Errorf("OneBot ws_url not configured") } - logger.InfoCF("onebot", "Starting OneBot channel", map[string]any{ + logger.InfoCF("onebot", "Starting OneBot channel", map[string]interface{}{ "ws_url": c.config.WSUrl, }) c.ctx, c.cancel = context.WithCancel(ctx) if err := c.connect(); err != nil { - logger.WarnCF("onebot", "Initial connection failed, will retry in background", map[string]any{ + logger.WarnCF("onebot", "Initial connection failed, will retry in background", map[string]interface{}{ "error": err.Error(), }) } else { go c.listen() + c.fetchSelfID() } if c.config.ReconnectInterval > 0 { go c.reconnectLoop() } else { - // If reconnect is disabled but initial connection failed, we cannot recover if c.conn == nil { return fmt.Errorf("failed to connect to OneBot and reconnect is disabled") } @@ -152,14 +179,141 @@ func (c *OneBotChannel) connect() error { return err } + conn.SetPongHandler(func(appData string) error { + _ = conn.SetReadDeadline(time.Now().Add(60 * time.Second)) + return nil + }) + _ = conn.SetReadDeadline(time.Now().Add(60 * time.Second)) + c.mu.Lock() c.conn = conn c.mu.Unlock() + go c.pinger(conn) + logger.InfoC("onebot", "WebSocket connected") return nil } +func (c *OneBotChannel) pinger(conn *websocket.Conn) { + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + for { + select { + case <-c.ctx.Done(): + return + case <-ticker.C: + c.writeMu.Lock() + err := conn.WriteMessage(websocket.PingMessage, nil) + c.writeMu.Unlock() + if err != nil { + logger.DebugCF("onebot", "Ping write failed, stopping pinger", map[string]interface{}{ + "error": err.Error(), + }) + return + } + } + } +} + +func (c *OneBotChannel) fetchSelfID() { + resp, err := c.sendAPIRequest("get_login_info", nil, 5*time.Second) + if err != nil { + logger.WarnCF("onebot", "Failed to get_login_info", map[string]interface{}{ + "error": err.Error(), + }) + return + } + + type loginInfo struct { + UserID json.RawMessage `json:"user_id"` + Nickname string `json:"nickname"` + } + for _, extract := range []func() (*loginInfo, error){ + func() (*loginInfo, error) { + var w struct { + Data loginInfo `json:"data"` + } + err := json.Unmarshal(resp, &w) + return &w.Data, err + }, + func() (*loginInfo, error) { + var f loginInfo + err := json.Unmarshal(resp, &f) + return &f, err + }, + } { + info, err := extract() + if err != nil || len(info.UserID) == 0 { + continue + } + if uid, err := parseJSONInt64(info.UserID); err == nil && uid > 0 { + atomic.StoreInt64(&c.selfID, uid) + logger.InfoCF("onebot", "Bot self ID retrieved", map[string]interface{}{ + "self_id": uid, + "nickname": info.Nickname, + }) + return + } + } + + logger.WarnCF("onebot", "Could not parse self ID from get_login_info response", map[string]interface{}{ + "response": string(resp), + }) +} + +func (c *OneBotChannel) sendAPIRequest(action string, params interface{}, timeout time.Duration) (json.RawMessage, error) { + c.mu.Lock() + conn := c.conn + c.mu.Unlock() + + if conn == nil { + return nil, fmt.Errorf("WebSocket not connected") + } + + echo := fmt.Sprintf("api_%d_%d", time.Now().UnixNano(), atomic.AddInt64(&c.echoCounter, 1)) + + ch := make(chan json.RawMessage, 1) + c.pendingMu.Lock() + c.pending[echo] = ch + c.pendingMu.Unlock() + + defer func() { + c.pendingMu.Lock() + delete(c.pending, echo) + c.pendingMu.Unlock() + }() + + req := oneBotAPIRequest{ + Action: action, + Params: params, + Echo: echo, + } + + data, err := json.Marshal(req) + if err != nil { + return nil, fmt.Errorf("failed to marshal API request: %w", err) + } + + c.writeMu.Lock() + err = conn.WriteMessage(websocket.TextMessage, data) + c.writeMu.Unlock() + + if err != nil { + return nil, fmt.Errorf("failed to write API request: %w", err) + } + + select { + case resp := <-ch: + return resp, nil + case <-time.After(timeout): + return nil, fmt.Errorf("API request %s timed out after %v", action, timeout) + case <-c.ctx.Done(): + return nil, fmt.Errorf("context cancelled") + } +} + func (c *OneBotChannel) reconnectLoop() { interval := time.Duration(c.config.ReconnectInterval) * time.Second if interval < 5*time.Second { @@ -178,11 +332,12 @@ func (c *OneBotChannel) reconnectLoop() { if conn == nil { logger.InfoC("onebot", "Attempting to reconnect...") if err := c.connect(); err != nil { - logger.ErrorCF("onebot", "Reconnect failed", map[string]any{ + logger.ErrorCF("onebot", "Reconnect failed", map[string]interface{}{ "error": err.Error(), }) } else { go c.listen() + c.fetchSelfID() } } } @@ -197,6 +352,13 @@ func (c *OneBotChannel) Stop(ctx context.Context) error { c.cancel() } + c.pendingMu.Lock() + for echo, ch := range c.pending { + close(ch) + delete(c.pending, echo) + } + c.pendingMu.Unlock() + c.mu.Lock() if c.conn != nil { c.conn.Close() @@ -225,10 +387,7 @@ func (c *OneBotChannel) Send(ctx context.Context, msg bus.OutboundMessage) error return err } - c.writeMu.Lock() - c.echoCounter++ - echo := fmt.Sprintf("send_%d", c.echoCounter) - c.writeMu.Unlock() + echo := fmt.Sprintf("send_%d", atomic.AddInt64(&c.echoCounter, 1)) req := oneBotAPIRequest{ Action: action, @@ -246,73 +405,84 @@ func (c *OneBotChannel) Send(ctx context.Context, msg bus.OutboundMessage) error c.writeMu.Unlock() if err != nil { - logger.ErrorCF("onebot", "Failed to send message", map[string]any{ + logger.ErrorCF("onebot", "Failed to send message", map[string]interface{}{ "error": err.Error(), }) return err } + if msgID, ok := c.pendingEmojiMsg.LoadAndDelete(msg.ChatID); ok { + if mid, ok := msgID.(string); ok && mid != "" { + c.setMsgEmojiLike(mid, 289, false) + } + } + return nil } -func (c *OneBotChannel) buildSendRequest(msg bus.OutboundMessage) (string, any, error) { +func (c *OneBotChannel) buildMessageSegments(chatID, content string) []oneBotMessageSegment { + var segments []oneBotMessageSegment + + if lastMsgID, ok := c.lastMessageID.Load(chatID); ok { + if msgID, ok := lastMsgID.(string); ok && msgID != "" { + segments = append(segments, oneBotMessageSegment{ + Type: "reply", + Data: map[string]interface{}{"id": msgID}, + }) + } + } + + segments = append(segments, oneBotMessageSegment{ + Type: "text", + Data: map[string]interface{}{"text": content}, + }) + + return segments +} + +func (c *OneBotChannel) buildSendRequest(msg bus.OutboundMessage) (string, interface{}, error) { chatID := msg.ChatID + segments := c.buildMessageSegments(chatID, msg.Content) - if len(chatID) > 6 && chatID[:6] == "group:" { - groupID, err := strconv.ParseInt(chatID[6:], 10, 64) - if err != nil { - return "", nil, fmt.Errorf("invalid group ID in chatID: %s", chatID) - } - return "send_group_msg", oneBotSendGroupMsgParams{ - GroupID: groupID, - Message: msg.Content, - }, nil + var action, idKey string + var rawID string + if rest, ok := strings.CutPrefix(chatID, "group:"); ok { + action, idKey, rawID = "send_group_msg", "group_id", rest + } else if rest, ok := strings.CutPrefix(chatID, "private:"); ok { + action, idKey, rawID = "send_private_msg", "user_id", rest + } else { + action, idKey, rawID = "send_private_msg", "user_id", chatID } - if len(chatID) > 8 && chatID[:8] == "private:" { - userID, err := strconv.ParseInt(chatID[8:], 10, 64) - if err != nil { - return "", nil, fmt.Errorf("invalid user ID in chatID: %s", chatID) - } - return "send_private_msg", oneBotSendPrivateMsgParams{ - UserID: userID, - Message: msg.Content, - }, nil - } - - userID, err := strconv.ParseInt(chatID, 10, 64) + id, err := strconv.ParseInt(rawID, 10, 64) if err != nil { - return "", nil, fmt.Errorf("invalid chatID for OneBot: %s", chatID) + return "", nil, fmt.Errorf("invalid %s in chatID: %s", idKey, chatID) } - - return "send_private_msg", oneBotSendPrivateMsgParams{ - UserID: userID, - Message: msg.Content, - }, nil + return action, map[string]interface{}{idKey: id, "message": segments}, nil } func (c *OneBotChannel) listen() { + c.mu.Lock() + conn := c.conn + c.mu.Unlock() + + if conn == nil { + logger.WarnC("onebot", "WebSocket connection is nil, listener exiting") + return + } + for { select { case <-c.ctx.Done(): return default: - c.mu.Lock() - conn := c.conn - c.mu.Unlock() - - if conn == nil { - logger.WarnC("onebot", "WebSocket connection is nil, listener exiting") - return - } - _, message, err := conn.ReadMessage() if err != nil { - logger.ErrorCF("onebot", "WebSocket read error", map[string]any{ + logger.ErrorCF("onebot", "WebSocket read error", map[string]interface{}{ "error": err.Error(), }) c.mu.Lock() - if c.conn != nil { + if c.conn == conn { c.conn.Close() c.conn = nil } @@ -320,34 +490,48 @@ func (c *OneBotChannel) listen() { return } - logger.DebugCF("onebot", "Raw WebSocket message received", map[string]any{ - "length": len(message), - "payload": string(message), - }) + _ = conn.SetReadDeadline(time.Now().Add(60 * time.Second)) var raw oneBotRawEvent if err := json.Unmarshal(message, &raw); err != nil { - logger.WarnCF("onebot", "Failed to unmarshal raw event", map[string]any{ + logger.WarnCF("onebot", "Failed to unmarshal raw event", map[string]interface{}{ "error": err.Error(), "payload": string(message), }) continue } - if raw.Echo != "" || raw.Status.Online || raw.Status.Good { - logger.DebugCF("onebot", "Received API response, skipping", map[string]any{ - "echo": raw.Echo, - "status": raw.Status, - }) + logger.DebugCF("onebot", "WebSocket event", map[string]interface{}{ + "length": len(message), + "post_type": raw.PostType, + "sub_type": raw.SubType, + }) + + if raw.Echo != "" { + c.pendingMu.Lock() + ch, ok := c.pending[raw.Echo] + c.pendingMu.Unlock() + + if ok { + select { + case ch <- message: + default: + } + } else { + logger.DebugCF("onebot", "Received API response (no waiter)", map[string]interface{}{ + "echo": raw.Echo, + "status": string(raw.Status), + }) + } continue } - logger.DebugCF("onebot", "Parsed raw event", map[string]any{ - "post_type": raw.PostType, - "message_type": raw.MessageType, - "sub_type": raw.SubType, - "meta_event_type": raw.MetaEventType, - }) + if isAPIResponse(raw.Status) { + logger.DebugCF("onebot", "Received API response without echo, skipping", map[string]interface{}{ + "status": string(raw.Status), + }) + continue + } c.handleRawEvent(&raw) } @@ -386,9 +570,12 @@ func parseJSONString(raw json.RawMessage) string { type parseMessageResult struct { Text string IsBotMentioned bool + Media []string + LocalFiles []string + ReplyTo string } -func parseMessageContentEx(raw json.RawMessage, selfID int64) parseMessageResult { +func (c *OneBotChannel) parseMessageSegments(raw json.RawMessage, selfID int64) parseMessageResult { if len(raw) == 0 { return parseMessageResult{} } @@ -407,80 +594,208 @@ func parseMessageContentEx(raw json.RawMessage, selfID int64) parseMessageResult return parseMessageResult{Text: s, IsBotMentioned: mentioned} } - var segments []map[string]any - if err := json.Unmarshal(raw, &segments); err == nil { - var text string - mentioned := false - selfIDStr := strconv.FormatInt(selfID, 10) - for _, seg := range segments { - segType, _ := seg["type"].(string) - data, _ := seg["data"].(map[string]any) - switch segType { - case "text": - if data != nil { - if t, ok := data["text"].(string); ok { - text += t - } + var segments []map[string]interface{} + if err := json.Unmarshal(raw, &segments); err != nil { + return parseMessageResult{} + } + + var textParts []string + mentioned := false + selfIDStr := strconv.FormatInt(selfID, 10) + var media []string + var localFiles []string + var replyTo string + + for _, seg := range segments { + segType, _ := seg["type"].(string) + data, _ := seg["data"].(map[string]interface{}) + + switch segType { + case "text": + if data != nil { + if t, ok := data["text"].(string); ok { + textParts = append(textParts, t) } - case "at": - if data != nil && selfID > 0 { - qqVal := fmt.Sprintf("%v", data["qq"]) - if qqVal == selfIDStr || qqVal == "all" { - mentioned = true + } + + case "at": + if data != nil && selfID > 0 { + qqVal := fmt.Sprintf("%v", data["qq"]) + if qqVal == selfIDStr || qqVal == "all" { + mentioned = true + } + } + + case "image", "video", "file": + if data != nil { + url, _ := data["url"].(string) + if url != "" { + defaults := map[string]string{"image": "image.jpg", "video": "video.mp4", "file": "file"} + filename := defaults[segType] + if f, ok := data["file"].(string); ok && f != "" { + filename = f + } else if n, ok := data["name"].(string); ok && n != "" { + filename = n + } + localPath := utils.DownloadFile(url, filename, utils.DownloadOptions{ + LoggerPrefix: "onebot", + }) + if localPath != "" { + media = append(media, localPath) + localFiles = append(localFiles, localPath) + textParts = append(textParts, fmt.Sprintf("[%s]", segType)) } } } + + case "record": + if data != nil { + url, _ := data["url"].(string) + if url != "" { + localPath := utils.DownloadFile(url, "voice.amr", utils.DownloadOptions{ + LoggerPrefix: "onebot", + }) + if localPath != "" { + localFiles = append(localFiles, localPath) + if c.transcriber != nil && c.transcriber.IsAvailable() { + tctx, tcancel := context.WithTimeout(c.ctx, 30*time.Second) + result, err := c.transcriber.Transcribe(tctx, localPath) + tcancel() + if err != nil { + logger.WarnCF("onebot", "Voice transcription failed", map[string]interface{}{ + "error": err.Error(), + }) + textParts = append(textParts, "[voice (transcription failed)]") + media = append(media, localPath) + } else { + textParts = append(textParts, fmt.Sprintf("[voice transcription: %s]", result.Text)) + } + } else { + textParts = append(textParts, "[voice]") + media = append(media, localPath) + } + } + } + } + + case "reply": + if data != nil { + if id, ok := data["id"]; ok { + replyTo = fmt.Sprintf("%v", id) + } + } + + case "face": + if data != nil { + faceID, _ := data["id"] + textParts = append(textParts, fmt.Sprintf("[face:%v]", faceID)) + } + + case "forward": + textParts = append(textParts, "[forward message]") + + default: + } - return parseMessageResult{Text: strings.TrimSpace(text), IsBotMentioned: mentioned} } - return parseMessageResult{} + + return parseMessageResult{ + Text: strings.TrimSpace(strings.Join(textParts, "")), + IsBotMentioned: mentioned, + Media: media, + LocalFiles: localFiles, + ReplyTo: replyTo, + } } func (c *OneBotChannel) handleRawEvent(raw *oneBotRawEvent) { switch raw.PostType { case "message": - evt, err := c.normalizeMessageEvent(raw) - if err != nil { - logger.WarnCF("onebot", "Failed to normalize message event", map[string]any{ - "error": err.Error(), - }) - return + if userID, err := parseJSONInt64(raw.UserID); err == nil && userID > 0 { + if !c.IsAllowed(strconv.FormatInt(userID, 10)) { + logger.DebugCF("onebot", "Message rejected by allowlist", map[string]interface{}{ + "user_id": userID, + }) + return + } } - c.handleMessage(evt) + c.handleMessage(raw) + + case "message_sent": + logger.DebugCF("onebot", "Bot sent message event", map[string]interface{}{ + "message_type": raw.MessageType, + "message_id": parseJSONString(raw.MessageID), + }) + case "meta_event": c.handleMetaEvent(raw) + case "notice": - logger.DebugCF("onebot", "Notice event received", map[string]any{ - "sub_type": raw.SubType, - }) + c.handleNoticeEvent(raw) + case "request": - logger.DebugCF("onebot", "Request event received", map[string]any{ + logger.DebugCF("onebot", "Request event received", map[string]interface{}{ "sub_type": raw.SubType, }) + case "": - logger.DebugCF("onebot", "Event with empty post_type (possibly API response)", map[string]any{ + logger.DebugCF("onebot", "Event with empty post_type (possibly API response)", map[string]interface{}{ "echo": raw.Echo, "status": raw.Status, }) + default: - logger.DebugCF("onebot", "Unknown post_type", map[string]any{ + logger.DebugCF("onebot", "Unknown post_type", map[string]interface{}{ "post_type": raw.PostType, }) } } -func (c *OneBotChannel) normalizeMessageEvent(raw *oneBotRawEvent) (*oneBotEvent, error) { +func (c *OneBotChannel) handleMetaEvent(raw *oneBotRawEvent) { + if raw.MetaEventType == "lifecycle" { + logger.InfoCF("onebot", "Lifecycle event", map[string]interface{}{"sub_type": raw.SubType}) + } else if raw.MetaEventType != "heartbeat" { + logger.DebugCF("onebot", "Meta event: "+raw.MetaEventType, nil) + } +} + +func (c *OneBotChannel) handleNoticeEvent(raw *oneBotRawEvent) { + fields := map[string]interface{}{ + "notice_type": raw.NoticeType, + "sub_type": raw.SubType, + "group_id": parseJSONString(raw.GroupID), + "user_id": parseJSONString(raw.UserID), + "message_id": parseJSONString(raw.MessageID), + } + switch raw.NoticeType { + case "group_recall", "group_increase", "group_decrease", + "friend_add", "group_admin", "group_ban": + logger.InfoCF("onebot", "Notice: "+raw.NoticeType, fields) + default: + logger.DebugCF("onebot", "Notice: "+raw.NoticeType, fields) + } +} + +func (c *OneBotChannel) handleMessage(raw *oneBotRawEvent) { + // Parse fields from raw event userID, err := parseJSONInt64(raw.UserID) if err != nil { - return nil, fmt.Errorf("parse user_id: %w (raw: %s)", err, string(raw.UserID)) + logger.WarnCF("onebot", "Failed to parse user_id", map[string]interface{}{ + "error": err.Error(), + "raw": string(raw.UserID), + }) + return } groupID, _ := parseJSONInt64(raw.GroupID) selfID, _ := parseJSONInt64(raw.SelfID) - ts, _ := parseJSONInt64(raw.Time) messageID := parseJSONString(raw.MessageID) - parsed := parseMessageContentEx(raw.Message, selfID) + if selfID == 0 { + selfID = atomic.LoadInt64(&c.selfID) + } + + parsed := c.parseMessageSegments(raw.Message, selfID) isBotMentioned := parsed.IsBotMentioned content := raw.RawMessage @@ -495,147 +810,121 @@ func (c *OneBotChannel) normalizeMessageEvent(raw *oneBotRawEvent) (*oneBotEvent } } + if parsed.Text != "" && content != parsed.Text && (len(parsed.Media) > 0 || parsed.ReplyTo != "") { + content = parsed.Text + } + var sender oneBotSender if len(raw.Sender) > 0 { if err := json.Unmarshal(raw.Sender, &sender); err != nil { - logger.WarnCF("onebot", "Failed to parse sender", map[string]any{ + logger.WarnCF("onebot", "Failed to parse sender", map[string]interface{}{ "error": err.Error(), "sender": string(raw.Sender), }) } } - logger.DebugCF("onebot", "Normalized message event", map[string]any{ - "message_type": raw.MessageType, - "user_id": userID, - "group_id": groupID, - "message_id": messageID, - "content_len": len(content), - "nickname": sender.Nickname, - }) - - return &oneBotEvent{ - PostType: raw.PostType, - MessageType: raw.MessageType, - SubType: raw.SubType, - MessageID: messageID, - UserID: userID, - GroupID: groupID, - Content: content, - RawContent: raw.RawMessage, - IsBotMentioned: isBotMentioned, - Sender: sender, - SelfID: selfID, - Time: ts, - MetaEventType: raw.MetaEventType, - }, nil -} - -func (c *OneBotChannel) handleMetaEvent(raw *oneBotRawEvent) { - switch raw.MetaEventType { - case "lifecycle": - logger.InfoCF("onebot", "Lifecycle event", map[string]any{ - "sub_type": raw.SubType, - }) - case "heartbeat": - logger.DebugC("onebot", "Heartbeat received") - default: - logger.DebugCF("onebot", "Unknown meta_event_type", map[string]any{ - "meta_event_type": raw.MetaEventType, - }) + // Clean up temp files when done + if len(parsed.LocalFiles) > 0 { + defer func() { + for _, f := range parsed.LocalFiles { + if err := os.Remove(f); err != nil { + logger.DebugCF("onebot", "Failed to remove temp file", map[string]interface{}{ + "path": f, + "error": err.Error(), + }) + } + } + }() } -} -func (c *OneBotChannel) handleMessage(evt *oneBotEvent) { - if c.isDuplicate(evt.MessageID) { - logger.DebugCF("onebot", "Duplicate message, skipping", map[string]any{ - "message_id": evt.MessageID, + if c.isDuplicate(messageID) { + logger.DebugCF("onebot", "Duplicate message, skipping", map[string]interface{}{ + "message_id": messageID, }) return } - content := evt.Content if content == "" { - logger.DebugCF("onebot", "Received empty message, ignoring", map[string]any{ - "message_id": evt.MessageID, + logger.DebugCF("onebot", "Received empty message, ignoring", map[string]interface{}{ + "message_id": messageID, }) return } - senderID := strconv.FormatInt(evt.UserID, 10) + senderID := strconv.FormatInt(userID, 10) var chatID string metadata := map[string]string{ - "message_id": evt.MessageID, + "message_id": messageID, } - switch evt.MessageType { + if parsed.ReplyTo != "" { + metadata["reply_to_message_id"] = parsed.ReplyTo + } + + switch raw.MessageType { case "private": chatID = "private:" + senderID - logger.InfoCF("onebot", "Received private message", map[string]any{ - "sender": senderID, - "message_id": evt.MessageID, - "length": len(content), - "content": truncate(content, 100), - }) case "group": - groupIDStr := strconv.FormatInt(evt.GroupID, 10) + groupIDStr := strconv.FormatInt(groupID, 10) chatID = "group:" + groupIDStr metadata["group_id"] = groupIDStr - senderUserID, _ := parseJSONInt64(evt.Sender.UserID) + senderUserID, _ := parseJSONInt64(sender.UserID) if senderUserID > 0 { metadata["sender_user_id"] = strconv.FormatInt(senderUserID, 10) } - if evt.Sender.Card != "" { - metadata["sender_name"] = evt.Sender.Card - } else if evt.Sender.Nickname != "" { - metadata["sender_name"] = evt.Sender.Nickname + if sender.Card != "" { + metadata["sender_name"] = sender.Card + } else if sender.Nickname != "" { + metadata["sender_name"] = sender.Nickname } - triggered, strippedContent := c.checkGroupTrigger(content, evt.IsBotMentioned) + triggered, strippedContent := c.checkGroupTrigger(content, isBotMentioned) if !triggered { - logger.DebugCF("onebot", "Group message ignored (no trigger)", map[string]any{ + logger.DebugCF("onebot", "Group message ignored (no trigger)", map[string]interface{}{ "sender": senderID, "group": groupIDStr, - "is_mentioned": evt.IsBotMentioned, + "is_mentioned": isBotMentioned, "content": truncate(content, 100), }) return } content = strippedContent - logger.InfoCF("onebot", "Received group message", map[string]any{ - "sender": senderID, - "group": groupIDStr, - "message_id": evt.MessageID, - "is_mentioned": evt.IsBotMentioned, - "length": len(content), - "content": truncate(content, 100), - }) - default: - logger.WarnCF("onebot", "Unknown message type, cannot route", map[string]any{ - "type": evt.MessageType, - "message_id": evt.MessageID, - "user_id": evt.UserID, + logger.WarnCF("onebot", "Unknown message type, cannot route", map[string]interface{}{ + "type": raw.MessageType, + "message_id": messageID, + "user_id": userID, }) return } - if evt.Sender.Nickname != "" { - metadata["nickname"] = evt.Sender.Nickname - } - - logger.DebugCF("onebot", "Forwarding message to bus", map[string]any{ - "sender_id": senderID, - "chat_id": chatID, - "content": truncate(content, 100), + logger.InfoCF("onebot", "Received "+raw.MessageType+" message", map[string]interface{}{ + "sender": senderID, + "chat_id": chatID, + "message_id": messageID, + "length": len(content), + "content": truncate(content, 100), + "media_count": len(parsed.Media), }) - c.HandleMessage(senderID, chatID, content, []string{}, metadata) + if sender.Nickname != "" { + metadata["nickname"] = sender.Nickname + } + + c.lastMessageID.Store(chatID, messageID) + + if raw.MessageType == "group" && messageID != "" && messageID != "0" { + c.setMsgEmojiLike(messageID, 289, true) + c.pendingEmojiMsg.Store(chatID, messageID) + } + + c.HandleMessage(senderID, chatID, content, parsed.Media, metadata) } func (c *OneBotChannel) isDuplicate(messageID string) bool { @@ -668,10 +957,7 @@ func truncate(s string, n int) string { return string(runes[:n]) + "..." } -func (c *OneBotChannel) checkGroupTrigger( - content string, - isBotMentioned bool, -) (triggered bool, strippedContent string) { +func (c *OneBotChannel) checkGroupTrigger(content string, isBotMentioned bool) (triggered bool, strippedContent string) { if isBotMentioned { return true, strings.TrimSpace(content) } diff --git a/pkg/config/config.go b/pkg/config/config.go index 306fc1f34..3bdb6f030 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -23,7 +23,7 @@ func (f *FlexibleStringSlice) UnmarshalJSON(data []byte) error { } // Try []interface{} to handle mixed types - var raw []any + var raw []interface{} if err := json.Unmarshal(data, &raw); err != nil { return err } @@ -139,16 +139,16 @@ type SessionConfig struct { } type AgentDefaults struct { - Workspace string `json:"workspace" env:"PICOCLAW_AGENTS_DEFAULTS_WORKSPACE"` - RestrictToWorkspace bool `json:"restrict_to_workspace" env:"PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE"` - Provider string `json:"provider" env:"PICOCLAW_AGENTS_DEFAULTS_PROVIDER"` - Model string `json:"model" env:"PICOCLAW_AGENTS_DEFAULTS_MODEL"` + Workspace string `json:"workspace" env:"PICOCLAW_AGENTS_DEFAULTS_WORKSPACE"` + RestrictToWorkspace bool `json:"restrict_to_workspace" env:"PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE"` + Provider string `json:"provider" env:"PICOCLAW_AGENTS_DEFAULTS_PROVIDER"` + Model string `json:"model" env:"PICOCLAW_AGENTS_DEFAULTS_MODEL"` ModelFallbacks []string `json:"model_fallbacks,omitempty"` - ImageModel string `json:"image_model,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_IMAGE_MODEL"` + ImageModel string `json:"image_model,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_IMAGE_MODEL"` ImageModelFallbacks []string `json:"image_model_fallbacks,omitempty"` - MaxTokens int `json:"max_tokens" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOKENS"` - Temperature float64 `json:"temperature" env:"PICOCLAW_AGENTS_DEFAULTS_TEMPERATURE"` - MaxToolIterations int `json:"max_tool_iterations" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOOL_ITERATIONS"` + MaxTokens int `json:"max_tokens" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOKENS"` + Temperature *float64 `json:"temperature,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_TEMPERATURE"` + MaxToolIterations int `json:"max_tool_iterations" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOOL_ITERATIONS"` } type ChannelsConfig struct { @@ -165,87 +165,87 @@ type ChannelsConfig struct { } type WhatsAppConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_WHATSAPP_ENABLED"` + Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_WHATSAPP_ENABLED"` BridgeURL string `json:"bridge_url" env:"PICOCLAW_CHANNELS_WHATSAPP_BRIDGE_URL"` AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_WHATSAPP_ALLOW_FROM"` } type TelegramConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_TELEGRAM_ENABLED"` - Token string `json:"token" env:"PICOCLAW_CHANNELS_TELEGRAM_TOKEN"` - Proxy string `json:"proxy" env:"PICOCLAW_CHANNELS_TELEGRAM_PROXY"` + Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_TELEGRAM_ENABLED"` + Token string `json:"token" env:"PICOCLAW_CHANNELS_TELEGRAM_TOKEN"` + Proxy string `json:"proxy" env:"PICOCLAW_CHANNELS_TELEGRAM_PROXY"` AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_TELEGRAM_ALLOW_FROM"` } type FeishuConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_FEISHU_ENABLED"` - AppID string `json:"app_id" env:"PICOCLAW_CHANNELS_FEISHU_APP_ID"` - AppSecret string `json:"app_secret" env:"PICOCLAW_CHANNELS_FEISHU_APP_SECRET"` - EncryptKey string `json:"encrypt_key" env:"PICOCLAW_CHANNELS_FEISHU_ENCRYPT_KEY"` + Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_FEISHU_ENABLED"` + AppID string `json:"app_id" env:"PICOCLAW_CHANNELS_FEISHU_APP_ID"` + AppSecret string `json:"app_secret" env:"PICOCLAW_CHANNELS_FEISHU_APP_SECRET"` + EncryptKey string `json:"encrypt_key" env:"PICOCLAW_CHANNELS_FEISHU_ENCRYPT_KEY"` VerificationToken string `json:"verification_token" env:"PICOCLAW_CHANNELS_FEISHU_VERIFICATION_TOKEN"` - AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_FEISHU_ALLOW_FROM"` + AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_FEISHU_ALLOW_FROM"` } type DiscordConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_DISCORD_ENABLED"` - Token string `json:"token" env:"PICOCLAW_CHANNELS_DISCORD_TOKEN"` + Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_DISCORD_ENABLED"` + Token string `json:"token" env:"PICOCLAW_CHANNELS_DISCORD_TOKEN"` AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_DISCORD_ALLOW_FROM"` } type MaixCamConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_MAIXCAM_ENABLED"` - Host string `json:"host" env:"PICOCLAW_CHANNELS_MAIXCAM_HOST"` - Port int `json:"port" env:"PICOCLAW_CHANNELS_MAIXCAM_PORT"` + Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_MAIXCAM_ENABLED"` + Host string `json:"host" env:"PICOCLAW_CHANNELS_MAIXCAM_HOST"` + Port int `json:"port" env:"PICOCLAW_CHANNELS_MAIXCAM_PORT"` AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_MAIXCAM_ALLOW_FROM"` } type QQConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_QQ_ENABLED"` - AppID string `json:"app_id" env:"PICOCLAW_CHANNELS_QQ_APP_ID"` + Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_QQ_ENABLED"` + AppID string `json:"app_id" env:"PICOCLAW_CHANNELS_QQ_APP_ID"` AppSecret string `json:"app_secret" env:"PICOCLAW_CHANNELS_QQ_APP_SECRET"` AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_QQ_ALLOW_FROM"` } type DingTalkConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_DINGTALK_ENABLED"` - ClientID string `json:"client_id" env:"PICOCLAW_CHANNELS_DINGTALK_CLIENT_ID"` + Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_DINGTALK_ENABLED"` + ClientID string `json:"client_id" env:"PICOCLAW_CHANNELS_DINGTALK_CLIENT_ID"` ClientSecret string `json:"client_secret" env:"PICOCLAW_CHANNELS_DINGTALK_CLIENT_SECRET"` - AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_DINGTALK_ALLOW_FROM"` + AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_DINGTALK_ALLOW_FROM"` } type SlackConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_SLACK_ENABLED"` - BotToken string `json:"bot_token" env:"PICOCLAW_CHANNELS_SLACK_BOT_TOKEN"` - AppToken string `json:"app_token" env:"PICOCLAW_CHANNELS_SLACK_APP_TOKEN"` + Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_SLACK_ENABLED"` + BotToken string `json:"bot_token" env:"PICOCLAW_CHANNELS_SLACK_BOT_TOKEN"` + AppToken string `json:"app_token" env:"PICOCLAW_CHANNELS_SLACK_APP_TOKEN"` AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_SLACK_ALLOW_FROM"` } type LINEConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_LINE_ENABLED"` - ChannelSecret string `json:"channel_secret" env:"PICOCLAW_CHANNELS_LINE_CHANNEL_SECRET"` + Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_LINE_ENABLED"` + ChannelSecret string `json:"channel_secret" env:"PICOCLAW_CHANNELS_LINE_CHANNEL_SECRET"` ChannelAccessToken string `json:"channel_access_token" env:"PICOCLAW_CHANNELS_LINE_CHANNEL_ACCESS_TOKEN"` - WebhookHost string `json:"webhook_host" env:"PICOCLAW_CHANNELS_LINE_WEBHOOK_HOST"` - WebhookPort int `json:"webhook_port" env:"PICOCLAW_CHANNELS_LINE_WEBHOOK_PORT"` - WebhookPath string `json:"webhook_path" env:"PICOCLAW_CHANNELS_LINE_WEBHOOK_PATH"` - AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_LINE_ALLOW_FROM"` + WebhookHost string `json:"webhook_host" env:"PICOCLAW_CHANNELS_LINE_WEBHOOK_HOST"` + WebhookPort int `json:"webhook_port" env:"PICOCLAW_CHANNELS_LINE_WEBHOOK_PORT"` + WebhookPath string `json:"webhook_path" env:"PICOCLAW_CHANNELS_LINE_WEBHOOK_PATH"` + AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_LINE_ALLOW_FROM"` } type OneBotConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_ONEBOT_ENABLED"` - WSUrl string `json:"ws_url" env:"PICOCLAW_CHANNELS_ONEBOT_WS_URL"` - AccessToken string `json:"access_token" env:"PICOCLAW_CHANNELS_ONEBOT_ACCESS_TOKEN"` - ReconnectInterval int `json:"reconnect_interval" env:"PICOCLAW_CHANNELS_ONEBOT_RECONNECT_INTERVAL"` + Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_ONEBOT_ENABLED"` + WSUrl string `json:"ws_url" env:"PICOCLAW_CHANNELS_ONEBOT_WS_URL"` + AccessToken string `json:"access_token" env:"PICOCLAW_CHANNELS_ONEBOT_ACCESS_TOKEN"` + ReconnectInterval int `json:"reconnect_interval" env:"PICOCLAW_CHANNELS_ONEBOT_RECONNECT_INTERVAL"` GroupTriggerPrefix []string `json:"group_trigger_prefix" env:"PICOCLAW_CHANNELS_ONEBOT_GROUP_TRIGGER_PREFIX"` - AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_ONEBOT_ALLOW_FROM"` + AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_ONEBOT_ALLOW_FROM"` } type HeartbeatConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_HEARTBEAT_ENABLED"` + Enabled bool `json:"enabled" env:"PICOCLAW_HEARTBEAT_ENABLED"` Interval int `json:"interval" env:"PICOCLAW_HEARTBEAT_INTERVAL"` // minutes, min 5 } type DevicesConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_DEVICES_ENABLED"` + Enabled bool `json:"enabled" env:"PICOCLAW_DEVICES_ENABLED"` MonitorUSB bool `json:"monitor_usb" env:"PICOCLAW_DEVICES_MONITOR_USB"` } @@ -266,11 +266,11 @@ type ProvidersConfig struct { } type ProviderConfig struct { - APIKey string `json:"api_key" env:"PICOCLAW_PROVIDERS_{{.Name}}_API_KEY"` - APIBase string `json:"api_base" env:"PICOCLAW_PROVIDERS_{{.Name}}_API_BASE"` - Proxy string `json:"proxy,omitempty" env:"PICOCLAW_PROVIDERS_{{.Name}}_PROXY"` - AuthMethod string `json:"auth_method,omitempty" env:"PICOCLAW_PROVIDERS_{{.Name}}_AUTH_METHOD"` - ConnectMode string `json:"connect_mode,omitempty" env:"PICOCLAW_PROVIDERS_{{.Name}}_CONNECT_MODE"` // only for Github Copilot, `stdio` or `grpc` + APIKey string `json:"api_key" env:"PICOCLAW_PROVIDERS_{{.Name}}_API_KEY"` + APIBase string `json:"api_base" env:"PICOCLAW_PROVIDERS_{{.Name}}_API_BASE"` + Proxy string `json:"proxy,omitempty" env:"PICOCLAW_PROVIDERS_{{.Name}}_PROXY"` + AuthMethod string `json:"auth_method,omitempty" env:"PICOCLAW_PROVIDERS_{{.Name}}_AUTH_METHOD"` + ConnectMode string `json:"connect_mode,omitempty" env:"PICOCLAW_PROVIDERS_{{.Name}}_CONNECT_MODE"` //only for Github Copilot, `stdio` or `grpc` } type OpenAIProviderConfig struct { @@ -284,19 +284,19 @@ type GatewayConfig struct { } type BraveConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_WEB_BRAVE_ENABLED"` - APIKey string `json:"api_key" env:"PICOCLAW_TOOLS_WEB_BRAVE_API_KEY"` + Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_WEB_BRAVE_ENABLED"` + APIKey string `json:"api_key" env:"PICOCLAW_TOOLS_WEB_BRAVE_API_KEY"` MaxResults int `json:"max_results" env:"PICOCLAW_TOOLS_WEB_BRAVE_MAX_RESULTS"` } type DuckDuckGoConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_WEB_DUCKDUCKGO_ENABLED"` + Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_WEB_DUCKDUCKGO_ENABLED"` MaxResults int `json:"max_results" env:"PICOCLAW_TOOLS_WEB_DUCKDUCKGO_MAX_RESULTS"` } type PerplexityConfig struct { - Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_WEB_PERPLEXITY_ENABLED"` - APIKey string `json:"api_key" env:"PICOCLAW_TOOLS_WEB_PERPLEXITY_API_KEY"` + Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_WEB_PERPLEXITY_ENABLED"` + APIKey string `json:"api_key" env:"PICOCLAW_TOOLS_WEB_PERPLEXITY_API_KEY"` MaxResults int `json:"max_results" env:"PICOCLAW_TOOLS_WEB_PERPLEXITY_MAX_RESULTS"` } @@ -330,7 +330,6 @@ func DefaultConfig() *Config { Provider: "", Model: "glm-4.7", MaxTokens: 8192, - Temperature: 0.7, MaxToolIterations: 20, }, }, @@ -483,11 +482,11 @@ func SaveConfig(path string, cfg *Config) error { } dir := filepath.Dir(path) - if err := os.MkdirAll(dir, 0o755); err != nil { + if err := os.MkdirAll(dir, 0755); err != nil { return err } - return os.WriteFile(path, data, 0o600) + return os.WriteFile(path, data, 0600) } func (c *Config) WorkspacePath() string { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 8da0d214f..0898217d6 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -237,8 +237,8 @@ func TestDefaultConfig_MaxToolIterations(t *testing.T) { func TestDefaultConfig_Temperature(t *testing.T) { cfg := DefaultConfig() - if cfg.Agents.Defaults.Temperature == 0 { - t.Error("Temperature should not be zero") + if cfg.Agents.Defaults.Temperature != nil { + t.Error("Temperature should be nil when not provided") } } @@ -334,8 +334,8 @@ func TestConfig_Complete(t *testing.T) { if cfg.Agents.Defaults.Model == "" { t.Error("Model should not be empty") } - if cfg.Agents.Defaults.Temperature == 0 { - t.Error("Temperature should have default value") + if cfg.Agents.Defaults.Temperature != nil { + t.Error("Temperature should be nil when not provided") } if cfg.Agents.Defaults.MaxTokens == 0 { t.Error("MaxTokens should not be zero") diff --git a/pkg/constants/channels.go b/pkg/constants/channels.go index 3e3df3839..0a46e6cd9 100644 --- a/pkg/constants/channels.go +++ b/pkg/constants/channels.go @@ -1,15 +1,16 @@ // Package constants provides shared constants across the codebase. package constants -// InternalChannels defines channels that are used for internal communication +// internalChannels defines channels that are used for internal communication // and should not be exposed to external users or recorded as last active channel. -var InternalChannels = map[string]bool{ - "cli": true, - "system": true, - "subagent": true, +var internalChannels = map[string]struct{}{ + "cli": {}, + "system": {}, + "subagent": {}, } // IsInternalChannel returns true if the channel is an internal channel. func IsInternalChannel(channel string) bool { - return InternalChannels[channel] + _, found := internalChannels[channel] + return found } diff --git a/pkg/migrate/config.go b/pkg/migrate/config.go index c7b1acb58..9b257bd0f 100644 --- a/pkg/migrate/config.go +++ b/pkg/migrate/config.go @@ -76,7 +76,7 @@ func ConvertConfig(data map[string]any) (*config.Config, []string, error) { cfg.Agents.Defaults.MaxTokens = int(v) } if v, ok := getFloat(defaults, "temperature"); ok { - cfg.Agents.Defaults.Temperature = v + cfg.Agents.Defaults.Temperature = &v } if v, ok := getFloat(defaults, "max_tool_iterations"); ok { cfg.Agents.Defaults.MaxToolIterations = int(v) diff --git a/pkg/migrate/migrate_test.go b/pkg/migrate/migrate_test.go index a7c4b5337..247561db5 100644 --- a/pkg/migrate/migrate_test.go +++ b/pkg/migrate/migrate_test.go @@ -275,8 +275,11 @@ func TestConvertConfig(t *testing.T) { if cfg.Agents.Defaults.MaxTokens != 4096 { t.Errorf("MaxTokens = %d, want %d", cfg.Agents.Defaults.MaxTokens, 4096) } - if cfg.Agents.Defaults.Temperature != 0.5 { - t.Errorf("Temperature = %f, want %f", cfg.Agents.Defaults.Temperature, 0.5) + if cfg.Agents.Defaults.Temperature == nil { + t.Fatalf("Temperature is nil, want %f", 0.5) + } + if *cfg.Agents.Defaults.Temperature != 0.5 { + t.Errorf("Temperature = %f, want %f", *cfg.Agents.Defaults.Temperature, 0.5) } if cfg.Agents.Defaults.Workspace != "~/.picoclaw/workspace" { t.Errorf("Workspace = %q, want %q", cfg.Agents.Defaults.Workspace, "~/.picoclaw/workspace") diff --git a/pkg/providers/openai_compat/provider.go b/pkg/providers/openai_compat/provider.go index 3a7fe4f39..9cfec44fe 100644 --- a/pkg/providers/openai_compat/provider.go +++ b/pkg/providers/openai_compat/provider.go @@ -79,7 +79,7 @@ func (p *Provider) Chat( if maxTokens, ok := asInt(options["max_tokens"]); ok { lowerModel := strings.ToLower(model) - if strings.Contains(lowerModel, "glm") || strings.Contains(lowerModel, "o1") { + if strings.Contains(lowerModel, "glm") || strings.Contains(lowerModel, "o1") || strings.Contains(lowerModel, "gpt-5") { requestBody["max_completion_tokens"] = maxTokens } else { requestBody["max_tokens"] = maxTokens diff --git a/pkg/skills/installer.go b/pkg/skills/installer.go index 5742a8f03..3210509df 100644 --- a/pkg/skills/installer.go +++ b/pkg/skills/installer.go @@ -8,7 +8,6 @@ import ( "net/http" "os" "path/filepath" - "strings" "time" ) @@ -24,12 +23,6 @@ type AvailableSkill struct { Tags []string `json:"tags"` } -type BuiltinSkill struct { - Name string `json:"name"` - Path string `json:"path"` - Enabled bool `json:"enabled"` -} - func NewSkillInstaller(workspace string) *SkillInstaller { return &SkillInstaller{ workspace: workspace, @@ -123,49 +116,3 @@ func (si *SkillInstaller) ListAvailableSkills(ctx context.Context) ([]AvailableS return skills, nil } - -func (si *SkillInstaller) ListBuiltinSkills() []BuiltinSkill { - builtinSkillsDir := filepath.Join(filepath.Dir(si.workspace), "picoclaw", "skills") - - entries, err := os.ReadDir(builtinSkillsDir) - if err != nil { - return nil - } - - var skills []BuiltinSkill - for _, entry := range entries { - if entry.IsDir() { - _ = entry - skillName := entry.Name() - skillFile := filepath.Join(builtinSkillsDir, skillName, "SKILL.md") - - data, err := os.ReadFile(skillFile) - description := "" - if err == nil { - content := string(data) - if idx := strings.Index(content, "\n"); idx > 0 { - firstLine := content[:idx] - if strings.Contains(firstLine, "description:") { - descLine := strings.Index(content[idx:], "\n") - if descLine > 0 { - description = strings.TrimSpace(content[idx+descLine : idx+descLine]) - } - } - } - } - - // skill := BuiltinSkill{ - // Name: skillName, - // Path: description, - // Enabled: true, - // } - - status := "✓" - fmt.Printf(" %s %s\n", status, entry.Name()) - if description != "" { - fmt.Printf(" %s\n", description) - } - } - } - return skills -} diff --git a/pkg/tools/subagent.go b/pkg/tools/subagent.go index 222137c89..91ebff636 100644 --- a/pkg/tools/subagent.go +++ b/pkg/tools/subagent.go @@ -23,15 +23,19 @@ type SubagentTask struct { } type SubagentManager struct { - tasks map[string]*SubagentTask - mu sync.RWMutex - provider providers.LLMProvider - defaultModel string - bus *bus.MessageBus - workspace string - tools *ToolRegistry - maxIterations int - nextID int + tasks map[string]*SubagentTask + mu sync.RWMutex + provider providers.LLMProvider + defaultModel string + bus *bus.MessageBus + workspace string + tools *ToolRegistry + maxIterations int + maxTokens int + temperature float64 + hasMaxTokens bool + hasTemperature bool + nextID int } func NewSubagentManager( @@ -51,6 +55,16 @@ func NewSubagentManager( } } +// SetLLMOptions sets max tokens and temperature for subagent LLM calls. +func (sm *SubagentManager) SetLLMOptions(maxTokens int, temperature float64) { + sm.mu.Lock() + defer sm.mu.Unlock() + sm.maxTokens = maxTokens + sm.hasMaxTokens = true + sm.temperature = temperature + sm.hasTemperature = true +} + // SetTools sets the tool registry for subagent execution. // If not set, subagent will have access to the provided tools. func (sm *SubagentManager) SetTools(tools *ToolRegistry) { @@ -133,17 +147,29 @@ After completing the task, provide a clear summary of what was done.` sm.mu.RLock() tools := sm.tools maxIter := sm.maxIterations + maxTokens := sm.maxTokens + temperature := sm.temperature + hasMaxTokens := sm.hasMaxTokens + hasTemperature := sm.hasTemperature sm.mu.RUnlock() + var llmOptions map[string]any + if hasMaxTokens || hasTemperature { + llmOptions = map[string]any{} + if hasMaxTokens { + llmOptions["max_tokens"] = maxTokens + } + if hasTemperature { + llmOptions["temperature"] = temperature + } + } + loopResult, err := RunToolLoop(ctx, ToolLoopConfig{ Provider: sm.provider, Model: sm.defaultModel, Tools: tools, MaxIterations: maxIter, - LLMOptions: map[string]any{ - "max_tokens": 4096, - "temperature": 0.7, - }, + LLMOptions: llmOptions, }, messages, task.OriginChannel, task.OriginChatID) sm.mu.Lock() @@ -296,17 +322,29 @@ func (t *SubagentTool) Execute(ctx context.Context, args map[string]any) *ToolRe sm.mu.RLock() tools := sm.tools maxIter := sm.maxIterations + maxTokens := sm.maxTokens + temperature := sm.temperature + hasMaxTokens := sm.hasMaxTokens + hasTemperature := sm.hasTemperature sm.mu.RUnlock() + var llmOptions map[string]any + if hasMaxTokens || hasTemperature { + llmOptions = map[string]any{} + if hasMaxTokens { + llmOptions["max_tokens"] = maxTokens + } + if hasTemperature { + llmOptions["temperature"] = temperature + } + } + loopResult, err := RunToolLoop(ctx, ToolLoopConfig{ Provider: sm.provider, Model: sm.defaultModel, Tools: tools, MaxIterations: maxIter, - LLMOptions: map[string]any{ - "max_tokens": 4096, - "temperature": 0.7, - }, + LLMOptions: llmOptions, }, messages, t.originChannel, t.originChatID) if err != nil { return ErrorResult(fmt.Sprintf("Subagent execution failed: %v", err)).WithError(err) diff --git a/pkg/tools/subagent_tool_test.go b/pkg/tools/subagent_tool_test.go index 8e4dc3953..f960a7fda 100644 --- a/pkg/tools/subagent_tool_test.go +++ b/pkg/tools/subagent_tool_test.go @@ -10,15 +10,12 @@ import ( ) // MockLLMProvider is a test implementation of LLMProvider -type MockLLMProvider struct{} +type MockLLMProvider struct { + lastOptions map[string]interface{} +} -func (m *MockLLMProvider) Chat( - ctx context.Context, - messages []providers.Message, - tools []providers.ToolDefinition, - model string, - options map[string]any, -) (*providers.LLMResponse, error) { +func (m *MockLLMProvider) Chat(ctx context.Context, messages []providers.Message, tools []providers.ToolDefinition, model string, options map[string]interface{}) (*providers.LLMResponse, error) { + m.lastOptions = options // Find the last user message to generate a response for i := len(messages) - 1; i >= 0; i-- { if messages[i].Role == "user" { @@ -42,6 +39,32 @@ func (m *MockLLMProvider) GetContextWindow() int { return 4096 } +func TestSubagentManager_SetLLMOptions_AppliesToRunToolLoop(t *testing.T) { + provider := &MockLLMProvider{} + manager := NewSubagentManager(provider, "test-model", "/tmp/test", nil) + manager.SetLLMOptions(2048, 0.6) + tool := NewSubagentTool(manager) + tool.SetContext("cli", "direct") + + ctx := context.Background() + args := map[string]interface{}{"task": "Do something"} + result := tool.Execute(ctx, args) + + if result == nil || result.IsError { + t.Fatalf("Expected successful result, got: %+v", result) + } + + if provider.lastOptions == nil { + t.Fatal("Expected LLM options to be passed, got nil") + } + if provider.lastOptions["max_tokens"] != 2048 { + t.Fatalf("max_tokens = %v, want %d", provider.lastOptions["max_tokens"], 2048) + } + if provider.lastOptions["temperature"] != 0.6 { + t.Fatalf("temperature = %v, want %v", provider.lastOptions["temperature"], 0.6) + } +} + // TestSubagentTool_Name verifies tool name func TestSubagentTool_Name(t *testing.T) { provider := &MockLLMProvider{} @@ -85,13 +108,13 @@ func TestSubagentTool_Parameters(t *testing.T) { } // Check properties - props, ok := params["properties"].(map[string]any) + props, ok := params["properties"].(map[string]interface{}) if !ok { t.Fatal("Properties should be a map") } // Verify task parameter - task, ok := props["task"].(map[string]any) + task, ok := props["task"].(map[string]interface{}) if !ok { t.Fatal("Task parameter should exist") } @@ -100,7 +123,7 @@ func TestSubagentTool_Parameters(t *testing.T) { } // Verify label parameter - label, ok := props["label"].(map[string]any) + label, ok := props["label"].(map[string]interface{}) if !ok { t.Fatal("Label parameter should exist") } @@ -140,7 +163,7 @@ func TestSubagentTool_Execute_Success(t *testing.T) { tool.SetContext("telegram", "chat-123") ctx := context.Background() - args := map[string]any{ + args := map[string]interface{}{ "task": "Write a haiku about coding", "label": "haiku-task", } @@ -195,7 +218,7 @@ func TestSubagentTool_Execute_NoLabel(t *testing.T) { tool := NewSubagentTool(manager) ctx := context.Background() - args := map[string]any{ + args := map[string]interface{}{ "task": "Test task without label", } @@ -218,7 +241,7 @@ func TestSubagentTool_Execute_MissingTask(t *testing.T) { tool := NewSubagentTool(manager) ctx := context.Background() - args := map[string]any{ + args := map[string]interface{}{ "label": "test", } @@ -245,7 +268,7 @@ func TestSubagentTool_Execute_NilManager(t *testing.T) { tool := NewSubagentTool(nil) ctx := context.Background() - args := map[string]any{ + args := map[string]interface{}{ "task": "test task", } @@ -274,7 +297,7 @@ func TestSubagentTool_Execute_ContextPassing(t *testing.T) { tool.SetContext(channel, chatID) ctx := context.Background() - args := map[string]any{ + args := map[string]interface{}{ "task": "Test context passing", } @@ -301,7 +324,7 @@ func TestSubagentTool_ForUserTruncation(t *testing.T) { // Create a task that will generate long response longTask := strings.Repeat("This is a very long task description. ", 100) - args := map[string]any{ + args := map[string]interface{}{ "task": longTask, "label": "long-test", } diff --git a/pkg/tools/toolloop.go b/pkg/tools/toolloop.go index f0653e1f2..cf72b01ff 100644 --- a/pkg/tools/toolloop.go +++ b/pkg/tools/toolloop.go @@ -60,12 +60,8 @@ func RunToolLoop( // 2. Set default LLM options llmOpts := config.LLMOptions if llmOpts == nil { - llmOpts = map[string]any{ - "max_tokens": 4096, - "temperature": 0.7, - } + llmOpts = map[string]any{} } - // 3. Call LLM response, err := config.Provider.Chat(ctx, messages, providerToolDefs, config.Model, llmOpts) if err != nil { @@ -114,6 +110,7 @@ func RunToolLoop( Name: tc.Name, Arguments: string(argumentsJSON), }, + Name: tc.Name, }) } messages = append(messages, assistantMsg) diff --git a/pkg/tools/web.go b/pkg/tools/web.go index de8296816..301e00daf 100644 --- a/pkg/tools/web.go +++ b/pkg/tools/web.go @@ -504,8 +504,10 @@ func (t *WebFetchTool) extractText(htmlContent string) string { result = strings.TrimSpace(result) - re = regexp.MustCompile(`\s+`) - result = re.ReplaceAllLiteralString(result, " ") + re = regexp.MustCompile(`[^\S\n]+`) + result = re.ReplaceAllString(result, " ") + re = regexp.MustCompile(`\n{3,}`) + result = re.ReplaceAllString(result, "\n\n") lines := strings.Split(result, "\n") var cleanLines []string diff --git a/pkg/tools/web_test.go b/pkg/tools/web_test.go index edb914f66..222a38972 100644 --- a/pkg/tools/web_test.go +++ b/pkg/tools/web_test.go @@ -238,6 +238,80 @@ func TestWebTool_WebFetch_HTMLExtraction(t *testing.T) { } } +// TestWebFetchTool_extractText verifies text extraction preserves newlines +func TestWebFetchTool_extractText(t *testing.T) { + tool := &WebFetchTool{} + + tests := []struct { + name string + input string + wantFunc func(t *testing.T, got string) + }{ + { + name: "preserves newlines between block elements", + input: "

Title

\n

Paragraph 1

\n

Paragraph 2

", + wantFunc: func(t *testing.T, got string) { + lines := strings.Split(got, "\n") + if len(lines) < 2 { + t.Errorf("Expected multiple lines, got %d: %q", len(lines), got) + } + if !strings.Contains(got, "Title") || !strings.Contains(got, "Paragraph 1") || !strings.Contains(got, "Paragraph 2") { + t.Errorf("Missing expected text: %q", got) + } + }, + }, + { + name: "removes script and style tags", + input: "

Keep this

", + wantFunc: func(t *testing.T, got string) { + if strings.Contains(got, "alert") || strings.Contains(got, "body{}") { + t.Errorf("Expected script/style content removed, got: %q", got) + } + if !strings.Contains(got, "Keep this") { + t.Errorf("Expected 'Keep this' to remain, got: %q", got) + } + }, + }, + { + name: "collapses excessive blank lines", + input: "

A

\n\n\n\n\n

B

", + wantFunc: func(t *testing.T, got string) { + if strings.Contains(got, "\n\n\n") { + t.Errorf("Expected excessive blank lines collapsed, got: %q", got) + } + }, + }, + { + name: "collapses horizontal whitespace", + input: "

hello world

", + wantFunc: func(t *testing.T, got string) { + if strings.Contains(got, " ") { + t.Errorf("Expected spaces collapsed, got: %q", got) + } + if !strings.Contains(got, "hello world") { + t.Errorf("Expected 'hello world', got: %q", got) + } + }, + }, + { + name: "empty input", + input: "", + wantFunc: func(t *testing.T, got string) { + if got != "" { + t.Errorf("Expected empty string, got: %q", got) + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tool.extractText(tt.input) + tt.wantFunc(t, got) + }) + } +} + // TestWebTool_WebFetch_MissingDomain verifies error handling for URL without domain func TestWebTool_WebFetch_MissingDomain(t *testing.T) { tool := NewWebFetchTool(50000) diff --git a/pkg/utils/message.go b/pkg/utils/message.go new file mode 100644 index 000000000..1d05950d9 --- /dev/null +++ b/pkg/utils/message.go @@ -0,0 +1,179 @@ +package utils + +import ( + "strings" +) + +// SplitMessage splits long messages into chunks, preserving code block integrity. +// The function reserves a buffer (10% of maxLen, min 50) to leave room for closing code blocks, +// but may extend to maxLen when needed. +// Call SplitMessage with the full text content and the maximum allowed length of a single message; +// it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks. +func SplitMessage(content string, maxLen int) []string { + var messages []string + + // Dynamic buffer: 10% of maxLen, but at least 50 chars if possible + codeBlockBuffer := maxLen / 10 + if codeBlockBuffer < 50 { + codeBlockBuffer = 50 + } + if codeBlockBuffer > maxLen/2 { + codeBlockBuffer = maxLen / 2 + } + + for len(content) > 0 { + if len(content) <= maxLen { + messages = append(messages, content) + break + } + + // Effective split point: maxLen minus buffer, to leave room for code blocks + effectiveLimit := maxLen - codeBlockBuffer + if effectiveLimit < maxLen/2 { + effectiveLimit = maxLen / 2 + } + + // Find natural split point within the effective limit + msgEnd := findLastNewline(content[:effectiveLimit], 200) + if msgEnd <= 0 { + msgEnd = findLastSpace(content[:effectiveLimit], 100) + } + if msgEnd <= 0 { + msgEnd = effectiveLimit + } + + // Check if this would end with an incomplete code block + candidate := content[:msgEnd] + unclosedIdx := findLastUnclosedCodeBlock(candidate) + + if unclosedIdx >= 0 { + // Message would end with incomplete code block + // Try to extend up to maxLen to include the closing ``` + if len(content) > msgEnd { + closingIdx := findNextClosingCodeBlock(content, msgEnd) + if closingIdx > 0 && closingIdx <= maxLen { + // Extend to include the closing ``` + msgEnd = closingIdx + } else { + // Code block is too long to fit in one chunk or missing closing fence. + // Try to split inside by injecting closing and reopening fences. + headerEnd := strings.Index(content[unclosedIdx:], "\n") + if headerEnd == -1 { + headerEnd = unclosedIdx + 3 + } else { + headerEnd += unclosedIdx + } + header := strings.TrimSpace(content[unclosedIdx:headerEnd]) + + // If we have a reasonable amount of content after the header, split inside + if msgEnd > headerEnd+20 { + // Find a better split point closer to maxLen + innerLimit := maxLen - 5 // Leave room for "\n```" + betterEnd := findLastNewline(content[:innerLimit], 200) + if betterEnd > headerEnd { + msgEnd = betterEnd + } else { + msgEnd = innerLimit + } + messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```") + content = strings.TrimSpace(header + "\n" + content[msgEnd:]) + continue + } + + // Otherwise, try to split before the code block starts + newEnd := findLastNewline(content[:unclosedIdx], 200) + if newEnd <= 0 { + newEnd = findLastSpace(content[:unclosedIdx], 100) + } + if newEnd > 0 { + msgEnd = newEnd + } else { + // If we can't split before, we MUST split inside (last resort) + if unclosedIdx > 20 { + msgEnd = unclosedIdx + } else { + msgEnd = maxLen - 5 + messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```") + content = strings.TrimSpace(header + "\n" + content[msgEnd:]) + continue + } + } + } + } + } + + if msgEnd <= 0 { + msgEnd = effectiveLimit + } + + messages = append(messages, content[:msgEnd]) + content = strings.TrimSpace(content[msgEnd:]) + } + + return messages +} + +// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ``` +// Returns the position of the opening ``` or -1 if all code blocks are complete +func findLastUnclosedCodeBlock(text string) int { + inCodeBlock := false + lastOpenIdx := -1 + + for i := 0; i < len(text); i++ { + if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { + // Toggle code block state on each fence + if !inCodeBlock { + // Entering a code block: record this opening fence + lastOpenIdx = i + } + inCodeBlock = !inCodeBlock + i += 2 + } + } + + if inCodeBlock { + return lastOpenIdx + } + return -1 +} + +// findNextClosingCodeBlock finds the next closing ``` starting from a position +// Returns the position after the closing ``` or -1 if not found +func findNextClosingCodeBlock(text string, startIdx int) int { + for i := startIdx; i < len(text); i++ { + if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { + return i + 3 + } + } + return -1 +} + +// findLastNewline finds the last newline character within the last N characters +// Returns the position of the newline or -1 if not found +func findLastNewline(s string, searchWindow int) int { + searchStart := len(s) - searchWindow + if searchStart < 0 { + searchStart = 0 + } + for i := len(s) - 1; i >= searchStart; i-- { + if s[i] == '\n' { + return i + } + } + return -1 +} + +// findLastSpace finds the last space character within the last N characters +// Returns the position of the space or -1 if not found +func findLastSpace(s string, searchWindow int) int { + searchStart := len(s) - searchWindow + if searchStart < 0 { + searchStart = 0 + } + for i := len(s) - 1; i >= searchStart; i-- { + if s[i] == ' ' || s[i] == '\t' { + return i + } + } + return -1 +} diff --git a/pkg/utils/message_test.go b/pkg/utils/message_test.go new file mode 100644 index 000000000..338509437 --- /dev/null +++ b/pkg/utils/message_test.go @@ -0,0 +1,151 @@ +package utils + +import ( + "strings" + "testing" +) + +func TestSplitMessage(t *testing.T) { + longText := strings.Repeat("a", 2500) + longCode := "```go\n" + strings.Repeat("fmt.Println(\"hello\")\n", 100) + "```" // ~2100 chars + + tests := []struct { + name string + content string + maxLen int + expectChunks int // Check number of chunks + checkContent func(t *testing.T, chunks []string) // Custom validation + }{ + { + name: "Empty message", + content: "", + maxLen: 2000, + expectChunks: 0, + }, + { + name: "Short message fits in one chunk", + content: "Hello world", + maxLen: 2000, + expectChunks: 1, + }, + { + name: "Simple split regular text", + content: longText, + maxLen: 2000, + expectChunks: 2, + checkContent: func(t *testing.T, chunks []string) { + if len(chunks[0]) > 2000 { + t.Errorf("Chunk 0 too large: %d", len(chunks[0])) + } + if len(chunks[0])+len(chunks[1]) != len(longText) { + t.Errorf("Total length mismatch. Got %d, want %d", len(chunks[0])+len(chunks[1]), len(longText)) + } + }, + }, + { + name: "Split at newline", + // 1750 chars then newline, then more chars. + // Dynamic buffer: 2000 / 10 = 200. + // Effective limit: 2000 - 200 = 1800. + // Split should happen at newline because it's at 1750 (< 1800). + // Total length must > 2000 to trigger split. 1750 + 1 + 300 = 2051. + content: strings.Repeat("a", 1750) + "\n" + strings.Repeat("b", 300), + maxLen: 2000, + expectChunks: 2, + checkContent: func(t *testing.T, chunks []string) { + if len(chunks[0]) != 1750 { + t.Errorf("Expected chunk 0 to be 1750 length (split at newline), got %d", len(chunks[0])) + } + if chunks[1] != strings.Repeat("b", 300) { + t.Errorf("Chunk 1 content mismatch. Len: %d", len(chunks[1])) + } + }, + }, + { + name: "Long code block split", + content: "Prefix\n" + longCode, + maxLen: 2000, + expectChunks: 2, + checkContent: func(t *testing.T, chunks []string) { + // Check that first chunk ends with closing fence + if !strings.HasSuffix(chunks[0], "\n```") { + t.Error("First chunk should end with injected closing fence") + } + // Check that second chunk starts with execution header + if !strings.HasPrefix(chunks[1], "```go") { + t.Error("Second chunk should start with injected code block header") + } + }, + }, + { + name: "Preserve Unicode characters", + content: strings.Repeat("\u4e16", 1000), // 3000 bytes + maxLen: 2000, + expectChunks: 2, + checkContent: func(t *testing.T, chunks []string) { + // Just verify we didn't panic and got valid strings. + // Go strings are UTF-8, if we split mid-rune it would be bad, + // but standard slicing might do that. + // Let's assume standard behavior is acceptable or check if it produces invalid rune? + if !strings.Contains(chunks[0], "\u4e16") { + t.Error("Chunk should contain unicode characters") + } + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := SplitMessage(tc.content, tc.maxLen) + + if tc.expectChunks == 0 { + if len(got) != 0 { + t.Errorf("Expected 0 chunks, got %d", len(got)) + } + return + } + + if len(got) != tc.expectChunks { + t.Errorf("Expected %d chunks, got %d", tc.expectChunks, len(got)) + // Log sizes for debugging + for i, c := range got { + t.Logf("Chunk %d length: %d", i, len(c)) + } + return // Stop further checks if count assumes specific split + } + + if tc.checkContent != nil { + tc.checkContent(t, got) + } + }) + } +} + +func TestSplitMessage_CodeBlockIntegrity(t *testing.T) { + // Focused test for the core requirement: splitting inside a code block preserves syntax highlighting + + // 60 chars total approximately + content := "```go\npackage main\n\nfunc main() {\n\tprintln(\"Hello\")\n}\n```" + maxLen := 40 + + chunks := SplitMessage(content, maxLen) + + if len(chunks) != 2 { + t.Fatalf("Expected 2 chunks, got %d: %q", len(chunks), chunks) + } + + // First chunk must end with "\n```" + if !strings.HasSuffix(chunks[0], "\n```") { + t.Errorf("First chunk should end with closing fence. Got: %q", chunks[0]) + } + + // Second chunk must start with the header "```go" + if !strings.HasPrefix(chunks[1], "```go") { + t.Errorf("Second chunk should start with code block header. Got: %q", chunks[1]) + } + + // First chunk should contain meaningful content + if len(chunks[0]) > 40 { + t.Errorf("First chunk exceeded maxLen: length %d", len(chunks[0])) + } +}