From e22b4e1eeee102202a23b83a8a643c4136f8c6ea Mon Sep 17 00:00:00 2001 From: lxowalle <83055338+lxowalle@users.noreply.github.com> Date: Thu, 16 Apr 2026 10:53:09 +0800 Subject: [PATCH] feat(agent): support btw side questions (#2532) --- docs/chat-apps.md | 3 +- docs/configuration.md | 4 +- docs/fr/chat-apps.md | 10 +- docs/fr/configuration.md | 22 +- docs/ja/chat-apps.md | 2 +- docs/ja/configuration.md | 22 +- docs/my/chat-apps.md | 10 +- docs/my/configuration.md | 22 +- docs/pt-br/chat-apps.md | 10 +- docs/pt-br/configuration.md | 22 +- docs/vi/chat-apps.md | 10 +- docs/vi/configuration.md | 22 +- docs/zh/chat-apps.md | 3 +- docs/zh/configuration.md | 4 +- pkg/agent/hooks_test.go | 89 ++++++ pkg/agent/llm_media.go | 21 ++ pkg/agent/loop.go | 557 ++++++++++++++++++++++++++++++++--- pkg/agent/loop_test.go | 343 +++++++++++++++++++++ pkg/agent/steering_test.go | 496 ++++++++++++++++++++++++++++++- pkg/commands/builtin.go | 1 + pkg/commands/builtin_test.go | 76 +++++ pkg/commands/cmd_btw.go | 51 ++++ pkg/commands/runtime.go | 7 +- 23 files changed, 1737 insertions(+), 70 deletions(-) create mode 100644 pkg/commands/cmd_btw.go diff --git a/docs/chat-apps.md b/docs/chat-apps.md index ae98a7d9f..698633642 100644 --- a/docs/chat-apps.md +++ b/docs/chat-apps.md @@ -62,7 +62,7 @@ picoclaw gateway **4. Telegram command menu (auto-registered at startup)** -PicoClaw now keeps command definitions in one shared registry. On startup, Telegram will automatically register supported bot commands (for example `/start`, `/help`, `/show`, `/list`, `/use`) so command menu and runtime behavior stay in sync. +PicoClaw now keeps command definitions in one shared registry. On startup, Telegram will automatically register supported bot commands (for example `/start`, `/help`, `/show`, `/list`, `/use`, `/btw`) so command menu and runtime behavior stay in sync. Telegram command menu registration remains channel-local discovery UX; generic command execution is handled centrally in the agent loop via the commands executor. If command registration fails (network/API transient errors), the channel still starts and PicoClaw retries registration in the background. @@ -73,6 +73,7 @@ You can also manage installed skills directly from Telegram: - `/use ` - `/use ` and then send the actual request in the next message - `/use clear` +- `/btw ` to ask an immediate side question without changing the active session history; `/btw` is handled as a no-tool query and does not enter the normal tool-execution flow **4. Advanced Formatting** You can set use_markdown_v2: true to enable enhanced formatting options. This allows the bot to utilize the full range of Telegram MarkdownV2 features, including nested styles, spoilers, and custom fixed-width blocks. diff --git a/docs/configuration.md b/docs/configuration.md index e59d6a022..96d5c35a3 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -103,12 +103,14 @@ Once skills are installed, you can inspect and force them directly from a chat c - `/use ` forces a specific skill for a single request. - `/use ` arms that skill for your next message in the same chat session. - `/use clear` cancels a pending skill override created by `/use `. +- `/btw ` asks an immediate side question without changing the current session history. `/btw` is handled as a no-tool query and does not enter the normal tool-execution flow. Examples: ```text /list skills /use git explain how to squash the last 3 commits +/btw remind me what we already decided about the deploy plan /use italiapersonalfinance dammi le ultime news ``` @@ -116,7 +118,7 @@ dammi le ultime news ### Unified Command Execution Policy - Generic slash commands are executed through a single path in `pkg/agent/loop.go` via `commands.Executor`. -- Channel adapters no longer consume generic commands locally; they forward inbound text to the bus/agent path. Telegram still auto-registers supported commands at startup. +- Channel adapters no longer consume generic commands locally; they forward inbound text to the bus/agent path. Telegram still auto-registers supported commands such as `/start`, `/help`, `/show`, `/list`, `/use`, and `/btw` at startup. - Unknown slash command (for example `/foo`) passes through to normal LLM processing. - Registered but unsupported command on the current channel (for example `/show` on WhatsApp) returns an explicit user-facing error and stops further processing. diff --git a/docs/fr/chat-apps.md b/docs/fr/chat-apps.md index d6590f9ba..35330ed92 100644 --- a/docs/fr/chat-apps.md +++ b/docs/fr/chat-apps.md @@ -61,11 +61,19 @@ picoclaw gateway **4. Menu de commandes Telegram (enregistré automatiquement au démarrage)** -PicoClaw conserve les définitions de commandes dans un registre partagé unique. Au démarrage, Telegram enregistre automatiquement les commandes bot prises en charge (par exemple `/start`, `/help`, `/show`, `/list`) afin que le menu de commandes et le comportement à l'exécution restent synchronisés. +PicoClaw conserve les définitions de commandes dans un registre partagé unique. Au démarrage, Telegram enregistre automatiquement les commandes bot prises en charge (par exemple `/start`, `/help`, `/show`, `/list`, `/use`, `/btw`) afin que le menu de commandes et le comportement à l'exécution restent synchronisés. L'enregistrement du menu de commandes Telegram reste une découverte UX locale au canal ; l'exécution générique des commandes est gérée de manière centralisée dans la boucle agent via l'exécuteur de commandes. Si l'enregistrement des commandes échoue (erreurs transitoires réseau/API), le canal démarre quand même et PicoClaw réessaie l'enregistrement en arrière-plan. +Vous pouvez aussi gerer les competences installees directement depuis Telegram : + +- `/list skills` +- `/use ` +- `/use ` puis envoyer la vraie requete dans le message suivant +- `/use clear` +- `/btw ` pour poser une question annexe immediate sans modifier l'historique actif de la session ; `/btw` est traite comme une requete directe sans outils et n'entre pas dans le flux normal d'execution des outils + diff --git a/docs/fr/configuration.md b/docs/fr/configuration.md index 7a57cceae..b26b8c4f7 100644 --- a/docs/fr/configuration.md +++ b/docs/fr/configuration.md @@ -80,10 +80,30 @@ Pour les configurations avancées/de test, vous pouvez remplacer la racine des c export PICOCLAW_BUILTIN_SKILLS=/path/to/skills ``` +### Utiliser les Commandes Depuis les Canaux de Chat + +Une fois les compétences installées, vous pouvez aussi les inspecter et les activer directement depuis un canal de chat : + +- `/list skills` affiche les noms des compétences installées visibles pour l'agent courant. +- `/use ` force une compétence pour une seule requête. +- `/use ` prépare cette compétence pour votre prochain message dans la meme conversation. +- `/use clear` annule une surcharge de compétence en attente creee via `/use `. +- `/btw ` pose une question annexe immediate sans modifier l'historique courant de la session. `/btw` est traite comme une requete directe sans outils et n'entre pas dans le flux normal d'execution des outils. + +Exemples : + +```text +/list skills +/use git explique comment squash les 3 derniers commits +/btw rappelle-moi ce qu'on a deja decide pour le plan de deploiement +/use italiapersonalfinance +dammi le ultime news +``` + ### Politique Unifiée d'Exécution des Commandes - Les commandes slash génériques sont exécutées via un chemin unique dans `pkg/agent/loop.go` via `commands.Executor`. -- Les adaptateurs de canaux ne consomment plus les commandes génériques localement ; ils transmettent le texte entrant au chemin bus/agent. Telegram enregistre toujours automatiquement les commandes prises en charge au démarrage. +- Les adaptateurs de canaux ne consomment plus les commandes génériques localement ; ils transmettent le texte entrant au chemin bus/agent. Telegram enregistre toujours automatiquement au démarrage les commandes prises en charge, comme `/start`, `/help`, `/show`, `/list`, `/use` et `/btw`. - Une commande slash inconnue (par exemple `/foo`) passe au traitement LLM normal. - Une commande enregistrée mais non prise en charge sur le canal actuel (par exemple `/show` sur WhatsApp) renvoie une erreur explicite à l'utilisateur et arrête le traitement ultérieur. diff --git a/docs/ja/chat-apps.md b/docs/ja/chat-apps.md index 997748939..b143a5fc6 100644 --- a/docs/ja/chat-apps.md +++ b/docs/ja/chat-apps.md @@ -65,7 +65,7 @@ picoclaw gateway **4. Telegram コマンドメニュー(起動時に自動登録)** -PicoClaw は統一されたコマンド定義を使用します。起動時に Telegram がサポートするコマンド(例: `/start`、`/help`、`/show`、`/list`)を Bot コマンドメニューに自動登録し、メニュー表示と実際の動作を一致させます。 +PicoClaw は統一されたコマンド定義を使用します。起動時に Telegram がサポートするコマンド(例: `/start`、`/help`、`/show`、`/list`、`/use`、`/btw`)を Bot コマンドメニューに自動登録し、メニュー表示と実際の動作を一致させます。 Telegram 側はコマンドメニュー登録機能を保持し、汎用コマンドの実行は Agent Loop 内の commands executor で統一的に処理されます。 ネットワークや API の一時的なエラーで登録に失敗しても、チャネルの起動はブロックされません。システムがバックグラウンドで自動リトライします。 diff --git a/docs/ja/configuration.md b/docs/ja/configuration.md index 6d6290e8a..bf2392585 100644 --- a/docs/ja/configuration.md +++ b/docs/ja/configuration.md @@ -81,10 +81,30 @@ PicoClaw は設定されたワークスペース(デフォルト: `~/.picoclaw export PICOCLAW_BUILTIN_SKILLS=/path/to/skills ``` +### チャットチャネルからスキルとコマンドを使う + +スキルをインストールすると、チャットチャネルから直接確認したり明示的に適用したりできます: + +- `/list skills` は現在の Agent から見えるインストール済みスキル名を表示します。 +- `/use ` は 1 回のリクエストだけそのスキルを強制します。 +- `/use ` は同じチャット内の次のメッセージにそのスキルを予約します。 +- `/use clear` は `/use ` で設定した保留中のスキル上書きを解除します。 +- `/btw ` は現在のセッション履歴を変更せずに即時の横道の質問を送ります。`/btw` はツールなしの直接質問として処理され、通常のツール実行フローには入りません。 + +例: + +```text +/list skills +/use git 直近 3 つのコミットを squash する方法を教えて +/btw さっきのデプロイ方針の結論だけもう一度教えて +/use italiapersonalfinance +dammi le ultime news +``` + ### 統一コマンド実行ポリシー - 汎用スラッシュコマンドは `pkg/agent/loop.go` 内の `commands.Executor` を通じて統一的に実行されます。 -- チャネルアダプターはローカルで汎用コマンドを消費しなくなりました。受信テキストを bus/agent パスに転送するだけです。Telegram は起動時にサポートするコマンドメニューを自動登録します。 +- チャネルアダプターはローカルで汎用コマンドを消費しなくなりました。受信テキストを bus/agent パスに転送するだけです。Telegram は起動時に `/start`、`/help`、`/show`、`/list`、`/use`、`/btw` などのサポート済みコマンドを自動登録します。 - 未登録のスラッシュコマンド(例: `/foo`)は通常の LLM 処理にパススルーされます。 - 登録済みだが現在のチャネルでサポートされていないコマンド(例: WhatsApp での `/show`)は、明示的なユーザー向けエラーを返し、以降の処理を停止します。 diff --git a/docs/my/chat-apps.md b/docs/my/chat-apps.md index c42436139..531c19cbb 100644 --- a/docs/my/chat-apps.md +++ b/docs/my/chat-apps.md @@ -60,11 +60,19 @@ picoclaw gateway **4. Menu arahan Telegram (auto-register semasa startup)** -PicoClaw kini menyimpan definisi arahan dalam satu registry bersama. Semasa startup, Telegram akan mendaftarkan arahan bot yang disokong secara automatik (contohnya `/start`, `/help`, `/show`, `/list`) supaya menu arahan dan tingkah laku runtime sentiasa selari. +PicoClaw kini menyimpan definisi arahan dalam satu registry bersama. Semasa startup, Telegram akan mendaftarkan arahan bot yang disokong secara automatik (contohnya `/start`, `/help`, `/show`, `/list`, `/use`, `/btw`) supaya menu arahan dan tingkah laku runtime sentiasa selari. Pendaftaran menu arahan Telegram kekal sebagai UX penemuan setempat saluran; pelaksanaan arahan generik dikendalikan secara berpusat dalam gelung agen melalui commands executor. Jika pendaftaran arahan gagal (ralat sementara rangkaian/API), saluran tetap akan bermula dan PicoClaw akan mencuba semula pendaftaran di latar belakang. +Anda juga boleh mengurus skill yang dipasang terus dari Telegram: + +- `/list skills` +- `/use ` +- `/use ` kemudian hantar permintaan sebenar dalam mesej seterusnya +- `/use clear` +- `/btw ` untuk bertanya soalan sampingan segera tanpa mengubah sejarah sesi aktif; `/btw` dikendalikan sebagai pertanyaan langsung tanpa tool dan tidak memasuki aliran pelaksanaan tool biasa + **4. Pemformatan Lanjutan** Anda boleh menetapkan `use_markdown_v2: true` untuk mengaktifkan pilihan pemformatan yang lebih maju. Ini membolehkan bot menggunakan keseluruhan set ciri Telegram MarkdownV2, termasuk gaya bersarang, spoiler, dan blok lebar tetap tersuai. diff --git a/docs/my/configuration.md b/docs/my/configuration.md index f798bd9bd..75bdd71a6 100644 --- a/docs/my/configuration.md +++ b/docs/my/configuration.md @@ -63,10 +63,30 @@ Untuk setup lanjutan/ujian, anda boleh menindih root builtin skills dengan: export PICOCLAW_BUILTIN_SKILLS=/path/to/skills ``` +### Menggunakan Skill dan Arahan Dari Saluran Chat + +Selepas skill dipasang, anda boleh menyemak dan memaksanya terus dari saluran chat: + +- `/list skills` memaparkan nama skill dipasang yang kelihatan kepada agen semasa. +- `/use ` memaksa satu skill untuk satu permintaan sahaja. +- `/use ` menyediakan skill itu untuk mesej anda yang seterusnya dalam chat yang sama. +- `/use clear` membatalkan skill override tertunda yang dibuat melalui `/use `. +- `/btw ` bertanya soalan sampingan segera tanpa mengubah sejarah sesi semasa. `/btw` dikendalikan sebagai pertanyaan langsung tanpa tool dan tidak memasuki aliran pelaksanaan tool biasa. + +Contoh: + +```text +/list skills +/use git terangkan cara squash 3 commit terakhir +/btw ingatkan saya semula apa keputusan tadi untuk pelan deploy +/use italiapersonalfinance +dammi le ultime news +``` + ### Polisi Pelaksanaan Arahan Bersepadu - Generic slash command dilaksanakan melalui satu laluan dalam `pkg/agent/loop.go` melalui `commands.Executor`. -- Adapter saluran tidak lagi menggunakan generic command secara setempat; ia memajukan teks masuk ke laluan bus/agent. Telegram masih auto-register arahan yang disokong semasa startup. +- Adapter saluran tidak lagi menggunakan generic command secara setempat; ia memajukan teks masuk ke laluan bus/agent. Telegram masih auto-register arahan yang disokong semasa startup seperti `/start`, `/help`, `/show`, `/list`, `/use`, dan `/btw`. - Slash command yang tidak dikenali (contohnya `/foo`) akan diteruskan ke pemprosesan LLM biasa. - Arahan yang didaftarkan tetapi tidak disokong pada saluran semasa (contohnya `/show` di WhatsApp) akan memulangkan ralat yang jelas kepada pengguna dan menghentikan pemprosesan lanjut. diff --git a/docs/pt-br/chat-apps.md b/docs/pt-br/chat-apps.md index 732cdb1dc..5d7e5990b 100644 --- a/docs/pt-br/chat-apps.md +++ b/docs/pt-br/chat-apps.md @@ -61,11 +61,19 @@ picoclaw gateway **4. Menu de comandos do Telegram (registrado automaticamente na inicialização)** -O PicoClaw agora mantém definições de comandos em um registro compartilhado. Na inicialização, o Telegram registrará automaticamente os comandos de bot suportados (por exemplo `/start`, `/help`, `/show`, `/list`) para que o menu de comandos e o comportamento em tempo de execução permaneçam sincronizados. +O PicoClaw agora mantém definições de comandos em um registro compartilhado. Na inicialização, o Telegram registrará automaticamente os comandos de bot suportados (por exemplo `/start`, `/help`, `/show`, `/list`, `/use`, `/btw`) para que o menu de comandos e o comportamento em tempo de execução permaneçam sincronizados. O registro do menu de comandos do Telegram permanece como descoberta UX local do canal; a execução genérica de comandos é tratada centralmente no loop do agente via commands executor. Se o registro de comandos falhar (erros transitórios de rede/API), o canal ainda inicia e o PicoClaw tenta novamente o registro em segundo plano. +Voce tambem pode gerenciar skills instaladas diretamente pelo Telegram: + +- `/list skills` +- `/use ` +- `/use ` e depois enviar a solicitacao real na proxima mensagem +- `/use clear` +- `/btw ` para fazer uma pergunta lateral imediata sem alterar o historico ativo da sessao; `/btw` e tratado como uma consulta direta sem ferramentas e nao entra no fluxo normal de execucao de ferramentas + diff --git a/docs/pt-br/configuration.md b/docs/pt-br/configuration.md index 27cd6d21f..7bf5f4026 100644 --- a/docs/pt-br/configuration.md +++ b/docs/pt-br/configuration.md @@ -81,10 +81,30 @@ Para configurações avançadas/de teste, você pode substituir o diretório rai export PICOCLAW_BUILTIN_SKILLS=/path/to/skills ``` +### Usando Skills e Comandos em Canais de Chat + +Depois que as skills estiverem instaladas, voce pode inspeciona-las e aplica-las diretamente de um canal de chat: + +- `/list skills` mostra os nomes das skills instaladas visiveis para o agente atual. +- `/use ` força uma skill para uma unica requisicao. +- `/use ` prepara essa skill para a sua proxima mensagem no mesmo chat. +- `/use clear` cancela uma substituicao pendente criada por `/use `. +- `/btw ` faz uma pergunta lateral imediata sem alterar o historico atual da sessao. `/btw` e tratado como uma consulta direta sem ferramentas e nao entra no fluxo normal de execucao de ferramentas. + +Exemplos: + +```text +/list skills +/use git explique como fazer squash dos ultimos 3 commits +/btw me relembre o que ja decidimos sobre o plano de deploy +/use italiapersonalfinance +dammi le ultime news +``` + ### Política Unificada de Execução de Comandos - Comandos slash genéricos são executados através de um único caminho em `pkg/agent/loop.go` via `commands.Executor`. -- Os adaptadores de canal não consomem mais comandos genéricos localmente; eles encaminham o texto de entrada para o caminho bus/agent. O Telegram ainda registra automaticamente os comandos suportados na inicialização. +- Os adaptadores de canal não consomem mais comandos genéricos localmente; eles encaminham o texto de entrada para o caminho bus/agent. O Telegram ainda registra automaticamente na inicialização comandos suportados como `/start`, `/help`, `/show`, `/list`, `/use` e `/btw`. - Comando slash desconhecido (por exemplo `/foo`) passa para o processamento normal do LLM. - Comando registrado mas não suportado no canal atual (por exemplo `/show` no WhatsApp) retorna um erro explícito ao usuário e interrompe o processamento. diff --git a/docs/vi/chat-apps.md b/docs/vi/chat-apps.md index 5eb7c9488..5dc4f8f01 100644 --- a/docs/vi/chat-apps.md +++ b/docs/vi/chat-apps.md @@ -61,11 +61,19 @@ picoclaw gateway **4. Menu lệnh Telegram (tự động đăng ký khi khởi động)** -PicoClaw hiện lưu trữ định nghĩa lệnh trong một registry chung. Khi khởi động, Telegram sẽ tự động đăng ký các lệnh bot được hỗ trợ (ví dụ `/start`, `/help`, `/show`, `/list`) để menu lệnh và hành vi runtime luôn đồng bộ. +PicoClaw hiện lưu trữ định nghĩa lệnh trong một registry chung. Khi khởi động, Telegram sẽ tự động đăng ký các lệnh bot được hỗ trợ (ví dụ `/start`, `/help`, `/show`, `/list`, `/use`, `/btw`) để menu lệnh và hành vi runtime luôn đồng bộ. Đăng ký menu lệnh Telegram vẫn là UX khám phá cục bộ của kênh; thực thi lệnh chung được xử lý tập trung trong vòng lặp agent qua commands executor. Nếu đăng ký lệnh thất bại (lỗi tạm thời mạng/API), kênh vẫn khởi động và PicoClaw thử lại đăng ký trong nền. +Ban cung co the quan ly skill da cai dat truc tiep tu Telegram: + +- `/list skills` +- `/use ` +- `/use ` roi gui yeu cau that o tin nhan tiep theo +- `/use clear` +- `/btw ` de hoi them mot cau ngoai le ngay lap tuc ma khong thay doi lich su phien dang hoat dong; `/btw` duoc xu ly nhu mot truy van truc tiep khong dung cong cu va khong di vao luong thuc thi cong cu thong thuong + diff --git a/docs/vi/configuration.md b/docs/vi/configuration.md index 56eb8f557..ea897bc28 100644 --- a/docs/vi/configuration.md +++ b/docs/vi/configuration.md @@ -81,10 +81,30 @@ Cho thiết lập nâng cao/test, bạn có thể ghi đè thư mục gốc skil export PICOCLAW_BUILTIN_SKILLS=/path/to/skills ``` +### Dung Skill va Lenh Tu Kenh Chat + +Sau khi cai dat skill, ban co the xem va ep dung truc tiep tu kenh chat: + +- `/list skills` hien ten cac skill da cai dat ma agent hien tai co the dung. +- `/use ` ep dung mot skill cho duy nhat mot yeu cau. +- `/use ` dat san skill do cho tin nhan tiep theo trong cung cuoc tro chuyen. +- `/use clear` huy skill override dang cho duoc tao boi `/use `. +- `/btw ` dat cau hoi phu ngay lap tuc ma khong thay doi lich su phien hien tai. `/btw` duoc xu ly nhu mot truy van truc tiep khong dung cong cu va khong di vao luong thuc thi cong cu thong thuong. + +Vi du: + +```text +/list skills +/use git giai thich cach squash 3 commit cuoi +/btw nhac lai giup toi chung ta da chot gi cho ke hoach deploy +/use italiapersonalfinance +dammi le ultime news +``` + ### Chính Sách Thực Thi Lệnh Thống Nhất - Lệnh slash chung được thực thi qua một đường dẫn duy nhất trong `pkg/agent/loop.go` qua `commands.Executor`. -- Adapter kênh không còn xử lý lệnh chung cục bộ; chúng chuyển tiếp văn bản đầu vào đến đường dẫn bus/agent. Telegram vẫn tự động đăng ký lệnh được hỗ trợ khi khởi động. +- Adapter kênh không còn xử lý lệnh chung cục bộ; chúng chuyển tiếp văn bản đầu vào đến đường dẫn bus/agent. Telegram vẫn tự động đăng ký khi khởi động các lệnh được hỗ trợ như `/start`, `/help`, `/show`, `/list`, `/use`, va `/btw`. - Lệnh slash không xác định (ví dụ `/foo`) được chuyển sang xử lý LLM bình thường. - Lệnh đã đăng ký nhưng không được hỗ trợ trên kênh hiện tại (ví dụ `/show` trên WhatsApp) trả về lỗi rõ ràng cho người dùng và dừng xử lý tiếp. diff --git a/docs/zh/chat-apps.md b/docs/zh/chat-apps.md index 4a59d528f..bb71e7c1c 100644 --- a/docs/zh/chat-apps.md +++ b/docs/zh/chat-apps.md @@ -65,7 +65,7 @@ picoclaw gateway **4. Telegram 命令菜单(启动时自动注册)** -PicoClaw 使用统一的命令定义来源。启动时会自动将 Telegram 支持的命令(例如 `/start`、`/help`、`/show`、`/list`、`/use`)注册到 Bot 命令菜单,确保菜单展示与实际行为一致。 +PicoClaw 使用统一的命令定义来源。启动时会自动将 Telegram 支持的命令(例如 `/start`、`/help`、`/show`、`/list`、`/use`、`/btw`)注册到 Bot 命令菜单,确保菜单展示与实际行为一致。 Telegram 侧保留的是命令菜单注册能力;通用命令的实际执行统一走 Agent Loop 中的 commands executor。 如果注册因网络或 API 短暂异常失败,不会阻塞 channel 启动;系统会在后台自动重试。 @@ -76,6 +76,7 @@ Telegram 侧保留的是命令菜单注册能力;通用命令的实际执行 - `/use ` - `/use `,然后在下一条消息里发送真正的请求 - `/use clear` +- `/btw `,用于发起一个不改动当前会话历史的即时旁支提问;`/btw` 会按一次无工具的直接问答处理,不会进入常规的工具执行流程 diff --git a/docs/zh/configuration.md b/docs/zh/configuration.md index a628eaaa2..9a8d39262 100644 --- a/docs/zh/configuration.md +++ b/docs/zh/configuration.md @@ -101,12 +101,14 @@ export PICOCLAW_BUILTIN_SKILLS=/path/to/skills - `/use `:只对当前这一条请求强制使用指定技能。 - `/use `:为同一会话中的下一条消息预先启用该技能。 - `/use clear`:取消通过 `/use ` 设置的待应用技能。 +- `/btw `:发起一个即时的旁支提问,且不改动当前会话历史。`/btw` 会按一次无工具的直接问答处理,不会进入常规的工具执行流程。 示例: ```text /list skills /use git explain how to squash the last 3 commits +/btw 帮我回顾一下刚才关于发布方案的结论 /use italiapersonalfinance dammi le ultime news ``` @@ -114,7 +116,7 @@ dammi le ultime news ### 统一命令执行策略 - 通用斜杠命令通过 `pkg/agent/loop.go` 中的 `commands.Executor` 统一执行。 -- Channel 适配器不再在本地消费通用命令;它们只负责把入站文本转发到 bus/agent 路径。Telegram 仍会在启动时自动注册其支持的命令菜单。 +- Channel 适配器不再在本地消费通用命令;它们只负责把入站文本转发到 bus/agent 路径。Telegram 仍会在启动时自动注册其支持的命令菜单,例如 `/start`、`/help`、`/show`、`/list`、`/use` 和 `/btw`。 - 未注册的斜杠命令(例如 `/foo`)会透传给 LLM 按普通输入处理。 - 已注册但当前 channel 不支持的命令(例如 WhatsApp 上的 `/show`)会返回明确的用户可见错误,并停止后续处理。 diff --git a/pkg/agent/hooks_test.go b/pkg/agent/hooks_test.go index cf0d03c03..eb76c4da8 100644 --- a/pkg/agent/hooks_test.go +++ b/pkg/agent/hooks_test.go @@ -111,6 +111,8 @@ func (p *llmHookTestProvider) GetDefaultModel() string { type llmObserverHook struct { eventCh chan Event lastInbound *bus.InboundContext + lastRoute *routing.ResolvedRoute + lastScope *session.SessionScope } func (h *llmObserverHook) OnEvent(ctx context.Context, evt Event) error { @@ -129,6 +131,8 @@ func (h *llmObserverHook) BeforeLLM( ) (*LLMHookRequest, HookDecision, error) { if req.Context != nil { h.lastInbound = cloneInboundContext(req.Context.Inbound) + h.lastRoute = cloneResolvedRoute(req.Context.Route) + h.lastScope = session.CloneScope(req.Context.Scope) } next := req.Clone() next.Model = "hook-model" @@ -230,6 +234,91 @@ func TestAgentLoop_Hooks_ObserverAndLLMInterceptor(t *testing.T) { } } +func TestAgentLoop_BtwCommand_UsesLLMHooks(t *testing.T) { + provider := &llmHookTestProvider{} + al, agent, cleanup := newHookTestLoop(t, provider) + defer cleanup() + useTestSideQuestionProvider(al, provider) + + hook := &llmObserverHook{eventCh: make(chan Event, 1)} + if err := al.MountHook(NamedHook("llm-observer", hook)); err != nil { + t.Fatalf("MountHook failed: %v", err) + } + + response, handled := al.handleCommand(context.Background(), bus.InboundMessage{ + Context: bus.InboundContext{ + Channel: "cli", + ChatID: "direct", + ChatType: "direct", + SenderID: "hook-user", + }, + Content: "/btw hello", + }, agent, &processOptions{ + Dispatch: DispatchRequest{ + SessionKey: "session-1", + InboundContext: &bus.InboundContext{ + Channel: "cli", + ChatID: "direct", + ChatType: "direct", + SenderID: "hook-user", + }, + RouteResult: &routing.ResolvedRoute{ + AgentID: "main", + Channel: "cli", + AccountID: routing.DefaultAccountID, + SessionPolicy: routing.SessionPolicy{ + Dimensions: []string{"sender"}, + }, + MatchedBy: "default", + }, + SessionScope: &session.SessionScope{ + Version: session.ScopeVersionV1, + AgentID: "main", + Channel: "cli", + Account: routing.DefaultAccountID, + Dimensions: []string{"sender"}, + Values: map[string]string{ + "sender": "hook-user", + }, + }, + UserMessage: "/btw hello", + }, + SessionKey: "session-1", + Channel: "cli", + ChatID: "direct", + SenderID: "hook-user", + SenderDisplayName: "Hook User", + }) + if !handled { + t.Fatal("expected /btw command to be handled") + } + if response != "hooked content" { + t.Fatalf("expected hooked content, got %q", response) + } + + provider.mu.Lock() + lastModel := provider.lastModel + provider.mu.Unlock() + if lastModel != "hook-model" { + t.Fatalf("expected model hook-model, got %q", lastModel) + } + if hook.lastInbound == nil { + t.Fatal("expected hook to receive inbound context") + } + if hook.lastInbound.Channel != "cli" || hook.lastInbound.SenderID != "hook-user" { + t.Fatalf("hook inbound context = %+v", hook.lastInbound) + } + if hook.lastInbound.ChatID != "direct" { + t.Fatalf("hook inbound chat ID = %q, want direct", hook.lastInbound.ChatID) + } + if hook.lastRoute == nil || hook.lastRoute.AgentID != "main" { + t.Fatalf("expected hook route context for /btw, got %+v", hook.lastRoute) + } + if hook.lastScope == nil || hook.lastScope.Values["sender"] != "hook-user" { + t.Fatalf("expected hook session scope for /btw, got %+v", hook.lastScope) + } +} + type toolHookProvider struct { mu sync.Mutex calls int diff --git a/pkg/agent/llm_media.go b/pkg/agent/llm_media.go index eb1908777..c1a1cdf53 100644 --- a/pkg/agent/llm_media.go +++ b/pkg/agent/llm_media.go @@ -29,6 +29,27 @@ func stripMessageMedia(messages []providers.Message) []providers.Message { return stripped } +func callLLMWithVisionUnsupportedRetry( + messages []providers.Message, + call func([]providers.Message) (*providers.LLMResponse, error), + beforeRetry func(error), +) (*providers.LLMResponse, []providers.Message, bool, error) { + response, err := call(messages) + if err == nil { + return response, messages, false, nil + } + if !messagesContainMedia(messages) || !isVisionUnsupportedError(err) { + return response, messages, false, err + } + + if beforeRetry != nil { + beforeRetry(err) + } + stripped := stripMessageMedia(messages) + response, err = call(stripped) + return response, stripped, true, err +} + func isVisionUnsupportedError(err error) bool { if err == nil { return false diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 5c75b5ef8..74cdfeb51 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -70,6 +70,8 @@ type AgentLoop struct { activeRequests sync.WaitGroup reloadFunc func() error + + providerFactory func(*config.ModelConfig) (providers.LLMProvider, string, error) } // processOptions configures how a message is processed @@ -159,6 +161,7 @@ func NewAgentLoop( cmdRegistry: commands.NewRegistry(commands.BuiltinDefinitions()), steering: newSteeringQueue(parseSteeringMode(cfg.Agents.Defaults.SteeringMode)), } + al.providerFactory = providers.CreateProviderFromConfig al.hooks = NewHookManager(eventBus) configureHookManagerFromConfig(al.hooks, cfg) al.contextManager = al.resolveContextManager() @@ -479,10 +482,12 @@ func (al *AgentLoop) Run(ctx context.Context) error { // running. Only messages that resolve to the active turn scope are // redirected into steering; other inbound messages are requeued. drainCancel := func() {} - if activeScope, activeAgentID, ok := al.resolveSteeringTarget(msg); ok { - drainCtx, cancel := context.WithCancel(ctx) - drainCancel = cancel - go al.drainBusToSteering(drainCtx, activeScope, activeAgentID) + if !isBtwCommand(msg.Content) { + if activeScope, activeAgentID, ok := al.resolveSteeringTarget(msg); ok { + drainCtx, cancel := context.WithCancel(ctx) + drainCancel = cancel + go al.drainBusToSteering(drainCtx, ctx, activeScope, activeAgentID) + } } // Process message @@ -604,7 +609,7 @@ func (al *AgentLoop) Run(ctx context.Context) error { // active scope into the steering queue. Messages from other scopes are requeued // so they can be processed normally after the active turn. It drains all // immediately available messages, blocking for the first one until ctx is done. -func (al *AgentLoop) drainBusToSteering(ctx context.Context, activeScope, activeAgentID string) { +func (al *AgentLoop) drainBusToSteering(ctx, priorityCtx context.Context, activeScope, activeAgentID string) { blocking := true var requeue []bus.InboundMessage defer func() { @@ -656,6 +661,17 @@ func (al *AgentLoop) drainBusToSteering(ctx context.Context, activeScope, active // Transcribe audio if needed before steering, so the agent sees text. msg, _ = al.transcribeAudioInMessage(ctx, msg) + // Handle priority commands (e.g. /btw) outside the steering queue, without + // blocking this drain from enqueueing later messages for the active turn. + if isBtwCommand(msg.Content) { + priorityMsg := msg + go al.handlePriorityCommandAsync(priorityCtx, priorityMsg) + // A priority command is not a steering interrupt. Keep waiting for the + // next inbound message while the active turn is still running. + blocking = true + continue + } + logger.InfoCF("agent", "Redirecting inbound message to steering queue", map[string]any{ "channel": msg.Channel, @@ -1532,6 +1548,359 @@ func (al *AgentLoop) ProcessHeartbeat( }) } +func sideQuestionModelName(agent *AgentInstance, usedLight bool) string { + if agent == nil { + return "" + } + if usedLight && agent.Router != nil { + if lightModel := strings.TrimSpace(agent.Router.LightModel()); lightModel != "" { + return lightModel + } + } + return agent.Model +} + +func modelNameFromIdentityKey(identityKey string) string { + const prefix = "model_name:" + if strings.HasPrefix(identityKey, prefix) { + return strings.TrimSpace(strings.TrimPrefix(identityKey, prefix)) + } + return "" +} + +func closeProviderIfStateful(provider providers.LLMProvider) { + if stateful, ok := provider.(providers.StatefulProvider); ok { + stateful.Close() + } +} + +func cloneLLMOptions(src map[string]any) map[string]any { + dst := make(map[string]any, len(src)+1) + for key, value := range src { + dst[key] = value + } + return dst +} + +func (al *AgentLoop) isolatedSideQuestionProvider( + agent *AgentInstance, + baseModelName string, + candidate providers.FallbackCandidate, +) (providers.LLMProvider, string, func(), error) { + if agent == nil { + return nil, "", func() {}, fmt.Errorf("no agent available for /btw") + } + + modelCfg, err := al.sideQuestionModelConfig(agent, baseModelName, candidate) + if err != nil { + return nil, "", func() {}, err + } + + factory := al.providerFactory + if factory == nil { + factory = providers.CreateProviderFromConfig + } + + provider, modelID, err := factory(modelCfg) + if err != nil { + return nil, "", func() {}, err + } + + cleanup := func() { + closeProviderIfStateful(provider) + } + return provider, modelID, cleanup, nil +} + +func (al *AgentLoop) sideQuestionModelConfig( + agent *AgentInstance, + baseModelName string, + candidate providers.FallbackCandidate, +) (*config.ModelConfig, error) { + if agent == nil { + return nil, fmt.Errorf("no agent available for /btw") + } + + if name := modelNameFromIdentityKey(candidate.IdentityKey); name != "" { + return resolvedModelConfig(al.GetConfig(), name, agent.Workspace) + } + + baseModelName = strings.TrimSpace(baseModelName) + modelCfg, err := resolvedModelConfig(al.GetConfig(), baseModelName, agent.Workspace) + if err != nil { + model := strings.TrimSpace(baseModelName) + if candidate.Model != "" { + model = candidate.Model + } + if candidate.Provider != "" && candidate.Model != "" { + model = providers.NormalizeProvider(candidate.Provider) + "/" + candidate.Model + } else { + model = ensureProtocolModel(model) + } + return &config.ModelConfig{ + ModelName: baseModelName, + Model: model, + Workspace: agent.Workspace, + }, nil + } + + clone := *modelCfg + if candidate.Provider != "" && candidate.Model != "" { + clone.Model = providers.NormalizeProvider(candidate.Provider) + "/" + candidate.Model + } + return &clone, nil +} + +func (al *AgentLoop) askSideQuestion( + ctx context.Context, + agent *AgentInstance, + opts *processOptions, + question string, +) (string, error) { + if agent == nil { + return "", fmt.Errorf("no agent available for /btw") + } + + question = strings.TrimSpace(question) + if question == "" { + return "", fmt.Errorf("Usage: /btw ") + } + + if opts != nil { + normalizeProcessOptionsInPlace(opts) + } + var media []string + var channel, chatID, senderID, senderDisplayName string + if opts != nil { + media = opts.Media + channel = opts.Channel + chatID = opts.ChatID + senderID = opts.SenderID + senderDisplayName = opts.SenderDisplayName + } + + var history []providers.Message + var summary string + if opts != nil { + if !opts.NoHistory { + if resp, err := al.contextManager.Assemble(ctx, &AssembleRequest{ + SessionKey: opts.SessionKey, + Budget: agent.ContextWindow, + MaxTokens: agent.MaxTokens, + }); err == nil && resp != nil { + history = resp.History + summary = resp.Summary + } + } + } + + messages := agent.ContextBuilder.BuildMessages( + history, + summary, + question, + media, + channel, + chatID, + senderID, + senderDisplayName, + ) + + maxMediaSize := al.GetConfig().Agents.Defaults.GetMaxMediaSize() + messages = resolveMediaRefs(messages, al.mediaStore, maxMediaSize) + + activeCandidates, activeModel, usedLight := al.selectCandidates(agent, question, messages) + selectedModelName := sideQuestionModelName(agent, usedLight) + + llmOpts := map[string]any{ + "max_tokens": agent.MaxTokens, + "temperature": agent.Temperature, + "prompt_cache_key": agent.ID + ":btw", + } + + hookModelChanged := false + callProvider := func( + ctx context.Context, + candidate providers.FallbackCandidate, + model string, + forceModel bool, + callMessages []providers.Message, + ) (*providers.LLMResponse, error) { + provider, providerModel, cleanup, err := al.isolatedSideQuestionProvider(agent, selectedModelName, candidate) + if err != nil { + return nil, err + } + defer cleanup() + if !forceModel || strings.TrimSpace(model) == "" { + model = providerModel + } + callOpts := llmOpts + if _, exists := callOpts["thinking_level"]; !exists && agent.ThinkingLevel != ThinkingOff { + if tc, ok := provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() { + callOpts = cloneLLMOptions(llmOpts) + callOpts["thinking_level"] = string(agent.ThinkingLevel) + } + } + return provider.Chat(ctx, callMessages, nil, model, callOpts) + } + + turnCtx := newTurnContext(nil, nil, nil) + if opts != nil { + turnCtx = newTurnContext(opts.Dispatch.InboundContext, opts.Dispatch.RouteResult, opts.Dispatch.SessionScope) + } + llmModel := activeModel + if al.hooks != nil { + llmReq, decision := al.hooks.BeforeLLM(ctx, &LLMHookRequest{ + Meta: EventMeta{ + Source: "askSideQuestion", + TracePath: "turn.llm.request", + turnContext: cloneTurnContext(turnCtx), + }, + Context: cloneTurnContext(turnCtx), + Model: llmModel, + Messages: messages, + Tools: nil, + Options: llmOpts, + GracefulTerminal: false, + }) + switch decision.normalizedAction() { + case HookActionContinue, HookActionModify: + if llmReq != nil { + if strings.TrimSpace(llmReq.Model) != "" && llmReq.Model != llmModel { + hookModelChanged = true + } + llmModel = llmReq.Model + messages = llmReq.Messages + llmOpts = llmReq.Options + } + case HookActionAbortTurn: + reason := decision.Reason + if reason == "" { + reason = "hook requested turn abort" + } + return "", fmt.Errorf("hook aborted turn during before_llm: %s", reason) + case HookActionHardAbort: + reason := decision.Reason + if reason == "" { + reason = "hook requested turn abort" + } + return "", fmt.Errorf("hook aborted turn during before_llm: %s", reason) + } + } + if hookModelChanged { + // Hook-selected models must not continue through the pre-hook fallback + // candidate list, otherwise fallback execution would call the original + // candidate model and silently ignore the hook decision. + activeCandidates = nil + } + + callSideLLM := func(callMessages []providers.Message) (*providers.LLMResponse, error) { + if len(activeCandidates) > 1 && al.fallback != nil { + fbResult, err := al.fallback.Execute( + ctx, + activeCandidates, + func(ctx context.Context, providerName, model string) (*providers.LLMResponse, error) { + candidate := providers.FallbackCandidate{Provider: providerName, Model: model} + for _, activeCandidate := range activeCandidates { + if activeCandidate.Provider == providerName && activeCandidate.Model == model { + candidate = activeCandidate + break + } + } + return callProvider(ctx, candidate, model, false, callMessages) + }, + ) + if err != nil { + return nil, err + } + return fbResult.Response, nil + } + + var candidate providers.FallbackCandidate + if len(activeCandidates) > 0 { + candidate = activeCandidates[0] + } + return callProvider(ctx, candidate, llmModel, hookModelChanged, callMessages) + } + + resp, _, _, err := callLLMWithVisionUnsupportedRetry( + messages, + callSideLLM, + func(originalErr error) { + al.emitEvent( + EventKindLLMRetry, + EventMeta{ + Source: "askSideQuestion", + TracePath: "turn.llm.retry", + turnContext: cloneTurnContext(turnCtx), + }, + LLMRetryPayload{ + Attempt: 1, + MaxRetries: 1, + Reason: "vision_unsupported", + Error: originalErr.Error(), + Backoff: 0, + }, + ) + }, + ) + if err != nil { + return "", err + } + if resp == nil { + return "", nil + } + resp, err = al.applySideQuestionAfterLLM(ctx, turnCtx, llmModel, resp) + if err != nil { + return "", err + } + return sideQuestionResponseContent(resp), nil +} + +func (al *AgentLoop) applySideQuestionAfterLLM( + ctx context.Context, + turnCtx *TurnContext, + model string, + response *providers.LLMResponse, +) (*providers.LLMResponse, error) { + if response == nil || al.hooks == nil { + return response, nil + } + + llmResp, decision := al.hooks.AfterLLM(ctx, &LLMHookResponse{ + Meta: EventMeta{ + Source: "askSideQuestion", + TracePath: "turn.llm.response", + turnContext: cloneTurnContext(turnCtx), + }, + Context: cloneTurnContext(turnCtx), + Model: model, + Response: response, + }) + switch decision.normalizedAction() { + case HookActionContinue, HookActionModify: + if llmResp != nil && llmResp.Response != nil { + response = llmResp.Response + } + case HookActionAbortTurn, HookActionHardAbort: + reason := decision.Reason + if reason == "" { + reason = "hook requested turn abort" + } + return nil, fmt.Errorf("hook aborted turn during after_llm: %s", reason) + } + return response, nil +} + +func sideQuestionResponseContent(response *providers.LLMResponse) string { + if response == nil { + return "" + } + if response.Content != "" { + return response.Content + } + return response.ReasoningContent +} + func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage) (string, error) { msg = bus.NormalizeInboundMessage(msg) @@ -2363,10 +2732,42 @@ turnLoop: var response *providers.LLMResponse var err error maxRetries := 2 - callHasMedia := messagesContainMedia(callMessages) - didStripMedia := false for retry := 0; retry <= maxRetries; retry++ { - response, err = callLLM(callMessages, providerToolDefs) + response, callMessages, _, err = callLLMWithVisionUnsupportedRetry( + callMessages, + func(messagesForRetry []providers.Message) (*providers.LLMResponse, error) { + return callLLM(messagesForRetry, providerToolDefs) + }, + func(originalErr error) { + if !ts.opts.NoHistory { + history = ts.agent.Sessions.GetHistory(ts.sessionKey) + ts.agent.Sessions.SetHistory(ts.sessionKey, stripMessageMedia(history)) + + // Keep persistedMessages aligned so abort restore-point trimming remains correct. + ts.mu.Lock() + for i := range ts.persistedMessages { + ts.persistedMessages[i].Media = nil + } + ts.mu.Unlock() + + ts.refreshRestorePointFromSession(ts.agent) + } + + messages = stripMessageMedia(messages) + + al.emitEvent( + EventKindLLMRetry, + ts.eventMeta("runTurn", "turn.llm.retry"), + LLMRetryPayload{ + Attempt: 1, + MaxRetries: 1, + Reason: "vision_unsupported", + Error: originalErr.Error(), + Backoff: 0, + }, + ) + }, + ) if err == nil { break } @@ -2375,45 +2776,6 @@ turnLoop: return al.abortTurn(ts) } - // If the provider/model doesn't support multimodal inputs, retry once with media stripped - // so the session doesn't get "stuck" after a user sends an image. - if callHasMedia && !didStripMedia && isVisionUnsupportedError(err) { - didStripMedia = true - if !ts.opts.NoHistory { - history = ts.agent.Sessions.GetHistory(ts.sessionKey) - ts.agent.Sessions.SetHistory(ts.sessionKey, stripMessageMedia(history)) - - // Keep persistedMessages aligned so abort restore-point trimming remains correct. - ts.mu.Lock() - for i := range ts.persistedMessages { - ts.persistedMessages[i].Media = nil - } - ts.mu.Unlock() - - ts.refreshRestorePointFromSession(ts.agent) - } - - messages = stripMessageMedia(messages) - callMessages = stripMessageMedia(callMessages) - callHasMedia = false - - al.emitEvent( - EventKindLLMRetry, - ts.eventMeta("runTurn", "turn.llm.retry"), - LLMRetryPayload{ - Attempt: 1, - MaxRetries: 1, - Reason: "vision_unsupported", - Error: err.Error(), - Backoff: 0, - }, - ) - response, err = callLLM(callMessages, providerToolDefs) - if err == nil { - break - } - } - errMsg := strings.ToLower(err.Error()) isTimeoutError := errors.Is(err, context.DeadlineExceeded) || strings.Contains(errMsg, "deadline exceeded") || @@ -3748,6 +4110,11 @@ func activeSkillNames(agent *AgentInstance, opts processOptions) []string { return resolved } +func isBtwCommand(content string) bool { + cmdName, ok := commands.CommandName(content) + return ok && cmdName == "btw" +} + func (al *AgentLoop) applyExplicitSkillCommand( raw string, agent *AgentInstance, @@ -3856,6 +4223,9 @@ func (al *AgentLoop) buildCommandsRuntime( if agent.ContextBuilder != nil { rt.ListSkillNames = agent.ContextBuilder.ListSkillNames } + rt.AskSideQuestion = func(ctx context.Context, question string) (string, error) { + return al.askSideQuestion(ctx, agent, opts, question) + } rt.GetModelInfo = func() (string, string) { return agent.Model, resolvedCandidateProvider(agent.Candidates, cfg.Agents.Defaults.Provider) } @@ -3975,6 +4345,99 @@ func mapCommandError(result commands.ExecuteResult) string { return fmt.Sprintf("Failed to execute /%s: %v", result.Command, result.Err) } +func (al *AgentLoop) tryHandlePriorityCommand(ctx context.Context, msg bus.InboundMessage) (bool, bus.OutboundMessage) { + if !isBtwCommand(msg.Content) { + return false, bus.OutboundMessage{} + } + + route, agent, err := al.resolveMessageRoute(msg) + if err != nil || agent == nil { + if err != nil { + logger.ErrorCF("agent", fmt.Sprintf("Error resolving route for /btw: %v", err), nil) + return true, bus.OutboundMessage{ + Channel: msg.Channel, + ChatID: msg.ChatID, + Context: outboundContextFromInbound( + &msg.Context, + msg.Channel, + msg.ChatID, + msg.Context.ReplyToMessageID, + ), + Content: fmt.Sprintf("Error processing message: %v", err), + } + } + logger.WarnCF("agent", "/btw command unavailable: no agent resolved", nil) + return true, bus.OutboundMessage{ + Channel: msg.Channel, + ChatID: msg.ChatID, + Context: outboundContextFromInbound( + &msg.Context, + msg.Channel, + msg.ChatID, + msg.Context.ReplyToMessageID, + ), + Content: "Command unavailable in current context.", + } + } + + allocation := al.allocateRouteSession(route, msg) + sessionKey := resolveScopeKey(allocation.SessionKey, msg.SessionKey) + msg.SessionKey = sessionKey + opts := processOptions{ + Dispatch: DispatchRequest{ + SessionKey: sessionKey, + SessionAliases: buildSessionAliases(sessionKey, append(allocation.SessionAliases, msg.SessionKey)...), + InboundContext: cloneInboundContext(&msg.Context), + RouteResult: cloneResolvedRoute(&route), + SessionScope: session.CloneScope(&allocation.Scope), + UserMessage: msg.Content, + Media: append([]string(nil), msg.Media...), + }, + SessionKey: sessionKey, + SenderID: msg.SenderID, + SenderDisplayName: msg.Sender.DisplayName, + } + + cmdCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) + defer cancel() + + response, handled := al.handleCommand(cmdCtx, msg, agent, &opts) + if !handled { + return false, bus.OutboundMessage{} + } + agentID, outboundSessionKey, scope := outboundTurnMetadata(agent.ID, sessionKey, &allocation.Scope) + return true, bus.OutboundMessage{ + Channel: msg.Channel, + ChatID: msg.ChatID, + Context: outboundContextFromInbound( + &msg.Context, + msg.Channel, + msg.ChatID, + msg.Context.ReplyToMessageID, + ), + AgentID: agentID, + SessionKey: outboundSessionKey, + Scope: scope, + Content: response, + } +} + +func (al *AgentLoop) handlePriorityCommandAsync(ctx context.Context, msg bus.InboundMessage) { + handled, outbound := al.tryHandlePriorityCommand(ctx, msg) + if !handled || outbound.Content == "" { + return + } + + publishCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + if err := al.bus.PublishOutbound(publishCtx, outbound); err != nil { + logger.WarnCF("agent", "Failed to publish priority command response", map[string]any{ + "error": err.Error(), + "channel": outbound.Channel, + }) + } +} + // isNativeSearchProvider reports whether the given LLM provider implements // NativeSearchCapable and returns true for SupportsNativeSearch. func isNativeSearchProvider(p providers.LLMProvider) bool { diff --git a/pkg/agent/loop_test.go b/pkg/agent/loop_test.go index e01f74e46..4faafcef0 100644 --- a/pkg/agent/loop_test.go +++ b/pkg/agent/loop_test.go @@ -9,6 +9,7 @@ import ( "net/http/httptest" "os" "path/filepath" + "reflect" "slices" "strings" "testing" @@ -80,6 +81,7 @@ func newStartedTestChannelManager( type recordingProvider struct { lastMessages []providers.Message + lastModel string } func (r *recordingProvider) Chat( @@ -90,6 +92,7 @@ func (r *recordingProvider) Chat( opts map[string]any, ) (*providers.LLMResponse, error) { r.lastMessages = append([]providers.Message(nil), messages...) + r.lastModel = model return &providers.LLMResponse{ Content: "Mock response", ToolCalls: []providers.ToolCall{}, @@ -100,6 +103,47 @@ func (r *recordingProvider) GetDefaultModel() string { return "mock-model" } +type closeTrackingProvider struct { + recordingProvider + closed bool +} + +func (p *closeTrackingProvider) Close() { + p.closed = true +} + +type modelRewriteHook struct { + model string +} + +func (h modelRewriteHook) BeforeLLM( + ctx context.Context, + req *LLMHookRequest, +) (*LLMHookRequest, HookDecision, error) { + next := req.Clone() + next.Model = h.model + return next, HookDecision{Action: HookActionModify}, nil +} + +func (h modelRewriteHook) AfterLLM( + ctx context.Context, + resp *LLMHookResponse, +) (*LLMHookResponse, HookDecision, error) { + return resp.Clone(), HookDecision{Action: HookActionContinue}, nil +} + +func useTestSideQuestionProvider(al *AgentLoop, provider providers.LLMProvider) { + al.providerFactory = func(mc *config.ModelConfig) (providers.LLMProvider, string, error) { + model := provider.GetDefaultModel() + if mc != nil { + if _, modelID := providers.ExtractProtocol(mc.Model); modelID != "" { + model = modelID + } + } + return provider, model, nil + } +} + func newTestAgentLoop( t *testing.T, ) (al *AgentLoop, cfg *config.Config, msgBus *bus.MessageBus, provider *mockProvider, cleanup func()) { @@ -235,6 +279,305 @@ func TestProcessMessage_UseCommandLoadsRequestedSkill(t *testing.T) { } } +func TestProcessMessage_BtwCommandRunsWithoutPersistingHistory(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + } + + msgBus := bus.NewMessageBus() + provider := &recordingProvider{} + al := NewAgentLoop(cfg, msgBus, provider) + useTestSideQuestionProvider(al, provider) + defaultAgent := al.GetRegistry().GetDefaultAgent() + if defaultAgent == nil { + t.Fatal("expected default agent") + } + + msg := bus.InboundMessage{ + Channel: "telegram", + SenderID: "telegram:123", + ChatID: "chat-1", + Content: "/btw explain side effects", + } + route, _, err := al.resolveMessageRoute(msg) + if err != nil { + t.Fatalf("resolveMessageRoute() error = %v", err) + } + allocation := al.allocateRouteSession(route, msg) + sessionKey := resolveScopeKey(allocation.SessionKey, msg.SessionKey) + initialHistory := []providers.Message{ + {Role: "user", Content: "We decided to avoid global state."}, + {Role: "assistant", Content: "Right, keep it request-scoped."}, + } + defaultAgent.Sessions.SetHistory(sessionKey, initialHistory) + defaultAgent.Sessions.SetSummary(sessionKey, "The team decided to keep state request-scoped.") + + response, err := al.processMessage(context.Background(), msg) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "Mock response" { + t.Fatalf("processMessage() response = %q, want %q", response, "Mock response") + } + if len(provider.lastMessages) == 0 { + t.Fatal("provider did not receive any messages") + } + if len(provider.lastMessages) != 4 { + t.Fatalf("provider messages len = %d, want 4 (system + prior history + user)", len(provider.lastMessages)) + } + + if !reflect.DeepEqual(provider.lastMessages[1:3], initialHistory) { + t.Fatalf("provider history = %#v, want %#v", provider.lastMessages[1:3], initialHistory) + } + + lastMessage := provider.lastMessages[len(provider.lastMessages)-1] + if lastMessage.Role != "user" || lastMessage.Content != "explain side effects" { + t.Fatalf("last provider message = %+v, want stripped /btw question", lastMessage) + } + + history := al.GetRegistry().GetDefaultAgent().Sessions.GetHistory(sessionKey) + if !reflect.DeepEqual(history, initialHistory) { + t.Fatalf("session history = %#v, want %#v", history, initialHistory) + } +} + +func TestProcessMessage_BtwCommandIncludesRequestContextAndMedia(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + } + + msgBus := bus.NewMessageBus() + provider := &recordingProvider{} + al := NewAgentLoop(cfg, msgBus, provider) + useTestSideQuestionProvider(al, provider) + + response, err := al.processMessage(context.Background(), testInboundMessage(bus.InboundMessage{ + Channel: "discord", + SenderID: "discord:123", + Sender: bus.SenderInfo{ + DisplayName: "Alice", + }, + ChatID: "group-1", + Content: "/btw describe this image", + Media: []string{"media://image-1"}, + })) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "Mock response" { + t.Fatalf("processMessage() response = %q, want %q", response, "Mock response") + } + if len(provider.lastMessages) == 0 { + t.Fatal("provider did not receive any messages") + } + + systemPrompt := provider.lastMessages[0].Content + if !strings.Contains(systemPrompt, "## Current Session\nChannel: discord\nChat ID: group-1") { + t.Fatalf("system prompt missing current session context:\n%s", systemPrompt) + } + if !strings.Contains(systemPrompt, "## Current Sender\nCurrent sender: Alice (ID: discord:123)") { + t.Fatalf("system prompt missing current sender context:\n%s", systemPrompt) + } + + lastMessage := provider.lastMessages[len(provider.lastMessages)-1] + if lastMessage.Role != "user" || lastMessage.Content != "describe this image" { + t.Fatalf("last provider message = %+v, want stripped /btw question", lastMessage) + } + if !reflect.DeepEqual(lastMessage.Media, []string{"media://image-1"}) { + t.Fatalf("last provider media = %#v, want media ref", lastMessage.Media) + } +} + +func TestProcessMessage_BtwCommandUsesIsolatedProvider(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + } + + msgBus := bus.NewMessageBus() + mainProvider := &recordingProvider{} + al := NewAgentLoop(cfg, msgBus, mainProvider) + var sideProvider *closeTrackingProvider + al.providerFactory = func(mc *config.ModelConfig) (providers.LLMProvider, string, error) { + sideProvider = &closeTrackingProvider{} + return sideProvider, "isolated-model", nil + } + + response, err := al.processMessage(context.Background(), bus.InboundMessage{ + Channel: "telegram", + SenderID: "telegram:123", + ChatID: "chat-1", + Content: "/btw explain isolation", + }) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "Mock response" { + t.Fatalf("processMessage() response = %q, want %q", response, "Mock response") + } + if len(mainProvider.lastMessages) != 0 { + t.Fatalf("main provider was used for /btw: %+v", mainProvider.lastMessages) + } + if sideProvider == nil { + t.Fatal("side question provider factory was not called") + } + if !sideProvider.closed { + t.Fatal("isolated stateful /btw provider was not closed") + } + if len(sideProvider.lastMessages) == 0 { + t.Fatal("isolated provider did not receive messages") + } +} + +func TestProcessMessage_BtwCommandRetriesWithoutMediaOnVisionUnsupported(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + } + + msgBus := bus.NewMessageBus() + provider := &visionUnsupportedMediaProvider{} + al := NewAgentLoop(cfg, msgBus, provider) + useTestSideQuestionProvider(al, provider) + + response, err := al.processMessage(context.Background(), testInboundMessage(bus.InboundMessage{ + Channel: "telegram", + SenderID: "telegram:123", + ChatID: "chat-1", + Content: "/btw describe this image", + Media: []string{"data:image/png;base64,abc123"}, + })) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "ok" { + t.Fatalf("processMessage() response = %q, want %q", response, "ok") + } + if provider.calls != 2 { + t.Fatalf("calls = %d, want %d (fail with media, then retry without media)", provider.calls, 2) + } + if !slices.Equal(provider.mediaSeen, []bool{true, false}) { + t.Fatalf("mediaSeen = %v, want %v", provider.mediaSeen, []bool{true, false}) + } +} + +func TestProcessMessage_BtwCommandUsesProviderFactoryModel(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "lb-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + ModelList: []*config.ModelConfig{ + {ModelName: "lb-model", Model: "openai/lb-model-a"}, + {ModelName: "lb-model", Model: "openai/lb-model-b"}, + }, + } + + msgBus := bus.NewMessageBus() + provider := &recordingProvider{} + al := NewAgentLoop(cfg, msgBus, provider) + + var wantModel string + al.providerFactory = func(mc *config.ModelConfig) (providers.LLMProvider, string, error) { + if mc == nil { + t.Fatal("expected model config") + } + _, modelID := providers.ExtractProtocol(mc.Model) + wantModel = "factory-" + modelID + return provider, wantModel, nil + } + + response, err := al.processMessage(context.Background(), bus.InboundMessage{ + Channel: "telegram", + SenderID: "telegram:123", + ChatID: "chat-1", + Content: "/btw explain load balancing", + }) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "Mock response" { + t.Fatalf("processMessage() response = %q, want %q", response, "Mock response") + } + if provider.lastModel != wantModel { + t.Fatalf("/btw model = %q, want provider factory model %q", provider.lastModel, wantModel) + } +} + +func TestProcessMessage_BtwCommandHookModelBypassesFallbackCandidates(t *testing.T) { + tmpDir := t.TempDir() + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "primary-model", + ModelFallbacks: []string{"fallback-model"}, + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + } + + msgBus := bus.NewMessageBus() + provider := &recordingProvider{} + al := NewAgentLoop(cfg, msgBus, provider) + useTestSideQuestionProvider(al, provider) + if err := al.MountHook(NamedHook("rewrite-model", modelRewriteHook{model: "hook-model"})); err != nil { + t.Fatalf("MountHook failed: %v", err) + } + + response, err := al.processMessage(context.Background(), bus.InboundMessage{ + Channel: "telegram", + SenderID: "telegram:123", + ChatID: "chat-1", + Content: "/btw explain hook routing", + }) + if err != nil { + t.Fatalf("processMessage() error = %v", err) + } + if response != "Mock response" { + t.Fatalf("processMessage() response = %q, want %q", response, "Mock response") + } + if provider.lastModel != "hook-model" { + t.Fatalf("/btw model = %q, want hook-selected model", provider.lastModel) + } +} + func TestHandleCommand_UseCommandRejectsUnknownSkill(t *testing.T) { tmpDir := t.TempDir() cfg := &config.Config{ diff --git a/pkg/agent/steering_test.go b/pkg/agent/steering_test.go index 8e6063f08..fd8a688eb 100644 --- a/pkg/agent/steering_test.go +++ b/pkg/agent/steering_test.go @@ -405,7 +405,7 @@ func TestDrainBusToSteering_RequeuesDifferentScopeMessage(t *testing.T) { done := make(chan struct{}) go func() { - al.drainBusToSteering(ctx, activeScope, activeAgentID) + al.drainBusToSteering(ctx, ctx, activeScope, activeAgentID) close(done) }() @@ -566,12 +566,14 @@ func (p *lateSteeringProvider) GetDefaultModel() string { } type blockingDirectProvider struct { - mu sync.Mutex - calls int - firstStarted chan struct{} - releaseFirst chan struct{} - firstResp string - finalResp string + mu sync.Mutex + calls int + firstStarted chan struct{} + releaseFirst chan struct{} + secondStarted chan struct{} + releaseSecond chan struct{} + firstResp string + finalResp string } func (p *blockingDirectProvider) Chat( @@ -586,11 +588,15 @@ func (p *blockingDirectProvider) Chat( call := p.calls firstStarted := p.firstStarted releaseFirst := p.releaseFirst + secondStarted := p.secondStarted + releaseSecond := p.releaseSecond firstResp := p.firstResp finalResp := p.finalResp if call == 1 && p.firstStarted != nil { close(p.firstStarted) - p.firstStarted = nil + } + if call == 2 && p.secondStarted != nil { + close(p.secondStarted) } p.mu.Unlock() @@ -604,6 +610,14 @@ func (p *blockingDirectProvider) Chat( } _ = firstStarted + _ = secondStarted + if call == 2 && releaseSecond != nil { + select { + case <-releaseSecond: + case <-ctx.Done(): + return nil, ctx.Err() + } + } return &providers.LLMResponse{Content: finalResp}, nil } @@ -611,6 +625,73 @@ func (p *blockingDirectProvider) GetDefaultModel() string { return "blocking-direct-mock" } +type blockedBtwWithFollowupProvider struct { + mu sync.Mutex + calls int + firstStarted chan struct{} + releaseFirst chan struct{} + secondStarted chan struct{} + releaseSecond chan struct{} + thirdStarted chan struct{} + thirdMessages []providers.Message +} + +func (p *blockedBtwWithFollowupProvider) Chat( + ctx context.Context, + messages []providers.Message, + tools []providers.ToolDefinition, + model string, + opts map[string]any, +) (*providers.LLMResponse, error) { + p.mu.Lock() + p.calls++ + call := p.calls + firstStarted := p.firstStarted + releaseFirst := p.releaseFirst + secondStarted := p.secondStarted + releaseSecond := p.releaseSecond + thirdStarted := p.thirdStarted + if call == 1 && p.firstStarted != nil { + close(p.firstStarted) + } + if call == 2 && p.secondStarted != nil { + close(p.secondStarted) + } + if call == 3 { + p.thirdMessages = append([]providers.Message(nil), messages...) + if p.thirdStarted != nil { + close(p.thirdStarted) + } + } + p.mu.Unlock() + + switch call { + case 1: + _ = firstStarted + select { + case <-releaseFirst: + case <-ctx.Done(): + return nil, ctx.Err() + } + return &providers.LLMResponse{Content: "long turn finished"}, nil + case 2: + _ = secondStarted + select { + case <-releaseSecond: + case <-ctx.Done(): + return nil, ctx.Err() + } + return &providers.LLMResponse{Content: "btw delayed reply"}, nil + default: + _ = thirdStarted + return &providers.LLMResponse{Content: "continued after follow-up"}, nil + } +} + +func (p *blockedBtwWithFollowupProvider) GetDefaultModel() string { + return "blocked-btw-followup-mock" +} + type interruptibleTool struct { name string started chan struct{} @@ -1010,6 +1091,405 @@ func TestAgentLoop_Steering_DirectResponseContinuesWithQueuedMessage(t *testing. } } +func TestAgentLoop_Steering_BtwCommandBypassesQueuedTurn(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "agent-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + } + + provider := &blockingDirectProvider{ + firstStarted: make(chan struct{}), + releaseFirst: make(chan struct{}), + firstResp: "long turn finished", + finalResp: "btw immediate reply", + } + + msgBus := bus.NewMessageBus() + al := NewAgentLoop(cfg, msgBus, provider) + useTestSideQuestionProvider(al, provider) + + runCtx, cancelRun := context.WithCancel(context.Background()) + defer cancelRun() + runErrCh := make(chan error, 1) + go func() { + runErrCh <- al.Run(runCtx) + }() + + first := bus.InboundMessage{ + Context: bus.InboundContext{ + Channel: "test", + ChatID: "chat1", + ChatType: "direct", + SenderID: "user1", + }, + Content: "execute sleep 60, then send OK", + } + btw := bus.InboundMessage{ + Context: bus.InboundContext{ + Channel: "test", + ChatID: "chat1", + ChatType: "direct", + SenderID: "user1", + }, + Content: "/btw what is the current progress?", + } + + pubCtx, pubCancel := context.WithTimeout(context.Background(), 2*time.Second) + defer pubCancel() + if err := msgBus.PublishInbound(pubCtx, first); err != nil { + t.Fatalf("publish first inbound: %v", err) + } + + select { + case <-provider.firstStarted: + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for first LLM call to start") + } + + messageTool, ok := al.GetRegistry().GetDefaultAgent().Tools.Get("message") + var mt *tools.MessageTool + if !ok { + mt = tools.NewMessageTool() + al.RegisterTool(mt) + } else { + var typeOK bool + mt, typeOK = messageTool.(*tools.MessageTool) + if !typeOK { + t.Fatal("expected message tool type") + } + } + mt.SetSendCallback(func(ctx context.Context, channel, chatID, content, replyToMessageID string) error { + return nil + }) + if result := mt.Execute(context.Background(), map[string]any{ + "channel": "test", + "chat_id": "chat1", + "content": "already sent from busy turn", + }); result == nil || result.IsError { + t.Fatalf("message tool setup result = %+v, want successful send", result) + } + + if err := msgBus.PublishInbound(pubCtx, btw); err != nil { + t.Fatalf("publish /btw inbound: %v", err) + } + + select { + case outbound := <-msgBus.OutboundChan(): + if outbound.Content != "btw immediate reply" { + t.Fatalf("expected /btw reply before long turn completion, got %q", outbound.Content) + } + if outbound.AgentID != routing.DefaultAgentID { + t.Fatalf("expected /btw outbound agent_id %q, got %q", routing.DefaultAgentID, outbound.AgentID) + } + route, _, err := al.resolveMessageRoute(btw) + if err != nil { + t.Fatalf("resolveMessageRoute(/btw) error = %v", err) + } + expectedSessionKey := resolveScopeKey(al.allocateRouteSession(route, btw).SessionKey, btw.SessionKey) + if outbound.SessionKey != expectedSessionKey { + t.Fatalf("expected /btw outbound session_key %q, got %q", expectedSessionKey, outbound.SessionKey) + } + if outbound.Scope == nil || + outbound.Scope.AgentID != routing.DefaultAgentID || + outbound.Scope.Channel != "test" { + t.Fatalf( + "expected /btw outbound scope for agent %q on test channel, got %+v", + routing.DefaultAgentID, + outbound.Scope, + ) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for /btw outbound response") + } + + sessionKey := session.BuildMainSessionKey(routing.DefaultAgentID) + if msgs := al.dequeueSteeringMessagesForScope(sessionKey); len(msgs) != 0 { + t.Fatalf("expected /btw to bypass steering queue, got %v", msgs) + } + + close(provider.releaseFirst) + + select { + case outbound := <-msgBus.OutboundChan(): + t.Fatalf("expected busy turn final response to stay suppressed, got %q", outbound.Content) + case <-time.After(2 * time.Second): + } + + provider.mu.Lock() + callCount := provider.calls + provider.mu.Unlock() + if callCount != 2 { + t.Fatalf("provider call count = %d, want 2", callCount) + } + + cancelRun() + select { + case err := <-runErrCh: + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for Run to stop") + } +} + +func TestAgentLoop_Steering_BtwCommandSurvivesActiveTurnCompletion(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "agent-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + } + + provider := &blockingDirectProvider{ + firstStarted: make(chan struct{}), + releaseFirst: make(chan struct{}), + secondStarted: make(chan struct{}), + releaseSecond: make(chan struct{}), + firstResp: "long turn finished", + finalResp: "btw delayed reply", + } + + msgBus := bus.NewMessageBus() + al := NewAgentLoop(cfg, msgBus, provider) + useTestSideQuestionProvider(al, provider) + + runCtx, cancelRun := context.WithCancel(context.Background()) + defer cancelRun() + runErrCh := make(chan error, 1) + go func() { + runErrCh <- al.Run(runCtx) + }() + + first := bus.InboundMessage{ + Context: bus.InboundContext{ + Channel: "test", + ChatID: "chat1", + ChatType: "direct", + SenderID: "user1", + }, + Content: "execute a long turn", + } + btw := bus.InboundMessage{ + Context: bus.InboundContext{ + Channel: "test", + ChatID: "chat1", + ChatType: "direct", + SenderID: "user1", + }, + Content: "/btw can you still answer?", + } + + pubCtx, pubCancel := context.WithTimeout(context.Background(), 2*time.Second) + defer pubCancel() + if err := msgBus.PublishInbound(pubCtx, first); err != nil { + t.Fatalf("publish first inbound: %v", err) + } + + select { + case <-provider.firstStarted: + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for first LLM call to start") + } + + if err := msgBus.PublishInbound(pubCtx, btw); err != nil { + t.Fatalf("publish /btw inbound: %v", err) + } + + select { + case <-provider.secondStarted: + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for /btw LLM call to start") + } + + close(provider.releaseFirst) + select { + case outbound := <-msgBus.OutboundChan(): + if outbound.Content != "long turn finished" { + t.Fatalf("expected first outbound to be long turn response, got %q", outbound.Content) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for long turn response") + } + + close(provider.releaseSecond) + select { + case outbound := <-msgBus.OutboundChan(): + if outbound.Content != "btw delayed reply" { + t.Fatalf("expected /btw response after drain cancellation, got %q", outbound.Content) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for delayed /btw response") + } + + cancelRun() + select { + case err := <-runErrCh: + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for Run to stop") + } +} + +func TestAgentLoop_Steering_BlockedBtwDoesNotBlockFollowupContinuation(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "agent-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + cfg := &config.Config{ + Agents: config.AgentsConfig{ + Defaults: config.AgentDefaults{ + Workspace: tmpDir, + ModelName: "test-model", + MaxTokens: 4096, + MaxToolIterations: 10, + }, + }, + } + + provider := &blockedBtwWithFollowupProvider{ + firstStarted: make(chan struct{}), + releaseFirst: make(chan struct{}), + secondStarted: make(chan struct{}), + releaseSecond: make(chan struct{}), + thirdStarted: make(chan struct{}), + } + + msgBus := bus.NewMessageBus() + al := NewAgentLoop(cfg, msgBus, provider) + useTestSideQuestionProvider(al, provider) + + runCtx, cancelRun := context.WithCancel(context.Background()) + defer cancelRun() + runErrCh := make(chan error, 1) + go func() { + runErrCh <- al.Run(runCtx) + }() + + first := bus.InboundMessage{ + Context: bus.InboundContext{ + Channel: "test", + ChatID: "chat1", + ChatType: "direct", + SenderID: "user1", + }, + Content: "execute a long turn", + } + btw := bus.InboundMessage{ + Context: bus.InboundContext{ + Channel: "test", + ChatID: "chat1", + ChatType: "direct", + SenderID: "user1", + }, + Content: "/btw this side question blocks", + } + followup := bus.InboundMessage{ + Context: bus.InboundContext{ + Channel: "test", + ChatID: "chat1", + ChatType: "direct", + SenderID: "user1", + }, + Content: "normal follow-up while btw is blocked", + } + + pubCtx, pubCancel := context.WithTimeout(context.Background(), 2*time.Second) + defer pubCancel() + if err := msgBus.PublishInbound(pubCtx, first); err != nil { + t.Fatalf("publish first inbound: %v", err) + } + + select { + case <-provider.firstStarted: + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for first LLM call to start") + } + + if err := msgBus.PublishInbound(pubCtx, btw); err != nil { + t.Fatalf("publish /btw inbound: %v", err) + } + select { + case <-provider.secondStarted: + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for /btw LLM call to start") + } + + if err := msgBus.PublishInbound(pubCtx, followup); err != nil { + t.Fatalf("publish follow-up inbound: %v", err) + } + close(provider.releaseFirst) + + select { + case outbound := <-msgBus.OutboundChan(): + if outbound.Content != "continued after follow-up" { + t.Fatalf("expected continuation response before /btw release, got %q", outbound.Content) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for follow-up continuation response") + } + + provider.mu.Lock() + thirdMessages := append([]providers.Message(nil), provider.thirdMessages...) + provider.mu.Unlock() + foundFollowup := false + for _, msg := range thirdMessages { + if msg.Role == "user" && msg.Content == followup.Content { + foundFollowup = true + break + } + } + if !foundFollowup { + t.Fatalf("continuation messages did not include follow-up: %+v", thirdMessages) + } + + close(provider.releaseSecond) + select { + case outbound := <-msgBus.OutboundChan(): + if outbound.Content != "btw delayed reply" { + t.Fatalf("expected delayed /btw response, got %q", outbound.Content) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for delayed /btw response") + } + + cancelRun() + select { + case err := <-runErrCh: + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for Run to stop") + } +} + func TestAgentLoop_AgentForSession_UsesStoredScopeMetadata(t *testing.T) { tmpDir, err := os.MkdirTemp("", "agent-test-*") if err != nil { diff --git a/pkg/commands/builtin.go b/pkg/commands/builtin.go index 39e76f752..5cf9425cb 100644 --- a/pkg/commands/builtin.go +++ b/pkg/commands/builtin.go @@ -11,6 +11,7 @@ func BuiltinDefinitions() []Definition { showCommand(), listCommand(), useCommand(), + btwCommand(), switchCommand(), checkCommand(), clearCommand(), diff --git a/pkg/commands/builtin_test.go b/pkg/commands/builtin_test.go index 5fd8dd9bc..79e63d9b7 100644 --- a/pkg/commands/builtin_test.go +++ b/pkg/commands/builtin_test.go @@ -188,3 +188,79 @@ func TestBuiltinUseCommand_PassthroughsToAgentLogic(t *testing.T) { t.Fatalf("/use command=%q, want=%q", res.Command, "use") } } + +func TestBuiltinBtwCommand_UsesSideQuestionRuntime(t *testing.T) { + rt := &Runtime{ + AskSideQuestion: func(ctx context.Context, question string) (string, error) { + if question != "what is 2+2?" { + t.Fatalf("question=%q, want %q", question, "what is 2+2?") + } + return "4", nil + }, + } + defs := BuiltinDefinitions() + ex := NewExecutor(NewRegistry(defs), rt) + + var reply string + res := ex.Execute(context.Background(), Request{ + Text: "/btw what is 2+2?", + Reply: func(text string) error { + reply = text + return nil + }, + }) + if res.Outcome != OutcomeHandled { + t.Fatalf("/btw outcome=%v, want=%v", res.Outcome, OutcomeHandled) + } + if reply != "4" { + t.Fatalf("/btw reply=%q, want=%q", reply, "4") + } +} + +func TestBuiltinBtwCommand_MissingQuestion(t *testing.T) { + defs := BuiltinDefinitions() + ex := NewExecutor(NewRegistry(defs), &Runtime{ + AskSideQuestion: func(context.Context, string) (string, error) { + return "", nil + }, + }) + + var reply string + res := ex.Execute(context.Background(), Request{ + Text: "/btw", + Reply: func(text string) error { + reply = text + return nil + }, + }) + if res.Outcome != OutcomeHandled { + t.Fatalf("/btw outcome=%v, want=%v", res.Outcome, OutcomeHandled) + } + if reply != "Usage: /btw " { + t.Fatalf("/btw reply=%q, want usage message", reply) + } +} + +func TestBuiltinBtwCommand_PreservesQuestionWhitespace(t *testing.T) { + const want = "explain:\n fmt.Println(\"hi\")" + rt := &Runtime{ + AskSideQuestion: func(ctx context.Context, question string) (string, error) { + if question != want { + t.Fatalf("question=%q, want %q", question, want) + } + return "ok", nil + }, + } + defs := BuiltinDefinitions() + ex := NewExecutor(NewRegistry(defs), rt) + + res := ex.Execute(context.Background(), Request{ + Text: "/btw " + want, + Reply: func(text string) error { + return nil + }, + }) + if res.Outcome != OutcomeHandled { + t.Fatalf("/btw outcome=%v, want=%v", res.Outcome, OutcomeHandled) + } +} diff --git a/pkg/commands/cmd_btw.go b/pkg/commands/cmd_btw.go new file mode 100644 index 000000000..509f2a80c --- /dev/null +++ b/pkg/commands/cmd_btw.go @@ -0,0 +1,51 @@ +package commands + +import ( + "context" + "strings" +) + +func btwCommand() Definition { + return Definition{ + Name: "btw", + Description: "Ask a side question without changing session history", + Usage: "/btw ", + Handler: func(ctx context.Context, req Request, rt *Runtime) error { + const emptyAnswerMsg = "The model returned an empty response. This may indicate a provider error or token limit." + + if rt == nil || rt.AskSideQuestion == nil { + return req.Reply(unavailableMsg) + } + + question := sideQuestionText(req.Text) + if question == "" { + return req.Reply("Usage: /btw ") + } + + answer, err := rt.AskSideQuestion(ctx, question) + if err != nil { + return req.Reply(err.Error()) + } + if strings.TrimSpace(answer) == "" { + return req.Reply(emptyAnswerMsg) + } + + return req.Reply(answer) + }, + } +} + +func sideQuestionText(input string) string { + input = strings.TrimSpace(input) + if input == "" { + return "" + } + parts := strings.Fields(input) + if len(parts) < 2 { + return "" + } + if !strings.HasPrefix(input, parts[0]) { + return "" + } + return strings.TrimSpace(input[len(parts[0]):]) +} diff --git a/pkg/commands/runtime.go b/pkg/commands/runtime.go index 5ba6a1bd2..69373f561 100644 --- a/pkg/commands/runtime.go +++ b/pkg/commands/runtime.go @@ -1,6 +1,10 @@ package commands -import "github.com/sipeed/picoclaw/pkg/config" +import ( + "context" + + "github.com/sipeed/picoclaw/pkg/config" +) // Runtime provides runtime dependencies to command handlers. It is constructed // per-request by the agent loop so that per-request state (like session scope) @@ -8,6 +12,7 @@ import "github.com/sipeed/picoclaw/pkg/config" type Runtime struct { Config *config.Config GetModelInfo func() (name, provider string) + AskSideQuestion func(ctx context.Context, question string) (string, error) ListAgentIDs func() []string ListDefinitions func() []Definition ListSkillNames func() []string