Merge pull request #2677 from alexhoshina/feat/runtime-events-plan

Feat/runtime events
This commit is contained in:
Mauro
2026-05-03 23:15:25 +02:00
committed by GitHub
72 changed files with 5484 additions and 998 deletions
+9
View File
@@ -482,6 +482,15 @@
"approval_timeout_ms": 60000
}
},
"events": {
"logging": {
"enabled": true,
"include": ["agent.*"],
"exclude": [],
"min_severity": "info",
"include_payload": false
}
},
"gateway": {
"_comment": "Default log level is set to 'fatal'. Other available options are 'debug', 'info', 'warn' and 'error'.",
"host": "localhost",
+1
View File
@@ -6,6 +6,7 @@ Internal architecture notes for major runtime mechanisms and subsystem design.
- [SubTurn Mechanism](subturn.md): sub-agent coordination, concurrency control, and lifecycle handling.
- [Session System](session-system.md): session scope allocation, JSONL persistence, alias compatibility, and migration. ([ZH](session-system.zh.md))
- [Routing System](routing-system.md): agent dispatch, session policy selection, and light/heavy model routing. ([ZH](routing-system.zh.md))
- [Runtime Events](runtime-events.md): runtime event envelope, centralized event logging, filters, and examples. ([ZH](runtime-events.zh.md))
- [Hook System Guide](hooks/README.md): current hook architecture and protocol details.
- [Agent Refactor](agent-refactor/README.md): notes and checkpoints for the agent refactor work.
+21 -20
View File
@@ -13,7 +13,7 @@ The repository no longer ships standalone example source files. The Go and Pytho
| Type | Interface | Stage | Can modify data |
| --- | --- | --- | --- |
| Observer | `EventObserver` | EventBus broadcast | No |
| Observer | `RuntimeEventObserver` | Runtime event bus broadcast | No |
| LLM interceptor | `LLMInterceptor` | `before_llm` / `after_llm` | Yes |
| Tool interceptor | `ToolInterceptor` | `before_tool` / `after_tool` | Yes |
| Tool approver | `ToolApprover` | `approve_tool` | No, returns allow/deny |
@@ -136,9 +136,9 @@ Example:
"/tmp/review_gate.py"
],
"observe": [
"tool_exec_start",
"tool_exec_end",
"tool_exec_skipped"
"agent.tool.exec_start",
"agent.tool.exec_end",
"agent.tool.exec_skipped"
],
"intercept": [
"before_tool",
@@ -174,7 +174,7 @@ Both examples are intentionally safe: they only log, never rewrite, and never de
The following is a minimal logging hook for in-process use. It implements:
1. `EventObserver`
1. `RuntimeEventObserver`
2. `LLMInterceptor`
3. `ToolInterceptor`
4. `ToolApprover`
@@ -196,6 +196,7 @@ import (
"time"
"github.com/sipeed/picoclaw/pkg/agent"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
)
@@ -217,12 +218,12 @@ func NewExampleLoggerHook(opts ExampleLoggerHookOptions) *ExampleLoggerHook {
}
}
func (h *ExampleLoggerHook) OnEvent(ctx context.Context, evt agent.Event) error {
func (h *ExampleLoggerHook) OnRuntimeEvent(ctx context.Context, evt runtimeevents.Event) error {
_ = ctx
if h == nil || !h.logEvents {
return nil
}
h.record("event", evt.Meta, map[string]any{
h.record("event", evt.Scope, map[string]any{
"event": evt.Kind.String(),
"payload": evt.Payload,
}, nil)
@@ -275,7 +276,7 @@ func (h *ExampleLoggerHook) ApproveTool(
return decision, nil
}
func (h *ExampleLoggerHook) record(stage string, meta agent.EventMeta, payload any, decision any) {
func (h *ExampleLoggerHook) record(stage string, refs any, payload any, decision any) {
logger.InfoCF("hooks", "Example hook observed", map[string]any{
"stage": stage,
})
@@ -286,7 +287,7 @@ func (h *ExampleLoggerHook) record(stage string, meta agent.EventMeta, payload a
entry := map[string]any{
"ts": time.Now().UTC(),
"stage": stage,
"meta": meta,
"refs": refs,
"payload": payload,
"decision": decision,
}
@@ -428,7 +429,7 @@ If you only see `before_llm` and `after_llm`, that usually means the request did
The following script is a minimal process-hook example. It uses only the Python standard library and supports:
1. `hook.hello`
2. `hook.event`
2. `hook.runtime_event`
3. `hook.before_tool`
4. `hook.approve_tool`
@@ -564,8 +565,8 @@ def main() -> int:
})
if not message_id:
if method == "hook.event" and LOG_EVENTS:
log_stderr(f"observed event: {params.get('Kind')}")
if method == "hook.runtime_event" and LOG_EVENTS:
log_stderr(f"observed event: {params.get('kind')}")
continue
try:
@@ -606,9 +607,9 @@ if __name__ == "__main__":
"/abs/path/to/review_gate.py"
],
"observe": [
"tool_exec_start",
"tool_exec_end",
"tool_exec_skipped"
"agent.tool.exec_start",
"agent.tool.exec_end",
"agent.tool.exec_skipped"
],
"intercept": [
"before_tool",
@@ -626,7 +627,7 @@ if __name__ == "__main__":
### Environment Variables
- `PICOCLAW_HOOK_LOG_EVENTS`
Whether to write `hook.event` summaries to `stderr`, enabled by default
Whether to write `hook.runtime_event` summaries to `stderr`, enabled by default
- `PICOCLAW_HOOK_LOG_FILE`
Path to an external log file. When set, the script appends inbound hook requests, notifications, and outbound responses as JSON Lines
@@ -645,7 +646,7 @@ Typical interpretation:
- Only `hook.hello`
The process started and completed the handshake, but no business hook request has arrived yet
- `hook.event`
- `hook.runtime_event`
The `observe` configuration is working
- `hook.before_tool`
The `intercept: ["before_tool", ...]` configuration is working
@@ -664,7 +665,7 @@ A complete sample:
```json
{"ts":"2026-03-21T14:12:00+00:00","direction":"in","id":1,"method":"hook.hello","params":{"name":"py_review_gate","version":1,"modes":["observe","tool","approve"]},"notification":false}
{"ts":"2026-03-21T14:12:00+00:00","direction":"out","id":1,"response":{"ok":true,"name":"python-review-gate"},"error":null}
{"ts":"2026-03-21T14:12:05+00:00","direction":"in","id":0,"method":"hook.event","params":{"Kind":"tool_exec_start"},"notification":true}
{"ts":"2026-03-21T14:12:05+00:00","direction":"in","id":0,"method":"hook.runtime_event","params":{"kind":"agent.tool.exec_start"},"notification":true}
{"ts":"2026-03-21T14:12:05+00:00","direction":"in","id":7,"method":"hook.before_tool","params":{"tool":"echo_text","arguments":{"text":"hello"}},"notification":false}
{"ts":"2026-03-21T14:12:05+00:00","direction":"out","id":7,"response":{"action":"continue"},"error":null}
```
@@ -672,7 +673,7 @@ A complete sample:
Additional notes:
- Timestamps are UTC
- `notification=true` means it was a notification such as `hook.event`, which does not expect a response
- `notification=true` means it was a notification such as `hook.runtime_event`, which does not expect a response
- `id` increases within a single hook process; if the process restarts, the counter starts over
## Process-Hook Protocol
@@ -681,7 +682,7 @@ Current process hooks use `JSON-RPC over stdio`:
- PicoClaw starts the external process
- Requests and responses are exchanged as one JSON message per line
- `hook.event` is a notification and does not need a response
- `hook.runtime_event` is a notification and does not need a response
- `hook.before_llm`, `hook.after_llm`, `hook.before_tool`, `hook.after_tool`, and `hook.approve_tool` are request/response calls
The host does not currently accept new RPCs initiated by the process hook. In practice, that means an external hook can only respond to PicoClaw calls; it cannot call back into the host to send channel messages.
+21 -20
View File
@@ -13,7 +13,7 @@
| 类型 | 接口 | 作用阶段 | 能否改写 |
| --- | --- | --- | --- |
| 观察型 | `EventObserver` | EventBus 广播事件时 | 否 |
| 观察型 | `RuntimeEventObserver` | runtime event bus 广播事件时 | 否 |
| LLM 拦截型 | `LLMInterceptor` | `before_llm` / `after_llm` | 是 |
| Tool 拦截型 | `ToolInterceptor` | `before_tool` / `after_tool` | 是 |
| Tool 审批型 | `ToolApprover` | `approve_tool` | 否,返回批准/拒绝 |
@@ -136,9 +136,9 @@ HookManager 的排序规则是:
"/tmp/review_gate.py"
],
"observe": [
"tool_exec_start",
"tool_exec_end",
"tool_exec_skipped"
"agent.tool.exec_start",
"agent.tool.exec_end",
"agent.tool.exec_skipped"
],
"intercept": [
"before_tool",
@@ -174,7 +174,7 @@ tail -f /tmp/picoclaw-hook-review-gate.log
下面这段代码是一个最小的“记录型” in-process hook。它实现了:
1. `EventObserver`
1. `RuntimeEventObserver`
2. `LLMInterceptor`
3. `ToolInterceptor`
4. `ToolApprover`
@@ -196,6 +196,7 @@ import (
"time"
"github.com/sipeed/picoclaw/pkg/agent"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
)
@@ -217,12 +218,12 @@ func NewExampleLoggerHook(opts ExampleLoggerHookOptions) *ExampleLoggerHook {
}
}
func (h *ExampleLoggerHook) OnEvent(ctx context.Context, evt agent.Event) error {
func (h *ExampleLoggerHook) OnRuntimeEvent(ctx context.Context, evt runtimeevents.Event) error {
_ = ctx
if h == nil || !h.logEvents {
return nil
}
h.record("event", evt.Meta, map[string]any{
h.record("event", evt.Scope, map[string]any{
"event": evt.Kind.String(),
"payload": evt.Payload,
}, nil)
@@ -275,7 +276,7 @@ func (h *ExampleLoggerHook) ApproveTool(
return decision, nil
}
func (h *ExampleLoggerHook) record(stage string, meta agent.EventMeta, payload any, decision any) {
func (h *ExampleLoggerHook) record(stage string, refs any, payload any, decision any) {
logger.InfoCF("hooks", "Example hook observed", map[string]any{
"stage": stage,
})
@@ -286,7 +287,7 @@ func (h *ExampleLoggerHook) record(stage string, meta agent.EventMeta, payload a
entry := map[string]any{
"ts": time.Now().UTC(),
"stage": stage,
"meta": meta,
"refs": refs,
"payload": payload,
"decision": decision,
}
@@ -428,7 +429,7 @@ func init() {
下面这段脚本是一个最小的 `process hook` 示例。它只使用 Python 标准库,支持:
1. `hook.hello`
2. `hook.event`
2. `hook.runtime_event`
3. `hook.before_tool`
4. `hook.approve_tool`
@@ -564,8 +565,8 @@ def main() -> int:
})
if not message_id:
if method == "hook.event" and LOG_EVENTS:
log_stderr(f"observed event: {params.get('Kind')}")
if method == "hook.runtime_event" and LOG_EVENTS:
log_stderr(f"observed event: {params.get('kind')}")
continue
try:
@@ -606,9 +607,9 @@ if __name__ == "__main__":
"/abs/path/to/review_gate.py"
],
"observe": [
"tool_exec_start",
"tool_exec_end",
"tool_exec_skipped"
"agent.tool.exec_start",
"agent.tool.exec_end",
"agent.tool.exec_skipped"
],
"intercept": [
"before_tool",
@@ -626,7 +627,7 @@ if __name__ == "__main__":
### 环境变量
- `PICOCLAW_HOOK_LOG_EVENTS`
是否把 `hook.event` 写到 `stderr`,默认开启
是否把 `hook.runtime_event` 写到 `stderr`,默认开启
- `PICOCLAW_HOOK_LOG_FILE`
外部日志文件路径。设置后,脚本会把收到的 hook 请求、notification 和返回结果按 JSON Lines 追加到该文件
@@ -645,7 +646,7 @@ if __name__ == "__main__":
- 只看到 `hook.hello`
说明进程启动并完成握手了,但还没有新的业务 hook 请求真正打进来
- 看到 `hook.event`
- 看到 `hook.runtime_event`
说明 `observe` 配置生效了
- 看到 `hook.before_tool`
说明 `intercept: ["before_tool", ...]` 生效了
@@ -664,7 +665,7 @@ if __name__ == "__main__":
```json
{"ts":"2026-03-21T14:12:00+00:00","direction":"in","id":1,"method":"hook.hello","params":{"name":"py_review_gate","version":1,"modes":["observe","tool","approve"]},"notification":false}
{"ts":"2026-03-21T14:12:00+00:00","direction":"out","id":1,"response":{"ok":true,"name":"python-review-gate"},"error":null}
{"ts":"2026-03-21T14:12:05+00:00","direction":"in","id":0,"method":"hook.event","params":{"Kind":"tool_exec_start"},"notification":true}
{"ts":"2026-03-21T14:12:05+00:00","direction":"in","id":0,"method":"hook.runtime_event","params":{"kind":"agent.tool.exec_start"},"notification":true}
{"ts":"2026-03-21T14:12:05+00:00","direction":"in","id":7,"method":"hook.before_tool","params":{"tool":"echo_text","arguments":{"text":"hello"}},"notification":false}
{"ts":"2026-03-21T14:12:05+00:00","direction":"out","id":7,"response":{"action":"continue"},"error":null}
```
@@ -672,7 +673,7 @@ if __name__ == "__main__":
补充说明:
- 时间戳是 UTC,不是本地时区
- `notification=true` 表示这是 `hook.event` 这类不需要响应的通知
- `notification=true` 表示这是 `hook.runtime_event` 这类不需要响应的通知
- `id` 会随着当前进程内的请求递增;如果 hook 进程重启,计数会重新开始
## Process Hook 协议约定
@@ -681,7 +682,7 @@ if __name__ == "__main__":
- PicoClaw 启动外部进程
- 请求和响应都按“一行一个 JSON 消息”传输
- `hook.event` 是 notification,不需要响应
- `hook.runtime_event` 是 notification,不需要响应
- `hook.before_llm` / `hook.after_llm` / `hook.before_tool` / `hook.after_tool` / `hook.approve_tool` 是 request/response
当前宿主不会接受 process hook 主动发起的新 RPC。也就是说,外部 hook 现在只能“响应 PicoClaw 的调用”,不能反向调用宿主去发送 channel 消息。
+24 -15
View File
@@ -437,21 +437,28 @@ Approval hook for deciding whether to allow execution of sensitive tools.
---
## 7. `hook.event` (notification)
## 7. `hook.runtime_event` (notification)
Observer event, broadcast only, no response required. `id` is `0` or absent.
Runtime observer event, broadcast only, no response required. `id` is `0` or absent.
```json
{
"jsonrpc": "2.0",
"method": "hook.event",
"method": "hook.runtime_event",
"params": {
"Kind": "tool_exec_start",
"Meta": {
"AgentID": "agent-1",
"TurnID": "turn-1"
"kind": "agent.tool.exec_start",
"source": {
"component": "agent",
"name": "agent-1"
},
"Payload": {
"scope": {
"agent_id": "agent-1",
"session_key": "session-1",
"turn_id": "turn-1",
"channel": "cli",
"chat_id": "chat-1"
},
"payload": {
"Tool": "echo_text",
"Arguments": {"text": "hello"}
}
@@ -460,12 +467,14 @@ Observer event, broadcast only, no response required. `id` is `0` or absent.
```
Common `Kind` values:
- `turn_start` / `turn_end`
- `llm_request` / `llm_response`
- `tool_exec_start` / `tool_exec_end` / `tool_exec_skipped`
- `steering_injected`
- `interrupt_received`
- `error`
- `agent.turn.start` / `agent.turn.end`
- `agent.llm.request` / `agent.llm.response`
- `agent.tool.exec_start` / `agent.tool.exec_end` / `agent.tool.exec_skipped`
- `agent.steering.injected`
- `agent.interrupt.received`
- `agent.error`
Legacy observe configuration names such as `turn_end` and `tool_exec_start` are still accepted and normalized to runtime event names. New process hook notifications use `hook.runtime_event`.
---
@@ -565,4 +574,4 @@ def handle_before_tool(params: dict) -> dict:
return {"action": "continue"}
```
This way, external hooks can fully implement plugin tools without registering any tool implementation inside PicoClaw.
This way, external hooks can fully implement plugin tools without registering any tool implementation inside PicoClaw.
@@ -437,21 +437,28 @@
---
## 7. `hook.event`notification
## 7. `hook.runtime_event`notification
观察型事件,仅广播,无需响应。`id``0` 或不存在。
runtime 观察型事件,仅广播,无需响应。`id``0` 或不存在。
```json
{
"jsonrpc": "2.0",
"method": "hook.event",
"method": "hook.runtime_event",
"params": {
"Kind": "tool_exec_start",
"Meta": {
"AgentID": "agent-1",
"TurnID": "turn-1"
"kind": "agent.tool.exec_start",
"source": {
"component": "agent",
"name": "agent-1"
},
"Payload": {
"scope": {
"agent_id": "agent-1",
"session_key": "session-1",
"turn_id": "turn-1",
"channel": "cli",
"chat_id": "chat-1"
},
"payload": {
"Tool": "echo_text",
"Arguments": {"text": "hello"}
}
@@ -460,12 +467,14 @@
```
常见 `Kind` 值:
- `turn_start` / `turn_end`
- `llm_request` / `llm_response`
- `tool_exec_start` / `tool_exec_end` / `tool_exec_skipped`
- `steering_injected`
- `interrupt_received`
- `error`
- `agent.turn.start` / `agent.turn.end`
- `agent.llm.request` / `agent.llm.response`
- `agent.tool.exec_start` / `agent.tool.exec_end` / `agent.tool.exec_skipped`
- `agent.steering.injected`
- `agent.interrupt.received`
- `agent.error`
旧 observe 配置名如 `turn_end``tool_exec_start` 仍然可用,并会归一化为 runtime event 名称。新的 process hook 通知使用 `hook.runtime_event`
---
@@ -565,4 +574,4 @@ def handle_before_tool(params: dict) -> dict:
return {"action": "continue"}
```
通过这种方式,外部 hook 可以完全实现插件工具,无需在 PicoClaw 内部注册任何工具实现。
通过这种方式,外部 hook 可以完全实现插件工具,无需在 PicoClaw 内部注册任何工具实现。
+216
View File
@@ -0,0 +1,216 @@
# Runtime Events And Event Logging
PicoClaw runtime events are the read-only observation surface for agent, channel, gateway, message bus, and MCP activity. Publishing events and printing logs are separate responsibilities:
- Event publishing: components publish `pkg/events.Event` values to the runtime event bus for hooks, tests, diagnostics, and future UI consumers.
- Event logging: the built-in runtime event logger subscribes to the same bus and prints only the events selected by configuration.
This keeps runtime code focused on publishing events while log policy stays centralized.
## Default Behavior
By default, only `agent.*` events are printed:
```json
{
"events": {
"logging": {
"enabled": true,
"include": ["agent.*"],
"min_severity": "info",
"include_payload": false
}
}
}
```
This preserves the previous behavior: agent turn, LLM, tool, steering, subturn, and error events appear in logs. Channel, gateway, bus, and MCP events are still published to the runtime event bus, but they are not printed unless configured.
## Configuration
The configuration lives under `events.logging` in `config.json`:
| Field | Type | Default | Description |
| ----- | ---- | ------- | ----------- |
| `enabled` | bool | `true` | Enables the built-in event logger subscription |
| `include` | string[] | `["agent.*"]` | Event kinds to print; supports exact matches, `*`, and patterns such as `agent.*` |
| `exclude` | string[] | `[]` | Event kinds to suppress after include matching |
| `min_severity` | string | `info` | Minimum severity: `debug`, `info`, `warn`, or `error` |
| `include_payload` | bool | `false` | Adds raw event payloads to log fields |
`include_payload` is disabled by default. Agent events print safe summary fields such as `user_len`, `args_count`, and `content_len` instead of full user messages or tool arguments. Enable raw payload logging only for short-lived diagnostics in a trusted log environment.
## Matching Rules
`include` and `exclude` match the `Event.Kind` string:
```json
{
"events": {
"logging": {
"include": ["gateway.*", "channel.lifecycle.*", "agent.error"],
"exclude": ["gateway.ready"],
"min_severity": "info"
}
}
}
```
Common patterns:
- `["agent.*"]`: print agent events only.
- `["*"]`: print all runtime events.
- `["gateway.*", "channel.*"]`: print gateway and channel events only.
- `exclude: ["agent.llm.delta"]`: suppress high-volume streaming delta events.
- `min_severity: "warn"`: print warn and error events only.
## Environment Variables
The same settings can be overridden with environment variables:
```bash
PICOCLAW_EVENTS_LOGGING_ENABLED=true
PICOCLAW_EVENTS_LOGGING_INCLUDE="gateway.*,channel.lifecycle.*"
PICOCLAW_EVENTS_LOGGING_EXCLUDE="gateway.ready"
PICOCLAW_EVENTS_LOGGING_MIN_SEVERITY=info
PICOCLAW_EVENTS_LOGGING_INCLUDE_PAYLOAD=false
```
`include` and `exclude` use comma-separated values.
## Event Names And Triggers
The table below lists the current runtime event kinds, when they are emitted, and the most useful event details. `Source`, `Scope`, and `Correlation` are shared envelope fields that may appear on every event. The "Details" column refers to useful payload fields or log summary fields.
### Agent
| Event | Trigger | Details |
| ----- | ------- | ------- |
| `agent.turn.start` | An agent starts processing one user or system input after the turn scope has been created. | `user_len`, `media_count`; scope usually includes `agent_id`, `session_key`, `turn_id`, `channel`, `chat_id`, `message_id` |
| `agent.turn.end` | A turn exits, whether it completed, errored, or was hard-aborted. | `status` (`completed`/`error`/`aborted`), `iterations_total`, `duration_ms`, `final_len` |
| `agent.llm.request` | Before each LLM provider request. | `model`, `messages`, `tools`, `max_tokens` |
| `agent.llm.delta` | Reserved for streaming LLM deltas; the kind is defined, but the current implementation has no natural emit site. | `content_delta_len`, `reasoning_delta_len` |
| `agent.llm.response` | After the LLM provider returns a complete response. | `content_len`, `tool_calls`, `has_reasoning` |
| `agent.llm.retry` | Before retrying an LLM request after context, rate-limit, transient provider, or fallback handling. | `attempt`, `max_retries`, `reason`, `error`, `backoff_ms` |
| `agent.context.compress` | Agent context history is compressed, for example during proactive budget checks or LLM retry handling. | `reason`, `dropped_messages`, `remaining_messages` |
| `agent.session.summarize` | Async session history summarization completes. | `summarized_messages`, `kept_messages`, `summary_len`, `omitted_oversized` |
| `agent.tool.exec_start` | Before the agent executes a tool call. | `tool`, `args_count`; full arguments are not logged by default |
| `agent.tool.exec_end` | After a tool call completes, including successful results, tool errors, and async results. | `tool`, `duration_ms`, `for_llm_len`, `for_user_len`, `is_error`, `async` |
| `agent.tool.exec_skipped` | A tool call is skipped because the tool is unavailable, arguments are invalid, or turn control logic requires skipping it. | `tool`, `reason` |
| `agent.steering.injected` | Queued steering messages are injected into the next LLM context. | `count`, `total_content_len` |
| `agent.follow_up.queued` | An async tool result is queued back into the inbound/follow-up flow. | `source_tool`, `content_len` |
| `agent.interrupt.received` | A turn accepts steering, graceful interrupt, or hard-abort input. | `interrupt_kind`, `role`, `content_len`, `queue_depth`, `hint_len` |
| `agent.subturn.spawn` | A parent turn creates a child turn/subagent. | `child_agent_id`, `label`, `parent_turn_id` |
| `agent.subturn.end` | A child turn ends. | `child_agent_id`, `status` |
| `agent.subturn.result_delivered` | A child turn result is delivered to the target channel/chat. | `target_channel`, `target_chat_id`, `content_len` |
| `agent.subturn.orphan` | A child turn result cannot be delivered or cannot be associated back to its parent turn. | `parent_turn_id`, `child_turn_id`, `reason` |
| `agent.error` | Agent execution reports an error. | `stage`, `error` |
### Channel
| Event | Trigger | Details |
| ----- | ------- | ------- |
| `channel.lifecycle.initialized` | The channel manager creates and registers a channel instance from config. | `type`; scope includes `channel` |
| `channel.lifecycle.started` | Channel `Start()` succeeds and worker goroutines have been started; added channels during hot reload also emit it. | `type` |
| `channel.lifecycle.start_failed` | Channel `Start()` fails. | `type`, `error`; severity is `error` |
| `channel.lifecycle.stopped` | Channel `Stop()` succeeds. | `type` |
| `channel.webhook.registered` | A channel webhook handler is registered on the shared HTTP mux. | `type`; scope includes `channel` |
| `channel.webhook.unregistered` | A channel webhook handler is removed from the shared HTTP mux. | `type`; scope includes `channel` |
| `channel.message.outbound_queued` | An outbound text or media message is queued into its channel worker. | `media`, `content_len`, `reply_to_message_id`; scope comes from the original inbound context |
| `channel.message.outbound_sent` | An outbound text or media message is sent successfully, or a placeholder edit handled the response. | `media`, `content_len`, `message_ids`, `reply_to_message_id` |
| `channel.message.outbound_failed` | An outbound text or media message exhausts retries or hits a permanent failure. | `media`, `content_len`, `retries`, `error`, `reply_to_message_id`; severity is `error` |
| `channel.rate_limited` | A channel worker is waiting for a rate-limit token and the context is canceled, interrupting this delivery. | `media`, `content_len`, `error`, `reply_to_message_id`; severity is `warn` |
### Message Bus
| Event | Trigger | Details |
| ----- | ------- | ------- |
| `bus.publish.failed` | Publishing inbound, outbound, media, audio, or voice-control data fails, or required context is missing. | `stream`, `error`; scope is derived from message context when possible |
| `bus.close.started` | Message bus shutdown begins. | `drained` is usually `0` |
| `bus.close.drained` | Shutdown waits for buffered messages to drain and at least one buffered message was drained. | `drained` |
| `bus.close.completed` | Message bus shutdown completes. | `drained` |
### Gateway
| Event | Trigger | Details |
| ----- | ------- | ------- |
| `gateway.start` | Gateway startup reaches the agent/runtime event bus/bootstrap binding point. | `duration_ms` |
| `gateway.ready` | Gateway services, channel manager, HTTP server, and other core services are ready. | `duration_ms` |
| `gateway.shutdown` | Gateway shutdown begins. | No fixed payload; envelope fields may be the only fields |
| `gateway.reload.started` | Hot reload execution starts. | `duration_ms` |
| `gateway.reload.completed` | Hot reload completes successfully. | `duration_ms` |
| `gateway.reload.failed` | Hot reload fails. | `duration_ms`, `error`; severity is `error` |
### MCP
| Event | Trigger | Details |
| ----- | ------- | ------- |
| `mcp.server.connecting` | The MCP manager is about to connect to a server. | `server`, `type`, `url`, `command` |
| `mcp.server.connected` | An MCP server connects and its tool list has been initialized. | `server`, `type`, `url`, `command`, `tool_count` |
| `mcp.server.failed` | An MCP server connection fails, or the manager is closed before connecting. | `server`, `type`, `url`, `command`, `error`; severity is `error` |
| `mcp.tool.discovered` | A tool from an MCP server is discovered and registered. | `server`, `type`, `url`, `command`, `tool` |
| `mcp.tool.call.start` | The MCP tool wrapper starts a remote tool call. | `server`, `tool`; when emitted inside an agent turn, scope includes turn/chat information |
| `mcp.tool.call.end` | The MCP tool wrapper finishes a remote tool call, including failures. | `server`, `tool`, `duration_ms`, `is_error`, `error` |
## Log Fields
Runtime event logs include stable envelope fields when available:
- `event_id`
- `event_kind`
- `severity`
- `event_time`
- `source_component`
- `source_name`
- `agent_id`
- `session_key`
- `turn_id`
- `channel`
- `account`
- `chat_id`
- `topic_id`
- `space_id`
- `space_type`
- `chat_type`
- `sender_id`
- `message_id`
- `trace_id`
- `parent_turn_id`
- `request_id`
- `reply_to_id`
Agent events add safe payload summaries:
| Event | Summary fields |
| ----- | -------------- |
| `agent.turn.start` | `user_len`, `media_count` |
| `agent.turn.end` | `status`, `iterations_total`, `duration_ms`, `final_len` |
| `agent.llm.request` | `model`, `messages`, `tools`, `max_tokens` |
| `agent.llm.delta` | `content_delta_len`, `reasoning_delta_len` |
| `agent.llm.response` | `content_len`, `tool_calls`, `has_reasoning` |
| `agent.llm.retry` | `attempt`, `max_retries`, `reason`, `error`, `backoff_ms` |
| `agent.context.compress` | `reason`, `dropped_messages`, `remaining_messages` |
| `agent.session.summarize` | `summarized_messages`, `kept_messages`, `summary_len`, `omitted_oversized` |
| `agent.tool.exec_start` | `tool`, `args_count` |
| `agent.tool.exec_end` | `tool`, `duration_ms`, `for_llm_len`, `for_user_len`, `is_error`, `async` |
| `agent.tool.exec_skipped` | `tool`, `reason` |
| `agent.steering.injected` | `count`, `total_content_len` |
| `agent.follow_up.queued` | `source_tool`, `content_len` |
| `agent.interrupt.received` | `interrupt_kind`, `role`, `content_len`, `queue_depth`, `hint_len` |
| `agent.subturn.spawn` | `child_agent_id`, `label` |
| `agent.subturn.end` | `child_agent_id`, `status` |
| `agent.subturn.result_delivered` | `target_channel`, `target_chat_id`, `content_len` |
| `agent.subturn.orphan` | `parent_turn_id`, `child_turn_id`, `reason` |
| `agent.error` | `stage`, `error` |
## Event Domains
Runtime event kinds are defined in `pkg/events/kind.go`. Event logging can select these domains:
- `agent.*`: agent turn, LLM, tool, context, steering, interrupt, subturn, and error events.
- `channel.*`: channel lifecycle, webhook registration, outbound queued/sent/failed, and rate limiting.
- `bus.*`: publish failures and close lifecycle.
- `gateway.*`: start, ready, shutdown, and reload lifecycle.
- `mcp.*`: MCP server connection, tool discovery, and tool call events.
See [`../../config/config.example.json`](../../config/config.example.json) for the default event logging example.
+216
View File
@@ -0,0 +1,216 @@
# Runtime Events 与事件日志
PicoClaw 的 runtime event 是运行时观察面,用来描述 agent、channel、gateway、message bus、MCP 等组件发生了什么。事件发布和日志打印是两件事:
- 事件发布:组件把 `pkg/events.Event` 发布到 runtime event bus,供 hook、测试、调试工具或后续 UI 消费。
- 事件日志:内置 runtime event logger 订阅同一个 bus,并按配置把匹配的事件打印到日志。
这样可以让业务流程继续只负责发布事件,日志策略统一收口到一个地方。
## 默认行为
默认配置只打印 `agent.*` 事件:
```json
{
"events": {
"logging": {
"enabled": true,
"include": ["agent.*"],
"min_severity": "info",
"include_payload": false
}
}
}
```
这个默认值保持了旧行为:agent turn、LLM、tool、steering、subturn、error 等事件会出现在日志中;channel、gateway、bus、MCP 事件仍会发布到 runtime event bus,但默认不打印,避免网关启动和消息投递日志过于嘈杂。
## 配置项
配置位于 `config.json``events.logging`
| 字段 | 类型 | 默认值 | 说明 |
| ---- | ---- | ------ | ---- |
| `enabled` | bool | `true` | 是否启用内置事件日志订阅器 |
| `include` | string[] | `["agent.*"]` | 允许打印的事件 kind,支持精确匹配、`*``agent.*` 这类 glob/prefix |
| `exclude` | string[] | `[]` | 在 include 命中后排除的事件 kind,匹配规则同 include |
| `min_severity` | string | `info` | 最低打印级别:`debug``info``warn``error` |
| `include_payload` | bool | `false` | 是否把原始 payload 放进日志字段 |
`include_payload` 默认关闭。agent 事件日志会输出安全摘要字段,例如 `user_len``args_count``content_len`,不会默认输出完整用户消息或工具参数。只有在排查问题、并且确认日志存储环境可信时,才建议临时打开 `include_payload`
## 匹配规则
`include``exclude` 都匹配 `Event.Kind` 字符串:
```json
{
"events": {
"logging": {
"include": ["gateway.*", "channel.lifecycle.*", "agent.error"],
"exclude": ["gateway.ready"],
"min_severity": "info"
}
}
}
```
常用写法:
- `["agent.*"]`:只打印 agent 事件。
- `["*"]`:打印所有 runtime events。
- `["gateway.*", "channel.*"]`:只打印 gateway 和 channel 事件。
- `exclude: ["agent.llm.delta"]`:排除高频流式 delta 事件。
- `min_severity: "warn"`:只打印 warn/error 事件。
## 环境变量
同一组配置也可以通过环境变量覆盖,适合临时调试:
```bash
PICOCLAW_EVENTS_LOGGING_ENABLED=true
PICOCLAW_EVENTS_LOGGING_INCLUDE="gateway.*,channel.lifecycle.*"
PICOCLAW_EVENTS_LOGGING_EXCLUDE="gateway.ready"
PICOCLAW_EVENTS_LOGGING_MIN_SEVERITY=info
PICOCLAW_EVENTS_LOGGING_INCLUDE_PAYLOAD=false
```
`include``exclude` 的环境变量使用逗号分隔。
## 事件名称与触发时机
下面列出当前 runtime event kind、触发时机和主要事件详情。`Source``Scope``Correlation` 是所有事件都可能携带的 envelope 字段;表里的“主要详情”指 payload 或日志摘要中最有用的字段。
### Agent
| 事件名 | 触发时机 | 主要详情 |
| ------ | -------- | -------- |
| `agent.turn.start` | agent 开始处理一次用户输入或系统输入,turn scope 已创建时 | `user_len`, `media_count`; scope 通常包含 `agent_id`, `session_key`, `turn_id`, `channel`, `chat_id`, `message_id` |
| `agent.turn.end` | 一次 turn 退出时,无论完成、报错还是 hard abort | `status` (`completed`/`error`/`aborted`), `iterations_total`, `duration_ms`, `final_len` |
| `agent.llm.request` | 每次调用 LLM provider 前 | `model`, `messages`, `tools`, `max_tokens` |
| `agent.llm.delta` | 预留给流式 LLM delta;当前实现已定义但没有自然发送点 | `content_delta_len`, `reasoning_delta_len` |
| `agent.llm.response` | LLM provider 返回完整响应后 | `content_len`, `tool_calls`, `has_reasoning` |
| `agent.llm.retry` | LLM 请求因上下文、限流、临时错误等原因准备重试前 | `attempt`, `max_retries`, `reason`, `error`, `backoff_ms` |
| `agent.context.compress` | 上下文历史被压缩时,例如主动预算检查或 LLM retry 处理 | `reason`, `dropped_messages`, `remaining_messages` |
| `agent.session.summarize` | 会话历史异步摘要完成时 | `summarized_messages`, `kept_messages`, `summary_len`, `omitted_oversized` |
| `agent.tool.exec_start` | agent 准备执行一个工具调用前 | `tool`, `args_count`; 默认不打印完整参数 |
| `agent.tool.exec_end` | 工具调用完成后,包括成功、工具错误和 async 结果 | `tool`, `duration_ms`, `for_llm_len`, `for_user_len`, `is_error`, `async` |
| `agent.tool.exec_skipped` | 工具调用被跳过时,例如工具不可用、参数无效或 turn 控制逻辑要求跳过 | `tool`, `reason` |
| `agent.steering.injected` | queued steering message 被注入下一轮 LLM 上下文时 | `count`, `total_content_len` |
| `agent.follow_up.queued` | async 工具结果被重新排入 inbound/follow-up 流程时 | `source_tool`, `content_len` |
| `agent.interrupt.received` | turn 接受 steering、graceful interrupt 或 hard abort 指令时 | `interrupt_kind`, `role`, `content_len`, `queue_depth`, `hint_len` |
| `agent.subturn.spawn` | 父 turn 创建子 turn/subagent 时 | `child_agent_id`, `label`, `parent_turn_id` |
| `agent.subturn.end` | 子 turn 结束时 | `child_agent_id`, `status` |
| `agent.subturn.result_delivered` | 子 turn 结果成功投递到目标 channel/chat 时 | `target_channel`, `target_chat_id`, `content_len` |
| `agent.subturn.orphan` | 子 turn 结果无法投递或无法关联回父 turn 时 | `parent_turn_id`, `child_turn_id`, `reason` |
| `agent.error` | agent 执行流程报告错误时 | `stage`, `error` |
### Channel
| 事件名 | 触发时机 | 主要详情 |
| ------ | -------- | -------- |
| `channel.lifecycle.initialized` | channel manager 根据配置创建并注册 channel 实例后 | `type`; scope 包含 `channel` |
| `channel.lifecycle.started` | channel `Start()` 成功,worker 已启动时;热重载新增 channel 也会触发 | `type` |
| `channel.lifecycle.start_failed` | channel `Start()` 失败时 | `type`, `error`; severity 为 `error` |
| `channel.lifecycle.stopped` | channel `Stop()` 成功后 | `type` |
| `channel.webhook.registered` | channel 的 webhook handler 被注册到共享 HTTP mux 时 | `type`; scope 包含 `channel` |
| `channel.webhook.unregistered` | channel 的 webhook handler 从共享 HTTP mux 移除时 | `type`; scope 包含 `channel` |
| `channel.message.outbound_queued` | outbound 文本或媒体消息被放入对应 channel worker 队列时 | `media`, `content_len`, `reply_to_message_id`; scope 来自原 inbound context |
| `channel.message.outbound_sent` | outbound 文本或媒体消息成功发送,或 placeholder edit 已处理响应时 | `media`, `content_len`, `message_ids`, `reply_to_message_id` |
| `channel.message.outbound_failed` | outbound 文本或媒体消息重试耗尽或遇到永久失败时 | `media`, `content_len`, `retries`, `error`, `reply_to_message_id`; severity 为 `error` |
| `channel.rate_limited` | channel worker 等待 rate limiter token 时被 context 取消,导致本次发送被限流/中断 | `media`, `content_len`, `error`, `reply_to_message_id`; severity 为 `warn` |
### Message Bus
| 事件名 | 触发时机 | 主要详情 |
| ------ | -------- | -------- |
| `bus.publish.failed` | inbound、outbound、media、audio 或 voice control 发布失败,或缺少必要 context 时 | `stream`, `error`; scope 尽量来自消息 context |
| `bus.close.started` | message bus 开始关闭时 | `drained` 通常为 `0` |
| `bus.close.drained` | close 期间等待队列 drain,并且 drain 到至少一条 buffered message 时 | `drained` |
| `bus.close.completed` | message bus 完成关闭时 | `drained` |
### Gateway
| 事件名 | 触发时机 | 主要详情 |
| ------ | -------- | -------- |
| `gateway.start` | gateway 完成 agent/runtime event bus/bootstrap 绑定后 | `duration_ms` |
| `gateway.ready` | gateway 服务、channel manager、HTTP 等关键服务启动完成后 | `duration_ms` |
| `gateway.shutdown` | gateway 开始关闭流程时 | 无固定 payload,可能只有 envelope 字段 |
| `gateway.reload.started` | 热重载开始执行时 | `duration_ms` |
| `gateway.reload.completed` | 热重载成功完成时 | `duration_ms` |
| `gateway.reload.failed` | 热重载失败时 | `duration_ms`, `error`; severity 为 `error` |
### MCP
| 事件名 | 触发时机 | 主要详情 |
| ------ | -------- | -------- |
| `mcp.server.connecting` | MCP manager 准备连接某个 server 前 | `server`, `type`, `url`, `command` |
| `mcp.server.connected` | MCP server 连接成功并完成工具列表初始化后 | `server`, `type`, `url`, `command`, `tool_count` |
| `mcp.server.failed` | MCP server 连接失败,或 manager 已关闭导致无法连接时 | `server`, `type`, `url`, `command`, `error`; severity 为 `error` |
| `mcp.tool.discovered` | MCP server 的某个工具被发现并注册时 | `server`, `type`, `url`, `command`, `tool` |
| `mcp.tool.call.start` | MCP tool wrapper 开始执行一次远端工具调用前 | `server`, `tool`; 如果在 agent turn 内触发,scope 会带上对应 turn/chat 信息 |
| `mcp.tool.call.end` | MCP tool wrapper 完成一次远端工具调用后,包括失败结果 | `server`, `tool`, `duration_ms`, `is_error`, `error` |
## 日志字段
所有事件日志都会尽量包含稳定 envelope 字段:
- `event_id`
- `event_kind`
- `severity`
- `event_time`
- `source_component`
- `source_name`
- `agent_id`
- `session_key`
- `turn_id`
- `channel`
- `account`
- `chat_id`
- `topic_id`
- `space_id`
- `space_type`
- `chat_type`
- `sender_id`
- `message_id`
- `trace_id`
- `parent_turn_id`
- `request_id`
- `reply_to_id`
agent 事件还会追加 payload 摘要字段:
| 事件 | 摘要字段 |
| ---- | -------- |
| `agent.turn.start` | `user_len`, `media_count` |
| `agent.turn.end` | `status`, `iterations_total`, `duration_ms`, `final_len` |
| `agent.llm.request` | `model`, `messages`, `tools`, `max_tokens` |
| `agent.llm.delta` | `content_delta_len`, `reasoning_delta_len` |
| `agent.llm.response` | `content_len`, `tool_calls`, `has_reasoning` |
| `agent.llm.retry` | `attempt`, `max_retries`, `reason`, `error`, `backoff_ms` |
| `agent.context.compress` | `reason`, `dropped_messages`, `remaining_messages` |
| `agent.session.summarize` | `summarized_messages`, `kept_messages`, `summary_len`, `omitted_oversized` |
| `agent.tool.exec_start` | `tool`, `args_count` |
| `agent.tool.exec_end` | `tool`, `duration_ms`, `for_llm_len`, `for_user_len`, `is_error`, `async` |
| `agent.tool.exec_skipped` | `tool`, `reason` |
| `agent.steering.injected` | `count`, `total_content_len` |
| `agent.follow_up.queued` | `source_tool`, `content_len` |
| `agent.interrupt.received` | `interrupt_kind`, `role`, `content_len`, `queue_depth`, `hint_len` |
| `agent.subturn.spawn` | `child_agent_id`, `label` |
| `agent.subturn.end` | `child_agent_id`, `status` |
| `agent.subturn.result_delivered` | `target_channel`, `target_chat_id`, `content_len` |
| `agent.subturn.orphan` | `parent_turn_id`, `child_turn_id`, `reason` |
| `agent.error` | `stage`, `error` |
## 可打印的事件域
当前 runtime event kind 定义在 `pkg/events/kind.go`。事件日志配置可以选择这些域:
- `agent.*`agent turn、LLM、tool、context、steering、interrupt、subturn、error。
- `channel.*`channel lifecycle、webhook 注册、outbound queued/sent/failed、rate limited。
- `bus.*`publish failed、close started/drained/completed。
- `gateway.*`start、ready、shutdown、reload started/completed/failed。
- `mcp.*`server connecting/connected/failed、tool discovered、tool call start/end。
默认事件日志示例见 [`../../config/config.example.json`](../../config/config.example.json)。
+8 -8
View File
@@ -135,16 +135,16 @@ The agent loop polls for async SubTurn results at two points per iteration:
All active turns are registered in `AgentLoop.activeTurnStates` (`sync.Map`, keyed by session key). A reservation sentinel is stored atomically via `LoadOrStore` before the worker starts, then replaced with the real `*turnState` when `runTurn` registers. This prevents a TOCTOU race where multiple messages for the same session could spawn concurrent workers. The sentinel is cleaned up by the worker's deferred cleanup. This allows `HardAbort` and `/subagents` observability commands to find and operate on active turns.
## Event Bus Integration
## Runtime Event Integration
SubTurns emit specific events to the PicoClaw `EventBus` for observability and debugging:
SubTurns emit runtime events through `pkg/events` for observability and debugging:
| Event Kind | When Emitted | Payload |
|:------|:-------------|:--------|
| `subturn_spawn` | Sub-turn successfully initialized | `SubTurnSpawnPayload{AgentID, Label, ParentTurnID}` |
| `subturn_end` | Sub-turn finishes (success or error) | `SubTurnEndPayload{AgentID, Status}` |
| `subturn_result_delivered` | Async result successfully delivered to parent | `SubTurnResultDeliveredPayload{TargetChannel, TargetChatID, ContentLen}` |
| `subturn_orphan` | Result cannot be delivered (parent finished or channel full) | `SubTurnOrphanPayload{ParentTurnID, ChildTurnID, Reason}` |
| `agent.subturn.spawn` | Sub-turn successfully initialized | `SubTurnSpawnPayload{AgentID, Label, ParentTurnID}` |
| `agent.subturn.end` | Sub-turn finishes (success or error) | `SubTurnEndPayload{AgentID, Status}` |
| `agent.subturn.result_delivered` | Async result successfully delivered to parent | `SubTurnResultDeliveredPayload{TargetChannel, TargetChatID, ContentLen}` |
| `agent.subturn.orphan` | Result cannot be delivered (parent finished or channel full) | `SubTurnOrphanPayload{ParentTurnID, ChildTurnID, Reason}` |
## API Reference
@@ -240,13 +240,13 @@ An orphan result occurs when:
2. The `pendingResults` channel is full (buffer size: 16)
When a result becomes orphan:
- `SubTurnOrphanResultEvent` is emitted to EventBus
- `agent.subturn.orphan` is emitted to the runtime event bus
- The result is **NOT** delivered to the LLM context
- External systems can listen to this event for custom handling
### Preventing Orphan Results
- Use `Critical: true` for important SubTurns that must complete
- Monitor `SubTurnOrphanResultEvent` for observability
- Monitor `agent.subturn.orphan` for observability
- Consider the 16-buffer limit when spawning many async SubTurns
## Tool Inheritance
+37 -35
View File
@@ -1,11 +1,15 @@
# PicoClaw Hook 系统设计(基于 `refactor/agent`
> 当前状态:本文是 hook 系统的早期设计记录。事件系统升级后,观察型 hook 的主路径已经切到
> `pkg/events.Event`、`RuntimeEventObserver` 和进程 hook 的 `hook.runtime_event`。
> 旧 `agent.Event`、`EventKind`、`hook.event` 兼容层已经删除。
## 背景
本设计围绕两个议题展开:
- `#1316`:把 agent loop 重构为事件驱动、可中断、可追加、可观测
- `#1796`:在 EventBus 稳定后,把 hooks 设计为 EventBus 的 consumer,而不是重新发明一套事件模型
- `#1796`:在 runtime event bus 稳定后,把 hooks 设计为事件 consumer,而不是重新发明一套事件模型
当前分支已经完成了第一步里的“事件系统基础”,但还没有真正的 hook 挂载层。因此这里的目标不是重新设计 event,而是在已有实现上补出一层可扩展、可拦截、可外挂的 HookManager。
@@ -52,20 +56,18 @@ pi-mono 的核心思路更接近当前分支:
当前分支已经具备 hook 系统的地基:
- `pkg/agent/events.go` 定义了稳定的 `EventKind``EventMeta` 和 payload
- `pkg/agent/eventbus.go` 提供了非阻塞 fan-out 的 `EventBus`
- `pkg/events` 定义 runtime event envelope、kind、scope、source、severity 和 fan-out bus
- `pkg/agent/event_payloads.go` 保留 agent domain payload
- agent domain payload 保留在 `pkg/agent/event_payloads.go`
- `pkg/agent/loop.go` 中的 `runTurn()` 已在 turn、llm、tool、interrupt、follow-up、summary 等节点发射事件
- `pkg/agent/steering.go` 已支持 steering、graceful interrupt、hard abort
- `pkg/agent/turn.go` 已维护 turn phase、恢复点、active turn、abort 状态
### 现有缺口
当前分支还缺四件事:
- 没有 HookManager,只有 EventBus
- 没有 Before/After LLM、Before/After Tool 这种同步拦截点
- 没有审批型 hook
- 子 agent 仍走 `pkg/tools/SubagentManager + RunToolLoop`,没有接入 `pkg/agent` 的 turn tree 和事件流
早期设计时的缺口包括 HookManager、Before/After LLM、Before/After Tool、审批型 hook
以及 sub-turn 接入。当前实现已经覆盖主 turn 的 HookManager、LLM/Tool 拦截和审批;
sub-turn 事件已接入 runtime event bus
### 一个关键现实
@@ -73,19 +75,19 @@ pi-mono 的核心思路更接近当前分支:
## 设计原则
- Hook 必须建立在 `pkg/agent` 的 EventBus 和 turn 上下文之上
- EventBus 负责广播,HookManager 负责拦截,两者职责分离
- Hook 必须建立在 `pkg/events` runtime event bus 和 turn 上下文之上
- runtime event bus 负责广播,HookManager 负责拦截,两者职责分离
- 项目内挂载要简单,项目外挂载必须走 IPC
- 观察型 hook 不能阻塞 loop;拦截型 hook 必须有超时
- 先覆盖主 turn,不把 sub-turn 一次做满
- 不新增第二套用户事件命名系统,优先复用 `EventKind.String()`
- 不新增第二套用户事件命名系统,新观察点统一使用 `pkg/events.Kind`
## 总体架构
分成三层:
1. `EventBus`
负责广播只读事件,现有实现直接复用
1. `pkg/events` runtime event bus
负责广播只读事件,覆盖 agent、channel、gateway、bus、MCP 等运行时组件
2. `HookManager`
负责管理 hook、排序、超时、错误隔离,并在 `runTurn()` 的明确检查点执行同步拦截
@@ -97,7 +99,7 @@ pi-mono 的核心思路更接近当前分支:
换句话说:
- EventBus 是“发生了什么”
- runtime event bus 是“发生了什么”
- HookManager 是“谁能介入”
- HookMount 是“这些 hook 从哪里来”
@@ -113,11 +115,11 @@ pi-mono 的核心思路更接近当前分支:
```go
type EventObserver interface {
OnEvent(ctx context.Context, evt agent.Event) error
OnRuntimeEvent(ctx context.Context, evt events.Event) error
}
```
这类 hook 直接订阅 EventBus 即可。
这类 hook 直接订阅 runtime event bus 即可。
适用场景:
@@ -156,7 +158,7 @@ type ToolApprover interface {
## 对外暴露的最小 hook 面
V1 不需要把所有 EventKind 都变成可拦截点。
V1 不需要把所有 runtime event kind 都变成可拦截点。
建议只开放这些同步 hook
@@ -168,19 +170,19 @@ V1 不需要把所有 EventKind 都变成可拦截点。
其余节点继续作为只读事件暴露:
- `turn_start`
- `turn_end`
- `llm_request`
- `llm_response`
- `tool_exec_start`
- `tool_exec_end`
- `tool_exec_skipped`
- `steering_injected`
- `follow_up_queued`
- `interrupt_received`
- `context_compress`
- `session_summarize`
- `error`
- `agent.turn.start`
- `agent.turn.end`
- `agent.llm.request`
- `agent.llm.response`
- `agent.tool.exec_start`
- `agent.tool.exec_end`
- `agent.tool.exec_skipped`
- `agent.steering.injected`
- `agent.follow_up.queued`
- `agent.interrupt.received`
- `agent.context.compress`
- `agent.session.summarize`
- `agent.error`
`subturn_*` 在 V1 中保留名字,但不承诺一定触发,直到子 turn 迁移完成。
@@ -369,7 +371,7 @@ PicoClaw 启动外部进程,并在其 stdin/stdout 上跑协议。
### 观察链路
```text
runTurn() -> emitEvent() -> EventBus -> observers
runTurn() -> emitEvent() -> runtime event bus -> observers
```
### 拦截链路
@@ -453,7 +455,7 @@ V1 不做复杂自动发现。
### Phase 3
-`SubagentManager` 迁移到 `runTurn/sub-turn`
- 接通 `subturn_spawn` / `subturn_end` / `subturn_result_delivered`
- 接通 `agent.subturn.spawn` / `agent.subturn.end` / `agent.subturn.result_delivered`
### Phase 4
@@ -464,13 +466,13 @@ V1 不做复杂自动发现。
最适合 PicoClaw 当前分支的方案,不是直接复制 OpenClaw 的 hooks,也不是完整照搬 pi-mono 的 extension system,而是:
-现有 `EventBus` 为只读观察面
-`pkg/events` runtime event bus 为只读观察面
- 以新增 `HookManager` 为同步拦截面
- 项目内通过 Go 对象直接挂载
- 项目外通过 `stdio JSON-RPC` 进程通信挂载
这样做有三个好处:
-`#1796` 一致,hooks 只是 EventBus 之上的消费层
-`#1796` 一致,hooks 只是 runtime event bus 之上的消费层
- 和当前 `refactor/agent` 实现一致,不需要推翻已有事件系统
- 同时满足“仓内简单挂载”和“仓外进程通信挂载”两个硬需求
+37
View File
@@ -753,6 +753,42 @@ PicoClaw 按协议族路由提供商:
</details>
### 事件日志
PicoClaw 的 runtime events 会覆盖 agent、channel、gateway、message bus 和 MCP 等运行时组件。默认只打印 `agent.*` 事件,其他事件仍会发布到 runtime event bus,但不会进入日志。
```json
{
"events": {
"logging": {
"enabled": true,
"include": ["agent.*"],
"exclude": [],
"min_severity": "info",
"include_payload": false
}
}
}
```
常用配置:
```json
{
"events": {
"logging": {
"include": ["*"],
"exclude": ["agent.llm.delta"],
"min_severity": "warn"
}
}
}
```
`include` / `exclude` 支持精确事件名和 `gateway.*``channel.lifecycle.*` 这类模式。`include_payload` 默认关闭,避免把完整用户消息或工具参数写入日志;agent 事件会默认输出长度、计数、状态等摘要字段。
更多字段说明和示例见 [Runtime Events 与事件日志](../architecture/runtime-events.zh.md)。
### 定时任务 / 提醒
PicoClaw 通过 `cron` 工具支持 cron 风格的定时任务。Agent 可以设置、列出和取消在指定时间触发的提醒或周期性任务。
@@ -775,6 +811,7 @@ PicoClaw 通过 `cron` 工具支持 cron 风格的定时任务。Agent 可以设
| 主题 | 说明 |
| ---- | ---- |
| [敏感数据过滤](../security/sensitive_data_filtering.zh.md) | 在发送给 LLM 前,从工具结果中过滤 API 密钥和令牌 |
| [Runtime Events 与事件日志](../architecture/runtime-events.zh.md) | 统一运行时事件、日志过滤和调试配置 |
| [Hook 系统](../architecture/hooks/README.zh.md) | 事件驱动 Hook:观察者、拦截器、审批 Hook |
| [Steering](../architecture/steering.md) | 在工具调用间向运行中的 Agent 注入消息 |
| [SubTurn](../architecture/subturn.md) | 子 Agent 协调、并发控制、生命周期管理 |
+17 -11
View File
@@ -21,6 +21,7 @@ import (
"github.com/sipeed/picoclaw/pkg/commands"
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/constants"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/media"
"github.com/sipeed/picoclaw/pkg/providers"
@@ -37,9 +38,13 @@ type AgentLoop struct {
registry *AgentRegistry
state *state.Manager
// Event system (from Incoming)
eventBus *EventBus
hooks *HookManager
// Runtime event system
runtimeEvents runtimeevents.Bus
ownsRuntimeEvents bool
runtimeEventLogMu sync.RWMutex
runtimeEventLogger *runtimeEventLogger
runtimeEventLogSub runtimeevents.Subscription
hooks *HookManager
// Runtime state
running atomic.Bool
@@ -285,8 +290,14 @@ func (al *AgentLoop) Close() {
if al.hooks != nil {
al.hooks.Close()
}
if al.eventBus != nil {
al.eventBus.Close()
al.closeRuntimeEventLogger()
if al.runtimeEvents != nil && al.ownsRuntimeEvents {
if err := al.runtimeEvents.Close(); err != nil {
logger.ErrorCF("agent", "Failed to close runtime event bus",
map[string]any{
"error": err.Error(),
})
}
}
}
@@ -294,12 +305,6 @@ func (al *AgentLoop) Close() {
// UnmountHook removes a previously registered in-process hook.
// SubscribeEvents registers a subscriber for agent-loop events.
// UnsubscribeEvents removes a previously registered event subscriber.
// EventDrops returns the number of dropped events for the given kind.
type turnEventScope struct {
agentID string
sessionKey string
@@ -384,6 +389,7 @@ func (al *AgentLoop) ReloadProviderAndConfig(
al.fallback = providers.NewFallbackChain(providers.NewCooldownTracker(), newRL)
al.mu.Unlock()
al.refreshRuntimeEventLogger(cfg)
oldMCPManager := al.mcp.reset()
al.hookRuntime.reset(al)
+31 -128
View File
@@ -5,7 +5,7 @@ package agent
import (
"fmt"
"github.com/sipeed/picoclaw/pkg/logger"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
func (al *AgentLoop) newTurnEventScope(agentID, sessionKey string, turnCtx *TurnContext) turnEventScope {
@@ -18,8 +18,8 @@ func (al *AgentLoop) newTurnEventScope(agentID, sessionKey string, turnCtx *Turn
}
}
func (ts turnEventScope) meta(iteration int, source, tracePath string) EventMeta {
return EventMeta{
func (ts turnEventScope) meta(iteration int, source, tracePath string) HookMeta {
return HookMeta{
AgentID: ts.agentID,
TurnID: ts.turnID,
SessionKey: ts.sessionKey,
@@ -30,119 +30,24 @@ func (ts turnEventScope) meta(iteration int, source, tracePath string) EventMeta
}
}
func (al *AgentLoop) emitEvent(kind EventKind, meta EventMeta, payload any) {
clonedMeta := cloneEventMeta(meta)
evt := Event{
Kind: kind,
Meta: clonedMeta,
Context: cloneTurnContext(clonedMeta.turnContext),
Payload: payload,
func (al *AgentLoop) emitEvent(kind runtimeevents.Kind, meta HookMeta, payload any) {
clonedMeta := cloneHookMeta(meta)
eventCtx := cloneTurnContext(clonedMeta.turnContext)
evt := runtimeevents.Event{
Kind: kind,
Source: runtimeevents.Source{Component: "agent", Name: clonedMeta.AgentID},
Scope: runtimeScopeFromHookMeta(clonedMeta, eventCtx),
Correlation: runtimeCorrelationFromHookMeta(clonedMeta),
Severity: runtimeSeverityForAgentEvent(kind, payload),
Payload: payload,
Attrs: runtimeAttrsFromHookMeta(clonedMeta),
}
if al == nil || al.eventBus == nil {
if al == nil {
return
}
al.logEvent(evt)
al.eventBus.Emit(evt)
}
func (al *AgentLoop) logEvent(evt Event) {
fields := map[string]any{
"event_kind": evt.Kind.String(),
"agent_id": evt.Meta.AgentID,
"turn_id": evt.Meta.TurnID,
"session_key": evt.Meta.SessionKey,
"iteration": evt.Meta.Iteration,
}
if evt.Meta.TracePath != "" {
fields["trace"] = evt.Meta.TracePath
}
if evt.Meta.Source != "" {
fields["source"] = evt.Meta.Source
}
appendEventContextFields(fields, evt.Context)
switch payload := evt.Payload.(type) {
case TurnStartPayload:
fields["user_len"] = len(payload.UserMessage)
fields["media_count"] = payload.MediaCount
case TurnEndPayload:
fields["status"] = payload.Status
fields["iterations_total"] = payload.Iterations
fields["duration_ms"] = payload.Duration.Milliseconds()
fields["final_len"] = payload.FinalContentLen
case LLMRequestPayload:
fields["model"] = payload.Model
fields["messages"] = payload.MessagesCount
fields["tools"] = payload.ToolsCount
fields["max_tokens"] = payload.MaxTokens
case LLMDeltaPayload:
fields["content_delta_len"] = payload.ContentDeltaLen
fields["reasoning_delta_len"] = payload.ReasoningDeltaLen
case LLMResponsePayload:
fields["content_len"] = payload.ContentLen
fields["tool_calls"] = payload.ToolCalls
fields["has_reasoning"] = payload.HasReasoning
case LLMRetryPayload:
fields["attempt"] = payload.Attempt
fields["max_retries"] = payload.MaxRetries
fields["reason"] = payload.Reason
fields["error"] = payload.Error
fields["backoff_ms"] = payload.Backoff.Milliseconds()
case ContextCompressPayload:
fields["reason"] = payload.Reason
fields["dropped_messages"] = payload.DroppedMessages
fields["remaining_messages"] = payload.RemainingMessages
case SessionSummarizePayload:
fields["summarized_messages"] = payload.SummarizedMessages
fields["kept_messages"] = payload.KeptMessages
fields["summary_len"] = payload.SummaryLen
fields["omitted_oversized"] = payload.OmittedOversized
case ToolExecStartPayload:
fields["tool"] = payload.Tool
fields["args_count"] = len(payload.Arguments)
case ToolExecEndPayload:
fields["tool"] = payload.Tool
fields["duration_ms"] = payload.Duration.Milliseconds()
fields["for_llm_len"] = payload.ForLLMLen
fields["for_user_len"] = payload.ForUserLen
fields["is_error"] = payload.IsError
fields["async"] = payload.Async
case ToolExecSkippedPayload:
fields["tool"] = payload.Tool
fields["reason"] = payload.Reason
case SteeringInjectedPayload:
fields["count"] = payload.Count
fields["total_content_len"] = payload.TotalContentLen
case FollowUpQueuedPayload:
fields["source_tool"] = payload.SourceTool
fields["content_len"] = payload.ContentLen
case InterruptReceivedPayload:
fields["interrupt_kind"] = payload.Kind
fields["role"] = payload.Role
fields["content_len"] = payload.ContentLen
fields["queue_depth"] = payload.QueueDepth
fields["hint_len"] = payload.HintLen
case SubTurnSpawnPayload:
fields["child_agent_id"] = payload.AgentID
fields["label"] = payload.Label
case SubTurnEndPayload:
fields["child_agent_id"] = payload.AgentID
fields["status"] = payload.Status
case SubTurnResultDeliveredPayload:
fields["target_channel"] = payload.TargetChannel
fields["target_chat_id"] = payload.TargetChatID
fields["content_len"] = payload.ContentLen
case ErrorPayload:
fields["stage"] = payload.Stage
fields["error"] = payload.Message
}
logger.InfoCF("eventbus", fmt.Sprintf("Agent event: %s", evt.Kind.String()), fields)
al.publishRuntimeEvent(evt)
}
// MountHook registers an in-process hook on the agent loop.
@@ -161,28 +66,26 @@ func (al *AgentLoop) UnmountHook(name string) {
al.hooks.Unmount(name)
}
// SubscribeEvents registers a subscriber for agent-loop events.
func (al *AgentLoop) SubscribeEvents(buffer int) EventSubscription {
if al == nil || al.eventBus == nil {
ch := make(chan Event)
close(ch)
return EventSubscription{C: ch}
// RuntimeEvents returns the root runtime event channel.
func (al *AgentLoop) RuntimeEvents() runtimeevents.EventChannel {
if al == nil || al.runtimeEvents == nil {
return nil
}
return al.eventBus.Subscribe(buffer)
return al.runtimeEvents.Channel()
}
// UnsubscribeEvents removes a previously registered event subscriber.
func (al *AgentLoop) UnsubscribeEvents(id uint64) {
if al == nil || al.eventBus == nil {
return
// RuntimeEventStats returns runtime event bus counters.
func (al *AgentLoop) RuntimeEventStats() runtimeevents.Stats {
if al == nil || al.runtimeEvents == nil {
return runtimeevents.Stats{Closed: true}
}
al.eventBus.Unsubscribe(id)
return al.runtimeEvents.Stats()
}
// EventDrops returns the number of dropped events for the given kind.
func (al *AgentLoop) EventDrops(kind EventKind) int64 {
if al == nil || al.eventBus == nil {
return 0
// RuntimeEventBus returns the runtime event bus used by the agent loop.
func (al *AgentLoop) RuntimeEventBus() runtimeevents.Bus {
if al == nil {
return nil
}
return al.eventBus.Dropped(kind)
return al.runtimeEvents
}
+22 -12
View File
@@ -13,6 +13,7 @@ import (
"github.com/sipeed/picoclaw/pkg/channels"
"github.com/sipeed/picoclaw/pkg/commands"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/skills"
@@ -24,6 +25,7 @@ func NewAgentLoop(
cfg *config.Config,
msgBus *bus.MessageBus,
provider providers.LLMProvider,
opts ...AgentLoopOption,
) *AgentLoop {
registry := NewAgentRegistry(cfg, provider)
@@ -47,8 +49,6 @@ func NewAgentLoop(
stateManager = state.NewManager(defaultAgent.Workspace)
}
eventBus := NewEventBus()
// Determine worker pool size from config (default: 1 = sequential)
workerPoolSize := cfg.Agents.Defaults.MaxParallelTurns
if workerPoolSize <= 0 {
@@ -56,18 +56,28 @@ func NewAgentLoop(
}
al := &AgentLoop{
bus: msgBus,
cfg: cfg,
registry: registry,
state: stateManager,
eventBus: eventBus,
fallback: fallbackChain,
cmdRegistry: commands.NewRegistry(commands.BuiltinDefinitions()),
steering: newSteeringQueue(parseSteeringMode(cfg.Agents.Defaults.SteeringMode)),
workerSem: make(chan struct{}, workerPoolSize),
bus: msgBus,
cfg: cfg,
registry: registry,
state: stateManager,
fallback: fallbackChain,
cmdRegistry: commands.NewRegistry(commands.BuiltinDefinitions()),
steering: newSteeringQueue(parseSteeringMode(cfg.Agents.Defaults.SteeringMode)),
workerSem: make(chan struct{}, workerPoolSize),
ownsRuntimeEvents: true,
}
for _, opt := range opts {
if opt != nil {
opt(al)
}
}
if al.runtimeEvents == nil {
al.runtimeEvents = runtimeevents.NewBus()
al.ownsRuntimeEvents = true
}
al.refreshRuntimeEventLogger(cfg)
al.providerFactory = providers.CreateProviderFromConfig
al.hooks = NewHookManager(eventBus)
al.hooks = NewHookManager(al.runtimeEvents.Channel())
configureHookManagerFromConfig(al.hooks, cfg)
al.contextManager = al.resolveContextManager()
+2 -1
View File
@@ -97,7 +97,7 @@ func (al *AgentLoop) ensureMCPInitialized(ctx context.Context) error {
}
al.mcp.initOnce.Do(func() {
mcpManager := mcp.NewManager()
mcpManager := mcp.NewManager(mcp.WithRuntimeEvents(al.runtimeEvents))
defaultAgent := al.registry.GetDefaultAgent()
workspacePath := al.cfg.WorkspacePath()
@@ -164,6 +164,7 @@ func (al *AgentLoop) ensureMCPInitialized(ctx context.Context) error {
mcpTool := tools.NewMCPTool(mcpManager, serverName, tool)
mcpTool.SetWorkspace(agent.Workspace)
mcpTool.SetMaxInlineTextRunes(al.cfg.Tools.MCP.GetMaxInlineTextChars())
mcpTool.SetEventPublisher(al.runtimeEvents)
if registerAsHidden {
agent.Tools.RegisterHidden(mcpTool)
+20
View File
@@ -0,0 +1,20 @@
package agent
import runtimeevents "github.com/sipeed/picoclaw/pkg/events"
// AgentLoopOption configures an AgentLoop at construction time.
type AgentLoopOption func(*AgentLoop)
// WithRuntimeEvents injects the runtime event bus used for new observation APIs.
//
// The injected bus is treated as externally owned and will not be closed by
// AgentLoop.Close. Passing nil leaves the default owned runtime bus enabled.
func WithRuntimeEvents(bus runtimeevents.Bus) AgentLoopOption {
return func(al *AgentLoop) {
if bus == nil {
return
}
al.runtimeEvents = bus
al.ownsRuntimeEvents = false
}
}
+15 -5
View File
@@ -19,6 +19,7 @@ import (
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/channels"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/media"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/routing"
@@ -5456,6 +5457,7 @@ func TestParallelMessageProcessing_SameSessionProcessedSequentially(t *testing.T
var mu sync.Mutex
turnIDs := make(map[string]bool)
var wg sync.WaitGroup
var firstResponse sync.Once
wg.Add(1) // Only 1 turn should be created for same session
cfg := &config.Config{
@@ -5478,19 +5480,27 @@ func TestParallelMessageProcessing_SameSessionProcessedSequentially(t *testing.T
al := NewAgentLoop(cfg, msgBus, &concurrentMockProvider{
responseFunc: func(callID int) string {
wg.Done()
firstResponse.Do(func() {
wg.Done()
})
return "ok"
},
})
defer al.Close()
sub := al.SubscribeEvents(64)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
64,
runtimeevents.KindAgentTurnStart,
)
defer closeRuntimeEvents()
go func() {
for evt := range sub.C {
if evt.Kind == EventKindTurnStart {
for evt := range runtimeCh {
if evt.Kind == runtimeevents.KindAgentTurnStart {
mu.Lock()
turnIDs[evt.Meta.TurnID] = true
turnIDs[evt.Scope.TurnID] = true
mu.Unlock()
}
}
+3 -2
View File
@@ -7,6 +7,7 @@ import (
"sync"
"time"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/providers"
)
@@ -41,7 +42,7 @@ func (m *legacyContextManager) Compact(_ context.Context, req *CompactRequest) e
// Sync emergency compression — budget exceeded.
if result, ok := m.forceCompression(req.SessionKey); ok {
m.al.emitEvent(
EventKindContextCompress,
runtimeevents.KindAgentContextCompress,
m.al.newTurnEventScope("", req.SessionKey, nil).meta(0, "forceCompression", "turn.context.compress"),
ContextCompressPayload{
Reason: req.Reason,
@@ -246,7 +247,7 @@ func (m *legacyContextManager) summarizeSession(agent *AgentInstance, sessionKey
agent.Sessions.TruncateHistory(sessionKey, keepCount)
agent.Sessions.Save(sessionKey)
m.al.emitEvent(
EventKindSessionSummarize,
runtimeevents.KindAgentSessionSummarize,
m.al.newTurnEventScope(agent.ID, sessionKey, nil).meta(0, "summarizeSession", "turn.session.summarize"),
SessionSummarizePayload{
SummarizedMessages: len(validMessages),
+29 -14
View File
@@ -12,6 +12,7 @@ import (
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/providers"
)
@@ -305,8 +306,13 @@ func TestLegacyCompact_Overflow(t *testing.T) {
}
defaultAgent.Sessions.SetHistory("session-overflow", history)
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentContextCompress,
)
defer closeRuntimeEvents()
err := al.contextManager.Compact(context.Background(), &CompactRequest{
SessionKey: "session-overflow",
@@ -329,8 +335,8 @@ func TestLegacyCompact_Overflow(t *testing.T) {
}
// Event should carry the proactive reason
events := collectEventStream(sub.C)
compressEvt, ok := findEvent(events, EventKindContextCompress)
events := collectRuntimeEventStream(runtimeCh)
compressEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentContextCompress)
if !ok {
t.Fatal("expected context compress event")
}
@@ -361,8 +367,13 @@ func TestLegacyCompact_Overflow_ProactiveReason(t *testing.T) {
}
defaultAgent.Sessions.SetHistory("session-proactive", history)
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentContextCompress,
)
defer closeRuntimeEvents()
err := al.contextManager.Compact(context.Background(), &CompactRequest{
SessionKey: "session-proactive",
@@ -372,8 +383,8 @@ func TestLegacyCompact_Overflow_ProactiveReason(t *testing.T) {
t.Fatalf("unexpected error: %v", err)
}
events := collectEventStream(sub.C)
compressEvt, ok := findEvent(events, EventKindContextCompress)
events := collectRuntimeEventStream(runtimeCh)
compressEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentContextCompress)
if !ok {
t.Fatal("expected context compress event")
}
@@ -483,6 +494,14 @@ func TestLegacyCompact_PostTurn_ExceedsMessageThreshold(t *testing.T) {
}
defaultAgent.Sessions.SetHistory("session-threshold", history)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentSessionSummarize,
)
defer closeRuntimeEvents()
err := al.contextManager.Compact(context.Background(), &CompactRequest{
SessionKey: "session-threshold",
Reason: ContextCompressReasonSummarize,
@@ -491,12 +510,8 @@ func TestLegacyCompact_PostTurn_ExceedsMessageThreshold(t *testing.T) {
t.Fatalf("unexpected error: %v", err)
}
// Wait for async summarization to complete via event
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
waitForEvent(t, sub.C, 5*time.Second, func(evt Event) bool {
return evt.Kind == EventKindSessionSummarize
waitForRuntimeEvent(t, runtimeCh, 5*time.Second, func(evt runtimeevents.Event) bool {
return evt.Kind == runtimeevents.KindAgentSessionSummarize
})
newHistory := defaultAgent.Sessions.GetHistory("session-threshold")
+171
View File
@@ -0,0 +1,171 @@
package agent
import "time"
// TurnEndStatus describes the terminal state of a turn.
type TurnEndStatus string
const (
// TurnEndStatusCompleted indicates the turn finished normally.
TurnEndStatusCompleted TurnEndStatus = "completed"
// TurnEndStatusError indicates the turn ended because of an error.
TurnEndStatusError TurnEndStatus = "error"
// TurnEndStatusAborted indicates the turn was hard-aborted and rolled back.
TurnEndStatusAborted TurnEndStatus = "aborted"
)
// TurnStartPayload describes the start of a turn.
type TurnStartPayload struct {
UserMessage string
MediaCount int
}
// TurnEndPayload describes the completion of a turn.
type TurnEndPayload struct {
Status TurnEndStatus
Iterations int
Duration time.Duration
FinalContentLen int
}
// LLMRequestPayload describes an outbound LLM request.
type LLMRequestPayload struct {
Model string
MessagesCount int
ToolsCount int
MaxTokens int
Temperature float64
}
// LLMResponsePayload describes an inbound LLM response.
type LLMResponsePayload struct {
ContentLen int
ToolCalls int
HasReasoning bool
}
// LLMDeltaPayload describes a streamed LLM delta.
type LLMDeltaPayload struct {
ContentDeltaLen int
ReasoningDeltaLen int
}
// LLMRetryPayload describes a retry of an LLM request.
type LLMRetryPayload struct {
Attempt int
MaxRetries int
Reason string
Error string
Backoff time.Duration
}
// ContextCompressReason identifies why emergency compression ran.
type ContextCompressReason string
const (
// ContextCompressReasonProactive indicates compression before the first LLM call.
ContextCompressReasonProactive ContextCompressReason = "proactive_budget"
// ContextCompressReasonRetry indicates compression during context-error retry handling.
ContextCompressReasonRetry ContextCompressReason = "llm_retry"
// ContextCompressReasonSummarize indicates post-turn async summarization.
ContextCompressReasonSummarize ContextCompressReason = "summarize"
)
// ContextCompressPayload describes a forced history compression.
type ContextCompressPayload struct {
Reason ContextCompressReason
DroppedMessages int
RemainingMessages int
}
// SessionSummarizePayload describes a completed async session summarization.
type SessionSummarizePayload struct {
SummarizedMessages int
KeptMessages int
SummaryLen int
OmittedOversized bool
}
// ToolExecStartPayload describes a tool execution request.
type ToolExecStartPayload struct {
Tool string
Arguments map[string]any
}
// ToolExecEndPayload describes the outcome of a tool execution.
type ToolExecEndPayload struct {
Tool string
Duration time.Duration
ForLLMLen int
ForUserLen int
IsError bool
Async bool
}
// ToolExecSkippedPayload describes a skipped tool call.
type ToolExecSkippedPayload struct {
Tool string
Reason string
}
// SteeringInjectedPayload describes steering messages appended before the next LLM call.
type SteeringInjectedPayload struct {
Count int
TotalContentLen int
}
// FollowUpQueuedPayload describes an async follow-up queued back into the inbound bus.
type FollowUpQueuedPayload struct {
SourceTool string
ContentLen int
}
type InterruptKind string
const (
InterruptKindSteering InterruptKind = "steering"
InterruptKindGraceful InterruptKind = "graceful"
InterruptKindHard InterruptKind = "hard_abort"
)
// InterruptReceivedPayload describes accepted turn-control input.
type InterruptReceivedPayload struct {
Kind InterruptKind
Role string
ContentLen int
QueueDepth int
HintLen int
}
// SubTurnSpawnPayload describes the creation of a child turn.
type SubTurnSpawnPayload struct {
AgentID string
Label string
ParentTurnID string
}
// SubTurnEndPayload describes the completion of a child turn.
type SubTurnEndPayload struct {
AgentID string
Status string
}
// SubTurnResultDeliveredPayload describes delivery of a sub-turn result.
type SubTurnResultDeliveredPayload struct {
TargetChannel string
TargetChatID string
ContentLen int
}
// SubTurnOrphanPayload describes a sub-turn result that could not be delivered.
type SubTurnOrphanPayload struct {
ParentTurnID string
ChildTurnID string
Reason string
}
// ErrorPayload describes an execution error inside the agent loop.
type ErrorPayload struct {
Stage string
Message string
}
-121
View File
@@ -1,121 +0,0 @@
package agent
import (
"sync"
"sync/atomic"
"time"
)
const defaultEventSubscriberBuffer = 16
// EventSubscription identifies a subscriber channel returned by EventBus.Subscribe.
type EventSubscription struct {
ID uint64
C <-chan Event
}
type eventSubscriber struct {
ch chan Event
}
// EventBus is a lightweight multi-subscriber broadcaster for agent-loop events.
type EventBus struct {
mu sync.RWMutex
subs map[uint64]eventSubscriber
nextID uint64
closed bool
dropped [eventKindCount]atomic.Int64
}
// NewEventBus creates a new in-process event broadcaster.
func NewEventBus() *EventBus {
return &EventBus{
subs: make(map[uint64]eventSubscriber),
}
}
// Subscribe registers a new subscriber with the requested channel buffer size.
// A non-positive buffer uses the default size.
func (b *EventBus) Subscribe(buffer int) EventSubscription {
if buffer <= 0 {
buffer = defaultEventSubscriberBuffer
}
b.mu.Lock()
defer b.mu.Unlock()
if b.closed {
ch := make(chan Event)
close(ch)
return EventSubscription{C: ch}
}
b.nextID++
id := b.nextID
ch := make(chan Event, buffer)
b.subs[id] = eventSubscriber{ch: ch}
return EventSubscription{ID: id, C: ch}
}
// Unsubscribe removes a subscriber and closes its channel.
func (b *EventBus) Unsubscribe(id uint64) {
b.mu.Lock()
defer b.mu.Unlock()
sub, ok := b.subs[id]
if !ok {
return
}
delete(b.subs, id)
close(sub.ch)
}
// Emit broadcasts an event to all current subscribers without blocking.
// When a subscriber channel is full, the event is dropped for that subscriber.
func (b *EventBus) Emit(evt Event) {
if evt.Time.IsZero() {
evt.Time = time.Now()
}
b.mu.RLock()
defer b.mu.RUnlock()
if b.closed {
return
}
for _, sub := range b.subs {
select {
case sub.ch <- evt:
default:
if evt.Kind < eventKindCount {
b.dropped[evt.Kind].Add(1)
}
}
}
}
// Dropped returns the number of dropped events for a given kind.
func (b *EventBus) Dropped(kind EventKind) int64 {
if kind >= eventKindCount {
return 0
}
return b.dropped[kind].Load()
}
// Close closes all subscriber channels and stops future broadcasts.
func (b *EventBus) Close() {
b.mu.Lock()
defer b.mu.Unlock()
if b.closed {
return
}
b.closed = true
for id, sub := range b.subs {
close(sub.ch)
delete(b.subs, id)
}
}
+155 -132
View File
@@ -9,61 +9,94 @@ import (
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/routing"
"github.com/sipeed/picoclaw/pkg/session"
"github.com/sipeed/picoclaw/pkg/tools"
)
func TestEventBus_SubscribeEmitUnsubscribeClose(t *testing.T) {
eventBus := NewEventBus()
sub := eventBus.Subscribe(1)
eventBus.Emit(Event{
Kind: EventKindTurnStart,
Meta: EventMeta{TurnID: "turn-1"},
})
select {
case evt := <-sub.C:
if evt.Kind != EventKindTurnStart {
t.Fatalf("expected %v, got %v", EventKindTurnStart, evt.Kind)
func TestAgentLoop_PublishesRuntimeEvents(t *testing.T) {
runtimeBus := runtimeevents.NewBus()
al := &AgentLoop{
runtimeEvents: runtimeBus,
}
defer func() {
if err := runtimeBus.Close(); err != nil {
t.Errorf("runtime bus close failed: %v", err)
}
if evt.Meta.TurnID != "turn-1" {
t.Fatalf("expected turn id turn-1, got %q", evt.Meta.TurnID)
}()
runtimeSub, runtimeCh, err := al.RuntimeEvents().OfKind(runtimeevents.KindAgentToolExecStart).SubscribeChan(
context.Background(),
runtimeevents.SubscribeOptions{Name: "runtime", Buffer: 1},
)
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
defer func() {
if err := runtimeSub.Close(); err != nil {
t.Errorf("runtime subscription close failed: %v", err)
}
case <-time.After(time.Second):
t.Fatal("timed out waiting for event")
}()
al.emitEvent(
runtimeevents.KindAgentToolExecStart,
HookMeta{
AgentID: "main",
TurnID: "turn-1",
ParentTurnID: "parent-turn",
SessionKey: "session-1",
Iteration: 2,
TracePath: "trace/root",
Source: "pipeline_execute",
turnContext: &TurnContext{
Inbound: &bus.InboundContext{
Channel: "cli",
Account: "default",
ChatID: "direct",
ChatType: "direct",
SenderID: "tester",
MessageID: "msg-1",
TopicID: "topic-1",
},
},
},
ToolExecStartPayload{Tool: "mock_custom", Arguments: map[string]any{"task": "ping"}},
)
runtimeEvt := receiveRuntimeEvent(t, runtimeCh)
if runtimeEvt.Kind != runtimeevents.KindAgentToolExecStart {
t.Fatalf("runtime kind = %q, want %q", runtimeEvt.Kind, runtimeevents.KindAgentToolExecStart)
}
eventBus.Unsubscribe(sub.ID)
if _, ok := <-sub.C; ok {
t.Fatal("expected subscriber channel to be closed after unsubscribe")
if runtimeEvt.Source != (runtimeevents.Source{Component: "agent", Name: "main"}) {
t.Fatalf("runtime source = %+v", runtimeEvt.Source)
}
eventBus.Close()
closedSub := eventBus.Subscribe(1)
if _, ok := <-closedSub.C; ok {
t.Fatal("expected closed bus to return a closed subscriber channel")
if runtimeEvt.Scope.AgentID != "main" ||
runtimeEvt.Scope.SessionKey != "session-1" ||
runtimeEvt.Scope.TurnID != "turn-1" ||
runtimeEvt.Scope.Channel != "cli" ||
runtimeEvt.Scope.Account != "default" ||
runtimeEvt.Scope.ChatID != "direct" ||
runtimeEvt.Scope.TopicID != "topic-1" ||
runtimeEvt.Scope.ChatType != "direct" ||
runtimeEvt.Scope.SenderID != "tester" ||
runtimeEvt.Scope.MessageID != "msg-1" {
t.Fatalf("runtime scope = %+v", runtimeEvt.Scope)
}
}
func TestEventBus_DropsWhenSubscriberIsFull(t *testing.T) {
eventBus := NewEventBus()
sub := eventBus.Subscribe(1)
defer eventBus.Unsubscribe(sub.ID)
start := time.Now()
for i := 0; i < 1000; i++ {
eventBus.Emit(Event{Kind: EventKindLLMRequest})
if runtimeEvt.Correlation.TraceID != "trace/root" ||
runtimeEvt.Correlation.ParentTurnID != "parent-turn" {
t.Fatalf("runtime correlation = %+v", runtimeEvt.Correlation)
}
if elapsed := time.Since(start); elapsed > 100*time.Millisecond {
t.Fatalf("Emit took too long with a blocked subscriber: %s", elapsed)
if runtimeEvt.Attrs["agent_source"] != "pipeline_execute" || runtimeEvt.Attrs["iteration"] != 2 {
t.Fatalf("runtime attrs = %+v", runtimeEvt.Attrs)
}
if got := eventBus.Dropped(EventKindLLMRequest); got != 999 {
t.Fatalf("expected 999 dropped events, got %d", got)
payload, ok := runtimeEvt.Payload.(ToolExecStartPayload)
if !ok {
t.Fatalf("runtime payload = %T, want ToolExecStartPayload", runtimeEvt.Payload)
}
if payload.Tool != "mock_custom" {
t.Fatalf("runtime payload tool = %q, want mock_custom", payload.Tool)
}
}
@@ -127,8 +160,18 @@ func TestAgentLoop_EmitsMinimalTurnEvents(t *testing.T) {
t.Fatal("expected default agent")
}
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
expectedKinds := []runtimeevents.Kind{
runtimeevents.KindAgentTurnStart,
runtimeevents.KindAgentLLMRequest,
runtimeevents.KindAgentLLMResponse,
runtimeevents.KindAgentToolExecStart,
runtimeevents.KindAgentToolExecEnd,
runtimeevents.KindAgentLLMRequest,
runtimeevents.KindAgentLLMResponse,
runtimeevents.KindAgentTurnEnd,
}
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(t, al, 16, expectedKinds...)
defer closeRuntimeEvents()
response, err := al.runAgentLoop(context.Background(), defaultAgent, processOptions{
SessionKey: "session-1",
@@ -171,49 +214,36 @@ func TestAgentLoop_EmitsMinimalTurnEvents(t *testing.T) {
t.Fatalf("expected final response 'done', got %q", response)
}
events := collectEventStream(sub.C)
events := collectRuntimeEventStream(runtimeCh)
if len(events) != 8 {
t.Fatalf("expected 8 events, got %d", len(events))
}
kinds := make([]EventKind, 0, len(events))
kinds := make([]runtimeevents.Kind, 0, len(events))
for _, evt := range events {
kinds = append(kinds, evt.Kind)
}
expectedKinds := []EventKind{
EventKindTurnStart,
EventKindLLMRequest,
EventKindLLMResponse,
EventKindToolExecStart,
EventKindToolExecEnd,
EventKindLLMRequest,
EventKindLLMResponse,
EventKindTurnEnd,
}
if !slices.Equal(kinds, expectedKinds) {
t.Fatalf("unexpected event sequence: got %v want %v", kinds, expectedKinds)
}
turnID := events[0].Meta.TurnID
turnID := events[0].Scope.TurnID
if turnID == "" {
t.Fatal("expected runtime events to include turn id")
}
for i, evt := range events {
if evt.Meta.TurnID != turnID {
t.Fatalf("event %d has mismatched turn id %q, want %q", i, evt.Meta.TurnID, turnID)
if evt.Scope.TurnID != turnID {
t.Fatalf("event %d has mismatched turn id %q, want %q", i, evt.Scope.TurnID, turnID)
}
if evt.Meta.SessionKey != "session-1" {
t.Fatalf("event %d has session key %q, want session-1", i, evt.Meta.SessionKey)
if evt.Scope.SessionKey != "session-1" {
t.Fatalf("event %d has session key %q, want session-1", i, evt.Scope.SessionKey)
}
if evt.Context == nil || evt.Context.Inbound == nil {
t.Fatalf("event %d missing inbound turn context", i)
if evt.Scope.Channel != "cli" || evt.Scope.ChatID != "direct" || evt.Scope.SenderID != "tester" {
t.Fatalf("event %d scope = %+v", i, evt.Scope)
}
if evt.Context.Inbound.Channel != "cli" || evt.Context.Inbound.SenderID != "tester" {
t.Fatalf("event %d inbound context = %+v", i, evt.Context.Inbound)
}
if evt.Context.Route == nil || evt.Context.Route.AgentID != "main" {
t.Fatalf("event %d missing route context: %+v", i, evt.Context.Route)
}
if evt.Context.Scope == nil || evt.Context.Scope.Values["sender"] != "tester" {
t.Fatalf("event %d missing session scope: %+v", i, evt.Context.Scope)
if evt.Scope.AgentID != "main" {
t.Fatalf("event %d has agent id %q, want main", i, evt.Scope.AgentID)
}
}
@@ -309,8 +339,15 @@ func TestAgentLoop_EmitsSteeringAndSkippedToolEvents(t *testing.T) {
al.RegisterTool(tool1)
al.RegisterTool(tool2)
sub := al.SubscribeEvents(32)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
32,
runtimeevents.KindAgentSteeringInjected,
runtimeevents.KindAgentToolExecSkipped,
runtimeevents.KindAgentInterruptReceived,
)
defer closeRuntimeEvents()
resultCh := make(chan string, 1)
go func() {
@@ -337,8 +374,8 @@ func TestAgentLoop_EmitsSteeringAndSkippedToolEvents(t *testing.T) {
t.Fatal("timeout waiting for steered response")
}
events := collectEventStream(sub.C)
steeringEvt, ok := findEvent(events, EventKindSteeringInjected)
events := collectRuntimeEventStream(runtimeCh)
steeringEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentSteeringInjected)
if !ok {
t.Fatal("expected steering injected event")
}
@@ -350,7 +387,7 @@ func TestAgentLoop_EmitsSteeringAndSkippedToolEvents(t *testing.T) {
t.Fatalf("expected 1 steering message, got %d", steeringPayload.Count)
}
skippedEvt, ok := findEvent(events, EventKindToolExecSkipped)
skippedEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentToolExecSkipped)
if !ok {
t.Fatal("expected skipped tool event")
}
@@ -362,7 +399,7 @@ func TestAgentLoop_EmitsSteeringAndSkippedToolEvents(t *testing.T) {
t.Fatalf("expected skipped tool_two, got %q", skippedPayload.Tool)
}
interruptEvt, ok := findEvent(events, EventKindInterruptReceived)
interruptEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentInterruptReceived)
if !ok {
t.Fatal("expected interrupt received event")
}
@@ -420,8 +457,14 @@ func TestAgentLoop_EmitsContextCompressEventOnRetry(t *testing.T) {
{Role: "user", Content: "Trigger message"},
})
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentLLMRetry,
runtimeevents.KindAgentContextCompress,
)
defer closeRuntimeEvents()
resp, err := al.runAgentLoop(context.Background(), defaultAgent, processOptions{
SessionKey: "session-1",
@@ -439,8 +482,8 @@ func TestAgentLoop_EmitsContextCompressEventOnRetry(t *testing.T) {
t.Fatalf("expected retry success, got %q", resp)
}
events := collectEventStream(sub.C)
retryEvt, ok := findEvent(events, EventKindLLMRetry)
events := collectRuntimeEventStream(runtimeCh)
retryEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentLLMRetry)
if !ok {
t.Fatal("expected llm retry event")
}
@@ -455,7 +498,7 @@ func TestAgentLoop_EmitsContextCompressEventOnRetry(t *testing.T) {
t.Fatalf("expected retry attempt 1, got %d", retryPayload.Attempt)
}
compressEvt, ok := findEvent(events, EventKindContextCompress)
compressEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentContextCompress)
if !ok {
t.Fatal("expected context compress event")
}
@@ -508,14 +551,19 @@ func TestAgentLoop_EmitsSessionSummarizeEvent(t *testing.T) {
{Role: "assistant", Content: "Answer three"},
})
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentSessionSummarize,
)
defer closeRuntimeEvents()
lcm := &legacyContextManager{al: al}
lcm.summarizeSession(defaultAgent, "session-1")
events := collectEventStream(sub.C)
summaryEvt, ok := findEvent(events, EventKindSessionSummarize)
events := collectRuntimeEventStream(runtimeCh)
summaryEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentSessionSummarize)
if !ok {
t.Fatal("expected session summarize event")
}
@@ -575,8 +623,13 @@ func TestAgentLoop_EmitsFollowUpQueuedEvent(t *testing.T) {
t.Fatal("expected default agent")
}
sub := al.SubscribeEvents(32)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
32,
runtimeevents.KindAgentFollowUpQueued,
)
defer closeRuntimeEvents()
resp, err := al.runAgentLoop(context.Background(), defaultAgent, processOptions{
SessionKey: "session-1",
@@ -600,8 +653,8 @@ func TestAgentLoop_EmitsFollowUpQueuedEvent(t *testing.T) {
t.Fatal("timeout waiting for async tool completion")
}
followUpEvt := waitForEvent(t, sub.C, 2*time.Second, func(evt Event) bool {
return evt.Kind == EventKindFollowUpQueued
followUpEvt := waitForRuntimeEvent(t, runtimeCh, 2*time.Second, func(evt runtimeevents.Event) bool {
return evt.Kind == runtimeevents.KindAgentFollowUpQueued
})
payload, ok := followUpEvt.Payload.(FollowUpQueuedPayload)
if !ok {
@@ -613,59 +666,29 @@ func TestAgentLoop_EmitsFollowUpQueuedEvent(t *testing.T) {
if payload.ContentLen != len("background result") {
t.Fatalf("expected content len %d, got %d", len("background result"), payload.ContentLen)
}
if followUpEvt.Meta.SessionKey != "session-1" {
t.Fatalf("expected session key session-1, got %q", followUpEvt.Meta.SessionKey)
if followUpEvt.Scope.SessionKey != "session-1" {
t.Fatalf("expected session key session-1, got %q", followUpEvt.Scope.SessionKey)
}
if followUpEvt.Meta.TurnID == "" {
if followUpEvt.Scope.TurnID == "" {
t.Fatal("expected follow-up event to include turn id")
}
}
func collectEventStream(ch <-chan Event) []Event {
var events []Event
for {
select {
case evt, ok := <-ch:
if !ok {
return events
}
events = append(events, evt)
default:
return events
}
}
}
func waitForEvent(t *testing.T, ch <-chan Event, timeout time.Duration, match func(Event) bool) Event {
func receiveRuntimeEvent(t *testing.T, ch <-chan runtimeevents.Event) runtimeevents.Event {
t.Helper()
timer := time.NewTimer(timeout)
defer timer.Stop()
for {
select {
case evt, ok := <-ch:
if !ok {
t.Fatal("event stream closed before expected event arrived")
}
if match(evt) {
return evt
}
case <-timer.C:
t.Fatal("timed out waiting for expected event")
select {
case evt, ok := <-ch:
if !ok {
t.Fatal("runtime event stream closed before expected event arrived")
}
return evt
case <-time.After(time.Second):
t.Fatal("timed out waiting for runtime event")
return runtimeevents.Event{}
}
}
func findEvent(events []Event, kind EventKind) (Event, bool) {
for _, evt := range events {
if evt.Kind == kind {
return evt, true
}
}
return Event{}, false
}
type stringError string
func (e stringError) Error() string {
+3 -260
View File
@@ -1,97 +1,8 @@
package agent
import (
"fmt"
"time"
)
// EventKind identifies a structured agent-loop event.
type EventKind uint8
const (
// EventKindTurnStart is emitted when a turn begins processing.
EventKindTurnStart EventKind = iota
// EventKindTurnEnd is emitted when a turn finishes, successfully or with an error.
EventKindTurnEnd
// EventKindLLMRequest is emitted before a provider chat request is made.
EventKindLLMRequest
// EventKindLLMDelta is emitted when a streaming provider yields a partial delta.
EventKindLLMDelta
// EventKindLLMResponse is emitted after a provider chat response is received.
EventKindLLMResponse
// EventKindLLMRetry is emitted when an LLM request is retried.
EventKindLLMRetry
// EventKindContextCompress is emitted when session history is forcibly compressed.
EventKindContextCompress
// EventKindSessionSummarize is emitted when asynchronous summarization completes.
EventKindSessionSummarize
// EventKindToolExecStart is emitted immediately before a tool executes.
EventKindToolExecStart
// EventKindToolExecEnd is emitted immediately after a tool finishes executing.
EventKindToolExecEnd
// EventKindToolExecSkipped is emitted when a queued tool call is skipped.
EventKindToolExecSkipped
// EventKindSteeringInjected is emitted when queued steering is injected into context.
EventKindSteeringInjected
// EventKindFollowUpQueued is emitted when an async tool queues a follow-up system message.
EventKindFollowUpQueued
// EventKindInterruptReceived is emitted when a soft interrupt message is accepted.
EventKindInterruptReceived
// EventKindSubTurnSpawn is emitted when a sub-turn is spawned.
EventKindSubTurnSpawn
// EventKindSubTurnEnd is emitted when a sub-turn finishes.
EventKindSubTurnEnd
// EventKindSubTurnResultDelivered is emitted when a sub-turn result is delivered.
EventKindSubTurnResultDelivered
// EventKindSubTurnOrphan is emitted when a sub-turn result cannot be delivered.
EventKindSubTurnOrphan
// EventKindError is emitted when a turn encounters an execution error.
EventKindError
eventKindCount
)
var eventKindNames = [...]string{
"turn_start",
"turn_end",
"llm_request",
"llm_delta",
"llm_response",
"llm_retry",
"context_compress",
"session_summarize",
"tool_exec_start",
"tool_exec_end",
"tool_exec_skipped",
"steering_injected",
"follow_up_queued",
"interrupt_received",
"subturn_spawn",
"subturn_end",
"subturn_result_delivered",
"subturn_orphan",
"error",
}
// String returns the stable string form of an EventKind.
func (k EventKind) String() string {
if k >= eventKindCount {
return fmt.Sprintf("event_kind(%d)", k)
}
return eventKindNames[k]
}
// Event is the structured envelope broadcast by the agent EventBus.
type Event struct {
Kind EventKind
Time time.Time
Meta EventMeta
Context *TurnContext
Payload any
}
// EventMeta contains correlation fields shared by all agent-loop events.
type EventMeta struct {
// HookMeta contains correlation fields shared by agent hook requests and
// runtime events emitted from turn processing.
type HookMeta struct {
AgentID string
TurnID string
ParentTurnID string
@@ -101,171 +12,3 @@ type EventMeta struct {
Source string
turnContext *TurnContext
}
// TurnEndStatus describes the terminal state of a turn.
type TurnEndStatus string
const (
// TurnEndStatusCompleted indicates the turn finished normally.
TurnEndStatusCompleted TurnEndStatus = "completed"
// TurnEndStatusError indicates the turn ended because of an error.
TurnEndStatusError TurnEndStatus = "error"
// TurnEndStatusAborted indicates the turn was hard-aborted and rolled back.
TurnEndStatusAborted TurnEndStatus = "aborted"
)
// TurnStartPayload describes the start of a turn.
type TurnStartPayload struct {
UserMessage string
MediaCount int
}
// TurnEndPayload describes the completion of a turn.
type TurnEndPayload struct {
Status TurnEndStatus
Iterations int
Duration time.Duration
FinalContentLen int
}
// LLMRequestPayload describes an outbound LLM request.
type LLMRequestPayload struct {
Model string
MessagesCount int
ToolsCount int
MaxTokens int
Temperature float64
}
// LLMResponsePayload describes an inbound LLM response.
type LLMResponsePayload struct {
ContentLen int
ToolCalls int
HasReasoning bool
}
// LLMDeltaPayload describes a streamed LLM delta.
type LLMDeltaPayload struct {
ContentDeltaLen int
ReasoningDeltaLen int
}
// LLMRetryPayload describes a retry of an LLM request.
type LLMRetryPayload struct {
Attempt int
MaxRetries int
Reason string
Error string
Backoff time.Duration
}
// ContextCompressReason identifies why emergency compression ran.
type ContextCompressReason string
const (
// ContextCompressReasonProactive indicates compression before the first LLM call.
ContextCompressReasonProactive ContextCompressReason = "proactive_budget"
// ContextCompressReasonRetry indicates compression during context-error retry handling.
ContextCompressReasonRetry ContextCompressReason = "llm_retry"
// ContextCompressReasonSummarize indicates post-turn async summarization.
ContextCompressReasonSummarize ContextCompressReason = "summarize"
)
// ContextCompressPayload describes a forced history compression.
type ContextCompressPayload struct {
Reason ContextCompressReason
DroppedMessages int
RemainingMessages int
}
// SessionSummarizePayload describes a completed async session summarization.
type SessionSummarizePayload struct {
SummarizedMessages int
KeptMessages int
SummaryLen int
OmittedOversized bool
}
// ToolExecStartPayload describes a tool execution request.
type ToolExecStartPayload struct {
Tool string
Arguments map[string]any
}
// ToolExecEndPayload describes the outcome of a tool execution.
type ToolExecEndPayload struct {
Tool string
Duration time.Duration
ForLLMLen int
ForUserLen int
IsError bool
Async bool
}
// ToolExecSkippedPayload describes a skipped tool call.
type ToolExecSkippedPayload struct {
Tool string
Reason string
}
// SteeringInjectedPayload describes steering messages appended before the next LLM call.
type SteeringInjectedPayload struct {
Count int
TotalContentLen int
}
// FollowUpQueuedPayload describes an async follow-up queued back into the inbound bus.
type FollowUpQueuedPayload struct {
SourceTool string
ContentLen int
}
type InterruptKind string
const (
InterruptKindSteering InterruptKind = "steering"
InterruptKindGraceful InterruptKind = "graceful"
InterruptKindHard InterruptKind = "hard_abort"
)
// InterruptReceivedPayload describes accepted turn-control input.
type InterruptReceivedPayload struct {
Kind InterruptKind
Role string
ContentLen int
QueueDepth int
HintLen int
}
// SubTurnSpawnPayload describes the creation of a child turn.
type SubTurnSpawnPayload struct {
AgentID string
Label string
ParentTurnID string
}
// SubTurnEndPayload describes the completion of a child turn.
type SubTurnEndPayload struct {
AgentID string
Status string
}
// SubTurnResultDeliveredPayload describes delivery of a sub-turn result.
type SubTurnResultDeliveredPayload struct {
TargetChannel string
TargetChatID string
ContentLen int
}
// SubTurnOrphanPayload describes a sub-turn result that could not be delivered.
type SubTurnOrphanPayload struct {
ParentTurnID string
ChildTurnID string
Reason string
}
// ErrorPayload describes an execution error inside the agent loop.
type ErrorPayload struct {
Stage string
Message string
}
+88
View File
@@ -0,0 +1,88 @@
package agent
import runtimeevents "github.com/sipeed/picoclaw/pkg/events"
func (al *AgentLoop) publishRuntimeEvent(evt runtimeevents.Event) {
if al == nil || al.runtimeEvents == nil {
return
}
al.runtimeEvents.PublishNonBlocking(evt)
}
func runtimeScopeFromHookMeta(meta HookMeta, eventCtx *TurnContext) runtimeevents.Scope {
scope := runtimeevents.Scope{
AgentID: meta.AgentID,
SessionKey: meta.SessionKey,
TurnID: meta.TurnID,
}
if eventCtx == nil || eventCtx.Inbound == nil {
return scope
}
inbound := eventCtx.Inbound
scope.Channel = inbound.Channel
scope.Account = inbound.Account
scope.ChatID = inbound.ChatID
scope.TopicID = inbound.TopicID
scope.SpaceID = inbound.SpaceID
scope.SpaceType = inbound.SpaceType
scope.ChatType = inbound.ChatType
scope.SenderID = inbound.SenderID
scope.MessageID = inbound.MessageID
return scope
}
func runtimeCorrelationFromHookMeta(meta HookMeta) runtimeevents.Correlation {
return runtimeevents.Correlation{
TraceID: meta.TracePath,
ParentTurnID: meta.ParentTurnID,
}
}
func runtimeSeverityForAgentEvent(kind runtimeevents.Kind, payload any) runtimeevents.Severity {
switch kind {
case runtimeevents.KindAgentError, runtimeevents.KindAgentSubTurnOrphan:
return runtimeevents.SeverityError
case runtimeevents.KindAgentLLMRetry,
runtimeevents.KindAgentContextCompress,
runtimeevents.KindAgentToolExecSkipped:
return runtimeevents.SeverityWarn
case runtimeevents.KindAgentTurnEnd:
payload, ok := payload.(TurnEndPayload)
if !ok {
return runtimeevents.SeverityInfo
}
switch payload.Status {
case TurnEndStatusError:
return runtimeevents.SeverityError
case TurnEndStatusAborted:
return runtimeevents.SeverityWarn
default:
return runtimeevents.SeverityInfo
}
case runtimeevents.KindAgentToolExecEnd:
payload, ok := payload.(ToolExecEndPayload)
if ok && payload.IsError {
return runtimeevents.SeverityWarn
}
return runtimeevents.SeverityInfo
default:
return runtimeevents.SeverityInfo
}
}
func runtimeAttrsFromHookMeta(meta HookMeta) map[string]any {
attrs := make(map[string]any, 2)
if meta.Source != "" {
attrs["agent_source"] = meta.Source
}
if meta.Iteration != 0 {
attrs["iteration"] = meta.Iteration
}
if len(attrs) == 0 {
return nil
}
return attrs
}
+28 -6
View File
@@ -8,6 +8,7 @@ import (
"time"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
type hookRuntime struct {
@@ -295,10 +296,11 @@ func processHookObserveKindsFromConfig(observe []string) ([]string, bool, error)
case "", "*", "all":
return nil, true, nil
default:
if _, ok := validKinds[kind]; !ok {
normalizedKind, ok := validKinds[kind]
if !ok {
return nil, false, fmt.Errorf("unsupported observe event %q", kind)
}
normalized = append(normalized, kind)
normalized = append(normalized, normalizedKind)
}
}
@@ -308,10 +310,30 @@ func processHookObserveKindsFromConfig(observe []string) ([]string, bool, error)
return normalized, true, nil
}
func validHookEventKinds() map[string]struct{} {
kinds := make(map[string]struct{}, int(eventKindCount))
for kind := EventKind(0); kind < eventKindCount; kind++ {
kinds[kind.String()] = struct{}{}
func validHookEventKinds() map[string]string {
runtimeKinds := runtimeevents.KnownKinds()
kinds := make(map[string]string, len(runtimeKinds)*2)
for _, kind := range runtimeKinds {
kinds[kind.String()] = kind.String()
}
kinds["turn_start"] = runtimeevents.KindAgentTurnStart.String()
kinds["turn_end"] = runtimeevents.KindAgentTurnEnd.String()
kinds["llm_request"] = runtimeevents.KindAgentLLMRequest.String()
kinds["llm_delta"] = runtimeevents.KindAgentLLMDelta.String()
kinds["llm_response"] = runtimeevents.KindAgentLLMResponse.String()
kinds["llm_retry"] = runtimeevents.KindAgentLLMRetry.String()
kinds["context_compress"] = runtimeevents.KindAgentContextCompress.String()
kinds["session_summarize"] = runtimeevents.KindAgentSessionSummarize.String()
kinds["tool_exec_start"] = runtimeevents.KindAgentToolExecStart.String()
kinds["tool_exec_end"] = runtimeevents.KindAgentToolExecEnd.String()
kinds["tool_exec_skipped"] = runtimeevents.KindAgentToolExecSkipped.String()
kinds["steering_injected"] = runtimeevents.KindAgentSteeringInjected.String()
kinds["follow_up_queued"] = runtimeevents.KindAgentFollowUpQueued.String()
kinds["interrupt_received"] = runtimeevents.KindAgentInterruptReceived.String()
kinds["subturn_spawn"] = runtimeevents.KindAgentSubTurnSpawn.String()
kinds["subturn_end"] = runtimeevents.KindAgentSubTurnEnd.String()
kinds["subturn_result_delivered"] = runtimeevents.KindAgentSubTurnResultDelivered.String()
kinds["subturn_orphan"] = runtimeevents.KindAgentSubTurnOrphan.String()
kinds["error"] = runtimeevents.KindAgentError.String()
return kinds
}
+22 -1
View File
@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"path/filepath"
"slices"
"testing"
"github.com/sipeed/picoclaw/pkg/bus"
@@ -155,7 +156,27 @@ func TestAgentLoop_ProcessDirectWithChannel_AutoMountsProcessHook(t *testing.T)
t.Fatalf("expected process model, got %q", lastModel)
}
waitForFileContains(t, eventLog, "turn_end")
waitForFileContains(t, eventLog, "agent.turn.end")
}
func TestProcessHookObserveKindsFromConfigAcceptsRuntimeNames(t *testing.T) {
kinds, enabled, err := processHookObserveKindsFromConfig([]string{
"tool_exec_start",
"agent.tool.exec_end",
"gateway.ready",
"mcp.server.failed",
})
if err != nil {
t.Fatalf("processHookObserveKindsFromConfig failed: %v", err)
}
if !enabled {
t.Fatal("expected observe to be enabled")
}
want := []string{"agent.tool.exec_start", "agent.tool.exec_end", "gateway.ready", "mcp.server.failed"}
if !slices.Equal(kinds, want) {
t.Fatalf("observe kinds = %v, want %v", kinds, want)
}
}
func TestAgentLoop_ProcessDirectWithChannel_InvalidConfiguredHookFails(t *testing.T) {
+3 -2
View File
@@ -12,6 +12,7 @@ import (
"sync/atomic"
"time"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/isolation"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/tools"
@@ -183,7 +184,7 @@ func (ph *ProcessHook) Close() error {
return ph.closeErr
}
func (ph *ProcessHook) OnEvent(ctx context.Context, evt Event) error {
func (ph *ProcessHook) OnRuntimeEvent(ctx context.Context, evt runtimeevents.Event) error {
if ph == nil || !ph.opts.Observe {
return nil
}
@@ -192,7 +193,7 @@ func (ph *ProcessHook) OnEvent(ctx context.Context, evt Event) error {
return nil
}
}
return ph.notify(ctx, "hook.event", evt)
return ph.notify(ctx, "hook.runtime_event", evt)
}
func (ph *ProcessHook) BeforeLLM(
+14 -9
View File
@@ -13,6 +13,7 @@ import (
"time"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/isolation"
"github.com/sipeed/picoclaw/pkg/providers"
)
@@ -66,7 +67,7 @@ func TestAgentLoop_MountProcessHook_LLMAndObserver(t *testing.T) {
t.Fatalf("expected process model, got %q", lastModel)
}
waitForFileContains(t, eventLog, "turn_end")
waitForFileContains(t, eventLog, "agent.turn.end")
}
func TestAgentLoop_MountProcessHook_ToolRewrite(t *testing.T) {
@@ -146,8 +147,13 @@ func TestAgentLoop_MountProcessHook_ApprovalDeny(t *testing.T) {
t.Fatalf("MountProcessHook failed: %v", err)
}
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentToolExecSkipped,
)
defer closeRuntimeEvents()
resp, err := al.runAgentLoop(context.Background(), agent, processOptions{
SessionKey: "session-1",
@@ -167,8 +173,8 @@ func TestAgentLoop_MountProcessHook_ApprovalDeny(t *testing.T) {
t.Fatalf("expected %q, got %q", expected, resp)
}
events := collectEventStream(sub.C)
skippedEvt, ok := findEvent(events, EventKindToolExecSkipped)
events := collectRuntimeEventStream(runtimeCh)
skippedEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentToolExecSkipped)
if !ok {
t.Fatal("expected tool skipped event")
}
@@ -350,12 +356,11 @@ func runProcessHookHelper() error {
}
if msg.ID == 0 {
if msg.Method == "hook.event" && eventLog != "" {
if msg.Method == "hook.runtime_event" && eventLog != "" {
var evt map[string]any
if err := json.Unmarshal(msg.Params, &evt); err == nil {
if rawKind, ok := evt["Kind"].(float64); ok {
kind := EventKind(rawKind)
_ = os.WriteFile(eventLog, []byte(kind.String()+"\n"), 0o644)
if kind, ok := evt["kind"].(string); ok {
_ = os.WriteFile(eventLog, []byte(kind+"\n"), 0o644)
}
}
}
+56 -36
View File
@@ -9,6 +9,7 @@ import (
"sync"
"time"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/tools"
@@ -71,8 +72,8 @@ func NamedHook(name string, hook any) HookRegistration {
}
}
type EventObserver interface {
OnEvent(ctx context.Context, evt Event) error
type RuntimeEventObserver interface {
OnRuntimeEvent(ctx context.Context, evt runtimeevents.Event) error
}
type LLMInterceptor interface {
@@ -90,7 +91,7 @@ type ToolApprover interface {
}
type LLMHookRequest struct {
Meta EventMeta `json:"meta"`
Meta HookMeta `json:"meta"`
Context *TurnContext `json:"context,omitempty"`
Model string `json:"model"`
Messages []providers.Message `json:"messages,omitempty"`
@@ -104,7 +105,7 @@ func (r *LLMHookRequest) Clone() *LLMHookRequest {
return nil
}
cloned := *r
cloned.Meta = cloneEventMeta(r.Meta)
cloned.Meta = cloneHookMeta(r.Meta)
cloned.Context = cloneTurnContext(r.Context)
cloned.Messages = cloneProviderMessages(r.Messages)
cloned.Tools = cloneToolDefinitions(r.Tools)
@@ -113,7 +114,7 @@ func (r *LLMHookRequest) Clone() *LLMHookRequest {
}
type LLMHookResponse struct {
Meta EventMeta `json:"meta"`
Meta HookMeta `json:"meta"`
Context *TurnContext `json:"context,omitempty"`
Model string `json:"model"`
Response *providers.LLMResponse `json:"response,omitempty"`
@@ -124,14 +125,14 @@ func (r *LLMHookResponse) Clone() *LLMHookResponse {
return nil
}
cloned := *r
cloned.Meta = cloneEventMeta(r.Meta)
cloned.Meta = cloneHookMeta(r.Meta)
cloned.Context = cloneTurnContext(r.Context)
cloned.Response = cloneLLMResponse(r.Response)
return &cloned
}
type ToolCallHookRequest struct {
Meta EventMeta `json:"meta"`
Meta HookMeta `json:"meta"`
Context *TurnContext `json:"context,omitempty"`
Tool string `json:"tool"`
Arguments map[string]any `json:"arguments,omitempty"`
@@ -145,7 +146,7 @@ func (r *ToolCallHookRequest) Clone() *ToolCallHookRequest {
return nil
}
cloned := *r
cloned.Meta = cloneEventMeta(r.Meta)
cloned.Meta = cloneHookMeta(r.Meta)
cloned.Context = cloneTurnContext(r.Context)
cloned.Arguments = cloneStringAnyMap(r.Arguments)
cloned.HookResult = cloneToolResult(r.HookResult)
@@ -153,7 +154,7 @@ func (r *ToolCallHookRequest) Clone() *ToolCallHookRequest {
}
type ToolApprovalRequest struct {
Meta EventMeta `json:"meta"`
Meta HookMeta `json:"meta"`
Context *TurnContext `json:"context,omitempty"`
Tool string `json:"tool"`
Arguments map[string]any `json:"arguments,omitempty"`
@@ -164,14 +165,14 @@ func (r *ToolApprovalRequest) Clone() *ToolApprovalRequest {
return nil
}
cloned := *r
cloned.Meta = cloneEventMeta(r.Meta)
cloned.Meta = cloneHookMeta(r.Meta)
cloned.Context = cloneTurnContext(r.Context)
cloned.Arguments = cloneStringAnyMap(r.Arguments)
return &cloned
}
type ToolResultHookResponse struct {
Meta EventMeta `json:"meta"`
Meta HookMeta `json:"meta"`
Context *TurnContext `json:"context,omitempty"`
Tool string `json:"tool"`
Arguments map[string]any `json:"arguments,omitempty"`
@@ -184,7 +185,7 @@ func (r *ToolResultHookResponse) Clone() *ToolResultHookResponse {
return nil
}
cloned := *r
cloned.Meta = cloneEventMeta(r.Meta)
cloned.Meta = cloneHookMeta(r.Meta)
cloned.Context = cloneTurnContext(r.Context)
cloned.Arguments = cloneStringAnyMap(r.Arguments)
cloned.Result = cloneToolResult(r.Result)
@@ -192,7 +193,7 @@ func (r *ToolResultHookResponse) Clone() *ToolResultHookResponse {
}
type HookManager struct {
eventBus *EventBus
runtimeEvents runtimeevents.EventChannel
observerTimeout time.Duration
interceptorTimeout time.Duration
approvalTimeout time.Duration
@@ -201,28 +202,39 @@ type HookManager struct {
hooks map[string]HookRegistration
ordered []HookRegistration
sub EventSubscription
done chan struct{}
closeOnce sync.Once
runtimeSub runtimeevents.Subscription
runtimeDone chan struct{}
closeOnce sync.Once
}
func NewHookManager(eventBus *EventBus) *HookManager {
func NewHookManager(runtimeEvents runtimeevents.EventChannel) *HookManager {
hm := &HookManager{
eventBus: eventBus,
runtimeEvents: runtimeEvents,
observerTimeout: defaultHookObserverTimeout,
interceptorTimeout: defaultHookInterceptorTimeout,
approvalTimeout: defaultHookApprovalTimeout,
hooks: make(map[string]HookRegistration),
done: make(chan struct{}),
runtimeDone: make(chan struct{}),
}
if eventBus == nil {
close(hm.done)
return hm
if runtimeEvents != nil {
sub, ch, err := runtimeEvents.SubscribeChan(context.Background(), runtimeevents.SubscribeOptions{
Name: "hook-manager-observer",
Buffer: hookObserverBufferSize,
})
if err != nil {
logger.WarnCF("hooks", "Failed to subscribe runtime events for hooks", map[string]any{
"error": err.Error(),
})
close(hm.runtimeDone)
} else {
hm.runtimeSub = sub
go hm.dispatchRuntimeEvents(ch)
}
} else {
close(hm.runtimeDone)
}
hm.sub = eventBus.Subscribe(hookObserverBufferSize)
go hm.dispatchEvents()
return hm
}
@@ -232,10 +244,14 @@ func (hm *HookManager) Close() {
}
hm.closeOnce.Do(func() {
if hm.eventBus != nil {
hm.eventBus.Unsubscribe(hm.sub.ID)
if hm.runtimeSub != nil {
if err := hm.runtimeSub.Close(); err != nil {
logger.WarnCF("hooks", "Failed to close runtime event hook subscription", map[string]any{
"error": err.Error(),
})
}
}
<-hm.done
<-hm.runtimeDone
hm.closeAllHooks()
})
}
@@ -292,16 +308,16 @@ func (hm *HookManager) Unmount(name string) {
hm.rebuildOrdered()
}
func (hm *HookManager) dispatchEvents() {
defer close(hm.done)
func (hm *HookManager) dispatchRuntimeEvents(ch <-chan runtimeevents.Event) {
defer close(hm.runtimeDone)
for evt := range hm.sub.C {
for evt := range ch {
for _, reg := range hm.snapshotHooks() {
observer, ok := reg.Hook.(EventObserver)
observer, ok := reg.Hook.(RuntimeEventObserver)
if !ok {
continue
}
hm.runObserver(reg.Name, observer, evt)
hm.runRuntimeObserver(reg.Name, observer, evt)
}
}
}
@@ -581,26 +597,30 @@ func (hm *HookManager) closeAllHooks() {
hm.ordered = nil
}
func (hm *HookManager) runObserver(name string, observer EventObserver, evt Event) {
func (hm *HookManager) runRuntimeObserver(
name string,
observer RuntimeEventObserver,
evt runtimeevents.Event,
) {
ctx, cancel := context.WithTimeout(context.Background(), hm.observerTimeout)
defer cancel()
done := make(chan error, 1)
go func() {
done <- observer.OnEvent(ctx, evt)
done <- observer.OnRuntimeEvent(ctx, evt)
}()
select {
case err := <-done:
if err != nil {
logger.WarnCF("hooks", "Event observer failed", map[string]any{
logger.WarnCF("hooks", "Runtime event observer failed", map[string]any{
"hook": name,
"event": evt.Kind.String(),
"error": err.Error(),
})
}
case <-ctx.Done():
logger.WarnCF("hooks", "Event observer timed out", map[string]any{
logger.WarnCF("hooks", "Runtime event observer timed out", map[string]any{
"hook": name,
"event": evt.Kind.String(),
"timeout_ms": hm.observerTimeout.Milliseconds(),
+137 -51
View File
@@ -12,6 +12,7 @@ import (
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/routing"
"github.com/sipeed/picoclaw/pkg/session"
@@ -111,14 +112,14 @@ func (p *llmHookTestProvider) GetDefaultModel() string {
}
type llmObserverHook struct {
eventCh chan Event
eventCh chan runtimeevents.Event
lastInbound *bus.InboundContext
lastRoute *routing.ResolvedRoute
lastScope *session.SessionScope
}
func (h *llmObserverHook) OnEvent(ctx context.Context, evt Event) error {
if evt.Kind == EventKindTurnEnd {
func (h *llmObserverHook) OnRuntimeEvent(ctx context.Context, evt runtimeevents.Event) error {
if evt.Kind == runtimeevents.KindAgentTurnEnd {
select {
case h.eventCh <- evt:
default:
@@ -150,6 +151,20 @@ func (h *llmObserverHook) AfterLLM(
return next, HookDecision{Action: HookActionModify}, nil
}
type dualRuntimeObserverHook struct {
runtimeCh chan runtimeevents.Event
}
func (h *dualRuntimeObserverHook) OnRuntimeEvent(ctx context.Context, evt runtimeevents.Event) error {
if evt.Kind == runtimeevents.KindAgentTurnEnd {
select {
case h.runtimeCh <- evt:
default:
}
}
return nil
}
type llmSystemRewriteHook struct{}
func (h *llmSystemRewriteHook) BeforeLLM(
@@ -417,7 +432,7 @@ func TestAgentLoop_Hooks_ObserverAndLLMInterceptor(t *testing.T) {
al, agent, cleanup := newHookTestLoop(t, provider)
defer cleanup()
hook := &llmObserverHook{eventCh: make(chan Event, 1)}
hook := &llmObserverHook{eventCh: make(chan runtimeevents.Event, 1)}
if err := al.MountHook(NamedHook("llm-observer", hook)); err != nil {
t.Fatalf("MountHook failed: %v", err)
}
@@ -481,30 +496,80 @@ func TestAgentLoop_Hooks_ObserverAndLLMInterceptor(t *testing.T) {
select {
case evt := <-hook.eventCh:
if evt.Kind != EventKindTurnEnd {
if evt.Kind != runtimeevents.KindAgentTurnEnd {
t.Fatalf("expected turn end event, got %v", evt.Kind)
}
if evt.Context == nil || evt.Context.Inbound == nil {
t.Fatal("expected observer event to carry inbound context")
}
if evt.Context.Route == nil || evt.Context.Route.AgentID != "main" {
t.Fatalf("expected observer event to carry route context, got %+v", evt.Context.Route)
}
if evt.Context.Scope == nil || evt.Context.Scope.Values["sender"] != "hook-user" {
t.Fatalf("expected observer event to carry session scope, got %+v", evt.Context.Scope)
if evt.Scope.AgentID != "main" ||
evt.Scope.SessionKey != "session-1" ||
evt.Scope.Channel != "cli" ||
evt.Scope.ChatID != "direct" ||
evt.Scope.SenderID != "hook-user" {
t.Fatalf("runtime observer scope = %+v", evt.Scope)
}
case <-time.After(2 * time.Second):
t.Fatal("timed out waiting for hook observer event")
}
}
func TestAgentLoop_Hooks_RuntimeObserverReceivesEvents(t *testing.T) {
provider := &llmHookTestProvider{}
al, agent, cleanup := newHookTestLoop(t, provider)
defer cleanup()
hook := &dualRuntimeObserverHook{
runtimeCh: make(chan runtimeevents.Event, 1),
}
if err := al.MountHook(NamedHook("runtime-observer", hook)); err != nil {
t.Fatalf("MountHook failed: %v", err)
}
resp, err := al.runAgentLoop(context.Background(), agent, processOptions{
SessionKey: "session-1",
Channel: "cli",
ChatID: "direct",
UserMessage: "hello",
DefaultResponse: defaultResponse,
EnableSummary: false,
SendResponse: false,
InboundContext: &bus.InboundContext{
Channel: "cli",
Account: "default",
ChatID: "direct",
ChatType: "direct",
SenderID: "hook-user",
MessageID: "msg-1",
},
})
if err != nil {
t.Fatalf("runAgentLoop failed: %v", err)
}
if resp != "provider content" {
t.Fatalf("expected provider content, got %q", resp)
}
select {
case evt := <-hook.runtimeCh:
if evt.Kind != runtimeevents.KindAgentTurnEnd {
t.Fatalf("runtime observer kind = %q", evt.Kind)
}
if evt.Scope.SessionKey != "session-1" ||
evt.Scope.Channel != "cli" ||
evt.Scope.ChatID != "direct" ||
evt.Scope.MessageID != "msg-1" {
t.Fatalf("runtime observer scope = %+v", evt.Scope)
}
case <-time.After(2 * time.Second):
t.Fatal("timed out waiting for runtime observer event")
}
}
func TestAgentLoop_BtwCommand_UsesLLMHooks(t *testing.T) {
provider := &llmHookTestProvider{}
al, agent, cleanup := newHookTestLoop(t, provider)
defer cleanup()
useTestSideQuestionProvider(al, provider)
hook := &llmObserverHook{eventCh: make(chan Event, 1)}
hook := &llmObserverHook{eventCh: make(chan runtimeevents.Event, 1)}
if err := al.MountHook(NamedHook("llm-observer", hook)); err != nil {
t.Fatalf("MountHook failed: %v", err)
}
@@ -800,8 +865,13 @@ func TestAgentLoop_Hooks_ToolApproverCanDeny(t *testing.T) {
t.Fatalf("MountHook failed: %v", err)
}
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentToolExecSkipped,
)
defer closeRuntimeEvents()
resp, err := al.runAgentLoop(context.Background(), agent, processOptions{
SessionKey: "session-1",
@@ -820,8 +890,8 @@ func TestAgentLoop_Hooks_ToolApproverCanDeny(t *testing.T) {
t.Fatalf("expected %q, got %q", expected, resp)
}
events := collectEventStream(sub.C)
skippedEvt, ok := findEvent(events, EventKindToolExecSkipped)
events := collectRuntimeEventStream(runtimeCh)
skippedEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentToolExecSkipped)
if !ok {
t.Fatal("expected tool skipped event")
}
@@ -876,8 +946,13 @@ func TestAgentLoop_Hooks_ToolRespondAction(t *testing.T) {
t.Fatalf("MountHook failed: %v", err)
}
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentToolExecEnd,
)
defer closeRuntimeEvents()
resp, err := al.runAgentLoop(context.Background(), agent, processOptions{
SessionKey: "session-1",
@@ -899,8 +974,8 @@ func TestAgentLoop_Hooks_ToolRespondAction(t *testing.T) {
}
// Verify event stream has ToolExecEnd, not actual tool execution
events := collectEventStream(sub.C)
endEvt, ok := findEvent(events, EventKindToolExecEnd)
events := collectRuntimeEventStream(runtimeCh)
endEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentToolExecEnd)
if !ok {
t.Fatal("expected tool exec end event")
}
@@ -1065,8 +1140,13 @@ func TestAgentLoop_HookRespond_MediaError(t *testing.T) {
sendErr: errors.New("channel unavailable"),
})
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentToolExecEnd,
)
defer closeRuntimeEvents()
_, err := al.runAgentLoop(context.Background(), agent, processOptions{
SessionKey: "session-media-err",
@@ -1081,8 +1161,8 @@ func TestAgentLoop_HookRespond_MediaError(t *testing.T) {
t.Fatalf("runAgentLoop failed: %v", err)
}
events := collectEventStream(sub.C)
endEvt, ok := findEvent(events, EventKindToolExecEnd)
events := collectRuntimeEventStream(runtimeCh)
endEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentToolExecEnd)
if !ok {
t.Fatal("expected ToolExecEnd event")
}
@@ -1120,8 +1200,13 @@ func TestAgentLoop_HookRespond_BusFallback(t *testing.T) {
t.Fatalf("MountHook failed: %v", err)
}
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentToolExecEnd,
)
defer closeRuntimeEvents()
resp, err := al.runAgentLoop(context.Background(), agent, processOptions{
SessionKey: "session-bus-fallback",
@@ -1136,8 +1221,8 @@ func TestAgentLoop_HookRespond_BusFallback(t *testing.T) {
t.Fatalf("runAgentLoop failed: %v", err)
}
events := collectEventStream(sub.C)
endEvt, ok := findEvent(events, EventKindToolExecEnd)
events := collectRuntimeEventStream(runtimeCh)
endEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentToolExecEnd)
if !ok {
t.Fatal("expected ToolExecEnd event")
}
@@ -1282,8 +1367,13 @@ func TestAgentLoop_HookRespond_InterruptSkipsRemaining(t *testing.T) {
t.Fatalf("MountHook failed: %v", err)
}
sub := al.SubscribeEvents(32)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
32,
runtimeevents.KindAgentToolExecSkipped,
)
defer closeRuntimeEvents()
sessionKey := session.BuildMainSessionKey(routing.DefaultAgentID)
@@ -1322,9 +1412,9 @@ func TestAgentLoop_HookRespond_InterruptSkipsRemaining(t *testing.T) {
t.Fatal("timeout waiting for result")
}
events := collectEventStream(sub.C)
events := collectRuntimeEventStream(runtimeCh)
skippedEvts := filterEvents(events, EventKindToolExecSkipped)
skippedEvts := filterRuntimeEvents(events, runtimeevents.KindAgentToolExecSkipped)
if len(skippedEvts) < 1 {
t.Fatal("expected at least one ToolExecSkipped event after interrupt")
}
@@ -1362,8 +1452,14 @@ func TestAgentLoop_HookRespond_SteeringSkipsRemaining(t *testing.T) {
t.Fatalf("MountHook failed: %v", err)
}
sub := al.SubscribeEvents(32)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
32,
runtimeevents.KindAgentToolExecEnd,
runtimeevents.KindAgentToolExecSkipped,
)
defer closeRuntimeEvents()
sessionKey := session.BuildMainSessionKey(routing.DefaultAgentID)
@@ -1383,14 +1479,14 @@ func TestAgentLoop_HookRespond_SteeringSkipsRemaining(t *testing.T) {
resultCh <- result{resp: resp, err: err}
}()
collectedEvents := make([]Event, 0, 8)
collectedEvents := make([]runtimeevents.Event, 0, 8)
steered := false
deadline := time.After(3 * time.Second)
for !steered {
select {
case evt := <-sub.C:
case evt := <-runtimeCh:
collectedEvents = append(collectedEvents, evt)
if evt.Kind != EventKindToolExecEnd {
if evt.Kind != runtimeevents.KindAgentToolExecEnd {
continue
}
payload, ok := evt.Payload.(ToolExecEndPayload)
@@ -1413,9 +1509,9 @@ func TestAgentLoop_HookRespond_SteeringSkipsRemaining(t *testing.T) {
t.Fatal("timeout waiting for result")
}
events := append(collectedEvents, collectEventStream(sub.C)...)
events := append(collectedEvents, collectRuntimeEventStream(runtimeCh)...)
skippedEvts := filterEvents(events, EventKindToolExecSkipped)
skippedEvts := filterRuntimeEvents(events, runtimeevents.KindAgentToolExecSkipped)
if len(skippedEvts) < 1 {
t.Fatal("expected at least one ToolExecSkipped event after steering")
}
@@ -1480,13 +1576,3 @@ func TestCloneStringAnyMap_EmptyMapReturnsNonNil(t *testing.T) {
}
})
}
func filterEvents(events []Event, kind EventKind) []Event {
var result []Event
for _, evt := range events {
if evt.Kind == kind {
result = append(result, evt)
}
}
return result
}
+10 -9
View File
@@ -10,6 +10,7 @@ import (
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/constants"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/tools"
@@ -72,7 +73,7 @@ toolLoop:
})
al.emitEvent(
EventKindToolExecStart,
runtimeevents.KindAgentToolExecStart,
ts.eventMeta("runTurn", "turn.tool.start"),
ToolExecStartPayload{
Tool: toolName,
@@ -191,7 +192,7 @@ toolLoop:
}
al.emitEvent(
EventKindToolExecEnd,
runtimeevents.KindAgentToolExecEnd,
ts.eventMeta("runTurn", "turn.tool.end"),
ToolExecEndPayload{
Tool: toolName,
@@ -237,7 +238,7 @@ toolLoop:
for j := i + 1; j < len(normalizedToolCalls); j++ {
skippedTC := normalizedToolCalls[j]
al.emitEvent(
EventKindToolExecSkipped,
runtimeevents.KindAgentToolExecSkipped,
ts.eventMeta("runTurn", "turn.tool.skipped"),
ToolExecSkippedPayload{
Tool: skippedTC.Name,
@@ -284,7 +285,7 @@ toolLoop:
exec.allResponsesHandled = false
denyContent := hookDeniedToolContent("Tool execution denied by hook", decision.Reason)
al.emitEvent(
EventKindToolExecSkipped,
runtimeevents.KindAgentToolExecSkipped,
ts.eventMeta("runTurn", "turn.tool.skipped"),
ToolExecSkippedPayload{
Tool: toolName,
@@ -323,7 +324,7 @@ toolLoop:
exec.allResponsesHandled = false
denyContent := hookDeniedToolContent("Tool execution denied by approval hook", approval.Reason)
al.emitEvent(
EventKindToolExecSkipped,
runtimeevents.KindAgentToolExecSkipped,
ts.eventMeta("runTurn", "turn.tool.skipped"),
ToolExecSkippedPayload{
Tool: toolName,
@@ -353,7 +354,7 @@ toolLoop:
"iteration": iteration,
})
al.emitEvent(
EventKindToolExecStart,
runtimeevents.KindAgentToolExecStart,
ts.eventMeta("runTurn", "turn.tool.start"),
ToolExecStartPayload{
Tool: toolName,
@@ -401,7 +402,7 @@ toolLoop:
"channel": ts.channel,
})
al.emitEvent(
EventKindFollowUpQueued,
runtimeevents.KindAgentFollowUpQueued,
ts.scope.meta(iteration, "runTurn", "turn.follow_up.queued"),
FollowUpQueuedPayload{
SourceTool: asyncToolName,
@@ -567,7 +568,7 @@ toolLoop:
toolResultMsg.Media = append(toolResultMsg.Media, toolResult.Media...)
}
al.emitEvent(
EventKindToolExecEnd,
runtimeevents.KindAgentToolExecEnd,
ts.eventMeta("runTurn", "turn.tool.end"),
ToolExecEndPayload{
Tool: toolName,
@@ -612,7 +613,7 @@ toolLoop:
for j := i + 1; j < len(normalizedToolCalls); j++ {
skippedTC := normalizedToolCalls[j]
al.emitEvent(
EventKindToolExecSkipped,
runtimeevents.KindAgentToolExecSkipped,
ts.eventMeta("runTurn", "turn.tool.skipped"),
ToolExecSkippedPayload{
Tool: skippedTC.Name,
+2 -1
View File
@@ -6,6 +6,7 @@ import (
"context"
"github.com/sipeed/picoclaw/pkg/bus"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/providers"
)
@@ -50,7 +51,7 @@ func (p *Pipeline) Finalize(
ts.ingestMessage(turnCtx, al, finalMsg)
if err := ts.agent.Sessions.Save(ts.sessionKey); err != nil {
al.emitEvent(
EventKindError,
runtimeevents.KindAgentError,
ts.eventMeta("runTurn", "turn.error"),
ErrorPayload{
Stage: "session_save",
+7 -6
View File
@@ -11,6 +11,7 @@ import (
"time"
"github.com/sipeed/picoclaw/pkg/constants"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/providers"
)
@@ -113,7 +114,7 @@ func (p *Pipeline) CallLLM(
}
al.emitEvent(
EventKindLLMRequest,
runtimeevents.KindAgentLLMRequest,
ts.eventMeta("runTurn", "turn.llm.request"),
LLMRequestPayload{
Model: exec.llmModel,
@@ -206,7 +207,7 @@ func (p *Pipeline) CallLLM(
// Retry without media if vision is unsupported
if hasMediaRefs(exec.callMessages) && isVisionUnsupportedError(err) && retry < maxRetries {
al.emitEvent(
EventKindLLMRetry,
runtimeevents.KindAgentLLMRetry,
ts.eventMeta("runTurn", "turn.llm.retry"),
LLMRetryPayload{
Attempt: retry + 1,
@@ -262,7 +263,7 @@ func (p *Pipeline) CallLLM(
if isTimeoutError && retry < maxRetries {
backoff := time.Duration(retry+1) * time.Duration(backoffSecs) * time.Second
al.emitEvent(
EventKindLLMRetry,
runtimeevents.KindAgentLLMRetry,
ts.eventMeta("runTurn", "turn.llm.retry"),
LLMRetryPayload{
Attempt: retry + 1,
@@ -319,7 +320,7 @@ func (p *Pipeline) CallLLM(
if isContextError && retry < maxRetries && !ts.opts.NoHistory {
al.emitEvent(
EventKindLLMRetry,
runtimeevents.KindAgentLLMRetry,
ts.eventMeta("runTurn", "turn.llm.retry"),
LLMRetryPayload{
Attempt: retry + 1,
@@ -378,7 +379,7 @@ func (p *Pipeline) CallLLM(
if err != nil {
al.emitEvent(
EventKindError,
runtimeevents.KindAgentError,
ts.eventMeta("runTurn", "turn.error"),
ErrorPayload{
Stage: "llm",
@@ -442,7 +443,7 @@ func (p *Pipeline) CallLLM(
)
}
al.emitEvent(
EventKindLLMResponse,
runtimeevents.KindAgentLLMResponse,
ts.eventMeta("runTurn", "turn.llm.response"),
LLMResponsePayload{
ContentLen: len(exec.response.Content),
+408
View File
@@ -0,0 +1,408 @@
package agent
import (
"context"
"fmt"
"path"
"strings"
"sync"
"time"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
)
const (
runtimeEventLoggerBuffer = 256
runtimeEventLoggerDrainTimeout = 2 * time.Second
)
type runtimeEventLogger struct {
mu sync.RWMutex
cfg config.EventLoggingConfig
}
func (al *AgentLoop) refreshRuntimeEventLogger(cfg *config.Config) {
if al == nil {
return
}
logCfg := config.EffectiveEventLoggingConfig(cfg)
al.runtimeEventLogMu.Lock()
if !logCfg.Enabled {
oldSub := al.runtimeEventLogSub
al.runtimeEventLogger = nil
al.runtimeEventLogSub = nil
al.runtimeEventLogMu.Unlock()
closeRuntimeEventLoggerSubscription(oldSub)
return
}
if al.runtimeEventLogger != nil && al.runtimeEventLogSub != nil {
al.runtimeEventLogger.updateConfig(logCfg)
al.runtimeEventLogMu.Unlock()
return
}
al.runtimeEventLogMu.Unlock()
eventLogger := newRuntimeEventLoggerFromConfig(logCfg)
sub, err := eventLogger.subscribe(context.Background(), al.runtimeEvents)
if err != nil {
logger.WarnCF("events", "Failed to subscribe runtime event logger", map[string]any{"error": err.Error()})
return
}
al.runtimeEventLogMu.Lock()
oldSub := al.runtimeEventLogSub
al.runtimeEventLogger = eventLogger
al.runtimeEventLogSub = sub
al.runtimeEventLogMu.Unlock()
closeRuntimeEventLoggerSubscription(oldSub)
}
func (al *AgentLoop) closeRuntimeEventLogger() {
if al == nil {
return
}
al.runtimeEventLogMu.Lock()
oldSub := al.runtimeEventLogSub
al.runtimeEventLogger = nil
al.runtimeEventLogSub = nil
al.runtimeEventLogMu.Unlock()
closeRuntimeEventLoggerSubscription(oldSub)
}
func closeRuntimeEventLoggerSubscription(sub runtimeevents.Subscription) {
if sub == nil {
return
}
if err := sub.Close(); err != nil {
logger.WarnCF("events", "Failed to close runtime event logger subscription", map[string]any{
"error": err.Error(),
})
}
timer := time.NewTimer(runtimeEventLoggerDrainTimeout)
defer timer.Stop()
select {
case <-sub.Done():
case <-timer.C:
logger.WarnCF("events", "Timed out waiting for runtime event logger to drain", map[string]any{
"timeout": runtimeEventLoggerDrainTimeout.String(),
})
}
}
func newRuntimeEventLogger(cfg *config.Config) *runtimeEventLogger {
logCfg := config.EffectiveEventLoggingConfig(cfg)
if !logCfg.Enabled {
return nil
}
return newRuntimeEventLoggerFromConfig(logCfg)
}
func newRuntimeEventLoggerFromConfig(logCfg config.EventLoggingConfig) *runtimeEventLogger {
return &runtimeEventLogger{cfg: logCfg}
}
func (l *runtimeEventLogger) updateConfig(cfg config.EventLoggingConfig) {
if l == nil {
return
}
l.mu.Lock()
l.cfg = cfg
l.mu.Unlock()
}
func (l *runtimeEventLogger) configSnapshot() config.EventLoggingConfig {
if l == nil {
return config.EventLoggingConfig{}
}
l.mu.RLock()
defer l.mu.RUnlock()
return l.cfg
}
func (l *runtimeEventLogger) subscribe(
ctx context.Context,
eventBus runtimeevents.Bus,
) (runtimeevents.Subscription, error) {
if l == nil || eventBus == nil {
return nil, nil
}
return eventBus.Channel().Subscribe(ctx, runtimeevents.SubscribeOptions{
Name: "runtime-event-logger",
Buffer: runtimeEventLoggerBuffer,
Concurrency: runtimeevents.Locked,
Backpressure: runtimeevents.DropNewest,
PanicPolicy: runtimeevents.RecoverAndLog,
}, l.handle)
}
func (l *runtimeEventLogger) handle(_ context.Context, evt runtimeevents.Event) error {
if l == nil || !l.shouldLog(evt) {
return nil
}
fields := runtimeEventLogFields(evt)
if l.configSnapshot().IncludePayload && evt.Payload != nil {
fields["payload"] = evt.Payload
}
logRuntimeEvent(evt, fields)
return nil
}
func (l *runtimeEventLogger) shouldLog(evt runtimeevents.Event) bool {
if l == nil {
return false
}
cfg := l.configSnapshot()
if !cfg.Enabled {
return false
}
if runtimeEventSeverityRank(evt.Severity) < runtimeEventSeverityRank(parseRuntimeEventSeverity(cfg.MinSeverity)) {
return false
}
kind := evt.Kind.String()
if !matchAnyRuntimeEventPattern(cfg.Include, kind, true) {
return false
}
return !matchAnyRuntimeEventPattern(cfg.Exclude, kind, false)
}
func logRuntimeEvent(evt runtimeevents.Event, fields map[string]any) {
message := fmt.Sprintf("Runtime event: %s", evt.Kind.String())
switch normalizeRuntimeEventSeverity(evt.Severity) {
case runtimeevents.SeverityDebug:
logger.DebugCF("events", message, fields)
case runtimeevents.SeverityWarn:
logger.WarnCF("events", message, fields)
case runtimeevents.SeverityError:
logger.ErrorCF("events", message, fields)
default:
logger.InfoCF("events", message, fields)
}
}
func runtimeEventLogFields(evt runtimeevents.Event) map[string]any {
fields := map[string]any{
"event_id": evt.ID,
"event_kind": evt.Kind.String(),
"severity": string(normalizeRuntimeEventSeverity(evt.Severity)),
}
if !evt.Time.IsZero() {
fields["event_time"] = evt.Time.Format(time.RFC3339Nano)
}
appendRuntimeEventSourceFields(fields, evt.Source)
appendRuntimeEventScopeFields(fields, evt.Scope)
appendRuntimeEventCorrelationFields(fields, evt.Correlation)
appendRuntimeEventAttrs(fields, evt.Attrs)
appendRuntimeEventPayloadSummary(fields, evt.Payload)
return fields
}
func appendRuntimeEventSourceFields(fields map[string]any, source runtimeevents.Source) {
if source.Component != "" {
fields["source_component"] = source.Component
}
if source.Name != "" {
fields["source_name"] = source.Name
}
}
func appendRuntimeEventScopeFields(fields map[string]any, scope runtimeevents.Scope) {
setStringField(fields, "runtime_id", scope.RuntimeID)
setStringField(fields, "agent_id", scope.AgentID)
setStringField(fields, "session_key", scope.SessionKey)
setStringField(fields, "turn_id", scope.TurnID)
setStringField(fields, "channel", scope.Channel)
setStringField(fields, "account", scope.Account)
setStringField(fields, "chat_id", scope.ChatID)
setStringField(fields, "topic_id", scope.TopicID)
setStringField(fields, "space_id", scope.SpaceID)
setStringField(fields, "space_type", scope.SpaceType)
setStringField(fields, "chat_type", scope.ChatType)
setStringField(fields, "sender_id", scope.SenderID)
setStringField(fields, "message_id", scope.MessageID)
}
func appendRuntimeEventCorrelationFields(fields map[string]any, correlation runtimeevents.Correlation) {
setStringField(fields, "trace_id", correlation.TraceID)
setStringField(fields, "parent_turn_id", correlation.ParentTurnID)
setStringField(fields, "request_id", correlation.RequestID)
setStringField(fields, "reply_to_id", correlation.ReplyToID)
}
func appendRuntimeEventAttrs(fields map[string]any, attrs map[string]any) {
for key, value := range attrs {
if key == "" || value == nil {
continue
}
if _, exists := fields[key]; exists {
fields["attr_"+key] = value
continue
}
fields[key] = value
}
}
func appendRuntimeEventPayloadSummary(fields map[string]any, payload any) {
switch payload := payload.(type) {
case TurnStartPayload:
fields["user_len"] = len(payload.UserMessage)
fields["media_count"] = payload.MediaCount
case TurnEndPayload:
fields["status"] = payload.Status
fields["iterations_total"] = payload.Iterations
fields["duration_ms"] = payload.Duration.Milliseconds()
fields["final_len"] = payload.FinalContentLen
case LLMRequestPayload:
fields["model"] = payload.Model
fields["messages"] = payload.MessagesCount
fields["tools"] = payload.ToolsCount
fields["max_tokens"] = payload.MaxTokens
case LLMDeltaPayload:
fields["content_delta_len"] = payload.ContentDeltaLen
fields["reasoning_delta_len"] = payload.ReasoningDeltaLen
case LLMResponsePayload:
fields["content_len"] = payload.ContentLen
fields["tool_calls"] = payload.ToolCalls
fields["has_reasoning"] = payload.HasReasoning
case LLMRetryPayload:
fields["attempt"] = payload.Attempt
fields["max_retries"] = payload.MaxRetries
fields["reason"] = payload.Reason
fields["error"] = payload.Error
fields["backoff_ms"] = payload.Backoff.Milliseconds()
case ContextCompressPayload:
fields["reason"] = payload.Reason
fields["dropped_messages"] = payload.DroppedMessages
fields["remaining_messages"] = payload.RemainingMessages
case SessionSummarizePayload:
fields["summarized_messages"] = payload.SummarizedMessages
fields["kept_messages"] = payload.KeptMessages
fields["summary_len"] = payload.SummaryLen
fields["omitted_oversized"] = payload.OmittedOversized
case ToolExecStartPayload:
fields["tool"] = payload.Tool
fields["args_count"] = len(payload.Arguments)
case ToolExecEndPayload:
fields["tool"] = payload.Tool
fields["duration_ms"] = payload.Duration.Milliseconds()
fields["for_llm_len"] = payload.ForLLMLen
fields["for_user_len"] = payload.ForUserLen
fields["is_error"] = payload.IsError
fields["async"] = payload.Async
case ToolExecSkippedPayload:
fields["tool"] = payload.Tool
fields["reason"] = payload.Reason
case SteeringInjectedPayload:
fields["count"] = payload.Count
fields["total_content_len"] = payload.TotalContentLen
case FollowUpQueuedPayload:
fields["source_tool"] = payload.SourceTool
fields["content_len"] = payload.ContentLen
case InterruptReceivedPayload:
fields["interrupt_kind"] = payload.Kind
fields["role"] = payload.Role
fields["content_len"] = payload.ContentLen
fields["queue_depth"] = payload.QueueDepth
fields["hint_len"] = payload.HintLen
case SubTurnSpawnPayload:
fields["child_agent_id"] = payload.AgentID
fields["label"] = payload.Label
case SubTurnEndPayload:
fields["child_agent_id"] = payload.AgentID
fields["status"] = payload.Status
case SubTurnResultDeliveredPayload:
fields["target_channel"] = payload.TargetChannel
fields["target_chat_id"] = payload.TargetChatID
fields["content_len"] = payload.ContentLen
case SubTurnOrphanPayload:
fields["parent_turn_id"] = payload.ParentTurnID
fields["child_turn_id"] = payload.ChildTurnID
fields["reason"] = payload.Reason
case ErrorPayload:
fields["stage"] = payload.Stage
fields["error"] = payload.Message
}
}
func setStringField(fields map[string]any, key, value string) {
if value != "" {
fields[key] = value
}
}
func matchAnyRuntimeEventPattern(patterns []string, kind string, emptyMatches bool) bool {
if len(patterns) == 0 {
return emptyMatches
}
for _, pattern := range patterns {
if matchRuntimeEventPattern(pattern, kind) {
return true
}
}
return false
}
func matchRuntimeEventPattern(pattern, kind string) bool {
pattern = strings.TrimSpace(pattern)
if pattern == "" {
return false
}
if pattern == "*" {
return true
}
if strings.HasSuffix(pattern, ".*") {
return strings.HasPrefix(kind, strings.TrimSuffix(pattern, "*"))
}
matched, err := path.Match(pattern, kind)
if err == nil {
return matched
}
return pattern == kind
}
func parseRuntimeEventSeverity(severity string) runtimeevents.Severity {
switch strings.ToLower(strings.TrimSpace(severity)) {
case "debug":
return runtimeevents.SeverityDebug
case "warn", "warning":
return runtimeevents.SeverityWarn
case "error":
return runtimeevents.SeverityError
default:
return runtimeevents.SeverityInfo
}
}
func normalizeRuntimeEventSeverity(severity runtimeevents.Severity) runtimeevents.Severity {
switch severity {
case runtimeevents.SeverityDebug,
runtimeevents.SeverityInfo,
runtimeevents.SeverityWarn,
runtimeevents.SeverityError:
return severity
default:
return runtimeevents.SeverityInfo
}
}
func runtimeEventSeverityRank(severity runtimeevents.Severity) int {
switch normalizeRuntimeEventSeverity(severity) {
case runtimeevents.SeverityDebug:
return 0
case runtimeevents.SeverityInfo:
return 1
case runtimeevents.SeverityWarn:
return 2
case runtimeevents.SeverityError:
return 3
default:
return 1
}
}
+259
View File
@@ -0,0 +1,259 @@
package agent
import (
"context"
"sync/atomic"
"testing"
"time"
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
func TestRuntimeEventLoggerFiltering(t *testing.T) {
cfg := config.DefaultConfig()
eventLogger := newRuntimeEventLogger(cfg)
if eventLogger == nil {
t.Fatal("default runtime event logger is nil")
}
if !eventLogger.shouldLog(runtimeevents.Event{
Kind: runtimeevents.KindAgentTurnStart,
Severity: runtimeevents.SeverityInfo,
}) {
t.Fatal("default config should log agent events")
}
if eventLogger.shouldLog(runtimeevents.Event{
Kind: runtimeevents.KindChannelLifecycleStarted,
Severity: runtimeevents.SeverityInfo,
}) {
t.Fatal("default config should not log non-agent events")
}
cfg.Events.Logging.Include = []string{"*"}
cfg.Events.Logging.Exclude = []string{"mcp.*"}
eventLogger = newRuntimeEventLogger(cfg)
if !eventLogger.shouldLog(runtimeevents.Event{
Kind: runtimeevents.KindGatewayReady,
Severity: runtimeevents.SeverityInfo,
}) {
t.Fatal("include * should log gateway events")
}
if eventLogger.shouldLog(runtimeevents.Event{
Kind: runtimeevents.KindMCPServerConnected,
Severity: runtimeevents.SeverityInfo,
}) {
t.Fatal("exclude mcp.* should suppress MCP events")
}
cfg.Events.Logging.Exclude = nil
cfg.Events.Logging.MinSeverity = "warn"
eventLogger = newRuntimeEventLogger(cfg)
if eventLogger.shouldLog(runtimeevents.Event{
Kind: runtimeevents.KindGatewayReady,
Severity: runtimeevents.SeverityInfo,
}) {
t.Fatal("min severity warn should suppress info events")
}
if !eventLogger.shouldLog(runtimeevents.Event{
Kind: runtimeevents.KindGatewayReloadFailed,
Severity: runtimeevents.SeverityError,
}) {
t.Fatal("min severity warn should allow error events")
}
cfg.Events.Logging.Enabled = false
if newRuntimeEventLogger(cfg) != nil {
t.Fatal("disabled config should not create runtime event logger")
}
}
func TestRuntimeEventLogFieldsSummarizeAgentPayload(t *testing.T) {
fields := runtimeEventLogFields(runtimeevents.Event{
ID: "evt-test",
Kind: runtimeevents.KindAgentToolExecStart,
Severity: runtimeevents.SeverityInfo,
Source: runtimeevents.Source{
Component: "agent",
Name: "main",
},
Scope: runtimeevents.Scope{
AgentID: "main",
SessionKey: "session-1",
TurnID: "turn-1",
},
Payload: ToolExecStartPayload{
Tool: "exec",
Arguments: map[string]any{
"secret": "should-not-be-logged-by-default",
},
},
})
if fields["event_id"] != "evt-test" || fields["source_component"] != "agent" {
t.Fatalf("missing common event fields: %#v", fields)
}
if fields["tool"] != "exec" || fields["args_count"] != 1 {
t.Fatalf("missing safe agent payload summary fields: %#v", fields)
}
if _, ok := fields["payload"]; ok {
t.Fatalf("raw payload should not be included by runtimeEventLogFields: %#v", fields)
}
}
func TestRuntimeEventLogFieldsIncludeSafeAttrs(t *testing.T) {
fields := runtimeEventLogFields(runtimeevents.Event{
ID: "evt-gateway",
Kind: runtimeevents.KindGatewayReady,
Severity: runtimeevents.SeverityInfo,
Attrs: map[string]any{
"duration_ms": 42,
"error": "startup failed",
"event_kind": "conflict",
},
})
if fields["duration_ms"] != 42 || fields["error"] != "startup failed" {
t.Fatalf("missing safe attrs: %#v", fields)
}
if fields["event_kind"] != runtimeevents.KindGatewayReady.String() {
t.Fatalf("event_kind overwritten by attrs: %#v", fields)
}
if fields["attr_event_kind"] != "conflict" {
t.Fatalf("conflicting attr not preserved with prefix: %#v", fields)
}
if _, ok := fields["payload"]; ok {
t.Fatalf("raw payload should not be included by runtimeEventLogFields: %#v", fields)
}
}
func runtimeEventLoggerStateForTest(
al *AgentLoop,
) (*runtimeEventLogger, runtimeevents.Subscription) {
al.runtimeEventLogMu.RLock()
defer al.runtimeEventLogMu.RUnlock()
return al.runtimeEventLogger, al.runtimeEventLogSub
}
func TestReloadProviderAndConfigRefreshesRuntimeEventLogger(t *testing.T) {
cfg := config.DefaultConfig()
cfg.Agents.Defaults.Workspace = t.TempDir()
cfg.Events.Logging.Include = []string{"agent.*"}
al := NewAgentLoop(cfg, bus.NewMessageBus(), &mockProvider{})
defer al.Close()
eventLogger, logSub := runtimeEventLoggerStateForTest(al)
if eventLogger == nil || logSub == nil {
t.Fatal("expected initial runtime event logger subscription")
}
if eventLogger.shouldLog(runtimeevents.Event{
Kind: runtimeevents.KindGatewayReloadCompleted,
Severity: runtimeevents.SeverityInfo,
}) {
t.Fatal("initial agent-only logging should not log gateway reload events")
}
reloaded := config.DefaultConfig()
reloaded.Agents.Defaults.Workspace = cfg.Agents.Defaults.Workspace
reloaded.Events.Logging.Include = []string{"gateway.*"}
if err := al.ReloadProviderAndConfig(context.Background(), &mockProvider{}, reloaded); err != nil {
t.Fatalf("ReloadProviderAndConfig() error = %v", err)
}
eventLogger, logSub = runtimeEventLoggerStateForTest(al)
if eventLogger == nil || logSub == nil {
t.Fatal("expected runtime event logger subscription after reload")
}
if !eventLogger.shouldLog(runtimeevents.Event{
Kind: runtimeevents.KindGatewayReloadCompleted,
Severity: runtimeevents.SeverityInfo,
}) {
t.Fatal("reloaded gateway logging should log gateway reload events")
}
if eventLogger.shouldLog(runtimeevents.Event{
Kind: runtimeevents.KindAgentTurnStart,
Severity: runtimeevents.SeverityInfo,
}) {
t.Fatal("reloaded gateway-only logging should not log agent events")
}
disabled := config.DefaultConfig()
disabled.Agents.Defaults.Workspace = cfg.Agents.Defaults.Workspace
disabled.Events.Logging.Enabled = false
if err := al.ReloadProviderAndConfig(context.Background(), &mockProvider{}, disabled); err != nil {
t.Fatalf("ReloadProviderAndConfig() with disabled logging error = %v", err)
}
eventLogger, logSub = runtimeEventLoggerStateForTest(al)
if eventLogger != nil || logSub != nil {
t.Fatal("expected runtime event logger to be disabled after reload")
}
}
func TestCloseRuntimeEventLoggerSubscriptionWaitsForDrain(t *testing.T) {
eventBus := runtimeevents.NewBus()
defer func() {
if err := eventBus.Close(); err != nil {
t.Fatalf("Close failed: %v", err)
}
}()
var handled atomic.Uint64
firstStarted := make(chan struct{})
releaseFirst := make(chan struct{})
sub, err := eventBus.Channel().Subscribe(
context.Background(),
runtimeevents.SubscribeOptions{
Name: "runtime-event-logger",
Buffer: 2,
Concurrency: runtimeevents.Locked,
},
func(context.Context, runtimeevents.Event) error {
if handled.Add(1) == 1 {
close(firstStarted)
<-releaseFirst
}
return nil
},
)
if err != nil {
t.Fatalf("Subscribe failed: %v", err)
}
first := eventBus.Publish(context.Background(), runtimeevents.Event{Kind: runtimeevents.Kind("test.first")})
if first.Delivered != 1 {
t.Fatalf("first Publish = %+v, want one delivered event", first)
}
select {
case <-firstStarted:
case <-time.After(time.Second):
t.Fatal("timed out waiting for first handler to start")
}
second := eventBus.Publish(context.Background(), runtimeevents.Event{Kind: runtimeevents.Kind("test.second")})
if second.Delivered != 1 {
t.Fatalf("second Publish = %+v, want one delivered event", second)
}
closeReturned := make(chan struct{})
go func() {
closeRuntimeEventLoggerSubscription(sub)
close(closeReturned)
}()
select {
case <-closeReturned:
t.Fatal("runtime event logger close returned before buffered events drained")
case <-time.After(50 * time.Millisecond):
}
close(releaseFirst)
select {
case <-closeReturned:
case <-time.After(time.Second):
t.Fatal("timed out waiting for runtime event logger close to return")
}
if got := handled.Load(); got != 2 {
t.Fatalf("handled = %d, want 2", got)
}
}
+103
View File
@@ -0,0 +1,103 @@
package agent
import (
"testing"
"time"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
func subscribeRuntimeEventsForTest(
t *testing.T,
al *AgentLoop,
buffer int,
kinds ...runtimeevents.Kind,
) (<-chan runtimeevents.Event, func()) {
t.Helper()
if al == nil {
t.Fatal("agent loop is nil")
}
channel := al.RuntimeEvents()
if channel == nil {
t.Fatal("runtime event channel is nil")
}
if len(kinds) > 0 {
channel = channel.OfKind(kinds...)
}
sub, ch, err := channel.SubscribeChan(
t.Context(),
runtimeevents.SubscribeOptions{Name: "agent-runtime-test", Buffer: buffer},
)
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
return ch, func() {
if err := sub.Close(); err != nil {
t.Errorf("runtime subscription close failed: %v", err)
}
}
}
func waitForRuntimeEvent(
t *testing.T,
ch <-chan runtimeevents.Event,
timeout time.Duration,
match func(runtimeevents.Event) bool,
) runtimeevents.Event {
t.Helper()
timer := time.NewTimer(timeout)
defer timer.Stop()
for {
select {
case evt, ok := <-ch:
if !ok {
t.Fatal("runtime event stream closed before expected event arrived")
}
if match(evt) {
return evt
}
case <-timer.C:
t.Fatal("timed out waiting for expected runtime event")
}
}
}
func collectRuntimeEventStream(ch <-chan runtimeevents.Event) []runtimeevents.Event {
var events []runtimeevents.Event
for {
select {
case evt, ok := <-ch:
if !ok {
return events
}
events = append(events, evt)
default:
return events
}
}
}
func findRuntimeEvent(
events []runtimeevents.Event,
kind runtimeevents.Kind,
) (runtimeevents.Event, bool) {
for _, evt := range events {
if evt.Kind == kind {
return evt, true
}
}
return runtimeevents.Event{}, false
}
func filterRuntimeEvents(events []runtimeevents.Event, kind runtimeevents.Kind) []runtimeevents.Event {
var filtered []runtimeevents.Event
for _, evt := range events {
if evt.Kind == kind {
filtered = append(filtered, evt)
}
}
return filtered
}
+5 -4
View File
@@ -8,6 +8,7 @@ import (
"sync"
"github.com/sipeed/picoclaw/pkg/bus"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/session"
@@ -206,7 +207,7 @@ func (al *AgentLoop) enqueueSteeringMessage(scope, agentID string, msg providers
"scope": normalizeSteeringScope(scope),
})
meta := EventMeta{
meta := HookMeta{
Source: "Steer",
TracePath: "turn.interrupt.received",
}
@@ -230,7 +231,7 @@ func (al *AgentLoop) enqueueSteeringMessage(scope, agentID string, msg providers
}
al.emitEvent(
EventKindInterruptReceived,
runtimeevents.KindAgentInterruptReceived,
meta,
InterruptReceivedPayload{
Kind: InterruptKindSteering,
@@ -410,7 +411,7 @@ func (al *AgentLoop) InterruptGraceful(hint string) error {
}
al.emitEvent(
EventKindInterruptReceived,
runtimeevents.KindAgentInterruptReceived,
ts.eventMeta("InterruptGraceful", "turn.interrupt.received"),
InterruptReceivedPayload{
Kind: InterruptKindGraceful,
@@ -438,7 +439,7 @@ func (al *AgentLoop) InterruptHard() error {
}
al.emitEvent(
EventKindInterruptReceived,
runtimeevents.KindAgentInterruptReceived,
ts.eventMeta("InterruptHard", "turn.interrupt.received"),
InterruptReceivedPayload{
Kind: InterruptKindHard,
+23 -10
View File
@@ -14,6 +14,7 @@ import (
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/media"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/routing"
@@ -1134,8 +1135,14 @@ func TestAgentLoop_InterruptGraceful_UsesTerminalNoToolCall(t *testing.T) {
al.RegisterTool(tool2)
sessionKey := session.BuildMainSessionKey(routing.DefaultAgentID)
sub := al.SubscribeEvents(32)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
32,
runtimeevents.KindAgentInterruptReceived,
runtimeevents.KindAgentTurnEnd,
)
defer closeRuntimeEvents()
type result struct {
resp string
@@ -1222,8 +1229,8 @@ func TestAgentLoop_InterruptGraceful_UsesTerminalNoToolCall(t *testing.T) {
t.Fatal("expected remaining tool to be marked as skipped after graceful interrupt")
}
events := collectEventStream(sub.C)
interruptEvt, ok := findEvent(events, EventKindInterruptReceived)
events := collectRuntimeEventStream(runtimeCh)
interruptEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentInterruptReceived)
if !ok {
t.Fatal("expected interrupt received event")
}
@@ -1235,7 +1242,7 @@ func TestAgentLoop_InterruptGraceful_UsesTerminalNoToolCall(t *testing.T) {
t.Fatalf("expected graceful interrupt payload, got %q", interruptPayload.Kind)
}
turnEndEvt, ok := findEvent(events, EventKindTurnEnd)
turnEndEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentTurnEnd)
if !ok {
t.Fatal("expected turn end event")
}
@@ -1299,8 +1306,14 @@ func TestAgentLoop_InterruptHard_RestoresSession(t *testing.T) {
}
defaultAgent.Sessions.SetHistory(sessionKey, originalHistory)
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentInterruptReceived,
runtimeevents.KindAgentTurnEnd,
)
defer closeRuntimeEvents()
type result struct {
resp string
@@ -1353,8 +1366,8 @@ func TestAgentLoop_InterruptHard_RestoresSession(t *testing.T) {
t.Fatalf("expected history rollback after hard abort, got %#v", finalHistory)
}
events := collectEventStream(sub.C)
interruptEvt, ok := findEvent(events, EventKindInterruptReceived)
events := collectRuntimeEventStream(runtimeCh)
interruptEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentInterruptReceived)
if !ok {
t.Fatal("expected interrupt received event")
}
@@ -1366,7 +1379,7 @@ func TestAgentLoop_InterruptHard_RestoresSession(t *testing.T) {
t.Fatalf("expected hard interrupt payload, got %q", interruptPayload.Kind)
}
turnEndEvt, ok := findEvent(events, EventKindTurnEnd)
turnEndEvt, ok := findRuntimeEvent(events, runtimeevents.KindAgentTurnEnd)
if !ok {
t.Fatal("expected turn end event")
}
+11 -10
View File
@@ -8,6 +8,7 @@ import (
"sync/atomic"
"time"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/providers/messageutil"
@@ -422,7 +423,7 @@ func spawnSubTurn(
parentTS.mu.Unlock()
// 6. Emit Spawn event
al.emitEvent(EventKindSubTurnSpawn,
al.emitEvent(runtimeevents.KindAgentSubTurnSpawn,
childTS.eventMeta("spawnSubTurn", "subturn.spawn"),
SubTurnSpawnPayload{
AgentID: childTS.agentID,
@@ -453,7 +454,7 @@ func spawnSubTurn(
if err != nil {
status = "error"
}
al.emitEvent(EventKindSubTurnEnd,
al.emitEvent(runtimeevents.KindAgentSubTurnEnd,
childTS.eventMeta("spawnSubTurn", "subturn.end"),
SubTurnEndPayload{
AgentID: childTS.agentID,
@@ -504,16 +505,16 @@ func spawnSubTurn(
//
// Delivery behavior:
// - If parent turn is still running: attempts to deliver to pendingResults channel
// - If channel is full: emits SubTurnOrphanResultEvent (result is lost from channel but tracked)
// - If parent turn has finished: emits SubTurnOrphanResultEvent (late arrival)
// - If channel is full: emits agent.subturn.orphan (result is lost from channel but tracked)
// - If parent turn has finished: emits agent.subturn.orphan (late arrival)
//
// Thread safety:
// - Reads parent state under lock, then releases lock before channel send
// - Small race window exists but is acceptable (worst case: result becomes orphan)
//
// Event emissions:
// - SubTurnResultDeliveredEvent: successful delivery to channel
// - SubTurnOrphanResultEvent: delivery failed (parent finished or channel full)
// - agent.subturn.result_delivered: successful delivery to channel
// - agent.subturn.orphan: delivery failed (parent finished or channel full)
func deliverSubTurnResult(al *AgentLoop, parentTS *turnState, childID string, result *tools.ToolResult) {
// Let GC clean up the pendingResults channel; parent Finish will no longer close it.
// We use defer/recover to catch any unlikely channel panics if it were ever closed.
@@ -526,7 +527,7 @@ func deliverSubTurnResult(al *AgentLoop, parentTS *turnState, childID string, re
"recover": r,
})
if result != nil && al != nil {
al.emitEvent(EventKindSubTurnOrphan,
al.emitEvent(runtimeevents.KindAgentSubTurnOrphan,
parentTS.eventMeta("deliverSubTurnResult", "subturn.orphan"),
SubTurnOrphanPayload{ParentTurnID: parentTS.turnID, ChildTurnID: childID, Reason: "panic"},
)
@@ -541,7 +542,7 @@ func deliverSubTurnResult(al *AgentLoop, parentTS *turnState, childID string, re
// If parent turn has already finished, treat this as an orphan result
if isFinished || resultChan == nil {
if result != nil && al != nil {
al.emitEvent(EventKindSubTurnOrphan,
al.emitEvent(runtimeevents.KindAgentSubTurnOrphan,
parentTS.eventMeta("deliverSubTurnResult", "subturn.orphan"),
SubTurnOrphanPayload{ParentTurnID: parentTS.turnID, ChildTurnID: childID, Reason: "parent_finished"},
)
@@ -557,7 +558,7 @@ func deliverSubTurnResult(al *AgentLoop, parentTS *turnState, childID string, re
case resultChan <- result:
// Successfully delivered
if al != nil {
al.emitEvent(EventKindSubTurnResultDelivered,
al.emitEvent(runtimeevents.KindAgentSubTurnResultDelivered,
parentTS.eventMeta("deliverSubTurnResult", "subturn.result_delivered"),
SubTurnResultDeliveredPayload{ContentLen: len(result.ForLLM)},
)
@@ -571,7 +572,7 @@ func deliverSubTurnResult(al *AgentLoop, parentTS *turnState, childID string, re
})
if result != nil && al != nil {
al.emitEvent(
EventKindSubTurnOrphan,
runtimeevents.KindAgentSubTurnOrphan,
parentTS.eventMeta("deliverSubTurnResult", "subturn.orphan"),
SubTurnOrphanPayload{
ParentTurnID: parentTS.turnID,
+52 -27
View File
@@ -10,6 +10,7 @@ import (
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/providers"
"github.com/sipeed/picoclaw/pkg/tools"
)
@@ -22,30 +23,38 @@ const (
// ====================== Test Helper: Event Collector ======================
type eventCollector struct {
mu sync.Mutex
events []Event
events []runtimeevents.Event
}
func newEventCollector(t *testing.T, al *AgentLoop) (*eventCollector, func()) {
t.Helper()
c := &eventCollector{}
sub := al.SubscribeEvents(16)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentSubTurnSpawn,
runtimeevents.KindAgentSubTurnEnd,
runtimeevents.KindAgentSubTurnResultDelivered,
runtimeevents.KindAgentSubTurnOrphan,
)
done := make(chan struct{})
go func() {
defer close(done)
for evt := range sub.C {
for evt := range runtimeCh {
c.mu.Lock()
c.events = append(c.events, evt)
c.mu.Unlock()
}
}()
cleanup := func() {
al.UnsubscribeEvents(sub.ID)
closeRuntimeEvents()
<-done
}
return c, cleanup
}
func (c *eventCollector) hasEventOfKind(kind EventKind) bool {
func (c *eventCollector) hasEventOfKind(kind runtimeevents.Kind) bool {
c.mu.Lock()
defer c.mu.Unlock()
for _, e := range c.events {
@@ -131,7 +140,7 @@ func TestSpawnSubTurn(t *testing.T) {
agent: al.registry.GetDefaultAgent(),
}
// Subscribe to real EventBus to capture events
// Subscribe to runtime events to capture sub-turn lifecycle.
collector, collectCleanup := newEventCollector(t, al)
defer collectCleanup()
@@ -158,12 +167,12 @@ func TestSpawnSubTurn(t *testing.T) {
// Verify event emission
time.Sleep(10 * time.Millisecond) // let event goroutine flush
if tt.wantSpawn {
if !collector.hasEventOfKind(EventKindSubTurnSpawn) {
if !collector.hasEventOfKind(runtimeevents.KindAgentSubTurnSpawn) {
t.Error("SubTurnSpawnEvent not emitted")
}
}
if tt.wantEnd {
if !collector.hasEventOfKind(EventKindSubTurnEnd) {
if !collector.hasEventOfKind(runtimeevents.KindAgentSubTurnEnd) {
t.Error("SubTurnEndEvent not emitted")
}
}
@@ -316,8 +325,8 @@ func TestSpawnSubTurn_OrphanResultRouting(t *testing.T) {
time.Sleep(10 * time.Millisecond) // let event goroutine flush
// Verify Orphan event is emitted
if !collector.hasEventOfKind(EventKindSubTurnOrphan) {
t.Error("SubTurnOrphanResultEvent not emitted for finished parent")
if !collector.hasEventOfKind(runtimeevents.KindAgentSubTurnOrphan) {
t.Error("agent.subturn.orphan not emitted for finished parent")
}
// Verify history is NOT polluted
@@ -591,12 +600,16 @@ func TestNestedSubTurnHierarchy(t *testing.T) {
var spawnedTurns []turnInfo
var mu sync.Mutex
// Subscribe to real EventBus to capture spawn events
sub := al.SubscribeEvents(16)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
16,
runtimeevents.KindAgentSubTurnSpawn,
)
defer closeRuntimeEvents()
go func() {
for evt := range sub.C {
if evt.Kind == EventKindSubTurnSpawn {
for evt := range runtimeCh {
if evt.Kind == runtimeevents.KindAgentSubTurnSpawn {
p, _ := evt.Payload.(SubTurnSpawnPayload)
mu.Lock()
spawnedTurns = append(spawnedTurns, turnInfo{
@@ -879,7 +892,7 @@ func TestSpawnSubTurn_PanicRecovery(t *testing.T) {
time.Sleep(10 * time.Millisecond) // let event goroutine flush
// SubTurnEndEvent should still be emitted
if !collector.hasEventOfKind(EventKindSubTurnEnd) {
if !collector.hasEventOfKind(runtimeevents.KindAgentSubTurnEnd) {
t.Error("SubTurnEndEvent not emitted after panic")
}
@@ -1229,18 +1242,23 @@ func TestDeliverSubTurnResult_RaceWithFinish(t *testing.T) {
al, _, _, _, cleanup := newTestAgentLoop(t) //nolint:dogsled
defer cleanup()
// Collect events via real EventBus
var mu sync.Mutex
var deliveredCount, orphanCount int
sub := al.SubscribeEvents(64)
defer al.UnsubscribeEvents(sub.ID)
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
64,
runtimeevents.KindAgentSubTurnResultDelivered,
runtimeevents.KindAgentSubTurnOrphan,
)
defer closeRuntimeEvents()
go func() {
for evt := range sub.C {
for evt := range runtimeCh {
mu.Lock()
switch evt.Kind {
case EventKindSubTurnResultDelivered:
case runtimeevents.KindAgentSubTurnResultDelivered:
deliveredCount++
case EventKindSubTurnOrphan:
case runtimeevents.KindAgentSubTurnOrphan:
orphanCount++
}
mu.Unlock()
@@ -1795,13 +1813,20 @@ func TestAsyncSubTurn_ParentFinishesEarly(t *testing.T) {
provider := &slowMockProvider{delay: 5 * time.Second} // SubTurn takes 5 seconds
al := NewAgentLoop(cfg, msgBus, provider)
// Capture events via real EventBus
var mu sync.Mutex
var events []Event
sub := al.SubscribeEvents(32)
defer al.UnsubscribeEvents(sub.ID)
var events []runtimeevents.Event
runtimeCh, closeRuntimeEvents := subscribeRuntimeEventsForTest(
t,
al,
32,
runtimeevents.KindAgentSubTurnSpawn,
runtimeevents.KindAgentSubTurnEnd,
runtimeevents.KindAgentSubTurnResultDelivered,
runtimeevents.KindAgentSubTurnOrphan,
)
defer closeRuntimeEvents()
go func() {
for evt := range sub.C {
for evt := range runtimeCh {
mu.Lock()
events = append(events, evt)
mu.Unlock()
+1 -1
View File
@@ -61,7 +61,7 @@ func cloneStringMap(src map[string]string) map[string]string {
return cloned
}
func cloneEventMeta(meta EventMeta) EventMeta {
func cloneHookMeta(meta HookMeta) HookMeta {
meta.turnContext = cloneTurnContext(meta.turnContext)
return meta
}
+9 -8
View File
@@ -9,6 +9,7 @@ import (
"time"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/providers"
)
@@ -28,7 +29,7 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState, pipeline *Pipel
turnStatus := TurnEndStatusCompleted
defer func() {
al.emitEvent(
EventKindTurnEnd,
runtimeevents.KindAgentTurnEnd,
ts.eventMeta("runTurn", "turn.end"),
TurnEndPayload{
Status: turnStatus,
@@ -40,7 +41,7 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState, pipeline *Pipel
}()
al.emitEvent(
EventKindTurnStart,
runtimeevents.KindAgentTurnStart,
ts.eventMeta("runTurn", "turn.start"),
TurnStartPayload{
UserMessage: ts.userMessage,
@@ -140,7 +141,7 @@ func (al *AgentLoop) runTurn(ctx context.Context, ts *turnState, pipeline *Pipel
})
}
al.emitEvent(
EventKindSteeringInjected,
runtimeevents.KindAgentSteeringInjected,
ts.eventMeta("runTurn", "turn.steering.injected"),
SteeringInjectedPayload{
Count: len(pendingMessages),
@@ -249,7 +250,7 @@ func (al *AgentLoop) abortTurn(ts *turnState) (turnResult, error) {
if !ts.opts.NoHistory {
if err := ts.restoreSession(ts.agent); err != nil {
al.emitEvent(
EventKindError,
runtimeevents.KindAgentError,
ts.eventMeta("abortTurn", "turn.error"),
ErrorPayload{
Stage: "session_restore",
@@ -414,7 +415,7 @@ func (al *AgentLoop) askSideQuestion(
llmModel := activeModel
if al.hooks != nil {
llmReq, decision := al.hooks.BeforeLLM(ctx, &LLMHookRequest{
Meta: EventMeta{
Meta: HookMeta{
Source: "askSideQuestion",
TracePath: "turn.llm.request",
turnContext: cloneTurnContext(turnCtx),
@@ -494,8 +495,8 @@ func (al *AgentLoop) askSideQuestion(
resp, err = callSideLLM(messages)
if err != nil && hasMediaRefs(messages) && isVisionUnsupportedError(err) {
al.emitEvent(
EventKindLLMRetry,
EventMeta{
runtimeevents.KindAgentLLMRetry,
HookMeta{
Source: "askSideQuestion",
TracePath: "turn.llm.retry",
turnContext: cloneTurnContext(turnCtx),
@@ -521,7 +522,7 @@ func (al *AgentLoop) askSideQuestion(
// Apply after_llm hooks
if al.hooks != nil {
llmResp, decision := al.hooks.AfterLLM(ctx, &LLMHookResponse{
Meta: EventMeta{
Meta: HookMeta{
Source: "askSideQuestion",
TracePath: "turn.llm.response",
turnContext: cloneTurnContext(turnCtx),
+2 -2
View File
@@ -442,9 +442,9 @@ func (ts *turnState) hardAbortRequested() bool {
return ts.hardAbort
}
func (ts *turnState) eventMeta(source, tracePath string) EventMeta {
func (ts *turnState) eventMeta(source, tracePath string) HookMeta {
snap := ts.snapshot()
return EventMeta{
return HookMeta{
AgentID: snap.AgentID,
TurnID: snap.TurnID,
SessionKey: snap.SessionKey,
+44 -5
View File
@@ -6,6 +6,7 @@ import (
"sync"
"sync/atomic"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
)
@@ -48,6 +49,13 @@ type MessageBus struct {
closed atomic.Bool
wg sync.WaitGroup
streamDelegate atomic.Value // stores StreamDelegate
eventPublisher atomic.Value // stores EventPublisher
}
// EventPublisher is the minimal runtime event publisher used by MessageBus.
type EventPublisher interface {
Publish(ctx context.Context, evt runtimeevents.Event) runtimeevents.PublishResult
PublishNonBlocking(evt runtimeevents.Event) runtimeevents.PublishResult
}
func NewMessageBus() *MessageBus {
@@ -92,9 +100,14 @@ func publish[T any](ctx context.Context, mb *MessageBus, ch chan T, msg T) error
func (mb *MessageBus) PublishInbound(ctx context.Context, msg InboundMessage) error {
msg = NormalizeInboundMessage(msg)
if msg.Context.isZero() {
mb.publishFailure("inbound", runtimeScopeFromInboundContext(msg.Context), ErrMissingInboundContext)
return ErrMissingInboundContext
}
return publish(ctx, mb, mb.inbound, msg)
if err := publish(ctx, mb, mb.inbound, msg); err != nil {
mb.publishFailure("inbound", runtimeScopeFromInboundContext(msg.Context), err)
return err
}
return nil
}
func (mb *MessageBus) InboundChan() <-chan InboundMessage {
@@ -104,9 +117,14 @@ func (mb *MessageBus) InboundChan() <-chan InboundMessage {
func (mb *MessageBus) PublishOutbound(ctx context.Context, msg OutboundMessage) error {
msg = NormalizeOutboundMessage(msg)
if msg.Context.isZero() {
mb.publishFailure("outbound", runtimeScopeFromInboundContext(msg.Context), ErrMissingOutboundContext)
return ErrMissingOutboundContext
}
return publish(ctx, mb, mb.outbound, msg)
if err := publish(ctx, mb, mb.outbound, msg); err != nil {
mb.publishFailure("outbound", runtimeScopeFromInboundContext(msg.Context), err)
return err
}
return nil
}
func (mb *MessageBus) OutboundChan() <-chan OutboundMessage {
@@ -116,9 +134,14 @@ func (mb *MessageBus) OutboundChan() <-chan OutboundMessage {
func (mb *MessageBus) PublishOutboundMedia(ctx context.Context, msg OutboundMediaMessage) error {
msg = NormalizeOutboundMediaMessage(msg)
if msg.Context.isZero() {
mb.publishFailure("outbound_media", runtimeScopeFromInboundContext(msg.Context), ErrMissingOutboundMediaContext)
return ErrMissingOutboundMediaContext
}
return publish(ctx, mb, mb.outboundMedia, msg)
if err := publish(ctx, mb, mb.outboundMedia, msg); err != nil {
mb.publishFailure("outbound_media", runtimeScopeFromInboundContext(msg.Context), err)
return err
}
return nil
}
func (mb *MessageBus) OutboundMediaChan() <-chan OutboundMediaMessage {
@@ -126,7 +149,11 @@ func (mb *MessageBus) OutboundMediaChan() <-chan OutboundMediaMessage {
}
func (mb *MessageBus) PublishAudioChunk(ctx context.Context, chunk AudioChunk) error {
return publish(ctx, mb, mb.audioChunks, chunk)
if err := publish(ctx, mb, mb.audioChunks, chunk); err != nil {
mb.publishFailure("audio_chunk", runtimeScopeFromAudioChunk(chunk), err)
return err
}
return nil
}
func (mb *MessageBus) AudioChunksChan() <-chan AudioChunk {
@@ -134,7 +161,11 @@ func (mb *MessageBus) AudioChunksChan() <-chan AudioChunk {
}
func (mb *MessageBus) PublishVoiceControl(ctx context.Context, ctrl VoiceControl) error {
return publish(ctx, mb, mb.voiceControls, ctrl)
if err := publish(ctx, mb, mb.voiceControls, ctrl); err != nil {
mb.publishFailure("voice_control", runtimeScopeFromVoiceControl(ctrl), err)
return err
}
return nil
}
func (mb *MessageBus) VoiceControlsChan() <-chan VoiceControl {
@@ -146,6 +177,11 @@ func (mb *MessageBus) SetStreamDelegate(d StreamDelegate) {
mb.streamDelegate.Store(d)
}
// SetEventPublisher registers a runtime event publisher for bus errors and lifecycle events.
func (mb *MessageBus) SetEventPublisher(p EventPublisher) {
mb.eventPublisher.Store(p)
}
// GetStreamer returns a Streamer for the given channel+chatID via the delegate.
func (mb *MessageBus) GetStreamer(ctx context.Context, channel, chatID string) (Streamer, bool) {
if d, ok := mb.streamDelegate.Load().(StreamDelegate); ok && d != nil {
@@ -156,6 +192,7 @@ func (mb *MessageBus) GetStreamer(ctx context.Context, channel, chatID string) (
func (mb *MessageBus) Close() {
mb.closeOnce.Do(func() {
mb.publishCloseEvent(runtimeevents.KindBusCloseStarted, 0)
// notify all blocked publishers to exit
close(mb.done)
@@ -195,6 +232,8 @@ func (mb *MessageBus) Close() {
logger.DebugCF("bus", "Drained buffered messages during close", map[string]any{
"count": drained,
})
mb.publishCloseEvent(runtimeevents.KindBusCloseDrained, drained)
}
mb.publishCloseEvent(runtimeevents.KindBusCloseCompleted, drained)
})
}
+82
View File
@@ -5,6 +5,8 @@ import (
"sync"
"testing"
"time"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
func TestPublishConsume(t *testing.T) {
@@ -171,6 +173,86 @@ func TestPublishInbound_BackfillsContextFromLegacyFields(t *testing.T) {
}
}
func TestMessageBusPublishesRuntimeFailureAndCloseEvents(t *testing.T) {
eventBus := runtimeevents.NewBus()
defer func() {
if err := eventBus.Close(); err != nil {
t.Errorf("event bus close failed: %v", err)
}
}()
_, eventsCh, err := eventBus.Channel().OfKind(
runtimeevents.KindBusPublishFailed,
runtimeevents.KindBusCloseStarted,
runtimeevents.KindBusCloseDrained,
runtimeevents.KindBusCloseCompleted,
).SubscribeChan(t.Context(), runtimeevents.SubscribeOptions{Name: "bus-events", Buffer: 4})
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
mb := NewMessageBus()
mb.SetEventPublisher(eventBus)
if err := mb.PublishInbound(context.Background(), InboundMessage{}); err == nil {
t.Fatal("expected PublishInbound to fail")
}
failed := receiveBusRuntimeEvent(t, eventsCh)
if failed.Kind != runtimeevents.KindBusPublishFailed ||
failed.Source.Name != "inbound" ||
failed.Severity != runtimeevents.SeverityError {
t.Fatalf("publish failed event = %+v", failed)
}
if failed.Attrs["stream"] != "inbound" || failed.Attrs["error"] == "" {
t.Fatalf("publish failed attrs = %#v, want stream and error", failed.Attrs)
}
if err := mb.PublishOutbound(context.Background(), OutboundMessage{
Context: NewOutboundContext("telegram", "chat-1", ""),
Content: "queued",
}); err != nil {
t.Fatalf("PublishOutbound failed: %v", err)
}
mb.Close()
seen := map[runtimeevents.Kind]bool{}
var drainedAttrs map[string]any
for range 3 {
evt := receiveBusRuntimeEvent(t, eventsCh)
seen[evt.Kind] = true
if evt.Kind == runtimeevents.KindBusCloseDrained {
drainedAttrs = evt.Attrs
}
}
for _, kind := range []runtimeevents.Kind{
runtimeevents.KindBusCloseStarted,
runtimeevents.KindBusCloseDrained,
runtimeevents.KindBusCloseCompleted,
} {
if !seen[kind] {
t.Fatalf("missing %s event, seen=%v", kind, seen)
}
}
if drainedAttrs["drained"] != 1 {
t.Fatalf("bus close drained attrs = %#v, want drained count", drainedAttrs)
}
}
func receiveBusRuntimeEvent(t *testing.T, ch <-chan runtimeevents.Event) runtimeevents.Event {
t.Helper()
select {
case evt, ok := <-ch:
if !ok {
t.Fatal("runtime event channel closed before expected event")
}
return evt
case <-time.After(time.Second):
t.Fatal("timed out waiting for runtime event")
return runtimeevents.Event{}
}
}
func TestPublishOutboundSubscribe(t *testing.T) {
mb := NewMessageBus()
defer mb.Close()
+88
View File
@@ -0,0 +1,88 @@
package bus
import (
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
type busPublishFailedPayload struct {
Stream string `json:"stream"`
Error string `json:"error"`
}
type busClosePayload struct {
Drained int `json:"drained,omitempty"`
}
func (mb *MessageBus) publishFailure(stream string, scope runtimeevents.Scope, err error) {
if mb == nil || err == nil {
return
}
publisher, ok := mb.eventPublisher.Load().(EventPublisher)
if !ok || publisher == nil {
return
}
publisher.PublishNonBlocking(runtimeevents.Event{
Kind: runtimeevents.KindBusPublishFailed,
Source: runtimeevents.Source{Component: "bus", Name: stream},
Scope: scope,
Severity: runtimeevents.SeverityError,
Payload: busPublishFailedPayload{
Stream: stream,
Error: err.Error(),
},
Attrs: map[string]any{
"stream": stream,
"error": err.Error(),
},
})
}
func (mb *MessageBus) publishCloseEvent(kind runtimeevents.Kind, drained int) {
if mb == nil {
return
}
publisher, ok := mb.eventPublisher.Load().(EventPublisher)
if !ok || publisher == nil {
return
}
attrs := map[string]any{}
if drained > 0 {
attrs["drained"] = drained
}
publisher.PublishNonBlocking(runtimeevents.Event{
Kind: kind,
Source: runtimeevents.Source{Component: "bus"},
Severity: runtimeevents.SeverityInfo,
Payload: busClosePayload{Drained: drained},
Attrs: attrs,
})
}
func runtimeScopeFromInboundContext(ctx InboundContext) runtimeevents.Scope {
return runtimeevents.Scope{
Channel: ctx.Channel,
Account: ctx.Account,
ChatID: ctx.ChatID,
TopicID: ctx.TopicID,
SpaceID: ctx.SpaceID,
SpaceType: ctx.SpaceType,
ChatType: ctx.ChatType,
SenderID: ctx.SenderID,
MessageID: ctx.MessageID,
}
}
func runtimeScopeFromAudioChunk(chunk AudioChunk) runtimeevents.Scope {
return runtimeevents.Scope{
Channel: chunk.Channel,
ChatID: chunk.ChatID,
}
}
func runtimeScopeFromVoiceControl(ctrl VoiceControl) runtimeevents.Scope {
return runtimeevents.Scope{
ChatID: ctrl.ChatID,
}
}
+197
View File
@@ -0,0 +1,197 @@
package channels
import (
"github.com/sipeed/picoclaw/pkg/bus"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
func channelTypeForEvent(m *Manager, channelName string) string {
if m == nil || m.config == nil {
return channelName
}
if bc := m.config.Channels.Get(channelName); bc != nil && bc.Type != "" {
return bc.Type
}
return channelName
}
func (m *Manager) publishChannelEvent(
kind runtimeevents.Kind,
channelName string,
scope runtimeevents.Scope,
severity runtimeevents.Severity,
payload any,
) {
if m == nil || m.runtimeEvents == nil {
return
}
if scope.Channel == "" {
scope.Channel = channelName
}
m.runtimeEvents.PublishNonBlocking(runtimeevents.Event{
Kind: kind,
Source: runtimeevents.Source{Component: "channel", Name: channelName},
Scope: scope,
Severity: severity,
Payload: payload,
Attrs: channelEventAttrs(payload),
})
}
func channelEventAttrs(payload any) map[string]any {
switch payload := payload.(type) {
case ChannelLifecyclePayload:
attrs := map[string]any{}
setAttrString(attrs, "type", payload.Type)
setAttrString(attrs, "error", payload.Error)
return attrs
case ChannelOutboundPayload:
attrs := map[string]any{}
if payload.Media {
attrs["media"] = payload.Media
}
if payload.ContentLen > 0 {
attrs["content_len"] = payload.ContentLen
}
if len(payload.MessageIDs) > 0 {
attrs["message_ids_count"] = len(payload.MessageIDs)
}
setAttrString(attrs, "reply_to_message_id", payload.ReplyToMessageID)
setAttrString(attrs, "error", payload.Error)
if payload.Retries > 0 {
attrs["retries"] = payload.Retries
}
return attrs
default:
return nil
}
}
func setAttrString(attrs map[string]any, key, value string) {
if value != "" {
attrs[key] = value
}
}
func (m *Manager) publishOutboundSent(
channelName string,
msg bus.OutboundMessage,
messageIDs []string,
) {
m.publishChannelEvent(
runtimeevents.KindChannelMessageOutboundSent,
channelName,
scopeFromOutboundContext(msg.Context),
runtimeevents.SeverityInfo,
ChannelOutboundPayload{
ContentLen: len([]rune(msg.Content)),
MessageIDs: append([]string(nil), messageIDs...),
ReplyToMessageID: msg.ReplyToMessageID,
},
)
}
func (m *Manager) publishOutboundQueued(
channelName string,
msg bus.OutboundMessage,
) {
m.publishChannelEvent(
runtimeevents.KindChannelMessageOutboundQueued,
channelName,
scopeFromOutboundContext(msg.Context),
runtimeevents.SeverityInfo,
ChannelOutboundPayload{
ContentLen: len([]rune(msg.Content)),
ReplyToMessageID: msg.ReplyToMessageID,
},
)
}
func (m *Manager) publishOutboundFailed(
channelName string,
msg bus.OutboundMessage,
err error,
media bool,
) {
payload := ChannelOutboundPayload{
Media: media,
ContentLen: len([]rune(msg.Content)),
ReplyToMessageID: msg.ReplyToMessageID,
Retries: maxRetries,
}
if err != nil {
payload.Error = err.Error()
}
m.publishChannelEvent(
runtimeevents.KindChannelMessageOutboundFailed,
channelName,
scopeFromOutboundContext(msg.Context),
runtimeevents.SeverityError,
payload,
)
}
func (m *Manager) publishOutboundMediaSent(
channelName string,
msg bus.OutboundMediaMessage,
messageIDs []string,
) {
m.publishChannelEvent(
runtimeevents.KindChannelMessageOutboundSent,
channelName,
scopeFromOutboundContext(msg.Context),
runtimeevents.SeverityInfo,
ChannelOutboundPayload{
Media: true,
MessageIDs: append([]string(nil), messageIDs...),
},
)
}
func (m *Manager) publishOutboundMediaQueued(
channelName string,
msg bus.OutboundMediaMessage,
) {
m.publishChannelEvent(
runtimeevents.KindChannelMessageOutboundQueued,
channelName,
scopeFromOutboundContext(msg.Context),
runtimeevents.SeverityInfo,
ChannelOutboundPayload{Media: true},
)
}
func (m *Manager) publishOutboundMediaFailed(
channelName string,
msg bus.OutboundMediaMessage,
err error,
) {
payload := ChannelOutboundPayload{
Media: true,
Retries: maxRetries,
}
if err != nil {
payload.Error = err.Error()
}
m.publishChannelEvent(
runtimeevents.KindChannelMessageOutboundFailed,
channelName,
scopeFromOutboundContext(msg.Context),
runtimeevents.SeverityError,
payload,
)
}
func scopeFromOutboundContext(ctx bus.InboundContext) runtimeevents.Scope {
return runtimeevents.Scope{
Channel: ctx.Channel,
Account: ctx.Account,
ChatID: ctx.ChatID,
TopicID: ctx.TopicID,
SpaceID: ctx.SpaceID,
SpaceType: ctx.SpaceType,
ChatType: ctx.ChatType,
SenderID: ctx.SenderID,
MessageID: ctx.MessageID,
}
}
+125 -1
View File
@@ -23,6 +23,7 @@ import (
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/constants"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/health"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/media"
@@ -84,6 +85,7 @@ type Manager struct {
channels map[string]Channel
workers map[string]*channelWorker
bus *bus.MessageBus
runtimeEvents runtimeevents.Bus
config *config.Config
mediaStore media.MediaStore
dispatchTask *asyncTask
@@ -98,6 +100,32 @@ type Manager struct {
channelHashes map[string]string // channel name → config hash
}
// ManagerOption configures a channel Manager.
type ManagerOption func(*Manager)
// WithRuntimeEvents injects the runtime event bus used for channel observations.
func WithRuntimeEvents(eventBus runtimeevents.Bus) ManagerOption {
return func(m *Manager) {
m.runtimeEvents = eventBus
}
}
// ChannelLifecyclePayload describes channel lifecycle runtime events.
type ChannelLifecyclePayload struct {
Type string `json:"type,omitempty"`
Error string `json:"error,omitempty"`
}
// ChannelOutboundPayload describes channel outbound message runtime events.
type ChannelOutboundPayload struct {
Media bool `json:"media,omitempty"`
ContentLen int `json:"content_len,omitempty"`
MessageIDs []string `json:"message_ids,omitempty"`
ReplyToMessageID string `json:"reply_to_message_id,omitempty"`
Error string `json:"error,omitempty"`
Retries int `json:"retries,omitempty"`
}
type toolFeedbackMessageTracker interface {
RecordToolFeedbackMessage(chatID, messageID, content string)
ClearToolFeedbackMessage(chatID string)
@@ -424,7 +452,12 @@ func (m *Manager) preSendMedia(ctx context.Context, name string, msg bus.Outboun
}
}
func NewManager(cfg *config.Config, messageBus *bus.MessageBus, store media.MediaStore) (*Manager, error) {
func NewManager(
cfg *config.Config,
messageBus *bus.MessageBus,
store media.MediaStore,
opts ...ManagerOption,
) (*Manager, error) {
m := &Manager{
channels: make(map[string]Channel),
workers: make(map[string]*channelWorker),
@@ -433,6 +466,11 @@ func NewManager(cfg *config.Config, messageBus *bus.MessageBus, store media.Medi
mediaStore: store,
channelHashes: make(map[string]string),
}
for _, opt := range opts {
if opt != nil {
opt(m)
}
}
// Register as streaming delegate so the agent loop can obtain streamers
messageBus.SetStreamDelegate(m)
@@ -557,6 +595,13 @@ func (m *Manager) initChannel(typeName, channelName string) {
setter.SetOwner(ch)
}
m.channels[channelName] = ch
m.publishChannelEvent(
runtimeevents.KindChannelLifecycleInitialized,
channelName,
runtimeevents.Scope{Channel: channelName},
runtimeevents.SeverityInfo,
ChannelLifecyclePayload{Type: typeName},
)
logger.InfoCF("channels", "Channel enabled successfully", map[string]any{
"channel": channelName,
"type": typeName,
@@ -702,6 +747,13 @@ func (m *Manager) registerHTTPHandlersLocked() {
func (m *Manager) registerChannelHTTPHandler(name string, ch Channel) {
if wh, ok := ch.(WebhookHandler); ok {
m.mux.Handle(wh.WebhookPath(), wh)
m.publishChannelEvent(
runtimeevents.KindChannelWebhookRegistered,
name,
runtimeevents.Scope{Channel: name},
runtimeevents.SeverityInfo,
ChannelLifecyclePayload{Type: channelTypeForEvent(m, name)},
)
logger.InfoCF("channels", "Webhook handler registered", map[string]any{
"channel": name,
"path": wh.WebhookPath(),
@@ -721,6 +773,13 @@ func (m *Manager) registerChannelHTTPHandler(name string, ch Channel) {
func (m *Manager) unregisterChannelHTTPHandler(name string, ch Channel) {
if wh, ok := ch.(WebhookHandler); ok {
m.mux.Unhandle(wh.WebhookPath())
m.publishChannelEvent(
runtimeevents.KindChannelWebhookUnregistered,
name,
runtimeevents.Scope{Channel: name},
runtimeevents.SeverityInfo,
ChannelLifecyclePayload{Type: channelTypeForEvent(m, name)},
)
logger.InfoCF("channels", "Webhook handler unregistered", map[string]any{
"channel": name,
"path": wh.WebhookPath(),
@@ -759,6 +818,13 @@ func (m *Manager) StartAll(ctx context.Context) error {
"channel": name,
"error": err.Error(),
})
m.publishChannelEvent(
runtimeevents.KindChannelLifecycleStartFailed,
name,
runtimeevents.Scope{Channel: name},
runtimeevents.SeverityError,
ChannelLifecyclePayload{Type: channelTypeForEvent(m, name), Error: err.Error()},
)
failedStarts = append(failedStarts, fmt.Errorf("channel %s: %w", name, err))
failedNames = append(failedNames, name)
continue
@@ -774,6 +840,13 @@ func (m *Manager) StartAll(ctx context.Context) error {
m.workers[name] = w
go m.runWorker(dispatchCtx, name, w)
go m.runMediaWorker(dispatchCtx, name, w)
m.publishChannelEvent(
runtimeevents.KindChannelLifecycleStarted,
name,
runtimeevents.Scope{Channel: name},
runtimeevents.SeverityInfo,
ChannelLifecyclePayload{Type: channelType},
)
}
if len(m.channels) > 0 && len(m.workers) == 0 {
@@ -910,7 +983,15 @@ func (m *Manager) StopAll(ctx context.Context) error {
"channel": name,
"error": err.Error(),
})
continue
}
m.publishChannelEvent(
runtimeevents.KindChannelLifecycleStopped,
name,
runtimeevents.Scope{Channel: name},
runtimeevents.SeverityInfo,
ChannelLifecyclePayload{Type: channelTypeForEvent(m, name)},
)
}
logger.InfoC("channels", "All channels stopped")
@@ -1020,11 +1101,23 @@ func (m *Manager) sendWithRetry(
// Rate limit: wait for token
if err := w.limiter.Wait(ctx); err != nil {
// ctx canceled, shutting down
m.publishChannelEvent(
runtimeevents.KindChannelRateLimited,
name,
scopeFromOutboundContext(msg.Context),
runtimeevents.SeverityWarn,
ChannelOutboundPayload{
ContentLen: len([]rune(msg.Content)),
ReplyToMessageID: msg.ReplyToMessageID,
Error: err.Error(),
},
)
return nil, false
}
// Pre-send: stop typing and try to edit placeholder
if msgIDs, handled := m.preSend(ctx, name, msg, w.ch); handled {
m.publishOutboundSent(name, msg, msgIDs)
return msgIDs, true
}
@@ -1033,6 +1126,7 @@ func (m *Manager) sendWithRetry(
for attempt := 0; attempt <= maxRetries; attempt++ {
msgIDs, lastErr = w.ch.Send(ctx, msg)
if lastErr == nil {
m.publishOutboundSent(name, msg, msgIDs)
return msgIDs, true
}
@@ -1072,6 +1166,7 @@ func (m *Manager) sendWithRetry(
"error": lastErr.Error(),
"retries": maxRetries,
})
m.publishOutboundFailed(name, msg, lastErr, false)
return nil, false
}
@@ -1134,6 +1229,7 @@ func (m *Manager) dispatchOutbound(ctx context.Context) {
func(ctx context.Context, w *channelWorker, msg bus.OutboundMessage) bool {
select {
case w.queue <- msg:
m.publishOutboundQueued(outboundMessageChannel(msg), msg)
return true
case <-ctx.Done():
return false
@@ -1154,6 +1250,7 @@ func (m *Manager) dispatchOutboundMedia(ctx context.Context) {
func(ctx context.Context, w *channelWorker, msg bus.OutboundMediaMessage) bool {
select {
case w.mediaQueue <- msg:
m.publishOutboundMediaQueued(outboundMediaChannel(msg), msg)
return true
case <-ctx.Done():
return false
@@ -1203,6 +1300,16 @@ func (m *Manager) sendMediaWithRetry(
// Rate limit: wait for token
if err := w.limiter.Wait(ctx); err != nil {
m.publishChannelEvent(
runtimeevents.KindChannelRateLimited,
name,
scopeFromOutboundContext(msg.Context),
runtimeevents.SeverityWarn,
ChannelOutboundPayload{
Media: true,
Error: err.Error(),
},
)
return nil, err
}
@@ -1214,6 +1321,7 @@ func (m *Manager) sendMediaWithRetry(
for attempt := 0; attempt <= maxRetries; attempt++ {
msgIDs, lastErr = ms.SendMedia(ctx, msg)
if lastErr == nil {
m.publishOutboundMediaSent(name, msg, msgIDs)
return msgIDs, nil
}
@@ -1253,6 +1361,7 @@ func (m *Manager) sendMediaWithRetry(
"error": lastErr.Error(),
"retries": maxRetries,
})
m.publishOutboundMediaFailed(name, msg, lastErr)
return nil, lastErr
}
@@ -1390,6 +1499,13 @@ func (m *Manager) Reload(ctx context.Context, cfg *config.Config) error {
"channel": name,
"error": err.Error(),
})
m.publishChannelEvent(
runtimeevents.KindChannelLifecycleStartFailed,
name,
runtimeevents.Scope{Channel: name},
runtimeevents.SeverityError,
ChannelLifecyclePayload{Type: channelTypeForEvent(m, name), Error: err.Error()},
)
continue
}
// Lazily create worker only after channel starts successfully
@@ -1403,6 +1519,13 @@ func (m *Manager) Reload(ctx context.Context, cfg *config.Config) error {
m.workers[name] = w
go m.runWorker(dispatchCtx, name, w)
go m.runMediaWorker(dispatchCtx, name, w)
m.publishChannelEvent(
runtimeevents.KindChannelLifecycleStarted,
name,
runtimeevents.Scope{Channel: name},
runtimeevents.SeverityInfo,
ChannelLifecyclePayload{Type: channelType},
)
deferFuncs = append(deferFuncs, func() {
m.RegisterChannel(name, channel)
})
@@ -1525,6 +1648,7 @@ func (m *Manager) SendToChannel(ctx context.Context, channelName, chatID, conten
if wExists && w != nil {
select {
case w.queue <- msg:
m.publishOutboundQueued(channelName, msg)
return nil
case <-ctx.Done():
return ctx.Err()
+130
View File
@@ -14,6 +14,7 @@ import (
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/utils"
)
@@ -242,6 +243,57 @@ func TestStartAll_PartialFailure_StartsSuccessfulWorkers(t *testing.T) {
}
}
func TestStartAllPublishesLifecycleRuntimeEvents(t *testing.T) {
eventBus := runtimeevents.NewBus()
defer func() {
if err := eventBus.Close(); err != nil {
t.Errorf("event bus close failed: %v", err)
}
}()
_, eventsCh, err := eventBus.Channel().SubscribeChan(
t.Context(),
runtimeevents.SubscribeOptions{Name: "channel-lifecycle", Buffer: 4},
)
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
m := newTestManager()
m.runtimeEvents = eventBus
m.config = &config.Config{Channels: config.ChannelsConfig{}}
m.channels["good"] = &mockChannel{}
m.channels["bad"] = &mockChannel{
startFn: func(_ context.Context) error { return errors.New("bad start") },
}
if err := m.StartAll(t.Context()); err != nil {
t.Fatalf("StartAll() error = %v", err)
}
t.Cleanup(func() {
stopCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
if err := m.StopAll(stopCtx); err != nil {
t.Errorf("StopAll() error = %v", err)
}
})
events := []runtimeevents.Event{
receiveChannelRuntimeEvent(t, eventsCh),
receiveChannelRuntimeEvent(t, eventsCh),
}
seen := map[runtimeevents.Kind]runtimeevents.Event{}
for _, evt := range events {
seen[evt.Kind] = evt
}
if evt, ok := seen[runtimeevents.KindChannelLifecycleStarted]; !ok || evt.Scope.Channel != "good" {
t.Fatalf("missing started event for good channel: %+v", events)
}
if evt, ok := seen[runtimeevents.KindChannelLifecycleStartFailed]; !ok || evt.Scope.Channel != "bad" {
t.Fatalf("missing failed event for bad channel: %+v", events)
}
}
func testOutboundMessage(msg bus.OutboundMessage) bus.OutboundMessage {
if msg.Context.Channel == "" && msg.Context.ChatID == "" {
msg.Context = bus.NewOutboundContext(msg.Channel, msg.ChatID, msg.ReplyToMessageID)
@@ -256,6 +308,21 @@ func testOutboundMediaMessage(msg bus.OutboundMediaMessage) bus.OutboundMediaMes
return bus.NormalizeOutboundMediaMessage(msg)
}
func receiveChannelRuntimeEvent(t *testing.T, ch <-chan runtimeevents.Event) runtimeevents.Event {
t.Helper()
select {
case evt, ok := <-ch:
if !ok {
t.Fatal("runtime event channel closed before expected event")
}
return evt
case <-time.After(time.Second):
t.Fatal("timed out waiting for runtime event")
return runtimeevents.Event{}
}
}
func TestSendWithRetry_Success(t *testing.T) {
m := newTestManager()
var callCount int
@@ -280,6 +347,69 @@ func TestSendWithRetry_Success(t *testing.T) {
}
}
func TestSendWithRetryPublishesOutboundRuntimeEvents(t *testing.T) {
eventBus := runtimeevents.NewBus()
defer func() {
if err := eventBus.Close(); err != nil {
t.Errorf("event bus close failed: %v", err)
}
}()
_, eventsCh, err := eventBus.Channel().OfKind(
runtimeevents.KindChannelMessageOutboundSent,
runtimeevents.KindChannelMessageOutboundFailed,
).SubscribeChan(t.Context(), runtimeevents.SubscribeOptions{Name: "channel-outbound", Buffer: 2})
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
m := newTestManager()
m.runtimeEvents = eventBus
successWorker := &channelWorker{
ch: &mockChannel{},
limiter: rate.NewLimiter(rate.Inf, 1),
}
m.sendWithRetry(
context.Background(),
"test",
successWorker,
testOutboundMessage(bus.OutboundMessage{Channel: "test", ChatID: "chat-1", Content: "hello"}),
)
sent := receiveChannelRuntimeEvent(t, eventsCh)
if sent.Kind != runtimeevents.KindChannelMessageOutboundSent || sent.Scope.ChatID != "chat-1" {
t.Fatalf("sent event = %+v", sent)
}
if sent.Attrs["content_len"] != 5 {
t.Fatalf("sent attrs = %#v, want content_len", sent.Attrs)
}
failWorker := &channelWorker{
ch: &mockChannel{
sendFn: func(context.Context, bus.OutboundMessage) error {
return fmt.Errorf("send failed: %w", ErrSendFailed)
},
},
limiter: rate.NewLimiter(rate.Inf, 1),
}
m.sendWithRetry(
context.Background(),
"test",
failWorker,
testOutboundMessage(bus.OutboundMessage{Channel: "test", ChatID: "chat-2", Content: "hello"}),
)
failed := receiveChannelRuntimeEvent(t, eventsCh)
if failed.Kind != runtimeevents.KindChannelMessageOutboundFailed || failed.Scope.ChatID != "chat-2" {
t.Fatalf("failed event = %+v", failed)
}
if failed.Severity != runtimeevents.SeverityError {
t.Fatalf("failed severity = %q", failed.Severity)
}
if failed.Attrs["error"] == "" || failed.Attrs["retries"] != maxRetries {
t.Fatalf("failed attrs = %#v, want error and retries", failed.Attrs)
}
}
func TestSendWithRetry_TemporaryThenSuccess(t *testing.T) {
m := newTestManager()
var callCount int
+1
View File
@@ -40,6 +40,7 @@ type Config struct {
Channels ChannelsConfig `json:"channel_list" yaml:"channel_list"`
ModelList SecureModelList `json:"model_list" yaml:"model_list"` // New model-centric provider configuration
Gateway GatewayConfig `json:"gateway" yaml:"-"`
Events EventsConfig `json:"events,omitempty" yaml:"-"`
Hooks HooksConfig `json:"hooks,omitempty" yaml:"-"`
Tools ToolsConfig `json:"tools" yaml:",inline"`
Heartbeat HeartbeatConfig `json:"heartbeat" yaml:"-"`
+3
View File
@@ -296,6 +296,9 @@ func DefaultConfig() *Config {
HotReload: false,
LogLevel: DefaultGatewayLogLevel,
},
Events: EventsConfig{
Logging: defaultEventLoggingConfig(),
},
Tools: ToolsConfig{
FilterSensitiveData: true,
FilterMinLength: 8,
+48
View File
@@ -0,0 +1,48 @@
package config
// EventsConfig groups runtime event configuration.
type EventsConfig struct {
Logging EventLoggingConfig `json:"logging,omitempty" envPrefix:"PICOCLAW_EVENTS_LOGGING_"`
}
// EventLoggingConfig controls centralized runtime event logging.
type EventLoggingConfig struct {
// Enabled controls whether runtime events are printed by the built-in logger.
Enabled bool `json:"enabled" env:"ENABLED"`
// Include contains exact event kinds or glob patterns such as "agent.*" or "*".
Include []string `json:"include,omitempty" env:"INCLUDE"`
// Exclude contains exact event kinds or glob patterns to suppress after Include matches.
Exclude []string `json:"exclude,omitempty" env:"EXCLUDE"`
// MinSeverity filters out events below the configured severity: debug, info, warn, or error.
MinSeverity string `json:"min_severity,omitempty" env:"MIN_SEVERITY"`
// IncludePayload adds the raw payload to logs. Leave disabled unless detailed diagnostics are needed.
IncludePayload bool `json:"include_payload,omitempty" env:"INCLUDE_PAYLOAD"`
}
// DefaultEventLoggingInclude keeps the pre-existing behavior where agent events
// are printed, while non-agent runtime events are published for subscribers only.
var DefaultEventLoggingInclude = []string{"agent.*"}
// EffectiveEventLoggingConfig returns a logging config with stable defaults.
func EffectiveEventLoggingConfig(cfg *Config) EventLoggingConfig {
if cfg == nil {
return defaultEventLoggingConfig()
}
out := cfg.Events.Logging
if out.MinSeverity == "" {
out.MinSeverity = "info"
}
if len(out.Include) == 0 {
out.Include = append([]string(nil), DefaultEventLoggingInclude...)
}
return out
}
func defaultEventLoggingConfig() EventLoggingConfig {
return EventLoggingConfig{
Enabled: true,
Include: append([]string(nil), DefaultEventLoggingInclude...),
MinSeverity: "info",
}
}
+103
View File
@@ -0,0 +1,103 @@
package config
import (
"os"
"path/filepath"
"reflect"
"testing"
)
func TestDefaultEventLoggingConfig(t *testing.T) {
cfg := DefaultConfig()
logCfg := EffectiveEventLoggingConfig(cfg)
if !logCfg.Enabled {
t.Fatal("default event logging should be enabled")
}
if !reflect.DeepEqual(logCfg.Include, []string{"agent.*"}) {
t.Fatalf("default include = %#v, want agent.*", logCfg.Include)
}
if logCfg.MinSeverity != "info" {
t.Fatalf("default min severity = %q, want info", logCfg.MinSeverity)
}
if logCfg.IncludePayload {
t.Fatal("default event logging should not include raw payloads")
}
}
func TestLoadConfigEventLoggingOverrides(t *testing.T) {
path := filepath.Join(t.TempDir(), "config.json")
data := []byte(`{
"version": 3,
"events": {
"logging": {
"enabled": false,
"include": ["gateway.*"],
"exclude": ["gateway.ready"],
"min_severity": "warn",
"include_payload": true
}
}
}`)
if err := os.WriteFile(path, data, 0o600); err != nil {
t.Fatalf("write config: %v", err)
}
cfg, err := LoadConfig(path)
if err != nil {
t.Fatalf("LoadConfig() error: %v", err)
}
logCfg := EffectiveEventLoggingConfig(cfg)
if logCfg.Enabled {
t.Fatal("loaded event logging enabled = true, want false")
}
if !reflect.DeepEqual(logCfg.Include, []string{"gateway.*"}) {
t.Fatalf("loaded include = %#v, want gateway.*", logCfg.Include)
}
if !reflect.DeepEqual(logCfg.Exclude, []string{"gateway.ready"}) {
t.Fatalf("loaded exclude = %#v, want gateway.ready", logCfg.Exclude)
}
if logCfg.MinSeverity != "warn" {
t.Fatalf("loaded min severity = %q, want warn", logCfg.MinSeverity)
}
if !logCfg.IncludePayload {
t.Fatal("loaded include_payload = false, want true")
}
}
func TestLoadConfigEventLoggingEnvOverrides(t *testing.T) {
t.Setenv("PICOCLAW_EVENTS_LOGGING_ENABLED", "false")
t.Setenv("PICOCLAW_EVENTS_LOGGING_INCLUDE", "gateway.*,channel.lifecycle.*")
t.Setenv("PICOCLAW_EVENTS_LOGGING_EXCLUDE", "gateway.ready")
t.Setenv("PICOCLAW_EVENTS_LOGGING_MIN_SEVERITY", "error")
t.Setenv("PICOCLAW_EVENTS_LOGGING_INCLUDE_PAYLOAD", "true")
path := filepath.Join(t.TempDir(), "config.json")
data := []byte(`{"version": 3}`)
if err := os.WriteFile(path, data, 0o600); err != nil {
t.Fatalf("write config: %v", err)
}
cfg, err := LoadConfig(path)
if err != nil {
t.Fatalf("LoadConfig() error: %v", err)
}
logCfg := EffectiveEventLoggingConfig(cfg)
if logCfg.Enabled {
t.Fatal("env enabled override = true, want false")
}
if !reflect.DeepEqual(logCfg.Include, []string{"gateway.*", "channel.lifecycle.*"}) {
t.Fatalf("env include = %#v, want gateway/channel lifecycle", logCfg.Include)
}
if !reflect.DeepEqual(logCfg.Exclude, []string{"gateway.ready"}) {
t.Fatalf("env exclude = %#v, want gateway.ready", logCfg.Exclude)
}
if logCfg.MinSeverity != "error" {
t.Fatalf("env min severity = %q, want error", logCfg.MinSeverity)
}
if !logCfg.IncludePayload {
t.Fatal("env include_payload = false, want true")
}
}
+243
View File
@@ -0,0 +1,243 @@
package events
import (
"context"
"sort"
"strconv"
"sync"
"sync/atomic"
"time"
)
var globalEventSeq atomic.Uint64
// Bus publishes runtime events and creates filtered channels.
type Bus interface {
Publish(ctx context.Context, evt Event) PublishResult
PublishNonBlocking(evt Event) PublishResult
Channel() EventChannel
Close() error
Stats() Stats
}
// PublishResult reports per-publish delivery outcomes.
type PublishResult struct {
Matched int
Delivered int
Dropped int
Blocked int
Closed bool
}
// EventBus is an in-process runtime event broadcaster.
type EventBus struct {
mu sync.RWMutex
subs map[uint64]*eventSubscription
orderedSubs []*eventSubscription
closed bool
nextSubID atomic.Uint64
published atomic.Uint64
matched atomic.Uint64
delivered atomic.Uint64
dropped atomic.Uint64
blocked atomic.Uint64
}
var _ Bus = (*EventBus)(nil)
// NewBus creates an in-process runtime event bus.
func NewBus() *EventBus {
return &EventBus{
subs: make(map[uint64]*eventSubscription),
}
}
// Publish broadcasts evt to subscriptions whose filters match it.
func (b *EventBus) Publish(ctx context.Context, evt Event) PublishResult {
return b.publish(ctx, evt, false)
}
// PublishNonBlocking broadcasts evt without waiting for subscriber queue capacity.
func (b *EventBus) PublishNonBlocking(evt Event) PublishResult {
return b.publish(context.Background(), evt, true)
}
func (b *EventBus) publish(ctx context.Context, evt Event, nonBlocking bool) PublishResult {
if b == nil {
return PublishResult{Closed: true}
}
if ctx == nil {
ctx = context.Background()
}
if evt.Time.IsZero() {
evt.Time = time.Now()
}
if evt.ID == "" {
evt.ID = nextEventID()
}
subs, closed := b.snapshotSubscribers()
if closed {
return PublishResult{Closed: true}
}
b.published.Add(1)
result := PublishResult{}
for _, sub := range subs {
if !matchesFilters(sub.filters, evt) {
continue
}
result.Matched++
b.matched.Add(1)
delivery := sub.enqueue(ctx, evt, nonBlocking)
if delivery.closed {
continue
}
result.Delivered += delivery.delivered
result.Dropped += delivery.dropped
result.Blocked += delivery.blocked
b.delivered.Add(uint64(delivery.delivered))
b.dropped.Add(uint64(delivery.dropped))
b.blocked.Add(uint64(delivery.blocked))
}
return result
}
// Channel returns the root event channel for this bus.
func (b *EventBus) Channel() EventChannel {
return eventChannel{bus: b}
}
// Close closes the bus and all active subscriptions.
func (b *EventBus) Close() error {
if b == nil {
return nil
}
b.mu.Lock()
if b.closed {
b.mu.Unlock()
return nil
}
b.closed = true
subs := b.orderedSubs
b.subs = nil
b.orderedSubs = nil
b.mu.Unlock()
for _, sub := range subs {
sub.closeInput()
}
return nil
}
// Stats returns a snapshot of bus and subscription counters.
func (b *EventBus) Stats() Stats {
if b == nil {
return Stats{Closed: true}
}
b.mu.RLock()
closed := b.closed
subs := b.orderedSubs
b.mu.RUnlock()
stats := Stats{
Published: b.published.Load(),
Matched: b.matched.Load(),
Delivered: b.delivered.Load(),
Dropped: b.dropped.Load(),
Blocked: b.blocked.Load(),
Closed: closed,
Subscribers: len(subs),
SubscriberStats: make([]SubscriberStats, 0, len(subs)),
}
for _, sub := range subs {
stats.SubscriberStats = append(stats.SubscriberStats, sub.Stats())
}
return stats
}
func (b *EventBus) subscribe(
ctx context.Context,
filters []Filter,
opts SubscribeOptions,
handler Handler,
once bool,
) (Subscription, error) {
if b == nil {
return nil, ErrBusClosed
}
id := b.nextSubID.Add(1)
sub := newSubscription(b, id, filters, opts, handler, once)
b.mu.Lock()
if b.closed {
b.mu.Unlock()
sub.closeInput()
return nil, ErrBusClosed
}
b.subs[id] = sub
b.rebuildOrderedSubscribersLocked()
b.mu.Unlock()
if handler != nil {
go sub.run(ctx)
}
sub.watchContext(ctx)
return sub, nil
}
func (b *EventBus) unsubscribe(id uint64) {
b.mu.Lock()
sub, ok := b.subs[id]
if ok {
delete(b.subs, id)
b.rebuildOrderedSubscribersLocked()
}
b.mu.Unlock()
if ok {
sub.closeInput()
}
}
func (b *EventBus) snapshotSubscribers() ([]*eventSubscription, bool) {
b.mu.RLock()
defer b.mu.RUnlock()
if b.closed {
return nil, true
}
return b.orderedSubs, false
}
func (b *EventBus) rebuildOrderedSubscribersLocked() {
subs := make([]*eventSubscription, 0, len(b.subs))
for _, sub := range b.subs {
subs = append(subs, sub)
}
sortSubscriptions(subs)
b.orderedSubs = subs
}
func sortSubscriptions(subs []*eventSubscription) {
sort.Slice(subs, func(i, j int) bool {
if subs[i].opts.Priority == subs[j].opts.Priority {
return subs[i].id < subs[j].id
}
return subs[i].opts.Priority > subs[j].opts.Priority
})
}
func nextEventID() string {
id := globalEventSeq.Add(1)
return "evt-" + strconv.FormatUint(id, 10)
}
+75
View File
@@ -0,0 +1,75 @@
package events
import "context"
// EventChannel is a filtered view over an EventBus.
type EventChannel interface {
Filter(filter Filter) EventChannel
OfKind(kinds ...Kind) EventChannel
KindPrefix(prefix string) EventChannel
Source(component string, names ...string) EventChannel
Scope(scope ScopeFilter) EventChannel
Subscribe(ctx context.Context, opts SubscribeOptions, handler Handler) (Subscription, error)
SubscribeChan(ctx context.Context, opts SubscribeOptions) (Subscription, <-chan Event, error)
SubscribeOnce(ctx context.Context, opts SubscribeOptions, handler Handler) (Subscription, error)
}
type eventChannel struct {
bus *EventBus
filters []Filter
}
// Filter returns a new EventChannel with filter appended.
func (c eventChannel) Filter(filter Filter) EventChannel {
filters := append([]Filter(nil), c.filters...)
if filter != nil {
filters = append(filters, filter)
}
return eventChannel{bus: c.bus, filters: filters}
}
// OfKind returns a new EventChannel matching any of kinds.
func (c eventChannel) OfKind(kinds ...Kind) EventChannel {
return c.Filter(MatchKind(kinds...))
}
// KindPrefix returns a new EventChannel matching events with the kind prefix.
func (c eventChannel) KindPrefix(prefix string) EventChannel {
return c.Filter(MatchKindPrefix(prefix))
}
// Source returns a new EventChannel matching source component and optional names.
func (c eventChannel) Source(component string, names ...string) EventChannel {
return c.Filter(MatchSource(component, names...))
}
// Scope returns a new EventChannel matching non-empty scope fields.
func (c eventChannel) Scope(scope ScopeFilter) EventChannel {
return c.Filter(MatchScope(scope))
}
// Subscribe registers handler for events matching this channel.
func (c eventChannel) Subscribe(ctx context.Context, opts SubscribeOptions, handler Handler) (Subscription, error) {
if handler == nil {
return nil, ErrNilHandler
}
return c.bus.subscribe(ctx, c.filters, opts, handler, false)
}
// SubscribeChan registers a channel subscription for events matching this channel.
func (c eventChannel) SubscribeChan(ctx context.Context, opts SubscribeOptions) (Subscription, <-chan Event, error) {
sub, err := c.bus.subscribe(ctx, c.filters, opts, nil, false)
if err != nil {
return nil, nil, err
}
return sub, sub.(*eventSubscription).ch, nil
}
// SubscribeOnce registers handler and closes the subscription after the first event.
func (c eventChannel) SubscribeOnce(ctx context.Context, opts SubscribeOptions, handler Handler) (Subscription, error) {
if handler == nil {
return nil, ErrNilHandler
}
return c.bus.subscribe(ctx, c.filters, opts, handler, true)
}
+3
View File
@@ -0,0 +1,3 @@
// Package events provides the process-local runtime event bus used to observe
// PicoClaw components without coupling them to agent-specific event envelopes.
package events
+254
View File
@@ -0,0 +1,254 @@
package events
import (
"context"
"testing"
"time"
)
func TestPublishDeliversToMatchingSubscriber(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
_, ch, err := bus.Channel().OfKind(KindAgentTurnStart).SubscribeChan(
context.Background(),
SubscribeOptions{Name: "turn-starts", Buffer: 1},
)
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
unmatched := bus.Publish(context.Background(), Event{Kind: KindAgentTurnEnd})
if unmatched.Matched != 0 || unmatched.Delivered != 0 {
t.Fatalf("unmatched Publish = %+v, want no delivery", unmatched)
}
result := bus.Publish(context.Background(), Event{Kind: KindAgentTurnStart})
if result.Matched != 1 || result.Delivered != 1 || result.Dropped != 0 {
t.Fatalf("Publish = %+v, want one delivered event", result)
}
evt := receiveEvent(t, ch)
if evt.Kind != KindAgentTurnStart {
t.Fatalf("event kind = %q, want %q", evt.Kind, KindAgentTurnStart)
}
if evt.ID == "" {
t.Fatal("event ID is empty")
}
if evt.Time.IsZero() {
t.Fatal("event Time is zero")
}
}
func TestDropNewestIncrementsStats(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
sub, _, err := bus.Channel().SubscribeChan(
context.Background(),
SubscribeOptions{Name: "drop-newest", Buffer: 1, Backpressure: DropNewest},
)
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
first := bus.Publish(context.Background(), Event{Kind: KindAgentTurnStart})
if first.Delivered != 1 || first.Dropped != 0 {
t.Fatalf("first Publish = %+v, want one delivered event", first)
}
second := bus.Publish(context.Background(), Event{Kind: KindAgentTurnEnd})
if second.Delivered != 0 || second.Dropped != 1 {
t.Fatalf("second Publish = %+v, want one dropped event", second)
}
if got := sub.Stats().Dropped; got != 1 {
t.Fatalf("subscription dropped = %d, want 1", got)
}
if got := bus.Stats().Dropped; got != 1 {
t.Fatalf("bus dropped = %d, want 1", got)
}
}
func TestDropOldestKeepsNewestEvent(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
sub, ch, err := bus.Channel().SubscribeChan(
context.Background(),
SubscribeOptions{Name: "drop-oldest", Buffer: 1, Backpressure: DropOldest},
)
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
bus.Publish(context.Background(), Event{Kind: Kind("test.old"), Payload: "old"})
result := bus.Publish(context.Background(), Event{Kind: Kind("test.new"), Payload: "new"})
if result.Delivered != 1 || result.Dropped != 1 {
t.Fatalf("Publish = %+v, want replacement delivery", result)
}
evt := receiveEvent(t, ch)
if evt.Payload != "new" {
t.Fatalf("payload = %v, want new", evt.Payload)
}
if got := sub.Stats().Dropped; got != 1 {
t.Fatalf("subscription dropped = %d, want 1", got)
}
}
func TestBlockRespectsContext(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
_, _, err := bus.Channel().SubscribeChan(
context.Background(),
SubscribeOptions{Name: "block", Buffer: 1, Backpressure: Block},
)
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
first := bus.Publish(context.Background(), Event{Kind: Kind("test.first")})
if first.Delivered != 1 {
t.Fatalf("first Publish = %+v, want one delivered event", first)
}
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond)
defer cancel()
second := bus.Publish(ctx, Event{Kind: Kind("test.second")})
if second.Blocked != 1 || second.Dropped != 1 || second.Delivered != 0 {
t.Fatalf("second Publish = %+v, want one blocked drop", second)
}
}
func TestPublishNonBlockingDropsForFullBlockSubscriber(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
sub, _, err := bus.Channel().SubscribeChan(
context.Background(),
SubscribeOptions{Name: "block", Buffer: 1, Backpressure: Block},
)
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
first := bus.PublishNonBlocking(Event{Kind: Kind("test.first")})
if first.Delivered != 1 {
t.Fatalf("first PublishNonBlocking = %+v, want one delivered event", first)
}
resultCh := make(chan PublishResult, 1)
go func() {
resultCh <- bus.PublishNonBlocking(Event{Kind: Kind("test.second")})
}()
select {
case second := <-resultCh:
if second.Matched != 1 || second.Delivered != 0 || second.Dropped != 1 || second.Blocked != 0 {
t.Fatalf("second PublishNonBlocking = %+v, want non-blocking drop", second)
}
case <-time.After(100 * time.Millisecond):
t.Fatal("PublishNonBlocking blocked on full Block subscriber")
}
if got := sub.Stats().Dropped; got != 1 {
t.Fatalf("subscription dropped = %d, want 1", got)
}
}
func TestStatsSubscribersKeepPriorityOrder(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
low, _, err := bus.Channel().SubscribeChan(
context.Background(),
SubscribeOptions{Name: "low", Priority: -1},
)
if err != nil {
t.Fatalf("SubscribeChan low failed: %v", err)
}
high, _, err := bus.Channel().SubscribeChan(
context.Background(),
SubscribeOptions{Name: "high", Priority: 10},
)
if err != nil {
t.Fatalf("SubscribeChan high failed: %v", err)
}
peer, _, err := bus.Channel().SubscribeChan(
context.Background(),
SubscribeOptions{Name: "peer", Priority: 10},
)
if err != nil {
t.Fatalf("SubscribeChan peer failed: %v", err)
}
stats := bus.Stats()
got := []string{
stats.SubscriberStats[0].Name,
stats.SubscriberStats[1].Name,
stats.SubscriberStats[2].Name,
}
want := []string{"high", "peer", "low"}
if got[0] != want[0] || got[1] != want[1] || got[2] != want[2] {
t.Fatalf("subscriber order = %v, want %v", got, want)
}
if err := high.Close(); err != nil {
t.Fatalf("Close high failed: %v", err)
}
stats = bus.Stats()
got = []string{
stats.SubscriberStats[0].Name,
stats.SubscriberStats[1].Name,
}
want = []string{"peer", "low"}
if got[0] != want[0] || got[1] != want[1] {
t.Fatalf("subscriber order after unsubscribe = %v, want %v", got, want)
}
if err := peer.Close(); err != nil {
t.Fatalf("Close peer failed: %v", err)
}
if err := low.Close(); err != nil {
t.Fatalf("Close low failed: %v", err)
}
}
func receiveEvent(t *testing.T, ch <-chan Event) Event {
t.Helper()
select {
case evt, ok := <-ch:
if !ok {
t.Fatal("event channel closed before receive")
}
return evt
case <-time.After(time.Second):
t.Fatal("timed out waiting for event")
return Event{}
}
}
func closeBus(t *testing.T, bus *EventBus) {
t.Helper()
if err := bus.Close(); err != nil {
t.Fatalf("Close failed: %v", err)
}
}
+131
View File
@@ -0,0 +1,131 @@
package events
import "strings"
// Filter decides whether an event should pass through an EventChannel.
type Filter func(Event) bool
// ScopeFilter matches selected non-empty fields against Event.Scope.
type ScopeFilter struct {
AgentID string
SessionKey string
TurnID string
Channel string
ChatID string
MessageID string
}
// MatchKind matches events whose kind is in kinds. Empty kinds match all events.
func MatchKind(kinds ...Kind) Filter {
if len(kinds) == 0 {
return matchAll
}
allowed := make(map[Kind]struct{}, len(kinds))
for _, kind := range kinds {
allowed[kind] = struct{}{}
}
return func(evt Event) bool {
_, ok := allowed[evt.Kind]
return ok
}
}
// MatchKindPrefix matches events whose kind starts with prefix.
func MatchKindPrefix(prefix string) Filter {
if prefix == "" {
return matchAll
}
return func(evt Event) bool {
return strings.HasPrefix(evt.Kind.String(), prefix)
}
}
// MatchSource matches events emitted by component and, optionally, one of names.
func MatchSource(component string, names ...string) Filter {
if component == "" && len(names) == 0 {
return matchAll
}
allowedNames := make(map[string]struct{}, len(names))
for _, name := range names {
allowedNames[name] = struct{}{}
}
return func(evt Event) bool {
if component != "" && evt.Source.Component != component {
return false
}
if len(allowedNames) == 0 {
return true
}
_, ok := allowedNames[evt.Source.Name]
return ok
}
}
// MatchScope matches events whose Scope contains all non-empty filter fields.
func MatchScope(scope ScopeFilter) Filter {
if scope == (ScopeFilter{}) {
return matchAll
}
return func(evt Event) bool {
return matchesString(scope.AgentID, evt.Scope.AgentID) &&
matchesString(scope.SessionKey, evt.Scope.SessionKey) &&
matchesString(scope.TurnID, evt.Scope.TurnID) &&
matchesString(scope.Channel, evt.Scope.Channel) &&
matchesString(scope.ChatID, evt.Scope.ChatID) &&
matchesString(scope.MessageID, evt.Scope.MessageID)
}
}
// And combines filters and short-circuits on the first non-match.
func And(filters ...Filter) Filter {
if len(filters) == 0 {
return matchAll
}
return func(evt Event) bool {
for _, filter := range filters {
if filter != nil && !filter(evt) {
return false
}
}
return true
}
}
// Or combines filters and short-circuits on the first match.
func Or(filters ...Filter) Filter {
if len(filters) == 0 {
return matchAll
}
return func(evt Event) bool {
for _, filter := range filters {
if filter == nil || filter(evt) {
return true
}
}
return false
}
}
func matchAll(Event) bool {
return true
}
func matchesString(want, got string) bool {
return want == "" || want == got
}
func matchesFilters(filters []Filter, evt Event) bool {
for _, filter := range filters {
if filter != nil && !filter(evt) {
return false
}
}
return true
}
+96
View File
@@ -0,0 +1,96 @@
package events
import "testing"
func TestFilterKindPrefix(t *testing.T) {
t.Parallel()
tests := []struct {
name string
prefix string
event Event
want bool
}{
{
name: "matches agent prefix",
prefix: "agent.",
event: Event{Kind: KindAgentTurnStart},
want: true,
},
{
name: "rejects different prefix",
prefix: "channel.",
event: Event{Kind: KindAgentTurnStart},
want: false,
},
{
name: "empty prefix matches all",
prefix: "",
event: Event{Kind: KindAgentTurnStart},
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
if got := MatchKindPrefix(tt.prefix)(tt.event); got != tt.want {
t.Fatalf("MatchKindPrefix(%q) = %v, want %v", tt.prefix, got, tt.want)
}
})
}
}
func TestFilterScope(t *testing.T) {
t.Parallel()
evt := Event{
Scope: Scope{
AgentID: "agent-a",
SessionKey: "session-1",
TurnID: "turn-1",
Channel: "telegram",
ChatID: "chat-1",
MessageID: "msg-1",
},
}
tests := []struct {
name string
scope ScopeFilter
want bool
}{
{
name: "empty filter matches",
scope: ScopeFilter{},
want: true,
},
{
name: "matches selected fields",
scope: ScopeFilter{
AgentID: "agent-a",
ChatID: "chat-1",
},
want: true,
},
{
name: "rejects mismatched field",
scope: ScopeFilter{
AgentID: "agent-a",
MessageID: "msg-2",
},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
if got := MatchScope(tt.scope)(evt); got != tt.want {
t.Fatalf("MatchScope(%+v) = %v, want %v", tt.scope, got, tt.want)
}
})
}
}
+156
View File
@@ -0,0 +1,156 @@
package events
const (
// KindAgentTurnStart is emitted when an agent turn starts.
KindAgentTurnStart Kind = "agent.turn.start"
// KindAgentTurnEnd is emitted when an agent turn ends.
KindAgentTurnEnd Kind = "agent.turn.end"
// KindAgentLLMRequest is emitted before an LLM request.
KindAgentLLMRequest Kind = "agent.llm.request"
// KindAgentLLMDelta is emitted for streaming LLM deltas.
KindAgentLLMDelta Kind = "agent.llm.delta"
// KindAgentLLMResponse is emitted after an LLM response.
KindAgentLLMResponse Kind = "agent.llm.response"
// KindAgentLLMRetry is emitted before retrying an LLM request.
KindAgentLLMRetry Kind = "agent.llm.retry"
// KindAgentContextCompress is emitted when agent context is compressed.
KindAgentContextCompress Kind = "agent.context.compress"
// KindAgentSessionSummarize is emitted when session summarization completes.
KindAgentSessionSummarize Kind = "agent.session.summarize"
// KindAgentToolExecStart is emitted before a tool executes.
KindAgentToolExecStart Kind = "agent.tool.exec_start"
// KindAgentToolExecEnd is emitted after a tool finishes.
KindAgentToolExecEnd Kind = "agent.tool.exec_end"
// KindAgentToolExecSkipped is emitted when a tool call is skipped.
KindAgentToolExecSkipped Kind = "agent.tool.exec_skipped"
// KindAgentSteeringInjected is emitted when steering is injected into context.
KindAgentSteeringInjected Kind = "agent.steering.injected"
// KindAgentFollowUpQueued is emitted when async follow-up input is queued.
KindAgentFollowUpQueued Kind = "agent.follow_up.queued"
// KindAgentInterruptReceived is emitted when a turn interrupt is accepted.
KindAgentInterruptReceived Kind = "agent.interrupt.received"
// KindAgentSubTurnSpawn is emitted when a sub-turn is spawned.
KindAgentSubTurnSpawn Kind = "agent.subturn.spawn"
// KindAgentSubTurnEnd is emitted when a sub-turn ends.
KindAgentSubTurnEnd Kind = "agent.subturn.end"
// KindAgentSubTurnResultDelivered is emitted when a sub-turn result is delivered.
KindAgentSubTurnResultDelivered Kind = "agent.subturn.result_delivered"
// KindAgentSubTurnOrphan is emitted when a sub-turn result cannot be delivered.
KindAgentSubTurnOrphan Kind = "agent.subturn.orphan"
// KindAgentError is emitted when agent execution reports an error.
KindAgentError Kind = "agent.error"
// KindChannelLifecycleStarted is emitted when a channel starts.
KindChannelLifecycleStarted Kind = "channel.lifecycle.started"
// KindChannelLifecycleInitialized is emitted when a channel is initialized.
KindChannelLifecycleInitialized Kind = "channel.lifecycle.initialized"
// KindChannelLifecycleStartFailed is emitted when a channel fails to start.
KindChannelLifecycleStartFailed Kind = "channel.lifecycle.start_failed"
// KindChannelLifecycleStopped is emitted when a channel stops.
KindChannelLifecycleStopped Kind = "channel.lifecycle.stopped"
// KindChannelWebhookRegistered is emitted when a channel webhook is registered.
KindChannelWebhookRegistered Kind = "channel.webhook.registered"
// KindChannelWebhookUnregistered is emitted when a channel webhook is unregistered.
KindChannelWebhookUnregistered Kind = "channel.webhook.unregistered"
// KindChannelMessageOutboundQueued is emitted when an outbound message is queued.
KindChannelMessageOutboundQueued Kind = "channel.message.outbound_queued"
// KindChannelMessageOutboundSent is emitted when an outbound channel message is sent.
KindChannelMessageOutboundSent Kind = "channel.message.outbound_sent"
// KindChannelMessageOutboundFailed is emitted when an outbound channel message fails.
KindChannelMessageOutboundFailed Kind = "channel.message.outbound_failed"
// KindChannelRateLimited is emitted when channel rate limiting blocks delivery.
KindChannelRateLimited Kind = "channel.rate_limited"
// KindBusPublishFailed is emitted when message bus publish fails.
KindBusPublishFailed Kind = "bus.publish.failed"
// KindBusCloseStarted is emitted when message bus close starts.
KindBusCloseStarted Kind = "bus.close.started"
// KindBusCloseCompleted is emitted when message bus close completes.
KindBusCloseCompleted Kind = "bus.close.completed"
// KindBusCloseDrained is emitted when message bus close drains buffered messages.
KindBusCloseDrained Kind = "bus.close.drained"
// KindGatewayStart is emitted when gateway startup reaches runtime bootstrap.
KindGatewayStart Kind = "gateway.start"
// KindGatewayReady is emitted when gateway services are started and ready.
KindGatewayReady Kind = "gateway.ready"
// KindGatewayShutdown is emitted when gateway shutdown starts.
KindGatewayShutdown Kind = "gateway.shutdown"
// KindGatewayReloadStarted is emitted when gateway reload starts.
KindGatewayReloadStarted Kind = "gateway.reload.started"
// KindGatewayReloadCompleted is emitted when gateway reload completes.
KindGatewayReloadCompleted Kind = "gateway.reload.completed"
// KindGatewayReloadFailed is emitted when gateway reload fails.
KindGatewayReloadFailed Kind = "gateway.reload.failed"
// KindMCPServerConnected is emitted when an MCP server connects.
KindMCPServerConnected Kind = "mcp.server.connected"
// KindMCPServerConnecting is emitted before connecting to an MCP server.
KindMCPServerConnecting Kind = "mcp.server.connecting"
// KindMCPServerFailed is emitted when an MCP server fails.
KindMCPServerFailed Kind = "mcp.server.failed"
// KindMCPToolDiscovered is emitted when an MCP tool is discovered.
KindMCPToolDiscovered Kind = "mcp.tool.discovered"
// KindMCPToolCallStart is emitted when an MCP tool call starts.
KindMCPToolCallStart Kind = "mcp.tool.call.start"
// KindMCPToolCallEnd is emitted when an MCP tool call ends.
KindMCPToolCallEnd Kind = "mcp.tool.call.end"
)
var knownKinds = []Kind{
KindAgentTurnStart,
KindAgentTurnEnd,
KindAgentLLMRequest,
KindAgentLLMDelta,
KindAgentLLMResponse,
KindAgentLLMRetry,
KindAgentContextCompress,
KindAgentSessionSummarize,
KindAgentToolExecStart,
KindAgentToolExecEnd,
KindAgentToolExecSkipped,
KindAgentSteeringInjected,
KindAgentFollowUpQueued,
KindAgentInterruptReceived,
KindAgentSubTurnSpawn,
KindAgentSubTurnEnd,
KindAgentSubTurnResultDelivered,
KindAgentSubTurnOrphan,
KindAgentError,
KindChannelLifecycleStarted,
KindChannelLifecycleInitialized,
KindChannelLifecycleStartFailed,
KindChannelLifecycleStopped,
KindChannelWebhookRegistered,
KindChannelWebhookUnregistered,
KindChannelMessageOutboundQueued,
KindChannelMessageOutboundSent,
KindChannelMessageOutboundFailed,
KindChannelRateLimited,
KindBusPublishFailed,
KindBusCloseStarted,
KindBusCloseCompleted,
KindBusCloseDrained,
KindGatewayStart,
KindGatewayReady,
KindGatewayShutdown,
KindGatewayReloadStarted,
KindGatewayReloadCompleted,
KindGatewayReloadFailed,
KindMCPServerConnected,
KindMCPServerConnecting,
KindMCPServerFailed,
KindMCPToolDiscovered,
KindMCPToolCallStart,
KindMCPToolCallEnd,
}
// KnownKinds returns the runtime event kinds declared by this package.
func KnownKinds() []Kind {
return append([]Kind(nil), knownKinds...)
}
+26
View File
@@ -0,0 +1,26 @@
package events
// Stats reports aggregate EventBus counters.
type Stats struct {
Published uint64
Matched uint64
Delivered uint64
Dropped uint64
Blocked uint64
Closed bool
Subscribers int
SubscriberStats []SubscriberStats
}
// SubscriberStats reports counters for one subscription.
type SubscriberStats struct {
ID uint64
Name string
Received uint64
Handled uint64
Failed uint64
Dropped uint64
Panicked uint64
TimedOut uint64
}
+459
View File
@@ -0,0 +1,459 @@
package events
import (
"context"
"errors"
"log"
"sync"
"sync/atomic"
"time"
)
const defaultSubscriberBuffer = 16
var (
// ErrBusClosed is returned when subscribing to a closed event bus.
ErrBusClosed = errors.New("events: bus is closed")
// ErrNilHandler is returned when subscribing without a handler.
ErrNilHandler = errors.New("events: handler is nil")
)
// Handler processes a runtime event delivered to a subscription.
type Handler func(context.Context, Event) error
// SubscribeOptions controls how a subscription receives events.
type SubscribeOptions struct {
Name string
Buffer int
Priority int
Concurrency ConcurrencyKind
Backpressure BackpressurePolicy
// Timeout bounds how long the subscription worker waits for one handler call.
// Handlers should still honor ctx cancellation; timed-out calls keep running
// until their handler returns.
Timeout time.Duration
PanicPolicy PanicPolicy
}
// ConcurrencyKind controls how handler subscriptions process queued events.
type ConcurrencyKind string
const (
// Concurrent processes each event in its own goroutine.
Concurrent ConcurrencyKind = "concurrent"
// Locked processes events sequentially in subscription order.
Locked ConcurrencyKind = "locked"
// Keyed is reserved for keyed sequential processing and currently behaves as Locked.
Keyed ConcurrencyKind = "keyed"
)
// BackpressurePolicy controls delivery when a subscription queue is full.
type BackpressurePolicy string
const (
// DropNewest drops the event being published when the queue is full.
DropNewest BackpressurePolicy = "drop_newest"
// DropOldest drops one queued event and enqueues the event being published.
DropOldest BackpressurePolicy = "drop_oldest"
// Block waits for queue capacity until Publish's context is canceled.
Block BackpressurePolicy = "block"
)
// PanicPolicy controls handler panic behavior.
type PanicPolicy string
const (
// RecoverAndLog recovers handler panics and records them in subscription stats.
RecoverAndLog PanicPolicy = "recover_and_log"
// Crash lets handler panics propagate from the worker goroutine.
Crash PanicPolicy = "crash"
)
// Subscription represents an active event subscription.
type Subscription interface {
ID() uint64
Name() string
Close() error
Done() <-chan struct{}
Stats() SubscriberStats
}
type subscriberCounters struct {
received atomic.Uint64
handled atomic.Uint64
failed atomic.Uint64
dropped atomic.Uint64
panicked atomic.Uint64
timedOut atomic.Uint64
}
type eventSubscription struct {
bus *EventBus
id uint64
name string
opts SubscribeOptions
filters []Filter
handler Handler
once bool
ch chan Event
done chan struct{}
closing chan struct{}
closeOnce sync.Once
doneOnce sync.Once
mu sync.RWMutex
closed bool
wg sync.WaitGroup
blockWG sync.WaitGroup
counters subscriberCounters
}
type handlerResult struct {
err error
panicked bool
}
func normalizeSubscribeOptions(opts SubscribeOptions) SubscribeOptions {
if opts.Buffer <= 0 {
opts.Buffer = defaultSubscriberBuffer
}
if opts.Concurrency == "" {
opts.Concurrency = Locked
}
if opts.Backpressure == "" {
opts.Backpressure = DropNewest
}
if opts.PanicPolicy == "" {
opts.PanicPolicy = RecoverAndLog
}
return opts
}
func newSubscription(
bus *EventBus,
id uint64,
filters []Filter,
opts SubscribeOptions,
handler Handler,
once bool,
) *eventSubscription {
opts = normalizeSubscribeOptions(opts)
return &eventSubscription{
bus: bus,
id: id,
name: opts.Name,
opts: opts,
filters: append([]Filter(nil), filters...),
handler: handler,
once: once,
ch: make(chan Event, opts.Buffer),
done: make(chan struct{}),
closing: make(chan struct{}),
}
}
// ID returns the subscription identifier.
func (s *eventSubscription) ID() uint64 {
if s == nil {
return 0
}
return s.id
}
// Name returns the subscription name.
func (s *eventSubscription) Name() string {
if s == nil {
return ""
}
return s.name
}
// Close removes the subscription and closes its delivery channel.
func (s *eventSubscription) Close() error {
if s == nil || s.bus == nil {
return nil
}
s.bus.unsubscribe(s.id)
return nil
}
// Done returns a channel closed after the subscription has stopped processing.
func (s *eventSubscription) Done() <-chan struct{} {
if s == nil {
ch := make(chan struct{})
close(ch)
return ch
}
return s.done
}
// Stats returns a snapshot of the subscription counters.
func (s *eventSubscription) Stats() SubscriberStats {
if s == nil {
return SubscriberStats{}
}
return SubscriberStats{
ID: s.id,
Name: s.name,
Received: s.counters.received.Load(),
Handled: s.counters.handled.Load(),
Failed: s.counters.failed.Load(),
Dropped: s.counters.dropped.Load(),
Panicked: s.counters.panicked.Load(),
TimedOut: s.counters.timedOut.Load(),
}
}
func (s *eventSubscription) run(ctx context.Context) {
defer func() {
s.wg.Wait()
s.closeDone()
}()
for evt := range s.ch {
s.dispatch(ctx, evt)
if s.once {
_ = s.Close()
return
}
}
}
func (s *eventSubscription) dispatch(ctx context.Context, evt Event) {
switch s.opts.Concurrency {
case Concurrent:
s.wg.Add(1)
go func() {
defer s.wg.Done()
s.handle(ctx, evt)
}()
case Keyed:
// TODO: replace this with keyed executors when runtime events need
// per-scope ordering with cross-scope concurrency.
s.handle(ctx, evt)
default:
s.handle(ctx, evt)
}
}
func (s *eventSubscription) handle(ctx context.Context, evt Event) {
if ctx == nil {
ctx = context.Background()
}
if s.opts.Timeout <= 0 {
s.recordHandlerResult(ctx, s.invokeHandler(ctx, evt))
return
}
ctx, cancel := context.WithTimeout(ctx, s.opts.Timeout)
defer cancel()
done := make(chan handlerResult, 1)
go func() {
done <- s.invokeHandler(ctx, evt)
}()
select {
case result := <-done:
s.recordHandlerResult(ctx, result)
case <-ctx.Done():
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
s.counters.timedOut.Add(1)
}
s.counters.failed.Add(1)
}
}
func (s *eventSubscription) invokeHandler(ctx context.Context, evt Event) (result handlerResult) {
if s.opts.PanicPolicy != Crash {
defer func() {
if recovered := recover(); recovered != nil {
s.counters.panicked.Add(1)
result.panicked = true
log.Printf("events: subscriber %q recovered panic: %v", s.name, recovered)
}
}()
}
result.err = s.handler(ctx, evt)
return result
}
func (s *eventSubscription) recordHandlerResult(ctx context.Context, result handlerResult) {
if result.panicked {
return
}
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
s.counters.timedOut.Add(1)
}
if result.err != nil {
s.counters.failed.Add(1)
return
}
s.counters.handled.Add(1)
}
func (s *eventSubscription) watchContext(ctx context.Context) {
if ctx == nil {
return
}
go func() {
select {
case <-ctx.Done():
_ = s.Close()
case <-s.done:
}
}()
}
func (s *eventSubscription) closeInput() {
s.closeOnce.Do(func() {
close(s.closing)
s.mu.Lock()
s.closed = true
s.mu.Unlock()
s.blockWG.Wait()
s.mu.Lock()
close(s.ch)
s.mu.Unlock()
if s.handler == nil {
s.closeDone()
}
})
}
func (s *eventSubscription) closeDone() {
s.doneOnce.Do(func() {
close(s.done)
})
}
type deliveryResult struct {
delivered int
dropped int
blocked int
closed bool
}
func (s *eventSubscription) enqueue(ctx context.Context, evt Event, nonBlocking bool) deliveryResult {
if ctx == nil {
ctx = context.Background()
}
if nonBlocking {
return s.enqueueNonBlocking(evt)
}
if s.opts.Backpressure == Block {
return s.enqueueBlocking(ctx, evt)
}
s.mu.RLock()
defer s.mu.RUnlock()
if s.closed {
return deliveryResult{closed: true}
}
s.counters.received.Add(1)
switch s.opts.Backpressure {
case DropOldest:
return s.enqueueDropOldest(evt)
default:
return s.enqueueDropNewest(evt)
}
}
func (s *eventSubscription) enqueueBlocking(ctx context.Context, evt Event) deliveryResult {
s.mu.Lock()
if s.closed {
s.mu.Unlock()
return deliveryResult{closed: true}
}
s.blockWG.Add(1)
s.counters.received.Add(1)
s.mu.Unlock()
defer s.blockWG.Done()
return s.enqueueBlock(ctx, evt)
}
func (s *eventSubscription) enqueueNonBlocking(evt Event) deliveryResult {
s.mu.RLock()
defer s.mu.RUnlock()
if s.closed {
return deliveryResult{closed: true}
}
s.counters.received.Add(1)
if s.opts.Backpressure == DropOldest {
return s.enqueueDropOldest(evt)
}
return s.enqueueDropNewest(evt)
}
func (s *eventSubscription) enqueueDropNewest(evt Event) deliveryResult {
select {
case <-s.closing:
return deliveryResult{closed: true}
default:
}
select {
case s.ch <- evt:
return deliveryResult{delivered: 1}
default:
s.counters.dropped.Add(1)
return deliveryResult{dropped: 1}
}
}
func (s *eventSubscription) enqueueDropOldest(evt Event) deliveryResult {
select {
case <-s.closing:
return deliveryResult{closed: true}
default:
}
select {
case s.ch <- evt:
return deliveryResult{delivered: 1}
default:
}
dropped := 0
select {
case <-s.ch:
s.counters.dropped.Add(1)
dropped = 1
default:
}
select {
case <-s.closing:
return deliveryResult{dropped: dropped, closed: true}
case s.ch <- evt:
return deliveryResult{delivered: 1, dropped: dropped}
default:
s.counters.dropped.Add(1)
return deliveryResult{dropped: dropped + 1}
}
}
func (s *eventSubscription) enqueueBlock(ctx context.Context, evt Event) deliveryResult {
select {
case <-s.closing:
return deliveryResult{closed: true}
case s.ch <- evt:
return deliveryResult{delivered: 1}
case <-ctx.Done():
s.counters.dropped.Add(1)
return deliveryResult{dropped: 1, blocked: 1}
}
}
+254
View File
@@ -0,0 +1,254 @@
package events
import (
"context"
"sync/atomic"
"testing"
"time"
)
func TestSubscribeOnceClosesAfterFirstEvent(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
var handled atomic.Uint64
sub, err := bus.Channel().SubscribeOnce(
context.Background(),
SubscribeOptions{Name: "once", Buffer: 2},
func(context.Context, Event) error {
handled.Add(1)
return nil
},
)
if err != nil {
t.Fatalf("SubscribeOnce failed: %v", err)
}
bus.Publish(context.Background(), Event{Kind: KindAgentTurnStart})
waitForSubscriptionDone(t, sub)
bus.Publish(context.Background(), Event{Kind: KindAgentTurnEnd})
if got := handled.Load(); got != 1 {
t.Fatalf("handled = %d, want 1", got)
}
if got := sub.Stats().Handled; got != 1 {
t.Fatalf("subscription handled = %d, want 1", got)
}
}
func TestUnsubscribeClosesChannel(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
sub, ch, err := bus.Channel().SubscribeChan(context.Background(), SubscribeOptions{Name: "chan"})
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
if err := sub.Close(); err != nil {
t.Fatalf("Close failed: %v", err)
}
select {
case _, ok := <-ch:
if ok {
t.Fatal("channel is open, want closed")
}
case <-time.After(time.Second):
t.Fatal("timed out waiting for channel close")
}
waitForSubscriptionDone(t, sub)
}
func TestBlockBackpressureCloseUnblocksPublisher(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
sub, _, err := bus.Channel().SubscribeChan(context.Background(), SubscribeOptions{
Name: "block-close",
Buffer: 1,
Backpressure: Block,
})
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
first := bus.Publish(context.Background(), Event{Kind: Kind("test.first")})
if first.Delivered != 1 {
t.Fatalf("first Publish = %+v, want one delivered event", first)
}
publishStarted := make(chan struct{})
publishReturned := make(chan PublishResult, 1)
go func() {
close(publishStarted)
publishReturned <- bus.Publish(context.Background(), Event{Kind: Kind("test.second")})
}()
<-publishStarted
waitForStat(t, func() uint64 {
return sub.Stats().Received
}, 2)
select {
case result := <-publishReturned:
t.Fatalf("blocking Publish returned before close: %+v", result)
default:
}
closeReturned := make(chan error, 1)
go func() {
closeReturned <- sub.Close()
}()
select {
case err := <-closeReturned:
if err != nil {
t.Fatalf("Close failed: %v", err)
}
case <-time.After(time.Second):
t.Fatal("timed out waiting for Close to unblock")
}
select {
case <-publishReturned:
case <-time.After(time.Second):
t.Fatal("timed out waiting for blocking Publish to return after close")
}
waitForSubscriptionDone(t, sub)
}
func TestHandlerPanicRecovered(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
sub, err := bus.Channel().Subscribe(
context.Background(),
SubscribeOptions{Name: "panic", Buffer: 1},
func(context.Context, Event) error {
panic("boom")
},
)
if err != nil {
t.Fatalf("Subscribe failed: %v", err)
}
bus.Publish(context.Background(), Event{Kind: KindAgentError})
waitForStat(t, func() uint64 {
return sub.Stats().Panicked
}, 1)
}
func TestLockedHandlerProcessesSequentially(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
var active atomic.Int64
var maxActive atomic.Int64
sub, err := bus.Channel().Subscribe(
context.Background(),
SubscribeOptions{Name: "locked", Buffer: 8, Concurrency: Locked},
func(context.Context, Event) error {
current := active.Add(1)
for {
currentMax := maxActive.Load()
if current <= currentMax || maxActive.CompareAndSwap(currentMax, current) {
break
}
}
time.Sleep(10 * time.Millisecond)
active.Add(-1)
return nil
},
)
if err != nil {
t.Fatalf("Subscribe failed: %v", err)
}
for i := 0; i < 5; i++ {
bus.Publish(context.Background(), Event{Kind: KindAgentLLMDelta})
}
waitForStat(t, func() uint64 {
return sub.Stats().Handled
}, 5)
if got := maxActive.Load(); got != 1 {
t.Fatalf("max active handlers = %d, want 1", got)
}
}
func TestHandlerTimeoutDoesNotWedgeLockedSubscription(t *testing.T) {
t.Parallel()
bus := NewBus()
defer closeBus(t, bus)
releaseFirst := make(chan struct{})
defer close(releaseFirst)
var calls atomic.Uint64
sub, err := bus.Channel().Subscribe(
context.Background(),
SubscribeOptions{Name: "timeout", Buffer: 2, Concurrency: Locked, Timeout: 20 * time.Millisecond},
func(context.Context, Event) error {
if calls.Add(1) == 1 {
<-releaseFirst
}
return nil
},
)
if err != nil {
t.Fatalf("Subscribe failed: %v", err)
}
bus.Publish(context.Background(), Event{Kind: Kind("test.first")})
waitForStat(t, func() uint64 {
return sub.Stats().TimedOut
}, 1)
bus.Publish(context.Background(), Event{Kind: Kind("test.second")})
waitForStat(t, func() uint64 {
return sub.Stats().Handled
}, 1)
if got := sub.Stats().Failed; got != 1 {
t.Fatalf("subscription failed = %d, want timeout failure", got)
}
}
func waitForSubscriptionDone(t *testing.T, sub Subscription) {
t.Helper()
select {
case <-sub.Done():
case <-time.After(time.Second):
t.Fatal("timed out waiting for subscription to stop")
}
}
func waitForStat(t *testing.T, stat func() uint64, want uint64) {
t.Helper()
deadline := time.After(time.Second)
ticker := time.NewTicker(time.Millisecond)
defer ticker.Stop()
for {
if got := stat(); got >= want {
return
}
select {
case <-ticker.C:
case <-deadline:
t.Fatalf("timed out waiting for stat >= %d", want)
}
}
}
+77
View File
@@ -0,0 +1,77 @@
package events
import "time"
// Kind identifies a runtime event category.
type Kind string
// String returns the string representation of the event kind.
func (k Kind) String() string {
return string(k)
}
// Event is the runtime event envelope shared across PicoClaw components.
type Event struct {
ID string `json:"id"`
Kind Kind `json:"kind"`
Time time.Time `json:"time"`
Source Source `json:"source"`
Scope Scope `json:"scope,omitempty"`
Correlation Correlation `json:"correlation,omitempty"`
Severity Severity `json:"severity,omitempty"`
Payload any `json:"payload,omitempty"`
Attrs map[string]any `json:"attrs,omitempty"`
}
// Source identifies the component that emitted an event.
type Source struct {
Component string `json:"component"`
Name string `json:"name,omitempty"`
}
// Scope identifies the runtime ownership of an event.
//
// Scope is intentionally limited to agent, session, turn, channel, chat,
// message, and sender identity. Tool, provider, model, and MCP details belong
// in Source, Payload, or Attrs.
type Scope struct {
RuntimeID string `json:"runtime_id,omitempty"`
AgentID string `json:"agent_id,omitempty"`
SessionKey string `json:"session_key,omitempty"`
TurnID string `json:"turn_id,omitempty"`
Channel string `json:"channel,omitempty"`
Account string `json:"account,omitempty"`
ChatID string `json:"chat_id,omitempty"`
TopicID string `json:"topic_id,omitempty"`
SpaceID string `json:"space_id,omitempty"`
SpaceType string `json:"space_type,omitempty"`
ChatType string `json:"chat_type,omitempty"`
SenderID string `json:"sender_id,omitempty"`
MessageID string `json:"message_id,omitempty"`
}
// Correlation carries cross-event tracing fields.
type Correlation struct {
TraceID string `json:"trace_id,omitempty"`
ParentTurnID string `json:"parent_turn_id,omitempty"`
RequestID string `json:"request_id,omitempty"`
ReplyToID string `json:"reply_to_id,omitempty"`
}
// Severity describes the operational severity of an event.
type Severity string
const (
// SeverityDebug is used for verbose diagnostic events.
SeverityDebug Severity = "debug"
// SeverityInfo is used for normal lifecycle and activity events.
SeverityInfo Severity = "info"
// SeverityWarn is used for recoverable abnormal events.
SeverityWarn Severity = "warn"
// SeverityError is used for failed operations and unrecoverable events.
SeverityError Severity = "error"
)
+53
View File
@@ -0,0 +1,53 @@
package gateway
import (
"time"
"github.com/sipeed/picoclaw/pkg/agent"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
type gatewayEventPayload struct {
DurationMS int64 `json:"duration_ms,omitempty"`
Error string `json:"error,omitempty"`
}
func publishGatewayEvent(
al *agent.AgentLoop,
kind runtimeevents.Kind,
startedAt time.Time,
err error,
) {
if al == nil || al.RuntimeEventBus() == nil {
return
}
severity := runtimeevents.SeverityInfo
payload := gatewayEventPayload{}
if !startedAt.IsZero() {
payload.DurationMS = time.Since(startedAt).Milliseconds()
}
if err != nil {
severity = runtimeevents.SeverityError
payload.Error = err.Error()
}
al.RuntimeEventBus().PublishNonBlocking(runtimeevents.Event{
Kind: kind,
Source: runtimeevents.Source{Component: "gateway"},
Severity: severity,
Payload: payload,
Attrs: gatewayEventAttrs(payload),
})
}
func gatewayEventAttrs(payload gatewayEventPayload) map[string]any {
attrs := map[string]any{}
if payload.DurationMS > 0 {
attrs["duration_ms"] = payload.DurationMS
}
if payload.Error != "" {
attrs["error"] = payload.Error
}
return attrs
}
+31 -4
View File
@@ -39,6 +39,7 @@ import (
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/cron"
"github.com/sipeed/picoclaw/pkg/devices"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/health"
"github.com/sipeed/picoclaw/pkg/heartbeat"
"github.com/sipeed/picoclaw/pkg/logger"
@@ -114,6 +115,7 @@ func (p *startupBlockedProvider) GetDefaultModel() string {
// Run starts the gateway runtime using the configuration loaded from configPath.
func Run(debug bool, homePath, configPath string, allowEmptyStartup bool) (runErr error) {
startedAt := time.Now()
panicPath := filepath.Join(homePath, logPath, panicFile)
panicFunc, err := logger.InitPanic(panicPath)
if err != nil {
@@ -197,6 +199,8 @@ func Run(debug bool, homePath, configPath string, allowEmptyStartup bool) (runEr
msgBus := bus.NewMessageBus()
agentLoop := agent.NewAgentLoop(cfg, msgBus, provider)
msgBus.SetEventPublisher(agentLoop.RuntimeEventBus())
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayStart, startedAt, nil)
fmt.Println("\n📦 Agent Status:")
startupInfo := agentLoop.GetStartupInfo()
@@ -216,6 +220,7 @@ func Run(debug bool, homePath, configPath string, allowEmptyStartup bool) (runEr
if err != nil {
return err
}
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayReady, startedAt, nil)
closeListeners = false
// Setup manual reload channel for /reload endpoint
@@ -262,7 +267,7 @@ func Run(debug bool, homePath, configPath string, allowEmptyStartup bool) (runEr
select {
case <-sigChan:
logger.Info("Shutting down...")
shutdownGateway(runningServices, agentLoop, provider, true)
shutdownGateway(runningServices, agentLoop, provider, msgBus, true)
return nil
case newCfg := <-configReloadChan:
if !runningServices.reloading.CompareAndSwap(false, true) {
@@ -312,10 +317,20 @@ func executeReload(
msgBus *bus.MessageBus,
allowEmptyStartup bool,
debug bool,
) error {
) (err error) {
startedAt := time.Now()
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayReloadStarted, startedAt, nil)
defer runningServices.reloading.Store(false)
defer func() {
if err != nil {
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayReloadFailed, startedAt, err)
return
}
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayReloadCompleted, startedAt, nil)
}()
return handleConfigReload(ctx, agentLoop, newCfg, provider, runningServices, msgBus, allowEmptyStartup, debug)
err = handleConfigReload(ctx, agentLoop, newCfg, provider, runningServices, msgBus, allowEmptyStartup, debug)
return err
}
func createStartupProvider(
@@ -383,7 +398,12 @@ func setupAndStartServices(
fms.Start()
}
runningServices.ChannelManager, err = channels.NewManager(cfg, msgBus, runningServices.MediaStore)
runningServices.ChannelManager, err = channels.NewManager(
cfg,
msgBus,
runningServices.MediaStore,
channels.WithRuntimeEvents(agentLoop.RuntimeEventBus()),
)
if err != nil {
if fms, ok := runningServices.MediaStore.(*media.FileMediaStore); ok {
fms.Stop()
@@ -490,14 +510,21 @@ func shutdownGateway(
runningServices *services,
agentLoop *agent.AgentLoop,
provider providers.LLMProvider,
msgBus *bus.MessageBus,
fullShutdown bool,
) {
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayShutdown, time.Time{}, nil)
if cp, ok := provider.(providers.StatefulProvider); ok && fullShutdown {
cp.Close()
}
stopAndCleanupServices(runningServices, gracefulShutdownTimeout, false)
if fullShutdown && msgBus != nil {
msgBus.Close()
}
agentLoop.Stop()
agentLoop.Close()
+103
View File
@@ -1,14 +1,20 @@
package gateway
import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
"github.com/sipeed/picoclaw/pkg/agent"
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
func TestRun_StartupFailuresReturnErrorAndEmitStructuredLog(t *testing.T) {
@@ -106,3 +112,100 @@ func TestGatewayRunStartupFailureHelper(t *testing.T) {
fmt.Fprintln(os.Stdout, err.Error())
os.Exit(0)
}
func TestPublishGatewayEvent(t *testing.T) {
eventBus := runtimeevents.NewBus()
t.Cleanup(func() {
if err := eventBus.Close(); err != nil {
t.Fatalf("Close runtime event bus: %v", err)
}
})
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
sub, eventsCh, err := eventBus.Channel().OfKind(runtimeevents.KindGatewayStart).SubscribeChan(
ctx,
runtimeevents.SubscribeOptions{Name: "gateway-test", Buffer: 4},
)
if err != nil {
t.Fatalf("SubscribeChan() error = %v", err)
}
t.Cleanup(func() {
if err := sub.Close(); err != nil {
t.Fatalf("Close subscription: %v", err)
}
})
al := agent.NewAgentLoop(
config.DefaultConfig(),
bus.NewMessageBus(),
&startupBlockedProvider{reason: "not used"},
agent.WithRuntimeEvents(eventBus),
)
t.Cleanup(al.Close)
startedAt := time.Now().Add(-1500 * time.Millisecond)
publishGatewayEvent(al, runtimeevents.KindGatewayStart, startedAt, nil)
evt := receiveGatewayRuntimeEvent(t, eventsCh)
if evt.Kind != runtimeevents.KindGatewayStart ||
evt.Source.Component != "gateway" ||
evt.Severity != runtimeevents.SeverityInfo {
t.Fatalf("gateway event = %+v", evt)
}
payload, ok := evt.Payload.(gatewayEventPayload)
if !ok {
t.Fatalf("payload type = %T, want gatewayEventPayload", evt.Payload)
}
if payload.DurationMS <= 0 {
t.Fatalf("DurationMS = %d, want positive", payload.DurationMS)
}
if evt.Attrs["duration_ms"] == nil {
t.Fatalf("gateway event attrs missing duration_ms: %#v", evt.Attrs)
}
}
func TestShutdownGatewayClosesMessageBus(t *testing.T) {
msgBus := bus.NewMessageBus()
al := agent.NewAgentLoop(
config.DefaultConfig(),
msgBus,
&startupBlockedProvider{reason: "not used"},
)
msgBus.SetEventPublisher(al.RuntimeEventBus())
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sub, eventsCh, err := al.RuntimeEventBus().Channel().OfKind(runtimeevents.KindBusCloseCompleted).SubscribeChan(
ctx,
runtimeevents.SubscribeOptions{Name: "bus-close-test", Buffer: 4},
)
if err != nil {
t.Fatalf("SubscribeChan() error = %v", err)
}
defer func() {
_ = sub.Close()
}()
shutdownGateway(&services{}, al, &startupBlockedProvider{reason: "not used"}, msgBus, true)
evt := receiveGatewayRuntimeEvent(t, eventsCh)
if evt.Kind != runtimeevents.KindBusCloseCompleted {
t.Fatalf("shutdown event kind = %q, want %q", evt.Kind, runtimeevents.KindBusCloseCompleted)
}
if err := msgBus.PublishVoiceControl(context.Background(), bus.VoiceControl{}); !errors.Is(err, bus.ErrBusClosed) {
t.Fatalf("PublishVoiceControl after shutdown error = %v, want %v", err, bus.ErrBusClosed)
}
}
func receiveGatewayRuntimeEvent(t *testing.T, ch <-chan runtimeevents.Event) runtimeevents.Event {
t.Helper()
select {
case evt := <-ch:
return evt
case <-time.After(time.Second):
t.Fatal("timed out waiting for gateway runtime event")
return runtimeevents.Event{}
}
}
+92
View File
@@ -0,0 +1,92 @@
package mcp
import (
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
func (m *Manager) publishServerEvent(
kind runtimeevents.Kind,
serverName string,
cfg config.MCPServerConfig,
toolCount int,
err error,
) {
if m == nil || m.runtimeEvents == nil {
return
}
severity := runtimeevents.SeverityInfo
if err != nil {
severity = runtimeevents.SeverityError
}
payload := ServerEventPayload{
Server: serverName,
Type: mcpTransportType(cfg),
URL: cfg.URL,
Command: cfg.Command,
ToolCount: toolCount,
}
if err != nil {
payload.Error = err.Error()
}
m.runtimeEvents.PublishNonBlocking(runtimeevents.Event{
Kind: kind,
Source: runtimeevents.Source{Component: "mcp", Name: serverName},
Severity: severity,
Payload: payload,
Attrs: mcpServerEventAttrs(payload),
})
}
func (m *Manager) publishToolDiscovered(serverName string, cfg config.MCPServerConfig, toolName string) {
if m == nil || m.runtimeEvents == nil {
return
}
payload := ServerEventPayload{
Server: serverName,
Type: mcpTransportType(cfg),
URL: cfg.URL,
Command: cfg.Command,
Tool: toolName,
}
m.runtimeEvents.PublishNonBlocking(runtimeevents.Event{
Kind: runtimeevents.KindMCPToolDiscovered,
Source: runtimeevents.Source{Component: "mcp", Name: serverName},
Severity: runtimeevents.SeverityInfo,
Payload: payload,
Attrs: mcpServerEventAttrs(payload),
})
}
func mcpServerEventAttrs(payload ServerEventPayload) map[string]any {
attrs := map[string]any{}
setMCPAttrString(attrs, "server", payload.Server)
setMCPAttrString(attrs, "type", payload.Type)
setMCPAttrString(attrs, "tool", payload.Tool)
if payload.ToolCount > 0 {
attrs["tool_count"] = payload.ToolCount
}
setMCPAttrString(attrs, "error", payload.Error)
return attrs
}
func setMCPAttrString(attrs map[string]any, key, value string) {
if value != "" {
attrs[key] = value
}
}
func mcpTransportType(cfg config.MCPServerConfig) string {
if cfg.Type != "" {
return cfg.Type
}
if cfg.URL != "" {
return "sse"
}
if cfg.Command != "" {
return "stdio"
}
return ""
}
+46 -6
View File
@@ -16,6 +16,7 @@ import (
"github.com/modelcontextprotocol/go-sdk/mcp"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
)
@@ -127,19 +128,47 @@ type ServerConnection struct {
// Manager manages multiple MCP server connections
type Manager struct {
servers map[string]*ServerConnection
mu sync.RWMutex
closed atomic.Bool // changed from bool to atomic.Bool to avoid TOCTOU race
wg sync.WaitGroup // tracks in-flight CallTool calls
servers map[string]*ServerConnection
runtimeEvents runtimeevents.Bus
mu sync.RWMutex
closed atomic.Bool // changed from bool to atomic.Bool to avoid TOCTOU race
wg sync.WaitGroup // tracks in-flight CallTool calls
}
var connectServerFunc = connectServer
// ManagerOption configures an MCP manager.
type ManagerOption func(*Manager)
// WithRuntimeEvents injects the runtime event bus used for MCP observations.
func WithRuntimeEvents(eventBus runtimeevents.Bus) ManagerOption {
return func(m *Manager) {
m.runtimeEvents = eventBus
}
}
// ServerEventPayload describes MCP server connection events.
type ServerEventPayload struct {
Server string `json:"server"`
Type string `json:"type,omitempty"`
URL string `json:"url,omitempty"`
Command string `json:"command,omitempty"`
Tool string `json:"tool,omitempty"`
ToolCount int `json:"tool_count,omitempty"`
Error string `json:"error,omitempty"`
}
// NewManager creates a new MCP manager
func NewManager() *Manager {
return &Manager{
func NewManager(opts ...ManagerOption) *Manager {
m := &Manager{
servers: make(map[string]*ServerConnection),
}
for _, opt := range opts {
if opt != nil {
opt(m)
}
}
return m
}
// LoadFromConfig loads MCP servers from configuration
@@ -264,8 +293,10 @@ func (m *Manager) ConnectServer(
name string,
cfg config.MCPServerConfig,
) error {
m.publishServerEvent(runtimeevents.KindMCPServerConnecting, name, cfg, 0, nil)
conn, err := connectServerFunc(ctx, name, cfg)
if err != nil {
m.publishServerEvent(runtimeevents.KindMCPServerFailed, name, cfg, 0, err)
return err
}
@@ -274,10 +305,19 @@ func (m *Manager) ConnectServer(
if m.closed.Load() {
_ = conn.Session.Close()
m.publishServerEvent(runtimeevents.KindMCPServerFailed, name, cfg, 0, fmt.Errorf("manager is closed"))
return fmt.Errorf("manager is closed")
}
m.servers[name] = conn
for _, tool := range conn.Tools {
toolName := ""
if tool != nil {
toolName = tool.Name
}
m.publishToolDiscovered(name, cfg, toolName)
}
m.publishServerEvent(runtimeevents.KindMCPServerConnected, name, cfg, len(conn.Tools), nil)
return nil
}
+91
View File
@@ -10,11 +10,13 @@ import (
"strings"
"sync"
"testing"
"time"
"github.com/modelcontextprotocol/go-sdk/jsonrpc"
sdkmcp "github.com/modelcontextprotocol/go-sdk/mcp"
"github.com/sipeed/picoclaw/pkg/config"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
)
func TestLoadEnvFile(t *testing.T) {
@@ -248,6 +250,95 @@ func TestNewManager_InitialState(t *testing.T) {
}
}
func TestConnectServerPublishesRuntimeEvents(t *testing.T) {
originalConnectServerFunc := connectServerFunc
t.Cleanup(func() {
connectServerFunc = originalConnectServerFunc
})
eventBus := runtimeevents.NewBus()
defer func() {
if err := eventBus.Close(); err != nil {
t.Errorf("event bus close failed: %v", err)
}
}()
_, eventsCh, err := eventBus.Channel().OfKind(
runtimeevents.KindMCPServerConnected,
runtimeevents.KindMCPServerFailed,
).SubscribeChan(t.Context(), runtimeevents.SubscribeOptions{Name: "mcp-events", Buffer: 2})
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
connectServerFunc = func(
_ context.Context,
name string,
cfg config.MCPServerConfig,
) (*ServerConnection, error) {
if name == "bad" {
return nil, fmt.Errorf("connect failed")
}
return &ServerConnection{
Name: name,
Config: cfg,
Tools: []*sdkmcp.Tool{{Name: "echo"}},
}, nil
}
mgr := NewManager(WithRuntimeEvents(eventBus))
err = mgr.ConnectServer(context.Background(), "good", config.MCPServerConfig{
Type: "stdio",
Command: "echo",
})
if err != nil {
t.Fatalf("ConnectServer(good) error = %v", err)
}
connected := receiveMCPRuntimeEvent(t, eventsCh)
if connected.Kind != runtimeevents.KindMCPServerConnected ||
connected.Source.Name != "good" ||
connected.Severity != runtimeevents.SeverityInfo {
t.Fatalf("connected event = %+v", connected)
}
if connected.Attrs["server"] != "good" ||
connected.Attrs["type"] != "stdio" ||
connected.Attrs["tool_count"] != 1 {
t.Fatalf("connected attrs = %#v", connected.Attrs)
}
err = mgr.ConnectServer(context.Background(), "bad", config.MCPServerConfig{
Type: "stdio",
Command: "echo",
})
if err == nil {
t.Fatal("expected ConnectServer(bad) to fail")
}
failed := receiveMCPRuntimeEvent(t, eventsCh)
if failed.Kind != runtimeevents.KindMCPServerFailed ||
failed.Source.Name != "bad" ||
failed.Severity != runtimeevents.SeverityError {
t.Fatalf("failed event = %+v", failed)
}
if failed.Attrs["server"] != "bad" || failed.Attrs["error"] != "connect failed" {
t.Fatalf("failed attrs = %#v", failed.Attrs)
}
}
func receiveMCPRuntimeEvent(t *testing.T, ch <-chan runtimeevents.Event) runtimeevents.Event {
t.Helper()
select {
case evt, ok := <-ch:
if !ok {
t.Fatal("runtime event channel closed before expected event")
}
return evt
case <-time.After(time.Second):
t.Fatal("timed out waiting for runtime event")
return runtimeevents.Event{}
}
}
func TestLoadFromMCPConfig_DisabledOrEmptyServers(t *testing.T) {
mgr := NewManager()
+78
View File
@@ -13,6 +13,7 @@ import (
"github.com/modelcontextprotocol/go-sdk/mcp"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/media"
toolshared "github.com/sipeed/picoclaw/pkg/tools/shared"
@@ -36,6 +37,16 @@ type MCPTool struct {
mediaStore media.MediaStore
workspace string
maxInlineTextRunes int
runtimeEvents runtimeevents.Bus
}
// MCPToolCallPayload describes MCP tool execution runtime events.
type MCPToolCallPayload struct {
Server string `json:"server"`
Tool string `json:"tool"`
DurationMS int64 `json:"duration_ms,omitempty"`
IsError bool `json:"is_error,omitempty"`
Error string `json:"error,omitempty"`
}
// NewMCPTool creates a new MCP tool wrapper
@@ -62,6 +73,11 @@ func (t *MCPTool) SetMaxInlineTextRunes(limit int) {
}
}
// SetEventPublisher injects the runtime event bus used for MCP tool observations.
func (t *MCPTool) SetEventPublisher(eventBus runtimeevents.Bus) {
t.runtimeEvents = eventBus
}
const maxMCPInlineTextRunes = 16 * 1024
// sanitizeIdentifierComponent normalizes a string so it can be safely used
@@ -237,26 +253,88 @@ func (t *MCPTool) Parameters() map[string]any {
// Execute executes the MCP tool
func (t *MCPTool) Execute(ctx context.Context, args map[string]any) *ToolResult {
startedAt := time.Now()
t.publishRuntimeEvent(ctx, runtimeevents.KindMCPToolCallStart, startedAt, false, "")
result, err := t.manager.CallTool(ctx, t.serverName, t.tool.Name, args)
if err != nil {
t.publishRuntimeEvent(ctx, runtimeevents.KindMCPToolCallEnd, startedAt, true, err.Error())
return ErrorResult(fmt.Sprintf("MCP tool execution failed: %v", err)).WithError(err)
}
if result == nil {
nilErr := fmt.Errorf("MCP tool returned nil result without error")
t.publishRuntimeEvent(ctx, runtimeevents.KindMCPToolCallEnd, startedAt, true, nilErr.Error())
return ErrorResult("MCP tool execution failed: nil result").WithError(nilErr)
}
// Handle error result from server
if result.IsError {
errMsg := extractContentText(result.Content)
t.publishRuntimeEvent(ctx, runtimeevents.KindMCPToolCallEnd, startedAt, true, errMsg)
return ErrorResult(fmt.Sprintf("MCP tool returned error: %s", errMsg)).
WithError(fmt.Errorf("MCP tool error: %s", errMsg))
}
t.publishRuntimeEvent(ctx, runtimeevents.KindMCPToolCallEnd, startedAt, false, "")
return t.normalizeResultContent(ctx, result.Content)
}
func (t *MCPTool) publishRuntimeEvent(
ctx context.Context,
kind runtimeevents.Kind,
startedAt time.Time,
isError bool,
errMsg string,
) {
if t == nil || t.runtimeEvents == nil {
return
}
scope := runtimeevents.Scope{
AgentID: toolshared.ToolAgentID(ctx),
SessionKey: toolshared.ToolSessionKey(ctx),
Channel: toolshared.ToolChannel(ctx),
ChatID: toolshared.ToolChatID(ctx),
MessageID: toolshared.ToolMessageID(ctx),
}
payload := MCPToolCallPayload{
Server: t.serverName,
Tool: t.tool.Name,
DurationMS: time.Since(startedAt).Milliseconds(),
IsError: isError,
Error: errMsg,
}
severity := runtimeevents.SeverityInfo
if isError {
severity = runtimeevents.SeverityError
}
t.runtimeEvents.PublishNonBlocking(runtimeevents.Event{
Kind: kind,
Source: runtimeevents.Source{Component: "mcp", Name: t.serverName},
Scope: scope,
Severity: severity,
Payload: payload,
Attrs: mcpToolCallEventAttrs(payload),
})
}
func mcpToolCallEventAttrs(payload MCPToolCallPayload) map[string]any {
attrs := map[string]any{
"server": payload.Server,
"tool": payload.Tool,
"duration_ms": payload.DurationMS,
}
if payload.IsError {
attrs["is_error"] = payload.IsError
}
if payload.Error != "" {
attrs["error"] = payload.Error
}
return attrs
}
// extractContentText extracts text from MCP content array
func extractContentText(content []mcp.Content) string {
var parts []string
+73
View File
@@ -7,9 +7,11 @@ import (
"path/filepath"
"strings"
"testing"
"time"
"github.com/modelcontextprotocol/go-sdk/mcp"
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
"github.com/sipeed/picoclaw/pkg/media"
toolshared "github.com/sipeed/picoclaw/pkg/tools/shared"
)
@@ -299,6 +301,77 @@ func TestMCPTool_Execute_Success(t *testing.T) {
}
}
func TestMCPTool_Execute_PublishesRuntimeEvents(t *testing.T) {
eventBus := runtimeevents.NewBus()
defer func() {
if err := eventBus.Close(); err != nil {
t.Errorf("event bus close failed: %v", err)
}
}()
_, eventsCh, err := eventBus.Channel().OfKind(
runtimeevents.KindMCPToolCallStart,
runtimeevents.KindMCPToolCallEnd,
).SubscribeChan(t.Context(), runtimeevents.SubscribeOptions{Name: "mcp-tool-events", Buffer: 2})
if err != nil {
t.Fatalf("SubscribeChan failed: %v", err)
}
manager := &MockMCPManager{}
mcpTool := NewMCPTool(manager, "github", &mcp.Tool{Name: "search_repos"})
mcpTool.SetEventPublisher(eventBus)
ctx := toolshared.WithToolContext(context.Background(), "telegram", "chat-1")
ctx = toolshared.WithToolMessageContext(ctx, "msg-1", "")
ctx = toolshared.WithToolSessionContext(ctx, "main", "session-1", nil)
result := mcpTool.Execute(ctx, map[string]any{"query": "picoclaw"})
if result == nil || result.IsError {
t.Fatalf("Execute result = %+v", result)
}
started := receiveMCPToolRuntimeEvent(t, eventsCh)
if started.Kind != runtimeevents.KindMCPToolCallStart ||
started.Scope.AgentID != "main" ||
started.Scope.SessionKey != "session-1" ||
started.Scope.Channel != "telegram" ||
started.Scope.ChatID != "chat-1" ||
started.Scope.MessageID != "msg-1" {
t.Fatalf("started event = %+v", started)
}
ended := receiveMCPToolRuntimeEvent(t, eventsCh)
if ended.Kind != runtimeevents.KindMCPToolCallEnd || ended.Severity != runtimeevents.SeverityInfo {
t.Fatalf("ended event = %+v", ended)
}
payload, ok := ended.Payload.(MCPToolCallPayload)
if !ok {
t.Fatalf("ended payload = %T, want MCPToolCallPayload", ended.Payload)
}
if payload.Server != "github" || payload.Tool != "search_repos" || payload.IsError {
t.Fatalf("ended payload = %+v", payload)
}
if ended.Attrs["server"] != "github" ||
ended.Attrs["tool"] != "search_repos" ||
ended.Attrs["duration_ms"] == nil {
t.Fatalf("ended attrs = %#v", ended.Attrs)
}
}
func receiveMCPToolRuntimeEvent(t *testing.T, ch <-chan runtimeevents.Event) runtimeevents.Event {
t.Helper()
select {
case evt, ok := <-ch:
if !ok {
t.Fatal("runtime event channel closed before expected event")
}
return evt
case <-time.After(time.Second):
t.Fatal("timed out waiting for runtime event")
return runtimeevents.Event{}
}
}
// TestMCPTool_Execute_ManagerError tests execution when manager returns error
func TestMCPTool_Execute_ManagerError(t *testing.T) {
manager := &MockMCPManager{