mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
feat(gateway): publish lifecycle runtime events
Emit gateway.start, gateway.ready, and gateway.shutdown on the shared runtime event bus, while keeping reload events on the same helper path. Update subturn architecture docs to refer to runtime event kinds instead of the removed agent EventBus names. Validation: GOCACHE=/tmp/picoclaw-go-cache go test ./pkg/gateway ./pkg/events; GOCACHE=/tmp/picoclaw-go-cache go test ./pkg/bus ./pkg/channels ./pkg/mcp ./pkg/tools/integration ./pkg/events ./pkg/gateway; make lint
This commit is contained in:
@@ -135,16 +135,16 @@ The agent loop polls for async SubTurn results at two points per iteration:
|
|||||||
|
|
||||||
All active turns are registered in `AgentLoop.activeTurnStates` (`sync.Map`, keyed by session key). A reservation sentinel is stored atomically via `LoadOrStore` before the worker starts, then replaced with the real `*turnState` when `runTurn` registers. This prevents a TOCTOU race where multiple messages for the same session could spawn concurrent workers. The sentinel is cleaned up by the worker's deferred cleanup. This allows `HardAbort` and `/subagents` observability commands to find and operate on active turns.
|
All active turns are registered in `AgentLoop.activeTurnStates` (`sync.Map`, keyed by session key). A reservation sentinel is stored atomically via `LoadOrStore` before the worker starts, then replaced with the real `*turnState` when `runTurn` registers. This prevents a TOCTOU race where multiple messages for the same session could spawn concurrent workers. The sentinel is cleaned up by the worker's deferred cleanup. This allows `HardAbort` and `/subagents` observability commands to find and operate on active turns.
|
||||||
|
|
||||||
## Event Bus Integration
|
## Runtime Event Integration
|
||||||
|
|
||||||
SubTurns emit specific events to the PicoClaw `EventBus` for observability and debugging:
|
SubTurns emit runtime events through `pkg/events` for observability and debugging:
|
||||||
|
|
||||||
| Event Kind | When Emitted | Payload |
|
| Event Kind | When Emitted | Payload |
|
||||||
|:------|:-------------|:--------|
|
|:------|:-------------|:--------|
|
||||||
| `subturn_spawn` | Sub-turn successfully initialized | `SubTurnSpawnPayload{AgentID, Label, ParentTurnID}` |
|
| `agent.subturn.spawn` | Sub-turn successfully initialized | `SubTurnSpawnPayload{AgentID, Label, ParentTurnID}` |
|
||||||
| `subturn_end` | Sub-turn finishes (success or error) | `SubTurnEndPayload{AgentID, Status}` |
|
| `agent.subturn.end` | Sub-turn finishes (success or error) | `SubTurnEndPayload{AgentID, Status}` |
|
||||||
| `subturn_result_delivered` | Async result successfully delivered to parent | `SubTurnResultDeliveredPayload{TargetChannel, TargetChatID, ContentLen}` |
|
| `agent.subturn.result_delivered` | Async result successfully delivered to parent | `SubTurnResultDeliveredPayload{TargetChannel, TargetChatID, ContentLen}` |
|
||||||
| `subturn_orphan` | Result cannot be delivered (parent finished or channel full) | `SubTurnOrphanPayload{ParentTurnID, ChildTurnID, Reason}` |
|
| `agent.subturn.orphan` | Result cannot be delivered (parent finished or channel full) | `SubTurnOrphanPayload{ParentTurnID, ChildTurnID, Reason}` |
|
||||||
|
|
||||||
## API Reference
|
## API Reference
|
||||||
|
|
||||||
@@ -240,13 +240,13 @@ An orphan result occurs when:
|
|||||||
2. The `pendingResults` channel is full (buffer size: 16)
|
2. The `pendingResults` channel is full (buffer size: 16)
|
||||||
|
|
||||||
When a result becomes orphan:
|
When a result becomes orphan:
|
||||||
- `SubTurnOrphanResultEvent` is emitted to EventBus
|
- `agent.subturn.orphan` is emitted to the runtime event bus
|
||||||
- The result is **NOT** delivered to the LLM context
|
- The result is **NOT** delivered to the LLM context
|
||||||
- External systems can listen to this event for custom handling
|
- External systems can listen to this event for custom handling
|
||||||
|
|
||||||
### Preventing Orphan Results
|
### Preventing Orphan Results
|
||||||
- Use `Critical: true` for important SubTurns that must complete
|
- Use `Critical: true` for important SubTurns that must complete
|
||||||
- Monitor `SubTurnOrphanResultEvent` for observability
|
- Monitor `agent.subturn.orphan` for observability
|
||||||
- Consider the 16-buffer limit when spawning many async SubTurns
|
- Consider the 16-buffer limit when spawning many async SubTurns
|
||||||
|
|
||||||
## Tool Inheritance
|
## Tool Inheritance
|
||||||
|
|||||||
@@ -75,6 +75,12 @@ const (
|
|||||||
// KindBusCloseDrained is emitted when message bus close drains buffered messages.
|
// KindBusCloseDrained is emitted when message bus close drains buffered messages.
|
||||||
KindBusCloseDrained Kind = "bus.close.drained"
|
KindBusCloseDrained Kind = "bus.close.drained"
|
||||||
|
|
||||||
|
// KindGatewayStart is emitted when gateway startup reaches runtime bootstrap.
|
||||||
|
KindGatewayStart Kind = "gateway.start"
|
||||||
|
// KindGatewayReady is emitted when gateway services are started and ready.
|
||||||
|
KindGatewayReady Kind = "gateway.ready"
|
||||||
|
// KindGatewayShutdown is emitted when gateway shutdown starts.
|
||||||
|
KindGatewayShutdown Kind = "gateway.shutdown"
|
||||||
// KindGatewayReloadStarted is emitted when gateway reload starts.
|
// KindGatewayReloadStarted is emitted when gateway reload starts.
|
||||||
KindGatewayReloadStarted Kind = "gateway.reload.started"
|
KindGatewayReloadStarted Kind = "gateway.reload.started"
|
||||||
// KindGatewayReloadCompleted is emitted when gateway reload completes.
|
// KindGatewayReloadCompleted is emitted when gateway reload completes.
|
||||||
|
|||||||
@@ -10,12 +10,12 @@ import (
|
|||||||
|
|
||||||
const gatewayEventPublishTimeout = 100 * time.Millisecond
|
const gatewayEventPublishTimeout = 100 * time.Millisecond
|
||||||
|
|
||||||
type gatewayReloadPayload struct {
|
type gatewayEventPayload struct {
|
||||||
DurationMS int64 `json:"duration_ms,omitempty"`
|
DurationMS int64 `json:"duration_ms,omitempty"`
|
||||||
Error string `json:"error,omitempty"`
|
Error string `json:"error,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func publishGatewayReloadEvent(
|
func publishGatewayEvent(
|
||||||
al *agent.AgentLoop,
|
al *agent.AgentLoop,
|
||||||
kind runtimeevents.Kind,
|
kind runtimeevents.Kind,
|
||||||
startedAt time.Time,
|
startedAt time.Time,
|
||||||
@@ -26,7 +26,7 @@ func publishGatewayReloadEvent(
|
|||||||
}
|
}
|
||||||
|
|
||||||
severity := runtimeevents.SeverityInfo
|
severity := runtimeevents.SeverityInfo
|
||||||
payload := gatewayReloadPayload{}
|
payload := gatewayEventPayload{}
|
||||||
if !startedAt.IsZero() {
|
if !startedAt.IsZero() {
|
||||||
payload.DurationMS = time.Since(startedAt).Milliseconds()
|
payload.DurationMS = time.Since(startedAt).Milliseconds()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -115,6 +115,7 @@ func (p *startupBlockedProvider) GetDefaultModel() string {
|
|||||||
|
|
||||||
// Run starts the gateway runtime using the configuration loaded from configPath.
|
// Run starts the gateway runtime using the configuration loaded from configPath.
|
||||||
func Run(debug bool, homePath, configPath string, allowEmptyStartup bool) (runErr error) {
|
func Run(debug bool, homePath, configPath string, allowEmptyStartup bool) (runErr error) {
|
||||||
|
startedAt := time.Now()
|
||||||
panicPath := filepath.Join(homePath, logPath, panicFile)
|
panicPath := filepath.Join(homePath, logPath, panicFile)
|
||||||
panicFunc, err := logger.InitPanic(panicPath)
|
panicFunc, err := logger.InitPanic(panicPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -199,6 +200,7 @@ func Run(debug bool, homePath, configPath string, allowEmptyStartup bool) (runEr
|
|||||||
msgBus := bus.NewMessageBus()
|
msgBus := bus.NewMessageBus()
|
||||||
agentLoop := agent.NewAgentLoop(cfg, msgBus, provider)
|
agentLoop := agent.NewAgentLoop(cfg, msgBus, provider)
|
||||||
msgBus.SetEventPublisher(agentLoop.RuntimeEventBus())
|
msgBus.SetEventPublisher(agentLoop.RuntimeEventBus())
|
||||||
|
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayStart, startedAt, nil)
|
||||||
|
|
||||||
fmt.Println("\n📦 Agent Status:")
|
fmt.Println("\n📦 Agent Status:")
|
||||||
startupInfo := agentLoop.GetStartupInfo()
|
startupInfo := agentLoop.GetStartupInfo()
|
||||||
@@ -218,6 +220,7 @@ func Run(debug bool, homePath, configPath string, allowEmptyStartup bool) (runEr
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayReady, startedAt, nil)
|
||||||
closeListeners = false
|
closeListeners = false
|
||||||
|
|
||||||
// Setup manual reload channel for /reload endpoint
|
// Setup manual reload channel for /reload endpoint
|
||||||
@@ -316,14 +319,14 @@ func executeReload(
|
|||||||
debug bool,
|
debug bool,
|
||||||
) (err error) {
|
) (err error) {
|
||||||
startedAt := time.Now()
|
startedAt := time.Now()
|
||||||
publishGatewayReloadEvent(agentLoop, runtimeevents.KindGatewayReloadStarted, startedAt, nil)
|
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayReloadStarted, startedAt, nil)
|
||||||
defer runningServices.reloading.Store(false)
|
defer runningServices.reloading.Store(false)
|
||||||
defer func() {
|
defer func() {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
publishGatewayReloadEvent(agentLoop, runtimeevents.KindGatewayReloadFailed, startedAt, err)
|
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayReloadFailed, startedAt, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
publishGatewayReloadEvent(agentLoop, runtimeevents.KindGatewayReloadCompleted, startedAt, nil)
|
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayReloadCompleted, startedAt, nil)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
err = handleConfigReload(ctx, agentLoop, newCfg, provider, runningServices, msgBus, allowEmptyStartup, debug)
|
err = handleConfigReload(ctx, agentLoop, newCfg, provider, runningServices, msgBus, allowEmptyStartup, debug)
|
||||||
@@ -509,6 +512,8 @@ func shutdownGateway(
|
|||||||
provider providers.LLMProvider,
|
provider providers.LLMProvider,
|
||||||
fullShutdown bool,
|
fullShutdown bool,
|
||||||
) {
|
) {
|
||||||
|
publishGatewayEvent(agentLoop, runtimeevents.KindGatewayShutdown, time.Time{}, nil)
|
||||||
|
|
||||||
if cp, ok := provider.(providers.StatefulProvider); ok && fullShutdown {
|
if cp, ok := provider.(providers.StatefulProvider); ok && fullShutdown {
|
||||||
cp.Close()
|
cp.Close()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,14 +1,19 @@
|
|||||||
package gateway
|
package gateway
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/sipeed/picoclaw/pkg/agent"
|
||||||
|
"github.com/sipeed/picoclaw/pkg/bus"
|
||||||
"github.com/sipeed/picoclaw/pkg/config"
|
"github.com/sipeed/picoclaw/pkg/config"
|
||||||
|
runtimeevents "github.com/sipeed/picoclaw/pkg/events"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestRun_StartupFailuresReturnErrorAndEmitStructuredLog(t *testing.T) {
|
func TestRun_StartupFailuresReturnErrorAndEmitStructuredLog(t *testing.T) {
|
||||||
@@ -106,3 +111,64 @@ func TestGatewayRunStartupFailureHelper(t *testing.T) {
|
|||||||
fmt.Fprintln(os.Stdout, err.Error())
|
fmt.Fprintln(os.Stdout, err.Error())
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPublishGatewayEvent(t *testing.T) {
|
||||||
|
eventBus := runtimeevents.NewBus()
|
||||||
|
t.Cleanup(func() {
|
||||||
|
if err := eventBus.Close(); err != nil {
|
||||||
|
t.Fatalf("Close runtime event bus: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
t.Cleanup(cancel)
|
||||||
|
sub, eventsCh, err := eventBus.Channel().OfKind(runtimeevents.KindGatewayStart).SubscribeChan(
|
||||||
|
ctx,
|
||||||
|
runtimeevents.SubscribeOptions{Name: "gateway-test", Buffer: 4},
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("SubscribeChan() error = %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() {
|
||||||
|
if err := sub.Close(); err != nil {
|
||||||
|
t.Fatalf("Close subscription: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
al := agent.NewAgentLoop(
|
||||||
|
config.DefaultConfig(),
|
||||||
|
bus.NewMessageBus(),
|
||||||
|
&startupBlockedProvider{reason: "not used"},
|
||||||
|
agent.WithRuntimeEvents(eventBus),
|
||||||
|
)
|
||||||
|
t.Cleanup(al.Close)
|
||||||
|
|
||||||
|
startedAt := time.Now().Add(-1500 * time.Millisecond)
|
||||||
|
publishGatewayEvent(al, runtimeevents.KindGatewayStart, startedAt, nil)
|
||||||
|
|
||||||
|
evt := receiveGatewayRuntimeEvent(t, eventsCh)
|
||||||
|
if evt.Kind != runtimeevents.KindGatewayStart ||
|
||||||
|
evt.Source.Component != "gateway" ||
|
||||||
|
evt.Severity != runtimeevents.SeverityInfo {
|
||||||
|
t.Fatalf("gateway event = %+v", evt)
|
||||||
|
}
|
||||||
|
payload, ok := evt.Payload.(gatewayEventPayload)
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("payload type = %T, want gatewayEventPayload", evt.Payload)
|
||||||
|
}
|
||||||
|
if payload.DurationMS <= 0 {
|
||||||
|
t.Fatalf("DurationMS = %d, want positive", payload.DurationMS)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func receiveGatewayRuntimeEvent(t *testing.T, ch <-chan runtimeevents.Event) runtimeevents.Event {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case evt := <-ch:
|
||||||
|
return evt
|
||||||
|
case <-time.After(time.Second):
|
||||||
|
t.Fatal("timed out waiting for gateway runtime event")
|
||||||
|
return runtimeevents.Event{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user