ci(release): split tag creation and release into separate workflows

- Add `create-tag.yml`: creates annotated tag at a specified commit or latest main HEAD, with duplicate tag and commit validation - Simplify `release.yml`: only accepts existing tags, removes create_tag toggle, validates tag via GitHub API before checkout - Always checkout main branch (fetch-depth: 0 fetches full history), then create tag at the specified commit Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
refactor(web): switch dashboard auth from tokens to passwords (#2608 )
2026-08-01 01:26:19 +00:00 · 2026-04-21 19:01:47 -07:00 · 2026-04-21 18:04:15 +08:00 · 2026-04-21 18:01:16 +08:00 · 2026-04-21 16:30:02 +08:00 · 2026-04-21 16:28:29 +08:00
866 changed files with 103249 additions and 20138 deletions
@@ -16,5 +16,5 @@ jobs:
        with:
          go-version-file: go.mod

-      - name: Build
+      - name: Build core binaries
        run: make build-all
@@ -0,0 +1,60 @@
+name: Create Tag
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: "Tag name (required, e.g. v0.2.0)"
+        required: true
+        type: string
+      commit:
+        description: "Target commit SHA (leave empty for latest main)"
+        required: false
+        type: string
+        default: ""
+
+jobs:
+  create-tag:
+    name: Create Git Tag
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+          ref: main
+
+      - name: Validate commit exists
+        if: ${{ inputs.commit != '' }}
+        shell: bash
+        run: |
+          if ! git cat-file -t "${{ inputs.commit }}" &>/dev/null; then
+            echo "::error::Commit '${{ inputs.commit }}' does not exist."
+            exit 1
+          fi
+
+      - name: Check tag does not already exist
+        shell: bash
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if gh api "repos/${{ github.repository }}/git/ref/tags/${{ inputs.tag }}" --silent 2>/dev/null; then
+            echo "::error::Tag '${{ inputs.tag }}' already exists."
+            exit 1
+          fi
+
+      - name: Create and push tag
+        shell: bash
+        run: |
+          TARGET="${{ inputs.commit || 'HEAD' }}"
+          COMMIT_SHA=$(git rev-parse "$TARGET")
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git tag -a "${{ inputs.tag }}" "$COMMIT_SHA" -m "Release ${{ inputs.tag }}"
+          git push origin "${{ inputs.tag }}"
+          echo "### Tag Created" >> "$GITHUB_STEP_SUMMARY"
+          echo "- **Tag:** \`${{ inputs.tag }}\`" >> "$GITHUB_STEP_SUMMARY"
+          echo "- **Commit:** \`${COMMIT_SHA}\`" >> "$GITHUB_STEP_SUMMARY"
+          echo "- **Branch:** \`$(git branch -r --contains "$COMMIT_SHA" | head -1 | xargs)\`" >> "$GITHUB_STEP_SUMMARY"
@@ -0,0 +1,71 @@
+name: Create macOS DMG
+on:
+  workflow_dispatch:
+
+jobs:
+  build:
+    name: Build ${{ matrix.arch }}
+    runs-on: macos-latest
+    strategy:
+      matrix:
+        # This creates two parallel jobs
+        arch: [arm64, amd64]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+        with:
+          ref: main
+
+      # 1. Install Go from go.mod
+      - name: Setup Go
+        uses: actions/setup-go@v6
+        with:
+          go-version-file: go.mod
+
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v6
+        with:
+          version: 10.33.0
+          run_install: false
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v6
+        with:
+          node-version: 22
+          cache: pnpm
+          cache-dependency-path: web/frontend/pnpm-lock.yaml
+
+      # 3. Build the application bundle
+      - name: Build with Make
+        run: make build ARCH=${{ matrix.arch }} && make build-macos-app ARCH=${{ matrix.arch }}
+
+      # 4. Apply ad-hoc signing
+      - name: Ad-hoc Sign
+        run: codesign --force --deep --sign - "build/PicoClaw Launcher.app"
+
+      # 5. Install the DMG packaging tool
+      - name: Install create-dmg
+        run: brew install create-dmg
+
+      # 6. Create the DMG
+      - name: Create DMG
+        run: |
+          mkdir -p dist
+          create-dmg \
+            --volname "PicoClaw Installer" \
+            --window-pos 200 120 \
+            --window-size 800 400 \
+            --icon-size 100 \
+            --icon "PicoClaw Launcher.app" 200 190 \
+            --hide-extension "PicoClaw Launcher.app" \
+            --app-drop-link 600 185 \
+            "dist/picoclaw-${{ matrix.arch }}.dmg" \
+            "build/PicoClaw Launcher.app"
+
+      # 7. Upload the DMG as a GitHub artifact
+      - name: Upload DMG
+        uses: actions/upload-artifact@v7
+        with:
+          name: macos-dmg-${{ matrix.arch }}
+          path: dist/*.dmg
@@ -47,13 +47,18 @@ jobs:
        with:
          go-version-file: go.mod

+      - name: Setup pnpm
+        uses: pnpm/action-setup@v6
+        with:
+          version: 10.33.0
+          run_install: false
+
      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: 22
-
-      - name: Setup pnpm
-        run: corepack enable && corepack prepare pnpm@latest --activate
+          cache: pnpm
+          cache-dependency-path: web/frontend/pnpm-lock.yaml

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v4
@@ -75,6 +80,9 @@ jobs:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

+      - name: Install zip
+        run: sudo apt-get install -y zip
+
      - name: Create local tag for GoReleaser
        run: git tag "${{ steps.version.outputs.version }}"

@@ -90,6 +98,7 @@ jobs:
          DOCKERHUB_IMAGE_NAME: ${{ vars.DOCKERHUB_REPOSITORY }}
          GOVERSION: ${{ steps.setup-go.outputs.go-version }}
          GORELEASER_CURRENT_TAG: ${{ steps.version.outputs.version }}
+          INCLUDE_ANDROID_BUNDLE: "true"
          NIGHTLY_BUILD: "true"
          MACOS_SIGN_P12: ${{ secrets.MACOS_SIGN_P12 }}
          MACOS_SIGN_PASSWORD: ${{ secrets.MACOS_SIGN_PASSWORD }}
@@ -123,7 +132,7 @@ jobs:

          # Collect release artifacts from goreleaser dist/
          ASSETS=()
-          for f in dist/*.tar.gz dist/*.zip dist/*.deb dist/*.rpm dist/checksums.txt; do
+          for f in dist/*.tar.gz dist/*.zip dist/*.deb dist/*.rpm dist/checksums.txt build/picoclaw-android-universal.zip; do
            [ -f "$f" ] && ASSETS+=("$f")
          done

@@ -135,4 +144,3 @@ jobs:
            --prerelease \
            --latest=false \
            "${ASSETS[@]}"
-
@@ -41,10 +41,11 @@ jobs:
        with:
          go-version-file: go.mod

+      - name: Install govulncheck
+        run: go install golang.org/x/vuln/cmd/govulncheck@v1.1.4
+
      - name: Run Govulncheck
-        uses: golang/govulncheck-action@v1
-        with:
-          go-package: ./...
+        run: govulncheck -C . -format text ./...

  test:
    name: Tests
@@ -1,10 +1,10 @@
-name: Create Tag and Release
+name: Release

 on:
  workflow_dispatch:
    inputs:
      tag:
-        description: "Release tag (required, e.g. v0.2.0)"
+        description: "Existing tag to release (e.g. v0.2.0)"
        required: true
        type: string
      prerelease:
@@ -24,35 +24,23 @@ on:
        default: true

 jobs:
-  create-tag:
-    name: Create Git Tag
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-
-      - name: Create and push tag
-        shell: bash
-        env:
-          RELEASE_TAG: ${{ inputs.tag }}
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          git tag -a "$RELEASE_TAG" -m "Release $RELEASE_TAG"
-          git push origin "$RELEASE_TAG"
-
  release:
    name: GoReleaser Release
-    needs: create-tag
    runs-on: ubuntu-latest
    permissions:
      contents: write
      packages: write
    steps:
+      - name: Verify tag exists
+        shell: bash
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if ! gh api "repos/${{ github.repository }}/git/ref/tags/${{ inputs.tag }}" --silent 2>/dev/null; then
+            echo "::error::Tag '${{ inputs.tag }}' does not exist. Create it first using the 'Create Tag' workflow."
+            exit 1
+          fi
+
      - name: Checkout tag
        uses: actions/checkout@v6
        with:
@@ -65,13 +53,18 @@ jobs:
        with:
          go-version-file: go.mod

+      - name: Setup pnpm
+        uses: pnpm/action-setup@v6
+        with:
+          version: 10.33.0
+          run_install: false
+
      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: 22
-
-      - name: Setup pnpm
-        run: corepack enable && corepack prepare pnpm@latest --activate
+          cache: pnpm
+          cache-dependency-path: web/frontend/pnpm-lock.yaml

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v4
@@ -93,6 +86,9 @@ jobs:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

+      - name: Install zip
+        run: sudo apt-get install -y zip
+
      - name: Run GoReleaser
        uses: goreleaser/goreleaser-action@v7
        with:
@@ -104,6 +100,7 @@ jobs:
          GITHUB_REPOSITORY_OWNER: ${{ github.repository_owner }}
          DOCKERHUB_IMAGE_NAME: ${{ vars.DOCKERHUB_REPOSITORY }}
          GOVERSION: ${{ steps.setup-go.outputs.go-version }}
+          INCLUDE_ANDROID_BUNDLE: "true"
          MACOS_SIGN_P12: ${{ secrets.MACOS_SIGN_P12 }}
          MACOS_SIGN_PASSWORD: ${{ secrets.MACOS_SIGN_PASSWORD }}
          MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
@@ -25,6 +25,9 @@ build/
 # Secrets & Config (keep templates, ignore actual secrets)
 .env
 config/config.json
+.security.yml
+onboard
+

 # Test
 coverage.txt
@@ -64,3 +67,5 @@ web/backend/dist/*
 .claude/

 docker/data
+
+.omc/
@@ -12,6 +12,7 @@ linters:
    - exhaustruct
    - funcorder
    - gochecknoglobals
+    - gosmopolitan  # Project legitimately uses CJK text in tests (FTS5, token counting)
    - godot
    - intrange
    - ireturn
@@ -61,6 +62,9 @@ linters:
    - usestdlibvars
    - usetesting
  settings:
+    gomoddirectives:
+      replace-allow-list:
+        - github.com/bwmarrin/discordgo
    errcheck:
      check-type-assertions: true
      check-blank: true
@@ -2,13 +2,17 @@
 # vim: set ts=2 sw=2 tw=0 fo=cnqoj
 version: 2

+git:
+  ignore_tags:
+    - nightly
+    - ".*-nightly.*"
+
 before:
  hooks:
-    - go mod tidy
    - go generate ./...
-    - sh -c 'cd web/frontend && pnpm install && pnpm build:backend'
-    - go install github.com/tc-hib/go-winres@latest
-    - go-winres make --in web/backend/winres/winres.json --out web/backend/rsrc --product-version={{ .Version }} --file-version={{ .Version }}
+    - sh -c 'cd web/frontend && CI=true pnpm install --frozen-lockfile && pnpm build:backend'
+    - sh -c 'GOBIN="$(go env GOPATH)/bin"; mkdir -p "$GOBIN"; go install github.com/tc-hib/go-winres@v0.3.3 && "$GOBIN/go-winres" make --in web/backend/winres/winres.json --out web/backend/rsrc --product-version={{ .Version }} --file-version={{ .Version }}'
+    - sh -c 'if [ "${INCLUDE_ANDROID_BUNDLE:-}" = "true" ]; then make build-android-bundle; fi'

 builds:
  - id: picoclaw
@@ -22,7 +26,7 @@ builds:
      - -X github.com/sipeed/picoclaw/pkg/config.Version={{ .Version }}
      - -X github.com/sipeed/picoclaw/pkg/config.GitCommit={{ .ShortCommit }}
      - -X github.com/sipeed/picoclaw/pkg/config.BuildTime={{ .Date }}
-      - -X github.com/sipeed/picoclaw/pkg/config.GoVersion={{ .Env.GOVERSION }}
+      - -X github.com/sipeed/picoclaw/pkg/config.GoVersion={{ with index .Env "GOVERSION" }}{{ . }}{{ else }}unknown{{ end }}
    goos:
      - linux
      - windows
@@ -62,6 +66,10 @@ builds:
      - stdjson
    ldflags:
      - -s -w
+      - -X github.com/sipeed/picoclaw/pkg/config.Version={{ .Version }}
+      - -X github.com/sipeed/picoclaw/pkg/config.GitCommit={{ .ShortCommit }}
+      - -X github.com/sipeed/picoclaw/pkg/config.BuildTime={{ .Date }}
+      - -X github.com/sipeed/picoclaw/pkg/config.GoVersion={{ with index .Env "GOVERSION" }}{{ . }}{{ else }}unknown{{ end }}
    goos:
      - linux
      - windows
@@ -101,6 +109,10 @@ builds:
      - stdjson
    ldflags:
      - -s -w
+      - -X github.com/sipeed/picoclaw/pkg/config.Version={{ .Version }}
+      - -X github.com/sipeed/picoclaw/pkg/config.GitCommit={{ .ShortCommit }}
+      - -X github.com/sipeed/picoclaw/pkg/config.BuildTime={{ .Date }}
+      - -X github.com/sipeed/picoclaw/pkg/config.GoVersion={{ with index .Env "GOVERSION" }}{{ . }}{{ else }}unknown{{ end }}
    goos:
      - linux
      - windows
@@ -240,6 +252,8 @@ changelog:

 release:
  disable: '{{ isEnvSet "NIGHTLY_BUILD" }}'
+  extra_files:
+    - glob: ./build/picoclaw-android-universal.zip
  footer: >-

    ---
@@ -35,6 +35,8 @@ We are committed to maintaining a welcoming and respectful community. Be kind, c

 For substantial new features, please open an issue first to discuss the design before writing code. This prevents wasted effort and ensures alignment with the project's direction.

+For documentation contributions, prefer the layout and naming conventions in [`docs/README.md`](docs/README.md). Run `make lint-docs` after adding or moving Markdown files to catch common consistency issues early.
+
 ---

 ## Getting Started
@@ -64,7 +66,7 @@ For substantial new features, please open an issue first to discuss the design b
 ```bash
 make build       # Build binary (runs go generate first)
 make generate    # Run go generate only
-make check       # Full pre-commit check: deps + fmt + vet + test
+make check       # Full pre-commit check: deps + fmt + vet + test + docs consistency checks
 ```

 ### Running Tests
@@ -81,9 +83,10 @@ go test -bench=. -benchmem -run='^$' ./...  # Run benchmarks
 make fmt   # Format code
 make vet   # Static analysis
 make lint  # Full linter run
+make lint-docs  # Check common documentation layout and naming conventions
 ```

-All CI checks must pass before a PR can be merged. Run `make check` locally before pushing to catch issues early.
+All CI checks must pass before a PR can be merged. Run `make check` locally before pushing to catch issues early, including the common docs consistency checks from `make lint-docs`.

 ---

@@ -108,7 +111,7 @@ Use descriptive branch names, e.g. `fix/telegram-timeout`, `feat/ollama-provider
 - Reference the related issue when relevant: `Fix session leak (#123)`.
 - Keep commits focused. One logical change per commit is preferred.
 - For minor cleanups or typo fixes, squash them into a single commit before opening a PR.
- Refer to https://www.conventionalcommits.org/zh-hans/v1.0.0/
+- Refer to [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/)

 ### Keeping Up to Date

@@ -1,10 +1,11 @@
-.PHONY: all build install uninstall clean help test
+.PHONY: all build install uninstall clean help test build-all lint-docs

 # Build variables
 BINARY_NAME=picoclaw
 BUILD_DIR=build
 CMD_DIR=cmd/$(BINARY_NAME)
 MAIN_GO=$(CMD_DIR)/main.go
+EXT=

 # Version
 VERSION?=$(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
@@ -47,6 +48,13 @@ define PATCH_MIPS_FLAGS
 	fi
 endef

+# Patch creack/pty for loong64 support (upstream doesn't have ztypes_loong64.go)
+PTY_PATCH_LOONG64=pty_dir=$$(go env GOMODCACHE)/github.com/creack/pty@v1.1.9; \
+	if [ -d "$$pty_dir" ] && [ ! -f "$$pty_dir/ztypes_loong64.go" ]; then \
+		chmod +w "$$pty_dir" 2>/dev/null || true; \
+		printf '//go:build linux && loong64\npackage pty\ntype (_C_int int32; _C_uint uint32)\n' > "$$pty_dir/ztypes_loong64.go"; \
+	fi
+
 # Golangci-lint
 GOLANGCI_LINT?=golangci-lint

@@ -62,9 +70,11 @@ WORKSPACE_DIR?=$(PICOCLAW_HOME)/workspace
 WORKSPACE_SKILLS_DIR=$(WORKSPACE_DIR)/skills
 BUILTIN_SKILLS_DIR=$(CURDIR)/skills

+LNCMD=ln -sf
+
 # OS detection
-UNAME_S:=$(shell uname -s)
-UNAME_M:=$(shell uname -m)
+UNAME_S?=$(shell uname -s)
+UNAME_M?=$(shell uname -m)

 # Platform-specific settings
 ifeq ($(UNAME_S),Linux)
@@ -86,17 +96,30 @@ ifeq ($(UNAME_S),Linux)
 	endif
 else ifeq ($(UNAME_S),Darwin)
 	PLATFORM=darwin
-	WEB_GO=CGO_ENABLED=1 go
+	WEB_GO=CGO_LDFLAGS="-mmacosx-version-min=10.11" CGO_CFLAGS="-mmacosx-version-min=10.11" CGO_ENABLED=1 go
 	ifeq ($(UNAME_M),x86_64)
-		ARCH=amd64
+		ARCH?=amd64
 	else ifeq ($(UNAME_M),arm64)
-		ARCH=arm64
+		ARCH?=arm64
 	else
-		ARCH=$(UNAME_M)
+		ARCH?=$(UNAME_M)
 	endif
 else
 	PLATFORM=$(UNAME_S)
-	ARCH=$(UNAME_M)
+	ifeq ($(UNAME_M),x86_64)
+		ARCH?=amd64
+	else
+	    ARCH?=$(UNAME_M)
+	endif
+	# Detect Windows (Git Bash / MSYS2)
+    IS_WINDOWS:=$(if $(findstring MINGW,$(UNAME_S)),yes,$(if $(findstring MSYS,$(UNAME_S)),yes,$(if $(findstring CYGWIN,$(UNAME_S)),yes,no)))
+	ifeq ($(IS_WINDOWS),yes)
+	    EXT=.exe
+	    LNCMD=cp
+	else ifeq ($(UNAME_S),windows) # failsafe for force windows build in other OS using UNAME_S=windows
+		EXT=.exe
+	endif
+
 endif

 BINARY_PATH=$(BUILD_DIR)/$(BINARY_NAME)-$(PLATFORM)-$(ARCH)
@@ -113,23 +136,26 @@ generate:

 ## build: Build the picoclaw binary for current platform
 build: generate
-	@echo "Building $(BINARY_NAME) for $(PLATFORM)/$(ARCH)..."
+	@echo "Building $(BINARY_NAME)$(EXT) for $(PLATFORM)/$(ARCH)..."
 	@mkdir -p $(BUILD_DIR)
-	@$(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o $(BINARY_PATH) ./$(CMD_DIR)
-	@echo "Build complete: $(BINARY_PATH)"
-	@ln -sf $(BINARY_NAME)-$(PLATFORM)-$(ARCH) $(BUILD_DIR)/$(BINARY_NAME)
+	@GOARCH=${ARCH} $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o $(BINARY_PATH)$(EXT) ./$(CMD_DIR)
+	@echo "Build complete: $(BINARY_PATH)$(EXT)"
+	@$(LNCMD) $(BINARY_NAME)-$(PLATFORM)-$(ARCH)$(EXT) $(BUILD_DIR)/$(BINARY_NAME)$(EXT)

 ## build-launcher: Build the picoclaw-launcher (web console) binary
 build-launcher:
 	@echo "Building picoclaw-launcher for $(PLATFORM)/$(ARCH)..."
 	@mkdir -p $(BUILD_DIR)
-	@if [ ! -f web/backend/dist/index.html ]; then \
-		echo "Building frontend..."; \
-		cd web/frontend && pnpm install && pnpm build:backend; \
-	fi
-	@$(WEB_GO) build $(GOFLAGS) -o $(BUILD_DIR)/picoclaw-launcher-$(PLATFORM)-$(ARCH) ./web/backend
-	@ln -sf picoclaw-launcher-$(PLATFORM)-$(ARCH) $(BUILD_DIR)/picoclaw-launcher
-	@echo "Build complete: $(BUILD_DIR)/picoclaw-launcher"
+	@GOARCH=${ARCH} $(MAKE) -C web build \
+		OUTPUT="$(CURDIR)/$(BUILD_DIR)/picoclaw-launcher-$(PLATFORM)-$(ARCH)$(EXT)" \
+		WEB_GO='$(WEB_GO)' \
+		GO_BUILD_TAGS='$(GO_BUILD_TAGS)' \
+		LDFLAGS='$(LDFLAGS)'
+	@$(LNCMD) picoclaw-launcher-$(PLATFORM)-$(ARCH)$(EXT) $(BUILD_DIR)/picoclaw-launcher$(EXT)
+	@echo "Build complete: $(BUILD_DIR)/picoclaw-launcher$(EXT)"
+
+build-launcher-frontend:
+	@$(MAKE) -C web build-frontend

 ## build-launcher-tui: Build the picoclaw-launcher TUI binary
 build-launcher-tui:
@@ -179,17 +205,51 @@ build-linux-mipsle: generate
 	$(call PATCH_MIPS_FLAGS,$(BUILD_DIR)/$(BINARY_NAME)-linux-mipsle)
 	@echo "Build complete: $(BUILD_DIR)/$(BINARY_NAME)-linux-mipsle"

+## build-android-arm64: Build core for Android ARM64
+build-android-arm64: generate
+	@echo "Building for android/arm64..."
+	@mkdir -p $(BUILD_DIR)
+	GOOS=android GOARCH=arm64 $(GO) build -tags stdjson -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-android-arm64 ./$(CMD_DIR)
+	@echo "Build complete: $(BUILD_DIR)/$(BINARY_NAME)-android-arm64"
+
+## build-launcher-android-arm64: Build launcher for Android ARM64
+build-launcher-android-arm64:
+	@echo "Building picoclaw-launcher for android/arm64..."
+	@mkdir -p $(BUILD_DIR)
+	@$(MAKE) -C web build-android-arm64 \
+		OUTPUT_ANDROID_ARM64="$(CURDIR)/$(BUILD_DIR)/picoclaw-launcher-android-arm64" \
+		GO='$(GO)' \
+		LDFLAGS='$(LDFLAGS)'
+	@echo "Build complete: $(BUILD_DIR)/picoclaw-launcher-android-arm64"
+
+## build-android-bundle: Build core and launcher for all Android architectures and package as universal zip
+build-android-bundle: generate
+	@echo "Building core for all Android architectures..."
+	@mkdir -p $(BUILD_DIR)
+	GOOS=android GOARCH=arm64 $(GO) build -tags stdjson -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-android-arm64 ./$(CMD_DIR)
+	@echo "Building launcher for Android arm64..."
+	@$(MAKE) build-launcher-android-arm64
+	@echo "Staging JNI libs..."
+	@rm -rf $(BUILD_DIR)/android-staging
+	@mkdir -p $(BUILD_DIR)/android-staging/arm64-v8a
+	@cp $(BUILD_DIR)/$(BINARY_NAME)-android-arm64 $(BUILD_DIR)/android-staging/arm64-v8a/libpicoclaw.so
+	@cp $(BUILD_DIR)/picoclaw-launcher-android-arm64 $(BUILD_DIR)/android-staging/arm64-v8a/libpicoclaw-web.so
+	@cd $(BUILD_DIR)/android-staging && zip -r ../picoclaw-android-universal.zip .
+	@rm -rf $(BUILD_DIR)/android-staging
+	@echo "All Android builds complete: $(BUILD_DIR)/picoclaw-android-universal.zip"
+
 ## build-pi-zero: Build for Raspberry Pi Zero 2 W (32-bit and 64-bit)
 build-pi-zero: build-linux-arm build-linux-arm64
 	@echo "Pi Zero 2 W builds: $(BUILD_DIR)/$(BINARY_NAME)-linux-arm (32-bit), $(BUILD_DIR)/$(BINARY_NAME)-linux-arm64 (64-bit)"

-## build-all: Build picoclaw for all platforms
+## build-all: Build the picoclaw core binary for all Makefile-managed platforms
 build-all: generate
 	@echo "Building for multiple platforms..."
 	@mkdir -p $(BUILD_DIR)
 	GOOS=linux GOARCH=amd64 $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-linux-amd64 ./$(CMD_DIR)
 	GOOS=linux GOARCH=arm GOARM=7 $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-linux-arm ./$(CMD_DIR)
 	GOOS=linux GOARCH=arm64 $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-linux-arm64 ./$(CMD_DIR)
+	@$(PTY_PATCH_LOONG64)
 	GOOS=linux GOARCH=loong64 $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-linux-loong64 ./$(CMD_DIR)
 	GOOS=linux GOARCH=riscv64 $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-linux-riscv64 ./$(CMD_DIR)
 	GOOS=linux GOARCH=mipsle GOMIPS=softfloat $(GO) build $(GOFLAGS_NO_GOOLM) -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-linux-mipsle ./$(CMD_DIR)
@@ -199,7 +259,7 @@ build-all: generate
 	GOOS=windows GOARCH=amd64 $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-windows-amd64.exe ./$(CMD_DIR)
 	GOOS=netbsd GOARCH=amd64 $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-netbsd-amd64 ./$(CMD_DIR)
 	GOOS=netbsd GOARCH=arm64 $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o $(BUILD_DIR)/$(BINARY_NAME)-netbsd-arm64 ./$(CMD_DIR)
-	@echo "All builds complete"
+	@echo "Core builds complete"

 ## install: Install picoclaw to system and copy builtin skills
 install: build
@@ -248,9 +308,14 @@ test: generate
 fmt:
 	@$(GOLANGCI_LINT) fmt

+## lint-docs: Check common documentation layout and naming conventions
+lint-docs:
+	@./scripts/lint-docs.sh
+
 ## lint: Run linters
 lint:
 	@$(GOLANGCI_LINT) run --build-tags $(GO_BUILD_TAGS)
+	@./scripts/lint-docs.sh

 ## fix: Fix linting issues
 fix:
@@ -266,8 +331,8 @@ update-deps:
 	@$(GO) get -u ./...
 	@$(GO) mod tidy

-## check: Run vet, fmt, and verify dependencies
-check: deps fmt vet test
+## check: Run deps, fmt, vet, tests, and docs consistency checks
+check: deps fmt vet test lint-docs

 ## run: Build and run picoclaw
 run: build
@@ -313,16 +378,34 @@ docker-clean:


 ## build-macos-app: Build PicoClaw macOS .app bundle (no terminal window)
-build-macos-app:
+build-macos-app:build-launcher
 	@echo "Building macOS .app bundle..."
 	@if [ "$(UNAME_S)" != "Darwin" ]; then \
 		echo "Error: This target is only available on macOS"; \
 		exit 1; \
 	fi
-	@cd web && $(MAKE) build && cd ..
-	@./scripts/build-macos-app.sh $(BINARY_NAME)-$(PLATFORM)-$(ARCH)
+	@./scripts/build-macos-app.sh $(PLATFORM)-$(ARCH)
 	@echo "macOS .app bundle created: $(BUILD_DIR)/PicoClaw.app"

+## mem: Build membench, download LOCOMO data (if needed), run benchmark, and show results
+mem:
+	@echo "Building membench..."
+	@mkdir -p $(BUILD_DIR)
+	@$(GO) build -o $(BUILD_DIR)/membench ./cmd/membench
+	@echo "Build complete: $(BUILD_DIR)/membench"
+	@if [ ! -f $(BUILD_DIR)/memdata/locomo10.json ]; then \
+		echo "Downloading LOCOMO dataset..."; \
+		mkdir -p $(BUILD_DIR)/memdata; \
+		curl -sfL "https://raw.githubusercontent.com/snap-research/locomo/main/data/locomo10.json" \
+			-o $(BUILD_DIR)/memdata/locomo10.json && [ -s $(BUILD_DIR)/memdata/locomo10.json ] || { echo "Error: LOCOMO download failed"; exit 1; }; \
+		echo "Download complete"; \
+	else \
+		echo "LOCOMO dataset already exists, skipping download"; \
+	fi
+	@echo "Running benchmark..."
+	@rm -rf $(BUILD_DIR)/memout
+	@$(BUILD_DIR)/membench run --data $(BUILD_DIR)/memdata --out $(BUILD_DIR)/memout --budget 4000
+
 ## help: Show this help message
 help:
 	@echo "picoclaw Makefile"
@@ -18,7 +18,7 @@
    <a href="https://discord.gg/V4sAZ9XWpN"><img src="https://img.shields.io/badge/Discord-Community-4c60eb?style=flat&logo=discord&logoColor=white" alt="Discord"></a>
  </p>

-[中文](README.zh.md) | [日本語](README.ja.md) | [Português](README.pt-br.md) | [Tiếng Việt](README.vi.md) | [Français](README.fr.md) | [Italiano](README.it.md) | [Bahasa Indonesia](README.id.md) | **English**
+[中文](docs/project/README.zh.md) | [日本語](docs/project/README.ja.md) | [한국어](docs/project/README.ko.md) | [Português](docs/project/README.pt-br.md) | [Tiếng Việt](docs/project/README.vi.md) | [Français](docs/project/README.fr.md) | [Italiano](docs/project/README.it.md) | [Bahasa Indonesia](docs/project/README.id.md) | [Malay](docs/project/README.ms.md) | **English**

 </div>

@@ -56,17 +56,21 @@

 ## 📢 News

+2026-03-31 📱 **Android Support!** PicoClaw now runs on Android! Download the APK at [picoclaw.io](https://picoclaw.io/download)
+
+2026-03-25 🚀 **v0.2.4 Released!** Agent architecture overhaul (SubTurn, Hooks, Steering, EventBus), WeChat/WeCom integration, security hardening (.security.yml, sensitive data filtering), new providers (AWS Bedrock, Azure, Xiaomi MiMo), and 35 bug fixes. PicoClaw has reached **26K Stars**!
+
 2026-03-17 🚀 **v0.2.3 Released!** System tray UI (Windows & Linux), sub-agent status query (`spawn_status`), experimental Gateway hot-reload, Cron security gating, and 2 security fixes. PicoClaw has reached **25K Stars**!

 2026-03-09 🎉 **v0.2.1 — Biggest update yet!** MCP protocol support, 4 new channels (Matrix/IRC/WeCom/Discord Proxy), 3 new providers (Kimi/Minimax/Avian), vision pipeline, JSONL memory store, model routing.

 2026-02-28 📦 **v0.2.0** released with Docker Compose and Web UI Launcher support.

-2026-02-26 🎉 PicoClaw hits **20K Stars** in just 17 days! Channel auto-orchestration and capability interfaces are live.
-
 <details>
 <summary>Earlier news...</summary>

+2026-02-26 🎉 PicoClaw hits **20K Stars** in just 17 days! Channel auto-orchestration and capability interfaces are live.
+
 2026-02-16 🎉 PicoClaw breaks 12K Stars in one week! Community maintainer roles and [Roadmap](ROADMAP.md) officially launched.

 2026-02-13 🎉 PicoClaw breaks 5000 Stars in 4 days! Project roadmap and developer groups in progress.
@@ -108,7 +112,7 @@ _*Recent builds may use 10-20MB due to rapid PR merges. Resource optimization is

 </div>

-> **[Hardware Compatibility List](docs/hardware-compatibility.md)** — See all tested boards, from $5 RISC-V to Raspberry Pi to Android phones. Your board not listed? Submit a PR!
+> **[Hardware Compatibility List](docs/guides/hardware-compatibility.md)** — See all tested boards, from $5 RISC-V to Raspberry Pi to Android phones. Your board not listed? Submit a PR!

 <p align="center">
 <img src="assets/hardware-banner.jpg" alt="PicoClaw Hardware Compatibility" width="100%">
@@ -160,22 +164,32 @@ Alternatively, download the binary for your platform from the [GitHub Releases](

 ### Build from source (for development)

+Prerequisites:
+
+- Go 1.25+
+- Node.js 22+ and pnpm 10.33.0+ for Web UI / launcher builds
+
 ```bash
 git clone https://github.com/sipeed/picoclaw.git

 cd picoclaw
 make deps

-# Build core binary
+# Install frontend dependencies
+(cd web/frontend && pnpm install --frozen-lockfile)
+
+# Build the core binary for the current platform
 make build

-# Build Web UI Launcher (required for WebUI mode)
+# Build the Web UI Launcher (required for WebUI mode)
 make build-launcher

-# Build for multiple platforms
+# Build core binaries for all Makefile-managed platforms
 make build-all

-# Build for Raspberry Pi Zero 2 W (32-bit: make build-linux-arm; 64-bit: make build-linux-arm64)
+# Build for Raspberry Pi Zero 2 W
+# 32-bit: make build-linux-arm
+# 64-bit: make build-linux-arm64
 make build-pi-zero

 # Build and install
@@ -211,7 +225,7 @@ picoclaw-launcher
 <img src="assets/launcher-webui.jpg" alt="WebUI Launcher" width="600">
 </p>

-**Getting started:** 
+**Getting started:**

 Open the WebUI, then: **1)** Configure a Provider (add your LLM API key) -> **2)** Configure a Channel (e.g., Telegram) -> **3)** Start the Gateway -> **4)** Chat!

@@ -254,6 +268,29 @@ docker compose -f docker/docker-compose.yml --profile launcher up -d

 </details>

+<details>
+<summary><b>macOS — First Launch Security Warning</b></summary>
+
+macOS may block `picoclaw-launcher` on first launch because it is downloaded from the internet and not notarized through the Mac App Store.
+
+**Step 1:** Double-click `picoclaw-launcher`. You will see a security warning:
+
+<p align="center">
+<img src="assets/macos-gatekeeper-warning.jpg" alt="macOS Gatekeeper warning" width="400">
+</p>
+
+> *"picoclaw-launcher" Not Opened — Apple could not verify "picoclaw-launcher" is free of malware that may harm your Mac or compromise your privacy.*
+
+**Step 2:** Open **System Settings** → **Privacy & Security** → scroll down to the **Security** section → click **Open Anyway** → confirm by clicking **Open Anyway** in the dialog.
+
+<p align="center">
+<img src="assets/macos-gatekeeper-allow.jpg" alt="macOS Privacy & Security — Open Anyway" width="600">
+</p>
+
+After this one-time step, `picoclaw-launcher` will open normally on subsequent launches.
+
+</details>
+
 ### 💻 TUI Launcher (Recommended for Headless / SSH)

 The TUI (Terminal UI) Launcher provides a full-featured terminal interface for configuration and management. Ideal for servers, Raspberry Pi, and other headless environments.
@@ -266,17 +303,36 @@ picoclaw-launcher-tui
 <img src="assets/launcher-tui.jpg" alt="TUI Launcher" width="600">
 </p>

-**Getting started:** 
+**Getting started:**

 Use the TUI menus to: **1)** Configure a Provider -> **2)** Configure a Channel -> **3)** Start the Gateway -> **4)** Chat!

 For detailed TUI documentation, see [docs.picoclaw.io](https://docs.picoclaw.io).

+<a id="-run-on-old-android-phones"></a>
 ### 📱 Android

 Give your decade-old phone a second life! Turn it into a smart AI Assistant with PicoClaw.

-**Option 1: Termux (available now)**
+**Option 1: APK Install**
+
+Preview:
+
+<table>
+  <tr>
+    <td><img src="assets/fui_main_page.jpg" width="200"></td>
+    <td><img src="assets/fui_web_page.jpg" width="200"></td>
+    <td><img src="assets/fui_log_page.jpg" width="200"></td>
+    <td><img src="assets/fui_setting_page.jpg" width="200"></td>
+  </tr>
+</table>
+
+Download the APK from [picoclaw.io](https://picoclaw.io/download/) and install directly. No Termux required!
+
+**Option 2: Termux**
+
+<details>
+<summary><b>Terminal Launcher (for resource-constrained environments)</b></summary>

 1. Install [Termux](https://github.com/termux/termux-app) (download from [GitHub Releases](https://github.com/termux/termux-app/releases), or search in F-Droid / Google Play)
 2. Run the following commands:
@@ -293,13 +349,6 @@ Then follow the Terminal Launcher section below to complete configuration.

 <img src="assets/termux.jpg" alt="PicoClaw on Termux" width="512">

-**Option 2: APK Install (coming soon)**
-
-A standalone Android APK with built-in WebUI is in development. Stay tuned!
-
-<details>
-<summary><b>Terminal Launcher (for resource-constrained environments)</b></summary>
-
 For minimal environments where only the `picoclaw` core binary is available (no Launcher UI), you can configure everything via the command line and a JSON config file.

 **1. Initialize**
@@ -330,8 +379,8 @@ This creates `~/.picoclaw/config.json` and the workspace directory.
 ```

 > See `config/config.example.json` in the repo for a complete configuration template with all available options.
-> 
-> Please note: config.example.json format is version 0, with sensitive codes in it, and will be auto migrated to version 1+, then, the config.json will only store insensitive data, the sensitive codes will be stored in .security.yml, if you need manually modify the codes, please see `docs/security_configuration.md` for more details.
+>
+> Please note: config.example.json format is version 0, with sensitive codes in it, and will be auto migrated to version 1+, then, the config.json will only store insensitive data, the sensitive codes will be stored in .security.yml, if you need manually modify the codes, please see `docs/security/security_configuration.md` for more details.


 **3. Chat**
@@ -370,6 +419,7 @@ PicoClaw supports 30+ LLM providers through the `model_list` configuration. Use
 | [NVIDIA NIM](https://build.nvidia.com/) | `nvidia/` | Required | NVIDIA hosted models |
 | [Cerebras](https://cloud.cerebras.ai/) | `cerebras/` | Required | Fast inference |
 | [Novita AI](https://novita.ai/) | `novita/` | Required | Various open models |
+| [Xiaomi MiMo](https://platform.xiaomimimo.com/) | `mimo/` | Required | MiMo models |
 | [Ollama](https://ollama.com/) | `ollama/` | Not needed | Local models, self-hosted |
 | [vLLM](https://docs.vllm.ai/) | `vllm/` | Not needed | Local deployment, OpenAI-compatible |
 | [LiteLLM](https://docs.litellm.ai/) | `litellm/` | Varies | Proxy for 100+ providers |
@@ -409,30 +459,29 @@ PicoClaw supports 30+ LLM providers through the `model_list` configuration. Use
 }
 ```

-For full provider configuration details, see [Providers & Models](docs/providers.md).
+For full provider configuration details, see [Providers & Models](docs/guides/providers.md).

 </details>

 ## 💬 Channels (Chat Apps)

-Talk to your PicoClaw through 17+ messaging platforms:
+Talk to your PicoClaw through 18+ messaging platforms:

 | Channel | Setup | Protocol | Docs |
 |---------|-------|----------|------|
 | **Telegram** | Easy (bot token) | Long polling | [Guide](docs/channels/telegram/README.md) |
 | **Discord** | Easy (bot token + intents) | WebSocket | [Guide](docs/channels/discord/README.md) |
-| **WhatsApp** | Easy (QR scan or bridge URL) | Native / Bridge | [Guide](docs/chat-apps.md#whatsapp) |
-| **Weixin** | Easy (Native QR scan) | iLink API | [Guide](docs/chat-apps.md#weixin) |
+| **WhatsApp** | Easy (QR scan or bridge URL) | Native / Bridge | [Guide](docs/guides/chat-apps.md#whatsapp) |
+| **Weixin** | Easy (Native QR scan) | iLink API | [Guide](docs/guides/chat-apps.md#weixin) |
 | **QQ** | Easy (AppID + AppSecret) | WebSocket | [Guide](docs/channels/qq/README.md) |
 | **Slack** | Easy (bot + app token) | Socket Mode | [Guide](docs/channels/slack/README.md) |
 | **Matrix** | Medium (homeserver + token) | Sync API | [Guide](docs/channels/matrix/README.md) |
 | **DingTalk** | Medium (client credentials) | Stream | [Guide](docs/channels/dingtalk/README.md) |
 | **Feishu / Lark** | Medium (App ID + Secret) | WebSocket/SDK | [Guide](docs/channels/feishu/README.md) |
 | **LINE** | Medium (credentials + webhook) | Webhook | [Guide](docs/channels/line/README.md) |
-| **WeCom Bot** | Medium (webhook URL) | Webhook | [Guide](docs/channels/wecom/wecom_bot/README.md) |
-| **WeCom App** | Medium (corp credentials) | Webhook | [Guide](docs/channels/wecom/wecom_app/README.md) |
-| **WeCom AI Bot** | Medium (token + AES key) | WebSocket / Webhook | [Guide](docs/channels/wecom/wecom_aibot/README.md) |
-| **IRC** | Medium (server + nick) | IRC protocol | [Guide](docs/chat-apps.md#irc) |
+| **WeCom** | Easy (QR login or manual) | WebSocket | [Guide](docs/channels/wecom/README.md) |
+| **VK** | Easy (group token) | Long Poll | [Guide](docs/channels/vk/README.md) |
+| **IRC** | Medium (server + nick) | IRC protocol | [Guide](docs/guides/chat-apps.md#irc) |
 | **OneBot** | Medium (WebSocket URL) | OneBot v11 | [Guide](docs/channels/onebot/README.md) |
 | **MaixCam** | Easy (enable) | TCP socket | [Guide](docs/channels/maixcam/README.md) |
 | **Pico** | Easy (enable) | Native protocol | Built-in |
@@ -440,7 +489,9 @@ Talk to your PicoClaw through 17+ messaging platforms:

 > All webhook-based channels share a single Gateway HTTP server (`gateway.host`:`gateway.port`, default `127.0.0.1:18790`). Feishu uses WebSocket/SDK mode and does not use the shared HTTP server.

-For detailed channel setup instructions, see [Chat Apps Configuration](docs/chat-apps.md).
+> Log verbosity is controlled by `gateway.log_level` (default: `warn`). Supported values: `debug`, `info`, `warn`, `error`, `fatal`. Can also be set via `PICOCLAW_LOG_LEVEL`. See [Configuration](docs/guides/configuration.md#gateway-log-level) for details.
+
+For detailed channel setup instructions, see [Chat Apps Configuration](docs/guides/chat-apps.md).

 ## 🔧 Tools

@@ -460,7 +511,7 @@ PicoClaw can search the web to provide up-to-date information. Configure in `too

 ### ⚙️ Other Tools

-PicoClaw includes built-in tools for file operations, code execution, scheduling, and more. See [Tools Configuration](docs/tools_configuration.md) for details.
+PicoClaw includes built-in tools for file operations, code execution, scheduling, and more. See [Tools Configuration](docs/reference/tools_configuration.md) for details.

 ## 🎯 Skills

@@ -473,7 +524,7 @@ picoclaw skills search "web scraping"
 picoclaw skills install <skill-name>
 ```

-**Configure ClawHub token** (optional, for higher rate limits):
+**Configure skill registries**:

 Add to your `config.json`:
 ```json
@@ -483,6 +534,11 @@ Add to your `config.json`:
      "registries": {
        "clawhub": {
          "auth_token": "your-clawhub-token"
+        },
+        "github": {
+          "base_url": "https://github.com",
+          "auth_token": "your-github-token",
+          "proxy": ""
        }
      }
    }
@@ -490,7 +546,9 @@ Add to your `config.json`:
 }
 ```

-For more details, see [Tools Configuration - Skills](docs/tools_configuration.md#skills-tool).
+`tools.skills.github.*` is deprecated. Use `tools.skills.registries.github.*` instead.
+
+For more details, see [Tools Configuration - Skills](docs/reference/tools_configuration.md#skills-tool).

 ## 🔗 MCP (Model Context Protocol)

@@ -513,7 +571,7 @@ PicoClaw natively supports [MCP](https://modelcontextprotocol.io/) — connect a
 }
 ```

-For full MCP configuration (stdio, SSE, HTTP transports, Tool Discovery), see [Tools Configuration - MCP](docs/tools_configuration.md#mcp-tool).
+For full MCP configuration (stdio, SSE, HTTP transports, Tool Discovery), see [Tools Configuration - MCP](docs/reference/tools_configuration.md#mcp-tool).

 ## <img src="assets/clawdchat-icon.png" width="24" height="24" alt="ClawdChat"> Join the Agent Social Network

@@ -550,23 +608,26 @@ PicoClaw supports scheduled reminders and recurring tasks through the `cron` too
 * **Recurring tasks**: "Remind me every 2 hours" -> triggers every 2 hours
 * **Cron expressions**: "Remind me at 9am daily" -> uses cron expression

+See [docs/reference/cron.md](docs/reference/cron.md) for current schedule types, execution modes, command-job gates, and persistence details.
+
 ## 📚 Documentation

 For detailed guides beyond this README:

 | Topic | Description |
 |-------|-------------|
-| [Docker & Quick Start](docs/docker.md) | Docker Compose setup, Launcher/Agent modes |
-| [Chat Apps](docs/chat-apps.md) | All 17+ channel setup guides |
-| [Configuration](docs/configuration.md) | Environment variables, workspace layout, security sandbox |
-| [Providers & Models](docs/providers.md) | 30+ LLM providers, model routing, model_list configuration |
-| [Spawn & Async Tasks](docs/spawn-tasks.md) | Quick tasks, long tasks with spawn, async sub-agent orchestration |
-| [Hooks](docs/hooks/README.md) | Event-driven hook system: observers, interceptors, approval hooks |
-| [Steering](docs/steering.md) | Inject messages into a running agent loop between tool calls |
-| [SubTurn](docs/subturn.md) | Subagent coordination, concurrency control, lifecycle |
-| [Troubleshooting](docs/troubleshooting.md) | Common issues and solutions |
-| [Tools Configuration](docs/tools_configuration.md) | Per-tool enable/disable, exec policies, MCP, Skills |
-| [Hardware Compatibility](docs/hardware-compatibility.md) | Tested boards, minimum requirements |
+| [Docker & Quick Start](docs/guides/docker.md) | Docker Compose setup, Launcher/Agent modes |
+| [Chat Apps](docs/guides/chat-apps.md) | All 17+ channel setup guides |
+| [Configuration](docs/guides/configuration.md) | Environment variables, workspace layout, security sandbox |
+| [Scheduled Tasks and Cron Jobs](docs/reference/cron.md) | Cron schedule types, deliver modes, command gates, job storage |
+| [Providers & Models](docs/guides/providers.md) | 30+ LLM providers, model routing, model_list configuration |
+| [Spawn & Async Tasks](docs/guides/spawn-tasks.md) | Quick tasks, long tasks with spawn, async sub-agent orchestration |
+| [Hooks](docs/architecture/hooks/README.md) | Event-driven hook system: observers, interceptors, approval hooks |
+| [Steering](docs/architecture/steering.md) | Inject messages into a running agent loop between tool calls |
+| [SubTurn](docs/architecture/subturn.md) | Subagent coordination, concurrency control, lifecycle |
+| [Troubleshooting](docs/operations/troubleshooting.md) | Common issues and solutions |
+| [Tools Configuration](docs/reference/tools_configuration.md) | Per-tool enable/disable, exec policies, MCP, Skills |
+| [Hardware Compatibility](docs/guides/hardware-compatibility.md) | Tested boards, minimum requirements |

 ## 🤝 Contribute & Roadmap

@@ -0,0 +1,412 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+
+	"github.com/sipeed/picoclaw/pkg/seahorse"
+)
+
+// EvalResult holds per-sample evaluation results for one mode.
+type EvalResult struct {
+	Mode      string     `json:"mode"`
+	SampleID  string     `json:"sampleId"`
+	QAResults []QAResult `json:"qaResults"`
+	Agg       AggMetrics `json:"aggregated"`
+}
+
+// QAResult holds metrics for a single QA pair.
+type QAResult struct {
+	Question   string  `json:"question"`
+	Category   int     `json:"category"`
+	GoldAnswer string  `json:"goldAnswer"`
+	TokenF1    float64 `json:"tokenF1"`
+	HitRate    float64 `json:"hitRate"`
+}
+
+// AggMetrics holds aggregated evaluation metrics.
+type AggMetrics struct {
+	OverallF1      float64             `json:"overallF1"`
+	OverallHitRate float64             `json:"overallHitRate"`
+	ByCategory     map[int]*CatMetrics `json:"byCategory"`
+	TotalQuestions int                 `json:"totalQuestions"`
+	ValidF1Count   int                 `json:"validF1Count"`
+}
+
+// CatMetrics holds metrics for a single category.
+type CatMetrics struct {
+	F1            float64 `json:"f1"`
+	HitRate       float64 `json:"hitRate"`
+	QuestionCount int     `json:"questionCount"`
+	ValidF1Count  int     `json:"validF1Count"`
+}
+
+// EvalLegacy evaluates using legacy session store (raw history + budget truncation).
+func EvalLegacy(
+	ctx context.Context,
+	samples []LocomoSample,
+	legacy *LegacyStore,
+	budgetTokens int,
+) []EvalResult {
+	results := make([]EvalResult, 0, len(samples))
+	for si := range samples {
+		sample := &samples[si]
+		history := legacy.GetHistory(sample.SampleID)
+
+		// Convert messages to content strings
+		allContent := make([]string, 0, len(history))
+		for _, msg := range history {
+			allContent = append(allContent, msg.Content)
+		}
+
+		qaResults := make([]QAResult, 0, len(sample.QA))
+		for qi := range sample.QA {
+			qa := &sample.QA[qi]
+			// Budget truncate the full history
+			truncated, _ := BudgetTruncate(allContent, budgetTokens)
+			context := StringListToContent(truncated)
+
+			f1 := TokenOverlapF1(context, qa.AnswerString())
+			hitRate := RecallHitRate(qa.Evidence, sample, context)
+
+			qaResults = append(qaResults, QAResult{
+				Question:   qa.Question,
+				Category:   qa.Category,
+				GoldAnswer: qa.AnswerString(),
+				TokenF1:    f1,
+				HitRate:    hitRate,
+			})
+		}
+
+		results = append(results, EvalResult{
+			Mode:      "legacy",
+			SampleID:  sample.SampleID,
+			QAResults: qaResults,
+			Agg:       aggregateMetrics(qaResults),
+		})
+	}
+	return results
+}
+
+// EvalSeahorse evaluates using seahorse short memory (per-keyword search + expand).
+func EvalSeahorse(
+	ctx context.Context,
+	samples []LocomoSample,
+	ir *SeahorseIngestResult,
+	budgetTokens int,
+) []EvalResult {
+	store := ir.Engine.GetRetrieval().Store()
+	retrieval := ir.Engine.GetRetrieval()
+
+	results := make([]EvalResult, 0, len(samples))
+	for si := range samples {
+		sample := &samples[si]
+		convID, ok := ir.ConvMap[sample.SampleID]
+		if !ok {
+			log.Printf("WARN: no conversation ID for sample %s", sample.SampleID)
+			continue
+		}
+
+		qaResults := make([]QAResult, 0, len(sample.QA))
+		for qi := range sample.QA {
+			qa := &sample.QA[qi]
+			keywords := ExtractKeywords(qa.Question)
+
+			// Search each keyword individually and union results,
+			// tracking best BM25 rank per message for relevance sorting.
+			bestRank := map[int64]float64{}
+			for _, kw := range keywords {
+				searchResults, err := store.SearchMessages(ctx, seahorse.SearchInput{
+					Pattern:        kw,
+					ConversationID: convID,
+					Limit:          20,
+				})
+				if err != nil {
+					log.Printf("WARN: search failed for keyword %q: %v", kw, err)
+					continue
+				}
+				for _, sr := range searchResults {
+					if sr.MessageID > 0 {
+						if prev, ok := bestRank[sr.MessageID]; !ok || sr.Rank < prev {
+							bestRank[sr.MessageID] = sr.Rank
+						}
+					}
+				}
+			}
+			// Sort messageIDs by rank ascending (best/most-negative first).
+			// BudgetTruncate walks from the front, keeping best-ranked messages.
+			// Note: SQLite FTS5 bm25() returns negative values where more
+			// negative = better match.
+			messageIDs := make([]int64, 0, len(bestRank))
+			for id := range bestRank {
+				messageIDs = append(messageIDs, id)
+			}
+			sort.Slice(messageIDs, func(i, j int) bool {
+				return bestRank[messageIDs[i]] < bestRank[messageIDs[j]]
+			})
+
+			// Expand messages to get full content
+			var contentParts []string
+			if len(messageIDs) > 0 {
+				expandResult, err := retrieval.ExpandMessages(ctx, messageIDs)
+				if err != nil {
+					log.Printf("WARN: expand failed for sample %s: %v", sample.SampleID, err)
+				} else {
+					for _, msg := range expandResult.Messages {
+						contentParts = append(contentParts, msg.Content)
+					}
+				}
+			}
+
+			if len(contentParts) == 0 {
+				qaResults = append(qaResults, QAResult{
+					Question:   qa.Question,
+					Category:   qa.Category,
+					GoldAnswer: qa.AnswerString(),
+					TokenF1:    0.0,
+					HitRate:    0.0,
+				})
+				continue
+			}
+
+			// Budget truncate (drop worst-ranked)
+			truncated, _ := BudgetTruncate(contentParts, budgetTokens)
+			context := StringListToContent(truncated)
+
+			f1 := TokenOverlapF1(context, qa.AnswerString())
+			hitRate := RecallHitRate(qa.Evidence, sample, context)
+
+			qaResults = append(qaResults, QAResult{
+				Question:   qa.Question,
+				Category:   qa.Category,
+				GoldAnswer: qa.AnswerString(),
+				TokenF1:    f1,
+				HitRate:    hitRate,
+			})
+		}
+
+		results = append(results, EvalResult{
+			Mode:      "seahorse",
+			SampleID:  sample.SampleID,
+			QAResults: qaResults,
+			Agg:       aggregateMetrics(qaResults),
+		})
+	}
+	return results
+}
+
+// aggregateMetrics computes overall and per-category metrics.
+func aggregateMetrics(qaResults []QAResult) AggMetrics {
+	type catAccum struct {
+		f1Sum        float64
+		f1Count      int
+		hitRateSum   float64
+		hitRateCount int
+	}
+	byCatAcc := map[int]*catAccum{}
+	totalF1 := 0.0
+	totalHitRate := 0.0
+	validF1Count := 0
+	for _, qr := range qaResults {
+		// Skip sentinel -1.0 scores (LLM API/parse failures) from F1 averaging.
+		if qr.TokenF1 >= 0 {
+			totalF1 += qr.TokenF1
+			validF1Count++
+		}
+		totalHitRate += qr.HitRate
+		acc, ok := byCatAcc[qr.Category]
+		if !ok {
+			acc = &catAccum{}
+			byCatAcc[qr.Category] = acc
+		}
+		if qr.TokenF1 >= 0 {
+			acc.f1Sum += qr.TokenF1
+			acc.f1Count++
+		}
+		acc.hitRateSum += qr.HitRate
+		acc.hitRateCount++
+	}
+	nHit := len(qaResults)
+	if nHit == 0 {
+		nHit = 1
+	}
+	byCat := map[int]*CatMetrics{}
+	for cat, acc := range byCatAcc {
+		cm := &CatMetrics{
+			QuestionCount: acc.hitRateCount,
+			ValidF1Count:  acc.f1Count,
+		}
+		if acc.f1Count > 0 {
+			cm.F1 = acc.f1Sum / float64(acc.f1Count)
+		}
+		if acc.hitRateCount > 0 {
+			cm.HitRate = acc.hitRateSum / float64(acc.hitRateCount)
+		}
+		byCat[cat] = cm
+	}
+	var overallF1 float64
+	if validF1Count > 0 {
+		overallF1 = totalF1 / float64(validF1Count)
+	}
+	return AggMetrics{
+		OverallF1:      overallF1,
+		OverallHitRate: totalHitRate / float64(nHit),
+		ByCategory:     byCat,
+		TotalQuestions: len(qaResults),
+		ValidF1Count:   validF1Count,
+	}
+}
+
+// SaveResults writes per-sample eval results to JSON files.
+func SaveResults(results []EvalResult, outDir string) error {
+	if err := os.MkdirAll(outDir, 0o755); err != nil {
+		return fmt.Errorf("create output dir: %w", err)
+	}
+	for _, r := range results {
+		path := filepath.Join(outDir, fmt.Sprintf("eval_%s_%s.json", r.Mode, r.SampleID))
+		data, err := json.MarshalIndent(r, "", "  ")
+		if err != nil {
+			return fmt.Errorf("marshal result: %w", err)
+		}
+		if err := os.WriteFile(path, data, 0o644); err != nil {
+			return fmt.Errorf("write result: %w", err)
+		}
+	}
+	return nil
+}
+
+// SaveAggregated writes a combined results.json with all modes.
+func SaveAggregated(results []EvalResult, outDir string) error {
+	byMode := map[string][]EvalResult{}
+	for _, r := range results {
+		byMode[r.Mode] = append(byMode[r.Mode], r)
+	}
+
+	aggMap := map[string]AggMetrics{}
+	for mode, modeResults := range byMode {
+		aggMap[mode] = computeModeAgg(modeResults)
+	}
+
+	data, err := json.MarshalIndent(aggMap, "", "  ")
+	if err != nil {
+		return err
+	}
+	return os.WriteFile(filepath.Join(outDir, "results.json"), data, 0o644)
+}
+
+// computeModeAgg aggregates results for a single mode using weighted averaging
+// (weighted by question count per sample). All modes must have the same Mode field.
+func computeModeAgg(results []EvalResult) AggMetrics {
+	agg := AggMetrics{ByCategory: map[int]*CatMetrics{}}
+	for _, r := range results {
+		// Backward compat: old eval JSON (token mode) without ValidF1Count → use TotalQuestions.
+		// LLM modes may legitimately have ValidF1Count==0 (all failures).
+		vf1 := r.Agg.ValidF1Count
+		if vf1 == 0 && r.Agg.TotalQuestions > 0 && !strings.HasSuffix(r.Mode, "-llm") {
+			vf1 = r.Agg.TotalQuestions
+		}
+		agg.OverallF1 += r.Agg.OverallF1 * float64(vf1)
+		agg.OverallHitRate += r.Agg.OverallHitRate * float64(r.Agg.TotalQuestions)
+		agg.TotalQuestions += r.Agg.TotalQuestions
+		agg.ValidF1Count += vf1
+		for cat, cm := range r.Agg.ByCategory {
+			existing, ok := agg.ByCategory[cat]
+			if !ok {
+				existing = &CatMetrics{}
+				agg.ByCategory[cat] = existing
+			}
+			cvf1 := cm.ValidF1Count
+			if cvf1 == 0 && cm.QuestionCount > 0 && !strings.HasSuffix(r.Mode, "-llm") {
+				cvf1 = cm.QuestionCount
+			}
+			existing.F1 += cm.F1 * float64(cvf1)
+			existing.HitRate += cm.HitRate * float64(cm.QuestionCount)
+			existing.QuestionCount += cm.QuestionCount
+			existing.ValidF1Count += cvf1
+		}
+	}
+	if agg.ValidF1Count > 0 {
+		agg.OverallF1 /= float64(agg.ValidF1Count)
+	}
+	if agg.TotalQuestions > 0 {
+		agg.OverallHitRate /= float64(agg.TotalQuestions)
+	}
+	for _, cat := range agg.ByCategory {
+		if cat.ValidF1Count > 0 {
+			cat.F1 /= float64(cat.ValidF1Count)
+		}
+		if cat.QuestionCount > 0 {
+			cat.HitRate /= float64(cat.QuestionCount)
+		}
+	}
+	return agg
+}
+
+// printSection prints a single comparison table section.
+func printSection(title string, results []EvalResult) {
+	fmt.Printf("\n--- %s ---\n", title)
+	byMode := map[string][]EvalResult{}
+	for _, r := range results {
+		byMode[r.Mode] = append(byMode[r.Mode], r)
+	}
+
+	modes := map[string]AggMetrics{}
+	for mode, modeResults := range byMode {
+		modes[mode] = computeModeAgg(modeResults)
+	}
+
+	modeKeys := make([]string, 0, len(modes))
+	for k := range modes {
+		modeKeys = append(modeKeys, k)
+	}
+	sort.Strings(modeKeys)
+
+	// Collect all category keys across modes
+	catSet := map[int]bool{}
+	for _, agg := range modes {
+		for cat := range agg.ByCategory {
+			catSet[cat] = true
+		}
+	}
+	cats := make([]int, 0, len(catSet))
+	for cat := range catSet {
+		cats = append(cats, cat)
+	}
+	sort.Ints(cats)
+
+	fmt.Printf("%-10s %-8s %-8s", "Mode", "HitRate", "F1")
+	for _, cat := range cats {
+		fmt.Printf(" %-7s", fmt.Sprintf("C%d", cat))
+	}
+	fmt.Println()
+	fmt.Println(strings.Repeat("-", 10+8+8+7*len(cats)+8))
+
+	for _, mode := range modeKeys {
+		agg := modes[mode]
+		fmt.Printf("%-10s %-8.4f %-8.4f", mode, agg.OverallHitRate, agg.OverallF1)
+		for _, cat := range cats {
+			if cm, ok := agg.ByCategory[cat]; ok {
+				fmt.Printf(" %-7.4f", cm.HitRate)
+			} else {
+				fmt.Printf(" %-7s", "N/A")
+			}
+		}
+		fmt.Println()
+	}
+}
+
+// PrintComparison outputs a human-readable comparison table to stdout.
+func PrintComparison(results []EvalResult, llmResults []EvalResult) {
+	if len(results) > 0 {
+		printSection("No LLM generation", results)
+	}
+	if len(llmResults) > 0 {
+		printSection("With LLM", llmResults)
+	}
+}
@@ -0,0 +1,346 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+
+	"github.com/sipeed/picoclaw/pkg/seahorse"
+)
+
+const answerSystemPrompt = `You are a helpful assistant. Given conversation context, answer the question concisely and accurately. If the answer is not in the context, say "I don't know". Answer in 1-3 sentences maximum.`
+
+const judgeSystemPrompt = `You are an impartial judge evaluating answer quality.
+Compare the candidate answer against the reference answer.
+Consider semantic equivalence — different wording expressing the same meaning should score high.
+
+Output ONLY a single integer score from 1 to 5:
+1 = completely wrong or irrelevant
+2 = partially related but mostly incorrect
+3 = partially correct, missing key details
+4 = mostly correct with minor omissions
+5 = fully correct, semantically equivalent
+
+Output ONLY the number, nothing else.`
+
+// generateAnswer asks the LLM to answer a question given retrieved context.
+func generateAnswer(ctx context.Context, client *LLMClient, contextText, question string) (string, error) {
+	// Truncate context to avoid exceeding model limits while preserving valid UTF-8.
+	contextRunes := []rune(contextText)
+	if len(contextRunes) > 6000 {
+		contextText = string(contextRunes[:6000]) + "\n... [truncated]"
+	}
+
+	userPrompt := fmt.Sprintf("## Conversation Context\n\n%s\n\n## Question\n\n%s", contextText, question)
+	return client.Complete(ctx, answerSystemPrompt, userPrompt)
+}
+
+// scoreRe matches the first standalone integer 1-5 in the judge response.
+var scoreRe = regexp.MustCompile(`\b([1-5])\b`)
+
+// judgeAnswer asks the LLM to score the candidate answer vs the gold answer.
+// Returns a score from 0.0 to 1.0, or -1.0 on parse failure.
+func judgeAnswer(
+	ctx context.Context,
+	judgeClient *LLMClient,
+	question, goldAnswer, candidateAnswer string,
+) (float64, error) {
+	userPrompt := fmt.Sprintf(
+		"Question: %s\n\nReference Answer: %s\n\nCandidate Answer: %s\n\nScore:",
+		question, goldAnswer, candidateAnswer,
+	)
+
+	response, err := judgeClient.Complete(ctx, judgeSystemPrompt, userPrompt)
+	if err != nil {
+		return -1.0, err
+	}
+
+	response = strings.TrimSpace(response)
+	if m := scoreRe.FindStringSubmatch(response); len(m) == 2 {
+		score, _ := strconv.Atoi(m[1])
+		return float64(score-1) / 4.0, nil // Normalize 1-5 to 0.0-1.0
+	}
+	log.Printf("WARNING: could not parse judge score from: %q, returning -1", response)
+	return -1.0, nil
+}
+
+// qaWork describes one QA evaluation unit.
+type qaWork struct {
+	sampleID    string
+	qaIndex     int
+	globalIndex int
+	totalQA     int
+	qa          *LocomoQA
+	contextText string
+	sample      *LocomoSample
+}
+
+// qaResult collects one QA evaluation output.
+type qaResultOut struct {
+	index  int // position in the flat QA list for ordering
+	result QAResult
+	answer string
+	score  float64
+}
+
+// evalQAWorker processes a single QA item: generate answer + judge score.
+func evalQAWorker(
+	ctx context.Context,
+	w qaWork,
+	answerClient, judgeClient *LLMClient,
+	logPrefix string,
+) qaResultOut {
+	llmAnswer, err := generateAnswer(ctx, answerClient, w.contextText, w.qa.Question)
+	if err != nil {
+		log.Printf("WARN: LLM generation failed for sample %s Q%d: %v", w.sampleID, w.qaIndex, err)
+		llmAnswer = ""
+	}
+
+	score := -1.0
+	if llmAnswer != "" {
+		score, err = judgeAnswer(ctx, judgeClient, w.qa.Question, w.qa.AnswerString(), llmAnswer)
+		if err != nil {
+			log.Printf("WARN: LLM judge failed for sample %s Q%d: %v", w.sampleID, w.qaIndex, err)
+		}
+	}
+
+	hitRate := RecallHitRate(w.qa.Evidence, w.sample, w.contextText)
+
+	log.Printf("[%s] sample=%s q=%d/%d score=%.2f answer=%q",
+		logPrefix, w.sampleID, w.globalIndex, w.totalQA, score, truncateStr(llmAnswer, 80))
+
+	return qaResultOut{
+		index: w.globalIndex,
+		result: QAResult{
+			Question:   w.qa.Question,
+			Category:   w.qa.Category,
+			GoldAnswer: w.qa.AnswerString(),
+			TokenF1:    score,
+			HitRate:    hitRate,
+		},
+		answer: llmAnswer,
+		score:  score,
+	}
+}
+
+// EvalLegacyLLM evaluates legacy store using LLM generation + LLM-as-Judge.
+func EvalLegacyLLM(
+	ctx context.Context,
+	samples []LocomoSample,
+	legacy *LegacyStore,
+	budgetTokens int,
+	answerClient, judgeClient *LLMClient,
+	concurrency int,
+) []EvalResult {
+	if concurrency < 1 {
+		concurrency = 1
+	}
+	totalQA := countTotalQA(samples)
+	results := make([]EvalResult, 0, len(samples))
+
+	for si := range samples {
+		sample := &samples[si]
+		history := legacy.GetHistory(sample.SampleID)
+
+		allContent := make([]string, 0, len(history))
+		for _, msg := range history {
+			allContent = append(allContent, msg.Content)
+		}
+
+		truncated, _ := BudgetTruncate(allContent, budgetTokens)
+		contextText := StringListToContent(truncated)
+
+		qaResults := make([]QAResult, len(sample.QA))
+
+		if concurrency <= 1 {
+			for qi := range sample.QA {
+				out := evalQAWorker(ctx, qaWork{
+					sampleID: sample.SampleID, qaIndex: qi,
+					globalIndex: si*len(sample.QA) + qi + 1, totalQA: totalQA,
+					qa: &sample.QA[qi], contextText: contextText, sample: sample,
+				}, answerClient, judgeClient, "legacy-llm")
+				qaResults[qi] = out.result
+			}
+		} else {
+			sem := make(chan struct{}, concurrency)
+			var wg sync.WaitGroup
+			for qi := range sample.QA {
+				wg.Add(1)
+				go func() {
+					defer wg.Done()
+					sem <- struct{}{}
+					defer func() { <-sem }()
+					out := evalQAWorker(ctx, qaWork{
+						sampleID: sample.SampleID, qaIndex: qi,
+						globalIndex: si*len(sample.QA) + qi + 1, totalQA: totalQA,
+						qa: &sample.QA[qi], contextText: contextText, sample: sample,
+					}, answerClient, judgeClient, "legacy-llm")
+					qaResults[qi] = out.result // safe: each goroutine writes distinct index
+				}()
+			}
+			wg.Wait()
+		}
+
+		results = append(results, EvalResult{
+			Mode:      "legacy-llm",
+			SampleID:  sample.SampleID,
+			QAResults: qaResults,
+			Agg:       aggregateMetrics(qaResults),
+		})
+	}
+	return results
+}
+
+// buildSeahorseContext retrieves context for a seahorse QA item.
+func buildSeahorseContext(
+	ctx context.Context,
+	ir *SeahorseIngestResult,
+	sample *LocomoSample,
+	qa *LocomoQA,
+	budgetTokens int,
+) string {
+	store := ir.Engine.GetRetrieval().Store()
+	retrieval := ir.Engine.GetRetrieval()
+	convID := ir.ConvMap[sample.SampleID]
+
+	keywords := ExtractKeywords(qa.Question)
+	bestRank := map[int64]float64{}
+	for _, kw := range keywords {
+		searchResults, err := store.SearchMessages(ctx, seahorse.SearchInput{
+			Pattern:        kw,
+			ConversationID: convID,
+			Limit:          20,
+		})
+		if err != nil {
+			continue
+		}
+		for _, sr := range searchResults {
+			if sr.MessageID > 0 {
+				if prev, ok := bestRank[sr.MessageID]; !ok || sr.Rank < prev {
+					bestRank[sr.MessageID] = sr.Rank
+				}
+			}
+		}
+	}
+
+	messageIDs := make([]int64, 0, len(bestRank))
+	for id := range bestRank {
+		messageIDs = append(messageIDs, id)
+	}
+	sort.Slice(messageIDs, func(i, j int) bool {
+		return bestRank[messageIDs[i]] < bestRank[messageIDs[j]]
+	})
+
+	var contentParts []string
+	if len(messageIDs) > 0 {
+		expandResult, err := retrieval.ExpandMessages(ctx, messageIDs)
+		if err == nil {
+			for _, msg := range expandResult.Messages {
+				contentParts = append(contentParts, msg.Content)
+			}
+		}
+	}
+	if len(contentParts) == 0 {
+		return ""
+	}
+	truncated, _ := BudgetTruncate(contentParts, budgetTokens)
+	return StringListToContent(truncated)
+}
+
+// EvalSeahorseLLM evaluates seahorse retrieval using LLM generation + LLM-as-Judge.
+func EvalSeahorseLLM(
+	ctx context.Context,
+	samples []LocomoSample,
+	ir *SeahorseIngestResult,
+	budgetTokens int,
+	answerClient, judgeClient *LLMClient,
+	concurrency int,
+) []EvalResult {
+	if concurrency < 1 {
+		concurrency = 1
+	}
+	totalQA := countTotalQA(samples)
+	results := make([]EvalResult, 0, len(samples))
+
+	for si := range samples {
+		sample := &samples[si]
+		if _, ok := ir.ConvMap[sample.SampleID]; !ok {
+			log.Printf("WARN: no conversation ID for sample %s", sample.SampleID)
+			continue
+		}
+
+		qaResults := make([]QAResult, len(sample.QA))
+
+		evalOne := func(qi int) {
+			qa := &sample.QA[qi]
+			contextText := buildSeahorseContext(ctx, ir, sample, qa, budgetTokens)
+			if contextText == "" {
+				qaResults[qi] = QAResult{
+					Question:   qa.Question,
+					Category:   qa.Category,
+					GoldAnswer: qa.AnswerString(),
+					TokenF1:    0.0,
+					HitRate:    0.0,
+				}
+				log.Printf("[seahorse-llm] sample=%s q=%d/%d score=0.00 answer=(no context)",
+					sample.SampleID, si*len(sample.QA)+qi+1, totalQA)
+				return
+			}
+			out := evalQAWorker(ctx, qaWork{
+				sampleID: sample.SampleID, qaIndex: qi,
+				globalIndex: si*len(sample.QA) + qi + 1, totalQA: totalQA,
+				qa: qa, contextText: contextText, sample: sample,
+			}, answerClient, judgeClient, "seahorse-llm")
+			qaResults[qi] = out.result
+		}
+
+		if concurrency <= 1 {
+			for qi := range sample.QA {
+				evalOne(qi)
+			}
+		} else {
+			sem := make(chan struct{}, concurrency)
+			var wg sync.WaitGroup
+			for qi := range sample.QA {
+				wg.Add(1)
+				go func() {
+					defer wg.Done()
+					sem <- struct{}{}
+					defer func() { <-sem }()
+					evalOne(qi)
+				}()
+			}
+			wg.Wait()
+		}
+
+		results = append(results, EvalResult{
+			Mode:      "seahorse-llm",
+			SampleID:  sample.SampleID,
+			QAResults: qaResults,
+			Agg:       aggregateMetrics(qaResults),
+		})
+	}
+	return results
+}
+
+func countTotalQA(samples []LocomoSample) int {
+	n := 0
+	for i := range samples {
+		n += len(samples[i].QA)
+	}
+	return n
+}
+
+func truncateStr(s string, maxLen int) string {
+	s = strings.ReplaceAll(s, "\n", " ")
+	runes := []rune(s)
+	if len(runes) > maxLen {
+		return string(runes[:maxLen]) + "..."
+	}
+	return s
+}
@@ -0,0 +1,182 @@
+package main
+
+import (
+	"math"
+	"testing"
+)
+
+func TestComputeModeAggAllCategories(t *testing.T) {
+	results := []EvalResult{
+		{
+			Mode:     "test",
+			SampleID: "s1",
+			QAResults: []QAResult{
+				{Category: 1, TokenF1: 0.5, HitRate: 0.8},
+				{Category: 2, TokenF1: 0.3, HitRate: 0.6},
+				{Category: 3, TokenF1: 0.1, HitRate: 0.4},
+				{Category: 4, TokenF1: 0.7, HitRate: 0.9},
+				{Category: 5, TokenF1: 0.2, HitRate: 0.1},
+			},
+		},
+	}
+	for i := range results {
+		results[i].Agg = aggregateMetrics(results[i].QAResults)
+	}
+
+	got := computeModeAgg(results)
+
+	// Should have all 5 categories
+	for cat := 1; cat <= 5; cat++ {
+		cm, ok := got.ByCategory[cat]
+		if !ok {
+			t.Errorf("ByCategory missing category %d", cat)
+			continue
+		}
+		if cm.QuestionCount != 1 {
+			t.Errorf("ByCategory[%d].QuestionCount = %d, want 1", cat, cm.QuestionCount)
+		}
+	}
+
+	// Verify specific F1 values per category
+	wantF1 := map[int]float64{1: 0.5, 2: 0.3, 3: 0.1, 4: 0.7, 5: 0.2}
+	for cat, want := range wantF1 {
+		if cm, ok := got.ByCategory[cat]; ok {
+			if math.Abs(cm.F1-want) > 1e-9 {
+				t.Errorf("ByCategory[%d].F1 = %.4f, want %.4f", cat, cm.F1, want)
+			}
+		}
+	}
+}
+
+func TestComputeModeAgg(t *testing.T) {
+	// Two samples with different question counts:
+	//   sample-a: 2 questions, F1 = [0.4, 0.6] → avg 0.5
+	//   sample-b: 8 questions, F1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] → avg 0.1
+	//
+	// Unweighted (PrintComparison bug): (0.5 + 0.1) / 2 = 0.3
+	// Weighted (correct):              (0.4+0.6 + 0.1*8) / 10 = 1.8 / 10 = 0.18
+	results := []EvalResult{
+		{
+			Mode:     "test",
+			SampleID: "sample-a",
+			QAResults: []QAResult{
+				{TokenF1: 0.4, HitRate: 0.5},
+				{TokenF1: 0.6, HitRate: 0.7},
+			},
+		},
+		{
+			Mode:     "test",
+			SampleID: "sample-b",
+			QAResults: []QAResult{
+				{TokenF1: 0.1, HitRate: 0.2},
+				{TokenF1: 0.1, HitRate: 0.2},
+				{TokenF1: 0.1, HitRate: 0.2},
+				{TokenF1: 0.1, HitRate: 0.2},
+				{TokenF1: 0.1, HitRate: 0.2},
+				{TokenF1: 0.1, HitRate: 0.2},
+				{TokenF1: 0.1, HitRate: 0.2},
+				{TokenF1: 0.1, HitRate: 0.2},
+			},
+		},
+	}
+	// Compute per-sample aggregates
+	for i := range results {
+		results[i].Agg = aggregateMetrics(results[i].QAResults)
+	}
+
+	got := computeModeAgg(results)
+
+	// Weighted: (0.4+0.6+0.1*8) / 10 = 1.8/10 = 0.18
+	wantF1 := 0.18
+	if math.Abs(got.OverallF1-wantF1) > 1e-9 {
+		t.Errorf("OverallF1 = %.6f, want %.6f (weighted average)", got.OverallF1, wantF1)
+	}
+
+	// Weighted: (0.5+0.7+0.2*8) / 10 = 2.8/10 = 0.28
+	wantRecall := 0.28
+	if math.Abs(got.OverallHitRate-wantRecall) > 1e-9 {
+		t.Errorf("OverallHitRate = %.6f, want %.6f (weighted average)", got.OverallHitRate, wantRecall)
+	}
+
+	if got.TotalQuestions != 10 {
+		t.Errorf("TotalQuestions = %d, want 10", got.TotalQuestions)
+	}
+}
+
+func TestAggregateMetricsSentinel(t *testing.T) {
+	qa := []QAResult{
+		{Category: 1, TokenF1: 0.8, HitRate: 0.5},
+		{Category: 1, TokenF1: -1.0, HitRate: 0.3},
+		{Category: 1, TokenF1: 0.4, HitRate: 0.7},
+	}
+	agg := aggregateMetrics(qa)
+
+	if agg.ValidF1Count != 2 {
+		t.Errorf("ValidF1Count = %d, want 2", agg.ValidF1Count)
+	}
+	if agg.TotalQuestions != 3 {
+		t.Errorf("TotalQuestions = %d, want 3", agg.TotalQuestions)
+	}
+	wantF1 := (0.8 + 0.4) / 2.0
+	if math.Abs(agg.OverallF1-wantF1) > 1e-9 {
+		t.Errorf("OverallF1 = %.6f, want %.6f", agg.OverallF1, wantF1)
+	}
+	wantHR := (0.5 + 0.3 + 0.7) / 3.0
+	if math.Abs(agg.OverallHitRate-wantHR) > 1e-9 {
+		t.Errorf("OverallHitRate = %.6f, want %.6f", agg.OverallHitRate, wantHR)
+	}
+}
+
+func TestAggregateMetricsAllSentinel(t *testing.T) {
+	qa := []QAResult{
+		{Category: 1, TokenF1: -1.0, HitRate: 0.5},
+		{Category: 1, TokenF1: -1.0, HitRate: 0.3},
+	}
+	agg := aggregateMetrics(qa)
+
+	if agg.ValidF1Count != 0 {
+		t.Errorf("ValidF1Count = %d, want 0", agg.ValidF1Count)
+	}
+	if agg.OverallF1 != 0 {
+		t.Errorf("OverallF1 = %.6f, want 0", agg.OverallF1)
+	}
+}
+
+func TestComputeModeAggSentinelWeighting(t *testing.T) {
+	results := []EvalResult{
+		{
+			Mode:     "test",
+			SampleID: "s1",
+			QAResults: []QAResult{
+				{Category: 1, TokenF1: 0.8, HitRate: 0.5},
+				{Category: 1, TokenF1: -1.0, HitRate: 0.3},
+			},
+		},
+		{
+			Mode:     "test",
+			SampleID: "s2",
+			QAResults: []QAResult{
+				{Category: 1, TokenF1: 0.4, HitRate: 0.6},
+				{Category: 1, TokenF1: 0.6, HitRate: 0.8},
+			},
+		},
+	}
+	for i := range results {
+		results[i].Agg = aggregateMetrics(results[i].QAResults)
+	}
+
+	got := computeModeAgg(results)
+
+	// s1: ValidF1Count=1, F1=0.8; s2: ValidF1Count=2, F1=0.5
+	// Weighted: (0.8*1 + 0.5*2) / 3 = 1.8/3 = 0.6
+	wantF1 := 0.6
+	if math.Abs(got.OverallF1-wantF1) > 1e-9 {
+		t.Errorf("OverallF1 = %.6f, want %.6f", got.OverallF1, wantF1)
+	}
+	if got.ValidF1Count != 3 {
+		t.Errorf("ValidF1Count = %d, want 3", got.ValidF1Count)
+	}
+	if got.TotalQuestions != 4 {
+		t.Errorf("TotalQuestions = %d, want 4", got.TotalQuestions)
+	}
+}
@@ -0,0 +1,85 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	"github.com/sipeed/picoclaw/pkg/seahorse"
+)
+
+// ConvMap stores the mapping from sampleID to seahorse ConversationID.
+type ConvMap map[string]int64
+
+// SeahorseIngestResult holds the results of ingesting into seahorse.
+type SeahorseIngestResult struct {
+	Engine  *seahorse.Engine
+	ConvMap ConvMap // sampleID → conversationID
+}
+
+// IngestSeahorse loads all LOCOMO samples into a seahorse Engine.
+// Returns the engine and a mapping from sampleID to conversationID for scoped retrieval.
+func IngestSeahorse(ctx context.Context, samples []LocomoSample, dbPath string) (*SeahorseIngestResult, error) {
+	noopFn := func(ctx context.Context, prompt string, opts seahorse.CompleteOptions) (string, error) {
+		return "", nil
+	}
+
+	engine, err := seahorse.NewEngine(seahorse.Config{
+		DBPath: dbPath,
+	}, noopFn)
+	if err != nil {
+		return nil, fmt.Errorf("create seahorse engine: %w", err)
+	}
+
+	store := engine.GetRetrieval().Store()
+	convMap := make(ConvMap)
+
+	for si := range samples {
+		sample := &samples[si]
+		sessionKey := "locomo-" + sample.SampleID
+
+		// Check if conversation already exists (idempotent)
+		existing, _ := store.GetConversationBySessionKey(ctx, sessionKey)
+		if existing != nil {
+			convMap[sample.SampleID] = existing.ConversationID
+			log.Printf("Skipping existing sample %s: convID=%d", sample.SampleID, existing.ConversationID)
+			continue
+		}
+
+		turns := GetTurns(sample)
+
+		// Convert turns to seahorse messages
+		msgs := make([]seahorse.Message, 0, len(turns))
+		for _, turn := range turns {
+			content := turn.Speaker + ": " + turn.Text
+			msgs = append(msgs, seahorse.Message{
+				Role:       "user",
+				Content:    content,
+				TokenCount: len(turn.Text) / 4,
+			})
+		}
+
+		// Ingest all turns for this sample
+		_, err := engine.Ingest(ctx, sessionKey, msgs)
+		if err != nil {
+			return nil, fmt.Errorf("ingest sample %s: %w", sample.SampleID, err)
+		}
+
+		// Get the conversation ID for scoped retrieval
+		conv, err := store.GetConversationBySessionKey(ctx, sessionKey)
+		if err != nil {
+			return nil, fmt.Errorf("get conversation for %s: %w", sample.SampleID, err)
+		}
+		if conv == nil {
+			return nil, fmt.Errorf("conversation not found for %s after ingest", sample.SampleID)
+		}
+		convMap[sample.SampleID] = conv.ConversationID
+		log.Printf("Ingested sample %s: %d turns, convID=%d", sample.SampleID, len(turns), conv.ConversationID)
+	}
+
+	log.Printf("Seahorse ingestion complete: %d samples, %d conversations", len(samples), len(convMap))
+	return &SeahorseIngestResult{
+		Engine:  engine,
+		ConvMap: convMap,
+	}, nil
+}
@@ -0,0 +1,79 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"path/filepath"
+	"testing"
+
+	"github.com/sipeed/picoclaw/pkg/seahorse"
+)
+
+func TestIngestSeahorseIdempotent(t *testing.T) {
+	ctx := context.Background()
+	tmpDir := t.TempDir()
+	dbPath := filepath.Join(tmpDir, "test.db")
+
+	// Minimal test data
+	samples := []LocomoSample{
+		{
+			SampleID: "test-1",
+			Conversation: map[string]json.RawMessage{
+				"session_1": json.RawMessage(`[
+					{"speaker":"A","dia_id":"D1:1","text":"hello world this is a test message"},
+					{"speaker":"B","dia_id":"D1:2","text":"another message for testing purposes"}
+				]`),
+			},
+		},
+	}
+
+	// First ingestion
+	result1, err := IngestSeahorse(ctx, samples, dbPath)
+	if err != nil {
+		t.Fatalf("first ingest failed: %v", err)
+	}
+	convCount1 := len(result1.ConvMap)
+	result1.Engine.Close()
+
+	// Second ingestion on same DB — should reuse existing data
+	result2, err := IngestSeahorse(ctx, samples, dbPath)
+	if err != nil {
+		t.Fatalf("second ingest failed: %v", err)
+	}
+	defer result2.Engine.Close()
+
+	// ConvMap should have same number of entries (no duplicates)
+	if len(result2.ConvMap) != convCount1 {
+		t.Errorf("second ingest convMap has %d entries, want %d (same as first)",
+			len(result2.ConvMap), convCount1)
+	}
+
+	// Verify conversation IDs are the same (reused, not new ones)
+	for id, cid1 := range result1.ConvMap {
+		cid2, ok := result2.ConvMap[id]
+		if !ok {
+			t.Errorf("sample %s missing from second ConvMap", id)
+			continue
+		}
+		if cid2 != cid1 {
+			t.Errorf("sample %s: second ingest got convID %d, want %d (reused)", id, cid2, cid1)
+		}
+	}
+
+	// Verify no duplicate messages by counting
+	store := result2.Engine.GetRetrieval().Store()
+	for _, convID := range result2.ConvMap {
+		msgs, err := store.SearchMessages(ctx, seahorse.SearchInput{
+			Pattern:        "test",
+			ConversationID: convID,
+			Limit:          100,
+		})
+		if err != nil {
+			t.Fatalf("search failed: %v", err)
+		}
+		// Should find exactly 1 message containing "test" (the first turn)
+		if len(msgs) > 2 {
+			t.Errorf("found %d messages for 'test' in conv %d, expected ≤2 (no duplicates)", len(msgs), convID)
+		}
+	}
+}
@@ -0,0 +1,34 @@
+package main
+
+import (
+	"github.com/sipeed/picoclaw/pkg/providers"
+	"github.com/sipeed/picoclaw/pkg/session"
+)
+
+// LegacyStore wraps session.SessionManager for legacy baseline.
+type LegacyStore struct {
+	sm *session.SessionManager
+}
+
+// NewLegacyStore creates a new in-memory session manager.
+func NewLegacyStore() *LegacyStore {
+	return &LegacyStore{
+		sm: session.NewSessionManager(""),
+	}
+}
+
+// IngestSample loads all turns from a LOCOMO sample into the legacy session store.
+func (ls *LegacyStore) IngestSample(sample *LocomoSample) {
+	sessionKey := "locomo-" + sample.SampleID
+	turns := GetTurns(sample)
+	for _, turn := range turns {
+		content := turn.Speaker + ": " + turn.Text
+		ls.sm.AddMessage(sessionKey, "user", content)
+	}
+}
+
+// GetHistory returns all messages for a sample's session.
+func (ls *LegacyStore) GetHistory(sampleID string) []providers.Message {
+	sessionKey := "locomo-" + sampleID
+	return ls.sm.GetHistory(sessionKey)
+}
@@ -0,0 +1,198 @@
+package main
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"strings"
+	"time"
+)
+
+// LLMClient wraps an OpenAI-compatible chat completion endpoint.
+type LLMClient struct {
+	BaseURL    string
+	Model      string
+	APIKey     string
+	NoThinking bool // send chat_template_kwargs to disable thinking (llama.cpp specific)
+	MaxRetries int  // max retry attempts for transient errors (0 = no retry)
+	Client     *http.Client
+}
+
+// LLMClientOptions configures the LLM client.
+type LLMClientOptions struct {
+	BaseURL    string
+	Model      string
+	APIKey     string
+	Timeout    time.Duration
+	NoThinking bool
+	MaxRetries int // max retry attempts (default 3)
+}
+
+// NewLLMClient creates a client for an OpenAI-compatible chat completion API.
+func NewLLMClient(opts LLMClientOptions) *LLMClient {
+	if opts.Timeout == 0 {
+		opts.Timeout = 120 * time.Second
+	}
+	maxRetries := opts.MaxRetries
+	if maxRetries < 0 {
+		maxRetries = 3
+	}
+	return &LLMClient{
+		BaseURL:    strings.TrimRight(opts.BaseURL, "/"),
+		Model:      opts.Model,
+		APIKey:     opts.APIKey,
+		NoThinking: opts.NoThinking,
+		MaxRetries: maxRetries,
+		Client: &http.Client{
+			Timeout: opts.Timeout,
+		},
+	}
+}
+
+type chatRequest struct {
+	Model              string         `json:"model"`
+	Messages           []chatMessage  `json:"messages"`
+	Temperature        float64        `json:"temperature"`
+	MaxTokens          int            `json:"max_tokens"`
+	ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"` // llama.cpp
+	Think              *bool          `json:"think,omitempty"`                // Ollama
+	Thinking           map[string]any `json:"thinking,omitempty"`             // GLM (智谱)
+}
+
+type chatMessage struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+type chatResponse struct {
+	Choices []struct {
+		Message struct {
+			Content          string `json:"content"`
+			ReasoningContent string `json:"reasoning_content,omitempty"`
+		} `json:"message"`
+	} `json:"choices"`
+}
+
+// Complete sends a chat completion request and returns the assistant's reply.
+func (c *LLMClient) Complete(ctx context.Context, systemPrompt, userPrompt string) (string, error) {
+	sysContent := systemPrompt
+	if c.NoThinking && sysContent != "" {
+		// Prepend /no_think tag — works with Ollama /v1 endpoint and
+		// Qwen chat templates where the JSON think field is ignored.
+		sysContent = "/no_think\n" + sysContent
+	}
+	messages := []chatMessage{}
+	if sysContent != "" {
+		messages = append(messages, chatMessage{Role: "system", Content: sysContent})
+	}
+	messages = append(messages, chatMessage{Role: "user", Content: userPrompt})
+
+	body := chatRequest{
+		Model:       c.Model,
+		Messages:    messages,
+		Temperature: 0.1,
+		MaxTokens:   512,
+	}
+	if c.NoThinking {
+		// llama.cpp: chat_template_kwargs
+		body.ChatTemplateKwargs = map[string]any{
+			"enable_thinking": false,
+		}
+		// Ollama (0.9+): think field
+		thinkFalse := false
+		body.Think = &thinkFalse
+		// GLM (智谱): thinking field
+		body.Thinking = map[string]any{
+			"type": "disabled",
+		}
+	}
+
+	jsonBody, err := json.Marshal(body)
+	if err != nil {
+		return "", fmt.Errorf("marshal request: %w", err)
+	}
+
+	endpoint := strings.TrimRight(c.BaseURL, "/") + "/chat/completions"
+	req, err := http.NewRequestWithContext(ctx, "POST", endpoint, bytes.NewReader(jsonBody))
+	if err != nil {
+		return "", fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	if c.APIKey != "" {
+		req.Header.Set("Authorization", "Bearer "+c.APIKey)
+	}
+
+	var respBody []byte
+	var lastErr error
+	for attempt := 0; attempt <= c.MaxRetries; attempt++ {
+		if attempt > 0 {
+			backoff := time.Duration(1<<(attempt-1)) * time.Second // 1s, 2s, 4s, ...
+			log.Printf("LLM retry %d/%d after %v: %v", attempt, c.MaxRetries, backoff, lastErr)
+			select {
+			case <-ctx.Done():
+				return "", ctx.Err()
+			case <-time.After(backoff):
+			}
+			// Rebuild request (body reader is consumed)
+			req, err = http.NewRequestWithContext(ctx, "POST", endpoint, bytes.NewReader(jsonBody))
+			if err != nil {
+				return "", fmt.Errorf("create request: %w", err)
+			}
+			req.Header.Set("Content-Type", "application/json")
+			if c.APIKey != "" {
+				req.Header.Set("Authorization", "Bearer "+c.APIKey)
+			}
+		}
+
+		var resp *http.Response
+		resp, lastErr = c.Client.Do(req)
+		if lastErr != nil {
+			continue // network/timeout error → retry
+		}
+
+		respBody, lastErr = io.ReadAll(resp.Body)
+		resp.Body.Close()
+		if lastErr != nil {
+			continue
+		}
+
+		if resp.StatusCode == 429 || resp.StatusCode >= 500 {
+			lastErr = fmt.Errorf("API error %d: %s", resp.StatusCode, string(respBody))
+			continue // rate limit or server error → retry
+		}
+		if resp.StatusCode != 200 {
+			return "", fmt.Errorf("API error %d: %s", resp.StatusCode, string(respBody))
+		}
+
+		lastErr = nil
+		break
+	}
+	if lastErr != nil {
+		return "", fmt.Errorf("after %d retries: %w", c.MaxRetries, lastErr)
+	}
+
+	var chatResp chatResponse
+	if err := json.Unmarshal(respBody, &chatResp); err != nil {
+		return "", fmt.Errorf("parse response: %w", err)
+	}
+	if len(chatResp.Choices) == 0 {
+		return "", fmt.Errorf("no choices in response")
+	}
+	content := strings.TrimSpace(chatResp.Choices[0].Message.Content)
+	// Strip any residual <think>...</think> blocks
+	if idx := strings.Index(content, "</think>"); idx >= 0 {
+		content = strings.TrimSpace(content[idx+len("</think>"):])
+	}
+	// Fallback: GLM/DeepSeek put thinking output in reasoning_content when thinking is enabled
+	if content == "" && chatResp.Choices[0].Message.ReasoningContent != "" {
+		content = strings.TrimSpace(chatResp.Choices[0].Message.ReasoningContent)
+	}
+	if content == "" {
+		return "", fmt.Errorf("empty LLM response")
+	}
+	return content, nil
+}
@@ -0,0 +1,142 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+// LocomoSample represents one conversation sample from the LOCOMO dataset.
+type LocomoSample struct {
+	SampleID     string                     `json:"sample_id"`
+	Conversation map[string]json.RawMessage `json:"conversation"`
+	QA           []LocomoQA                 `json:"qa"`
+}
+
+// LocomoTurn represents a single turn in a conversation.
+type LocomoTurn struct {
+	Speaker string `json:"speaker"`
+	DiaID   string `json:"dia_id"`
+	Text    string `json:"text"`
+}
+
+// LocomoQA represents a question-answer pair with evidence.
+type LocomoQA struct {
+	Question          string          `json:"question"`
+	Answer            json.RawMessage `json:"answer"`             // can be string or int (category 1-4)
+	AdversarialAnswer string          `json:"adversarial_answer"` // category 5 only
+	Evidence          []string        `json:"evidence"`
+	Category          int             `json:"category"` // 1=single-hop, 2=multi-hop, 3=open-ended, 5=adversarial
+}
+
+// AnswerString returns the answer as a string, handling both string and int types.
+func (qa *LocomoQA) AnswerString() string {
+	// Prefer answer field (category 1-4)
+	if len(qa.Answer) > 0 {
+		var s string
+		if err := json.Unmarshal(qa.Answer, &s); err == nil {
+			return s
+		}
+		var n json.Number
+		if err := json.Unmarshal(qa.Answer, &n); err == nil {
+			return n.String()
+		}
+		return strings.Trim(string(qa.Answer), `"`)
+	}
+	// Fallback to adversarial_answer (category 5)
+	return qa.AdversarialAnswer
+}
+
+// LoadDataset reads all JSON files from dataDir and returns parsed samples.
+func LoadDataset(dataDir string) ([]LocomoSample, error) {
+	entries, err := os.ReadDir(dataDir)
+	if err != nil {
+		return nil, fmt.Errorf("read data dir %s: %w", dataDir, err)
+	}
+
+	var samples []LocomoSample
+	for _, entry := range entries {
+		if !entry.IsDir() && strings.HasSuffix(entry.Name(), ".json") {
+			path := filepath.Join(dataDir, entry.Name())
+			data, err := os.ReadFile(path)
+			if err != nil {
+				return nil, fmt.Errorf("read file %s: %w", path, err)
+			}
+			var batch []LocomoSample
+			if err := json.Unmarshal(data, &batch); err != nil {
+				return nil, fmt.Errorf("parse file %s: %w", path, err)
+			}
+			samples = append(samples, batch...)
+		}
+	}
+	return samples, nil
+}
+
+// GetSessionNames returns sorted session keys (session_1, session_2, ...) from conversation.
+func GetSessionNames(conv map[string]json.RawMessage) []string {
+	var names []string
+	for k := range conv {
+		if strings.HasPrefix(k, "session_") && !strings.Contains(k, "_date_time") {
+			names = append(names, k)
+		}
+	}
+	sort.Slice(names, func(i, j int) bool {
+		ni := sessionNum(names[i])
+		nj := sessionNum(names[j])
+		return ni < nj
+	})
+	return names
+}
+
+func sessionNum(key string) int {
+	// "session_1" → 1, "session_10" → 10
+	parts := strings.SplitN(key, "_", 2)
+	if len(parts) < 2 {
+		return 0
+	}
+	n, _ := strconv.Atoi(parts[1])
+	return n
+}
+
+// GetTurns flattens all sessions' turns in chronological order.
+func GetTurns(sample *LocomoSample) []LocomoTurn {
+	names := GetSessionNames(sample.Conversation)
+	var all []LocomoTurn
+	for _, name := range names {
+		raw, ok := sample.Conversation[name]
+		if !ok {
+			continue
+		}
+		var turns []LocomoTurn
+		if err := json.Unmarshal(raw, &turns); err != nil {
+			log.Printf("WARNING: unmarshal failed for session %q in sample %s: %v", name, sample.SampleID, err)
+			continue
+		}
+		all = append(all, turns...)
+	}
+	return all
+}
+
+// GetTurnByDiaID finds a specific turn by dia_id (e.g. "D1:3").
+func GetTurnByDiaID(sample *LocomoSample, diaID string) *LocomoTurn {
+	turns := GetTurns(sample)
+	for i := range turns {
+		if turns[i].DiaID == diaID {
+			return &turns[i]
+		}
+	}
+	return nil
+}
+
+// GetSpeakers returns the two speaker names from conversation metadata.
+func GetSpeakers(conv map[string]json.RawMessage) (string, string) {
+	var a, b string
+	json.Unmarshal(conv["speaker_a"], &a)
+	json.Unmarshal(conv["speaker_b"], &b)
+	return a, b
+}
@@ -0,0 +1,67 @@
+package main
+
+import (
+	"encoding/json"
+	"testing"
+)
+
+func TestAnswerString(t *testing.T) {
+	tests := []struct {
+		name string
+		json string
+		want string
+	}{
+		{
+			"string answer",
+			`{"question":"Q","answer":"Paris","evidence":[],"category":1}`,
+			"Paris",
+		},
+		{
+			"int answer",
+			`{"question":"Q","answer":42,"evidence":[],"category":1}`,
+			"42",
+		},
+		{
+			"adversarial answer (category 5)",
+			`{"question":"Q","evidence":[],"category":5,"adversarial_answer":"self-care is important"}`,
+			"self-care is important",
+		},
+		{
+			"both answer and adversarial_answer present",
+			`{"question":"Q","answer":"normal","evidence":[],"category":5,"adversarial_answer":"adversarial"}`,
+			"normal",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var qa LocomoQA
+			if err := json.Unmarshal([]byte(tt.json), &qa); err != nil {
+				t.Fatalf("unmarshal: %v", err)
+			}
+			got := qa.AnswerString()
+			if got != tt.want {
+				t.Errorf("AnswerString() = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestGetSessionNames(t *testing.T) {
+	conv := map[string]json.RawMessage{
+		"session_2":           {},
+		"session_1":           {},
+		"session_10":          {},
+		"session_1_date_time": {},
+		"speaker_a":           {},
+	}
+	names := GetSessionNames(conv)
+	want := []string{"session_1", "session_2", "session_10"}
+	if len(names) != len(want) {
+		t.Fatalf("got %v, want %v", names, want)
+	}
+	for i, n := range names {
+		if n != want[i] {
+			t.Errorf("names[%d] = %q, want %q", i, n, want[i])
+		}
+	}
+}
@@ -0,0 +1,361 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/spf13/cobra"
+
+	"github.com/sipeed/picoclaw/pkg/logger"
+)
+
+var (
+	flagData         string
+	flagOut          string
+	flagMode         string
+	flagBudget       int
+	flagEvalMode     string
+	flagAPIBase      string
+	flagAPIKey       string
+	flagModel        string
+	flagNoThinking   bool
+	flagLimit        int
+	flagTimeout      int
+	flagRetries      int
+	flagJudgeModel   string
+	flagJudgeAPIBase string
+	flagJudgeAPIKey  string
+	flagConcurrency  int
+)
+
+func main() {
+	// Suppress seahorse INFO logs during benchmark
+	logger.SetLevel(logger.WARN)
+
+	rootCmd := &cobra.Command{
+		Use:   "membench",
+		Short: "Memory benchmark tool for picoclaw",
+	}
+
+	ingestCmd := &cobra.Command{
+		Use:   "ingest",
+		Short: "Load LOCOMO data into storage backends",
+		RunE:  runIngest,
+	}
+	ingestCmd.Flags().StringVar(&flagData, "data", "", "LOCOMO dataset directory (required)")
+	ingestCmd.Flags().StringVar(&flagOut, "out", "./bench-out", "output working directory")
+	ingestCmd.Flags().StringVar(&flagMode, "mode", "all", "modes to ingest: legacy, seahorse, or all")
+
+	evalCmd := &cobra.Command{
+		Use:   "eval",
+		Short: "Run QA evaluation against ingested data",
+		RunE:  runEval,
+	}
+	evalCmd.Flags().StringVar(&flagData, "data", "", "LOCOMO dataset directory (required)")
+	evalCmd.Flags().StringVar(&flagOut, "out", "./bench-out", "output working directory")
+	evalCmd.Flags().StringVar(&flagMode, "mode", "all", "modes to evaluate: legacy, seahorse, or all")
+	evalCmd.Flags().IntVar(&flagBudget, "budget", 4000, "token budget for retrieval")
+	evalCmd.Flags().
+		StringVar(&flagEvalMode, "eval-mode", "token", "evaluation mode: token (direct match) or llm (LLM-as-Judge)")
+	evalCmd.Flags().
+		StringVar(&flagAPIBase, "api-base", "", "API base URL with version path, e.g. http://host/v1 (default: http://127.0.0.1:8080/v1, env: MEMBENCH_API_BASE)")
+	evalCmd.Flags().StringVar(&flagAPIKey, "api-key", "", "API key for the LLM endpoint (env: MEMBENCH_API_KEY)")
+	evalCmd.Flags().StringVar(&flagModel, "model", "", "model name for LLM eval (env: MEMBENCH_MODEL)")
+	evalCmd.Flags().
+		BoolVar(&flagNoThinking, "no-thinking", false, "disable thinking mode via chat_template_kwargs (llama.cpp + Qwen)")
+	evalCmd.Flags().IntVar(&flagLimit, "limit", 0, "max QA questions per sample (0 = all)")
+	evalCmd.Flags().IntVar(&flagTimeout, "timeout", 120, "HTTP timeout in seconds for LLM requests")
+	evalCmd.Flags().IntVar(&flagRetries, "retries", 3, "max retry attempts for transient LLM errors (timeout/5xx/429)")
+	evalCmd.Flags().StringVar(&flagJudgeModel, "judge-model", "", "model for judge scoring (defaults to --model)")
+	evalCmd.Flags().
+		StringVar(&flagJudgeAPIBase, "judge-api-base", "", "API base URL for judge model (defaults to --api-base)")
+	evalCmd.Flags().StringVar(&flagJudgeAPIKey, "judge-api-key", "", "API key for judge model (defaults to --api-key)")
+	evalCmd.Flags().IntVar(&flagConcurrency, "concurrency", 1, "number of concurrent QA evaluations")
+
+	reportCmd := &cobra.Command{
+		Use:   "report",
+		Short: "Output comparison results from evaluation",
+		RunE:  runReport,
+	}
+	reportCmd.Flags().StringVar(&flagOut, "out", "./bench-out", "output working directory")
+
+	runCmd := &cobra.Command{
+		Use:   "run",
+		Short: "Convenience: eval + report (ingestion is done inline)",
+		RunE:  runAll,
+	}
+	runCmd.Flags().StringVar(&flagData, "data", "", "LOCOMO dataset directory (required)")
+	runCmd.Flags().StringVar(&flagOut, "out", "./bench-out", "output working directory")
+	runCmd.Flags().StringVar(&flagMode, "mode", "all", "modes to run: legacy, seahorse, or all")
+	runCmd.Flags().IntVar(&flagBudget, "budget", 4000, "token budget for retrieval")
+	runCmd.Flags().
+		StringVar(&flagEvalMode, "eval-mode", "token", "evaluation mode: token (direct match) or llm (LLM-as-Judge)")
+	runCmd.Flags().
+		StringVar(&flagAPIBase, "api-base", "", "API base URL with version path, e.g. http://host/v1 (default: http://127.0.0.1:8080/v1, env: MEMBENCH_API_BASE)")
+	runCmd.Flags().StringVar(&flagAPIKey, "api-key", "", "API key for the LLM endpoint (env: MEMBENCH_API_KEY)")
+	runCmd.Flags().StringVar(&flagModel, "model", "", "model name for LLM eval (env: MEMBENCH_MODEL)")
+	runCmd.Flags().
+		BoolVar(&flagNoThinking, "no-thinking", false, "disable thinking mode via chat_template_kwargs (llama.cpp + Qwen)")
+	runCmd.Flags().IntVar(&flagLimit, "limit", 0, "max QA questions per sample (0 = all)")
+	runCmd.Flags().IntVar(&flagTimeout, "timeout", 120, "HTTP timeout in seconds for LLM requests")
+	runCmd.Flags().IntVar(&flagRetries, "retries", 3, "max retry attempts for transient LLM errors (timeout/5xx/429)")
+	runCmd.Flags().StringVar(&flagJudgeModel, "judge-model", "", "model for judge scoring (defaults to --model)")
+	runCmd.Flags().
+		StringVar(&flagJudgeAPIBase, "judge-api-base", "", "API base URL for judge model (defaults to --api-base)")
+	runCmd.Flags().StringVar(&flagJudgeAPIKey, "judge-api-key", "", "API key for judge model (defaults to --api-key)")
+	runCmd.Flags().IntVar(&flagConcurrency, "concurrency", 1, "number of concurrent QA evaluations")
+
+	rootCmd.AddCommand(ingestCmd, evalCmd, reportCmd, runCmd)
+
+	if err := rootCmd.Execute(); err != nil {
+		os.Exit(1)
+	}
+}
+
+func modesFromFlag() []string {
+	switch strings.ToLower(flagMode) {
+	case "all":
+		return []string{"legacy", "seahorse"}
+	default:
+		return []string{strings.ToLower(flagMode)}
+	}
+}
+
+func runIngest(cmd *cobra.Command, args []string) error {
+	if flagData == "" {
+		return fmt.Errorf("--data is required")
+	}
+	modes := modesFromFlag()
+	if len(modes) == 0 {
+		return nil
+	}
+
+	ctx := context.Background()
+	samples, err := LoadDataset(flagData)
+	if err != nil {
+		return fmt.Errorf("load dataset: %w", err)
+	}
+	log.Printf("Loaded %d samples from %s", len(samples), flagData)
+
+	for _, mode := range modes {
+		switch mode {
+		case "legacy":
+			legacy := NewLegacyStore()
+			for i := range samples {
+				legacy.IngestSample(&samples[i])
+			}
+			log.Printf("legacy: ingested %d samples", len(samples))
+		case "seahorse":
+			dbPath := filepath.Join(flagOut, "seahorse.db")
+			if err := os.MkdirAll(flagOut, 0o755); err != nil {
+				return fmt.Errorf("create out dir: %w", err)
+			}
+			_, err := IngestSeahorse(ctx, samples, dbPath)
+			if err != nil {
+				return fmt.Errorf("ingest seahorse: %w", err)
+			}
+		}
+	}
+	return nil
+}
+
+func runEval(cmd *cobra.Command, args []string) error {
+	if flagData == "" {
+		return fmt.Errorf("--data is required")
+	}
+	modes := modesFromFlag()
+	if len(modes) == 0 {
+		return nil
+	}
+
+	ctx := context.Background()
+	samples, err := LoadDataset(flagData)
+	if err != nil {
+		return fmt.Errorf("load dataset: %w", err)
+	}
+	log.Printf("Loaded %d samples", len(samples))
+
+	if flagLimit > 0 {
+		for i := range samples {
+			if len(samples[i].QA) > flagLimit {
+				samples[i].QA = samples[i].QA[:flagLimit]
+			}
+		}
+		log.Printf("Limited to %d QA per sample", flagLimit)
+	}
+
+	evalMode := strings.ToLower(strings.TrimSpace(flagEvalMode))
+	var useLLM bool
+	switch evalMode {
+	case "token":
+		useLLM = false
+	case "llm":
+		useLLM = true
+	default:
+		return fmt.Errorf("invalid --eval-mode %q: must be token or llm", flagEvalMode)
+	}
+	var answerClient, judgeClient *LLMClient
+	if useLLM {
+		opts, err := buildLLMOptions()
+		if err != nil {
+			return err
+		}
+		answerClient = NewLLMClient(opts)
+		judgeClient = answerClient // default: same client
+		if flagJudgeModel != "" {
+			jOpts := opts // copy base settings
+			jOpts.Model = flagJudgeModel
+			if flagJudgeAPIBase != "" {
+				jOpts.BaseURL = flagJudgeAPIBase
+			}
+			if flagJudgeAPIKey != "" {
+				jOpts.APIKey = flagJudgeAPIKey
+			}
+			judgeClient = NewLLMClient(jOpts)
+			log.Printf("Judge model: model=%s base=%s no-thinking=%v", jOpts.Model, jOpts.BaseURL, jOpts.NoThinking)
+		}
+		log.Printf("LLM eval mode: model=%s base=%s no-thinking=%v concurrency=%d",
+			opts.Model, opts.BaseURL, opts.NoThinking, flagConcurrency)
+	}
+
+	var tokenResults, llmResults []EvalResult
+
+	for _, mode := range modes {
+		switch mode {
+		case "legacy":
+			legacy := NewLegacyStore()
+			for i := range samples {
+				legacy.IngestSample(&samples[i])
+			}
+			if useLLM {
+				results := EvalLegacyLLM(ctx, samples, legacy, flagBudget, answerClient, judgeClient, flagConcurrency)
+				llmResults = append(llmResults, results...)
+				log.Printf("legacy-llm: evaluated %d samples", len(results))
+			} else {
+				results := EvalLegacy(ctx, samples, legacy, flagBudget)
+				tokenResults = append(tokenResults, results...)
+				log.Printf("legacy: evaluated %d samples", len(results))
+			}
+		case "seahorse":
+			dbPath := filepath.Join(flagOut, "seahorse.db")
+			ir, err := IngestSeahorse(ctx, samples, dbPath)
+			if err != nil {
+				return fmt.Errorf("ingest seahorse: %w", err)
+			}
+			if useLLM {
+				results := EvalSeahorseLLM(ctx, samples, ir, flagBudget, answerClient, judgeClient, flagConcurrency)
+				llmResults = append(llmResults, results...)
+				log.Printf("seahorse-llm: evaluated %d samples", len(results))
+			} else {
+				results := EvalSeahorse(ctx, samples, ir, flagBudget)
+				tokenResults = append(tokenResults, results...)
+				log.Printf("seahorse: evaluated %d samples", len(results))
+			}
+		}
+	}
+
+	allResults := append(tokenResults, llmResults...)
+	if err := SaveResults(allResults, flagOut); err != nil {
+		return fmt.Errorf("save results: %w", err)
+	}
+	if err := SaveAggregated(allResults, flagOut); err != nil {
+		return fmt.Errorf("save aggregated: %w", err)
+	}
+
+	PrintComparison(tokenResults, llmResults)
+	return nil
+}
+
+func runReport(cmd *cobra.Command, args []string) error {
+	entries, err := os.ReadDir(flagOut)
+	if err != nil {
+		return fmt.Errorf("read out dir: %w", err)
+	}
+
+	var allResults []EvalResult
+	for _, entry := range entries {
+		if !entry.IsDir() && strings.HasPrefix(entry.Name(), "eval_") && strings.HasSuffix(entry.Name(), ".json") {
+			path := filepath.Join(flagOut, entry.Name())
+			var r EvalResult
+			data, err := os.ReadFile(path)
+			if err != nil {
+				log.Printf("WARN: read %s: %v", path, err)
+				continue
+			}
+			if err := json.Unmarshal(data, &r); err != nil {
+				log.Printf("WARN: parse %s: %v", path, err)
+				continue
+			}
+			allResults = append(allResults, r)
+		}
+	}
+
+	if len(allResults) == 0 {
+		return fmt.Errorf("no eval results found in %s", flagOut)
+	}
+
+	var tokenResults, llmResults []EvalResult
+	for _, r := range allResults {
+		if strings.HasSuffix(r.Mode, "-llm") {
+			llmResults = append(llmResults, r)
+		} else {
+			tokenResults = append(tokenResults, r)
+		}
+	}
+	PrintComparison(tokenResults, llmResults)
+	return nil
+}
+
+func runAll(cmd *cobra.Command, args []string) error {
+	return runEval(cmd, args)
+}
+
+// envOrFlag returns the flag value if non-empty, otherwise falls back to the
+// environment variable.
+func envOrFlag(flag, envKey string) string {
+	if flag != "" {
+		return flag
+	}
+	return os.Getenv(envKey)
+}
+
+// buildLLMOptions resolves LLM client configuration from flags and environment
+// variables. Flag values take precedence over environment variables.
+//
+// Environment variables:
+//
+//	MEMBENCH_API_BASE  – OpenAI-compatible base URL  (default http://127.0.0.1:8080/v1)
+//	MEMBENCH_API_KEY   – Bearer token for the endpoint
+//	MEMBENCH_MODEL     – Model name to send in the request
+func buildLLMOptions() (LLMClientOptions, error) {
+	base := envOrFlag(flagAPIBase, "MEMBENCH_API_BASE")
+	if base == "" {
+		base = "http://127.0.0.1:8080/v1"
+	}
+	model := envOrFlag(flagModel, "MEMBENCH_MODEL")
+	if model == "" {
+		return LLMClientOptions{}, fmt.Errorf(
+			"--model or MEMBENCH_MODEL is required for LLM eval mode",
+		)
+	}
+	apiKey := envOrFlag(flagAPIKey, "MEMBENCH_API_KEY")
+
+	if flagTimeout <= 0 {
+		return LLMClientOptions{}, fmt.Errorf("--timeout must be > 0, got %d", flagTimeout)
+	}
+
+	return LLMClientOptions{
+		BaseURL:    base,
+		Model:      model,
+		APIKey:     apiKey,
+		NoThinking: flagNoThinking,
+		Timeout:    time.Duration(flagTimeout) * time.Second,
+		MaxRetries: flagRetries,
+	}, nil
+}
@@ -0,0 +1,227 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	"regexp"
+	"strconv"
+	"strings"
+	"unicode"
+)
+
+// diaIDRe matches valid dia_id patterns like "D1:3", "D30:5".
+var diaIDRe = regexp.MustCompile(`^D(\d+):(\d+)$`)
+
+// SplitEvidenceIDs splits an evidence string that may contain multiple
+// semicolon-separated or space-separated dia_ids. Only returns valid IDs.
+// Example: "D8:6; D9:17" → ["D8:6", "D9:17"]
+// Example: "D9:1 D4:4 D4:6" → ["D9:1", "D4:4", "D4:6"]
+func SplitEvidenceIDs(evidence string) []string {
+	if evidence == "" {
+		return nil
+	}
+	// Split on semicolons first, then spaces
+	parts := strings.Split(evidence, ";")
+	var ids []string
+	for _, part := range parts {
+		for _, token := range strings.Fields(strings.TrimSpace(part)) {
+			token = strings.TrimSpace(token)
+			if diaIDRe.MatchString(token) {
+				ids = append(ids, NormalizeDiaID(token))
+			}
+		}
+	}
+	if len(ids) == 0 {
+		return nil
+	}
+	return ids
+}
+
+// NormalizeDiaID strips leading zeros from the number parts of a dia_id.
+// "D30:05" → "D30:5", "D10:003" → "D10:3"
+func NormalizeDiaID(id string) string {
+	m := diaIDRe.FindStringSubmatch(id)
+	if m == nil {
+		return id
+	}
+	session, _ := strconv.Atoi(m[1])
+	turn, _ := strconv.Atoi(m[2])
+	return fmt.Sprintf("D%d:%d", session, turn)
+}
+
+// stopwords is a fixed English stopword list for deterministic keyword extraction.
+var stopwords = map[string]struct{}{
+	"a": {}, "an": {}, "the": {},
+	"is": {}, "are": {}, "was": {}, "were": {},
+	"did": {}, "does": {}, "do": {},
+	"when": {}, "where": {}, "what": {}, "who": {},
+	"how": {}, "why": {},
+	"to": {}, "of": {}, "in": {}, "on": {}, "at": {},
+	"for": {}, "and": {}, "or": {}, "but": {}, "not": {},
+	"it": {}, "this": {}, "that": {}, "with": {},
+	"from": {}, "by": {}, "as": {},
+	"if": {}, "then": {}, "than": {}, "so": {},
+	"no": {}, "yes": {},
+	"all": {}, "any": {}, "each": {}, "every": {},
+	"some": {}, "such": {},
+	"about": {}, "into": {}, "over": {},
+	"after": {}, "before": {}, "between": {},
+	"through": {}, "during": {}, "until": {},
+	"would": {}, "could": {}, "should": {},
+	"may": {}, "might": {}, "can": {},
+	"will": {}, "shall": {}, "must": {},
+	"have": {}, "has": {}, "had": {},
+	"been": {}, "being": {}, "be": {},
+	"go": {}, "went": {}, "gone": {},
+	"i": {}, "you": {}, "me": {}, "my": {}, "your": {},
+	"we": {}, "they": {}, "them": {}, "our": {},
+	"its": {}, "their": {}, "he": {}, "she": {},
+	"his": {}, "her": {},
+}
+
+// ExtractKeywords removes stopwords and punctuation, returns individual keywords.
+// Deterministic: uses fixed stopword list, no LLM.
+func ExtractKeywords(question string) []string {
+	// Lowercase and split on whitespace/punctuation
+	lower := strings.ToLower(question)
+	words := strings.FieldsFunc(lower, func(r rune) bool {
+		return !unicode.IsLetter(r) && !unicode.IsDigit(r)
+	})
+
+	var keywords []string
+	for _, w := range words {
+		if w == "" || len(w) < 2 {
+			continue
+		}
+		if _, ok := stopwords[w]; ok {
+			continue
+		}
+		keywords = append(keywords, w)
+		if len(keywords) >= 6 {
+			break
+		}
+	}
+	return keywords
+}
+
+// TokenOverlapF1 computes token-level F1 between prediction and reference.
+// Both strings are lowercased and split on whitespace.
+// NOTE: This metric underestimates quality for multi-hop (cat 2) and
+// open-ended (cat 3) questions where the gold answer uses different phrasing
+// than the source text. LLM-Judge scoring is a v2 follow-up.
+func TokenOverlapF1(prediction, reference string) float64 {
+	predTokens := tokenize(prediction)
+	refTokens := tokenize(reference)
+
+	if len(predTokens) == 0 && len(refTokens) == 0 {
+		return 1.0
+	}
+	if len(predTokens) == 0 || len(refTokens) == 0 {
+		return 0.0
+	}
+
+	// Count matches
+	refCount := map[string]int{}
+	for _, t := range refTokens {
+		refCount[t]++
+	}
+
+	predCount := map[string]int{}
+	for _, t := range predTokens {
+		predCount[t]++
+	}
+
+	var matches float64
+	for token, pc := range predCount {
+		if rc, ok := refCount[token]; ok {
+			matches += float64(min(pc, rc))
+		}
+	}
+
+	precision := matches / float64(len(predTokens))
+	recall := matches / float64(len(refTokens))
+
+	if precision+recall == 0 {
+		return 0.0
+	}
+	return 2 * precision * recall / (precision + recall)
+}
+
+func tokenize(s string) []string {
+	lower := strings.ToLower(s)
+	return strings.Fields(lower)
+}
+
+// RecallHitRate computes fraction of evidence IDs found in retrieved content.
+// For each evidence dia_id, looks up the turn text and checks substring match.
+// Logs a warning for turns with text < 20 chars (higher false-positive risk).
+func RecallHitRate(evidenceIDs []string, sample *LocomoSample, retrievedContent string) float64 {
+	if len(evidenceIDs) == 0 {
+		return 1.0 // no evidence required = perfect
+	}
+
+	// Expand any multi-ID evidence entries (e.g. "D8:6; D9:17" or "D9:1 D4:4")
+	var expanded []string
+	for _, id := range evidenceIDs {
+		split := SplitEvidenceIDs(id)
+		if split != nil {
+			expanded = append(expanded, split...)
+		}
+	}
+	if len(expanded) == 0 {
+		log.Printf("WARNING: no valid dia_ids after expanding evidence %v", evidenceIDs)
+		return float64(0) / float64(len(evidenceIDs))
+	}
+
+	// Build turn index once (avoids re-parsing JSON per ID)
+	turns := GetTurns(sample)
+	turnMap := make(map[string]*LocomoTurn, len(turns))
+	for i := range turns {
+		turnMap[turns[i].DiaID] = &turns[i]
+	}
+
+	lowerRetrieved := strings.ToLower(retrievedContent)
+	found := 0
+	resolvable := 0
+	for _, diaID := range expanded {
+		turn, ok := turnMap[diaID]
+		if !ok {
+			log.Printf("WARNING: dia_id %q not found in sample %s", diaID, sample.SampleID)
+			continue
+		}
+		resolvable++
+		if len(turn.Text) < 20 {
+			log.Printf("WARNING: short turn text (%d chars) for dia_id %s: %q",
+				len(turn.Text), diaID, turn.Text)
+		}
+		if strings.Contains(lowerRetrieved, strings.ToLower(turn.Text)) {
+			found++
+		}
+	}
+	if resolvable == 0 {
+		return 0.0 // no resolvable evidence = can't evaluate
+	}
+	return float64(found) / float64(resolvable)
+}
+
+// BudgetTruncate truncates messages to fit within a token budget.
+// Returns the truncated messages and total token count.
+func BudgetTruncate(messages []string, budgetTokens int) ([]string, int) {
+	var result []string
+	total := 0
+	// Walk from the front (best first) and keep until budget exhausted.
+	for i := 0; i < len(messages); i++ {
+		tokens := len(messages[i]) / 4
+		if total+tokens > budgetTokens && len(result) > 0 {
+			break
+		}
+		result = append(result, messages[i])
+		total += tokens
+	}
+	return result, total
+}
+
+// StringListToContent joins a list of strings into a single content string.
+func StringListToContent(parts []string) string {
+	return strings.Join(parts, "\n")
+}
@@ -0,0 +1,239 @@
+package main
+
+import (
+	"encoding/json"
+	"math"
+	"testing"
+)
+
+func TestSplitEvidenceIDs(t *testing.T) {
+	tests := []struct {
+		input string
+		want  []string
+	}{
+		{"D1:3", []string{"D1:3"}},
+		{"D8:6; D9:17", []string{"D8:6", "D9:17"}},
+		{"D9:1 D4:4 D4:6", []string{"D9:1", "D4:4", "D4:6"}},
+		{"D22:1 D22:2 D9:10 D9:11", []string{"D22:1", "D22:2", "D9:10", "D9:11"}},
+		{"D21:18 D21:22 D11:15 D11:19", []string{"D21:18", "D21:22", "D11:15", "D11:19"}},
+		{"D30:05", []string{"D30:5"}},
+		{"D", nil},
+		{"D:", nil},
+		{"", nil},
+	}
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			got := SplitEvidenceIDs(tt.input)
+			if len(got) != len(tt.want) {
+				t.Fatalf("SplitEvidenceIDs(%q) = %v, want %v", tt.input, got, tt.want)
+			}
+			for i := range got {
+				if got[i] != tt.want[i] {
+					t.Errorf("[%d] = %q, want %q", i, got[i], tt.want[i])
+				}
+			}
+		})
+	}
+}
+
+func TestNormalizeDiaID(t *testing.T) {
+	tests := []struct {
+		input string
+		want  string
+	}{
+		{"D1:3", "D1:3"},
+		{"D30:05", "D30:5"},
+		{"D10:003", "D10:3"},
+		{"D1:0", "D1:0"},
+	}
+	for _, tt := range tests {
+		got := NormalizeDiaID(tt.input)
+		if got != tt.want {
+			t.Errorf("NormalizeDiaID(%q) = %q, want %q", tt.input, got, tt.want)
+		}
+	}
+}
+
+func TestTokenOverlapF1(t *testing.T) {
+	tests := []struct {
+		name       string
+		prediction string
+		reference  string
+		want       float64
+	}{
+		{"exact match", "hello world", "hello world", 1.0},
+		{"no overlap", "foo bar", "baz qux", 0.0},
+		{"empty both", "", "", 1.0},
+		{"empty prediction", "", "hello", 0.0},
+		{"empty reference", "hello", "", 0.0},
+		{"partial overlap", "the cat sat on the mat", "the cat on the floor", 8.0 / 11.0},
+		{"case insensitive", "Hello World", "hello world", 1.0},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := TokenOverlapF1(tt.prediction, tt.reference)
+			if math.Abs(got-tt.want) > 1e-9 {
+				t.Errorf("TokenOverlapF1(%q, %q) = %.4f, want %.4f",
+					tt.prediction, tt.reference, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestBudgetTruncate(t *testing.T) {
+	t.Run("within budget returns all", func(t *testing.T) {
+		msgs := []string{"short", "message", "here"}
+		result, total := BudgetTruncate(msgs, 1000)
+		if len(result) != 3 {
+			t.Errorf("expected 3 messages, got %d", len(result))
+		}
+		if total == 0 {
+			t.Error("expected non-zero token count")
+		}
+	})
+
+	t.Run("over budget keeps best first", func(t *testing.T) {
+		msgs := []string{
+			"best message that is quite long and takes up tokens",
+			"good message also fairly long content",
+			"worst short",
+		}
+		result, _ := BudgetTruncate(msgs, 5) // very small budget
+		if len(result) == 0 {
+			t.Fatal("expected at least one message")
+		}
+		// Best-ranked (first) should be kept
+		if result[0] != "best message that is quite long and takes up tokens" {
+			t.Errorf("expected best message kept first, got %q", result[0])
+		}
+	})
+
+	t.Run("over budget keeps best ranked first", func(t *testing.T) {
+		// Messages are sorted by bm25 rank ascending (best/most-negative first).
+		// When budget is insufficient, BudgetTruncate must keep the front
+		// (best-ranked) messages, not the tail (worst-ranked).
+		msgs := []string{
+			"best ranked message with some content here",
+			"second best message also has content",
+			"third message here too",
+			"worst ranked short",
+		}
+		// Budget only fits ~1 message (~10 tokens per message, budget=12)
+		result, _ := BudgetTruncate(msgs, 12)
+		if len(result) == 0 {
+			t.Fatal("expected at least one message")
+		}
+		if result[0] != "best ranked message with some content here" {
+			t.Errorf("expected best-ranked (first) message kept, got %q", result[0])
+		}
+		// Worst-ranked (last) must NOT appear
+		for _, m := range result {
+			if m == "worst ranked short" {
+				t.Error("worst-ranked message should have been truncated")
+			}
+		}
+	})
+
+	t.Run("preserves original order", func(t *testing.T) {
+		msgs := []string{"alpha", "beta", "gamma"}
+		result, _ := BudgetTruncate(msgs, 100)
+		for i, got := range result {
+			if got != msgs[i] {
+				t.Errorf("result[%d] = %q, want %q", i, got, msgs[i])
+			}
+		}
+	})
+
+	t.Run("empty input", func(t *testing.T) {
+		result, total := BudgetTruncate(nil, 100)
+		if len(result) != 0 {
+			t.Errorf("expected 0 messages, got %d", len(result))
+		}
+		if total != 0 {
+			t.Errorf("expected 0 tokens, got %d", total)
+		}
+	})
+}
+
+func TestRecallHitRate(t *testing.T) {
+	// Build a sample with known turns
+	sample := &LocomoSample{
+		SampleID: "test-sample",
+		Conversation: map[string]json.RawMessage{
+			"session_1": json.RawMessage(`[
+				{"speaker":"A","dia_id":"D1:1","text":"hello world this is a test message with enough length"},
+				{"speaker":"B","dia_id":"D1:2","text":"another message for testing recall computation purposes here"},
+				{"speaker":"A","dia_id":"D1:3","text":"third turn with some more content to test"}
+			]`),
+		},
+	}
+
+	t.Run("all evidence found", func(t *testing.T) {
+		retrieved := "hello world this is a test message with enough length another message for testing recall computation purposes here"
+		got := RecallHitRate([]string{"D1:1", "D1:2"}, sample, retrieved)
+		if math.Abs(got-1.0) > 1e-9 {
+			t.Errorf("RecallHitRate all found = %.4f, want 1.0", got)
+		}
+	})
+
+	t.Run("partial evidence found", func(t *testing.T) {
+		retrieved := "hello world this is a test message with enough length"
+		got := RecallHitRate([]string{"D1:1", "D1:2"}, sample, retrieved)
+		if math.Abs(got-0.5) > 1e-9 {
+			t.Errorf("RecallHitRate partial = %.4f, want 0.5", got)
+		}
+	})
+
+	t.Run("no evidence required", func(t *testing.T) {
+		got := RecallHitRate(nil, sample, "anything")
+		if got != 1.0 {
+			t.Errorf("RecallHitRate no evidence = %.4f, want 1.0", got)
+		}
+	})
+
+	t.Run("missing turn excluded from denominator", func(t *testing.T) {
+		// D1:1 is found, D99:1 does not exist in sample
+		// Should only count resolvable turns in denominator
+		retrieved := "hello world this is a test message with enough length"
+		got := RecallHitRate([]string{"D1:1", "D99:1"}, sample, retrieved)
+		if math.Abs(got-1.0) > 1e-9 {
+			t.Errorf("RecallHitRate missing turn = %.4f, want 1.0 (unresolvable excluded)", got)
+		}
+	})
+}
+
+func TestExtractKeywords(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  []string
+	}{
+		{"simple", "What is the capital of France", []string{"capital", "france"}},
+		{
+			"stops removed",
+			"Who is the president of the United States",
+			[]string{"president", "united", "states"},
+		},
+		{
+			"max 6 keywords",
+			"one two three four five six seven eight nine ten",
+			[]string{"one", "two", "three", "four", "five", "six"},
+		},
+		{"short words filtered", "I am a go to the store", []string{"am", "store"}},
+		{"empty", "", nil},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := ExtractKeywords(tt.input)
+			if len(got) != len(tt.want) {
+				t.Fatalf("ExtractKeywords(%q) = %v (len %d), want %v (len %d)",
+					tt.input, got, len(got), tt.want, len(tt.want))
+			}
+			for i := range got {
+				if got[i] != tt.want[i] {
+					t.Errorf("[%d] = %q, want %q", i, got[i], tt.want[i])
+				}
+			}
+		})
+	}
+}
@@ -7,9 +7,7 @@ package ui

 import (
 	"fmt"
-	"os"
 	"os/exec"
-	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
@@ -17,61 +15,30 @@ import (

 	"github.com/gdamore/tcell/v2"
 	"github.com/rivo/tview"
-)

-const pidFileName = "gateway.pid"
+	"github.com/sipeed/picoclaw/pkg/config"
+	ppid "github.com/sipeed/picoclaw/pkg/pid"
+)

 type gatewayStatus struct {
 	running bool
 	pid     int
+	version string
 }

-func getPidPath() string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		home = "."
-	}
-	return filepath.Join(home, ".picoclaw", pidFileName)
-}
-
-func isProcessRunning(pid int) bool {
-	if runtime.GOOS == "windows" {
-		cmd := exec.Command("tasklist", "/FI", fmt.Sprintf("PID eq %d", pid))
-		output, err := cmd.Output()
-		if err != nil {
-			return false
-		}
-		return strings.Contains(string(output), strconv.Itoa(pid))
-	} else if runtime.GOOS == "darwin" {
-		cmd := exec.Command("ps", "aux")
-		output, err := cmd.Output()
-		if err != nil {
-			return false
-		}
-		return strings.Contains(string(output), fmt.Sprintf(" %d ", pid))
-	}
-	// Linux
-	_, err := os.Stat(fmt.Sprintf("/proc/%d", pid))
-	return err == nil
+func picoHome() string {
+	return config.GetHome()
 }

 func getGatewayStatus() gatewayStatus {
-	pidPath := getPidPath()
-	data, err := os.ReadFile(pidPath)
-	if err != nil {
-		return gatewayStatus{running: false}
-	}
-	pid, err := strconv.Atoi(strings.TrimSpace(string(data)))
-	if err != nil {
-		return gatewayStatus{running: false}
-	}
-	if !isProcessRunning(pid) {
-		os.Remove(pidPath)
+	data := ppid.ReadPidFileWithCheck(picoHome())
+	if data == nil {
 		return gatewayStatus{running: false}
 	}
 	return gatewayStatus{
 		running: true,
-		pid:     pid,
+		pid:     data.PID,
+		version: data.Version,
 	}
 }

@@ -81,13 +48,12 @@ func startGateway() error {
 		return fmt.Errorf("gateway is already running (PID: %d)", status.pid)
 	}

-	pidPath := getPidPath()
 	var cmd *exec.Cmd

 	if runtime.GOOS == "windows" {
 		cmd = exec.Command("cmd", "/C", "start /B picoclaw gateway > NUL 2>&1")
 	} else {
-		cmd = exec.Command("sh", "-c", "nohup picoclaw gateway > /dev/null 2>&1 & echo $! > "+pidPath)
+		cmd = exec.Command("sh", "-c", "nohup picoclaw gateway > /dev/null 2>&1 &")
 	}

 	err := cmd.Start()
@@ -116,9 +82,8 @@ func startGateway() error {
 			if line == "" {
 				continue
 			}
-			pid, err := strconv.Atoi(line)
+			_, err := strconv.Atoi(line)
 			if err == nil {
-				os.WriteFile(pidPath, []byte(strconv.Itoa(pid)), 0o600)
 				break
 			}
 		}
@@ -141,21 +106,20 @@ func stopGateway() error {
 	if runtime.GOOS == "windows" {
 		err = exec.Command("taskkill", "/F", "/PID", strconv.Itoa(status.pid)).Run()
 	} else {
-		err = exec.Command("kill", "-9", strconv.Itoa(status.pid)).Run()
+		err = exec.Command("kill", strconv.Itoa(status.pid)).Run()
 	}
 	if err != nil {
 		return err
 	}

-	// 多次尝试确认进程已停止
+	// Wait for process to stop (ReadPidFileWithCheck cleans up stale pid file)
 	for i := 0; i < 5; i++ {
-		if !isProcessRunning(status.pid) {
+		if !getGatewayStatus().running {
 			break
 		}
 		time.Sleep(200 * time.Millisecond)
 	}

-	os.Remove(getPidPath())
 	return nil
 }

@@ -217,7 +181,11 @@ func (a *App) newGatewayPage() tview.Primitive {
 	updateStatus = func() {
 		status := getGatewayStatus()
 		if status.running {
-			statusTV.SetText(fmt.Sprintf("[#39ff14::b]GATEWAY RUNNING[-]\n\nPID: %d", status.pid))
+			versionInfo := ""
+			if status.version != "" {
+				versionInfo = fmt.Sprintf("\nVersion: %s", status.version)
+			}
+			statusTV.SetText(fmt.Sprintf("[#39ff14::b]GATEWAY RUNNING[-]\n\nPID: %d%s", status.pid, versionInfo))
 			buttons.SetItemText(0, " [gray]START[white]   ", "")
 			buttons.SetItemText(1, " [red]STOP[white]    ", "")
 		} else {
@@ -28,6 +28,8 @@ func agentCmd(message, sessionKey, model string, debug bool) error {
 		return fmt.Errorf("error loading config: %w", err)
 	}

+	logger.ConfigureFromEnv()
+
 	if debug {
 		logger.SetLevel(logger.DEBUG)
 		fmt.Println("🔍 Debug mode enabled")
@@ -17,24 +17,24 @@ import (
 )

 const (
-	supportedProvidersMsg = "supported providers: openai, anthropic, google-antigravity"
+	supportedProvidersMsg = "supported providers: openai, anthropic, google-antigravity, antigravity"
 	defaultAnthropicModel = "claude-sonnet-4.6"
 )

-func authLoginCmd(provider string, useDeviceCode bool, useOauth bool) error {
+func authLoginCmd(provider string, useDeviceCode bool, useOauth bool, noBrowser bool) error {
 	switch provider {
 	case "openai":
-		return authLoginOpenAI(useDeviceCode)
+		return authLoginOpenAI(useDeviceCode, noBrowser)
 	case "anthropic":
 		return authLoginAnthropic(useOauth)
 	case "google-antigravity", "antigravity":
-		return authLoginGoogleAntigravity()
+		return authLoginGoogleAntigravity(noBrowser)
 	default:
 		return fmt.Errorf("unsupported provider: %s (%s)", provider, supportedProvidersMsg)
 	}
 }

-func authLoginOpenAI(useDeviceCode bool) error {
+func authLoginOpenAI(useDeviceCode bool, noBrowser bool) error {
 	cfg := auth.OpenAIOAuthConfig()

 	var cred *auth.AuthCredential
@@ -43,7 +43,7 @@ func authLoginOpenAI(useDeviceCode bool) error {
 	if useDeviceCode {
 		cred, err = auth.LoginDeviceCode(cfg)
 	} else {
-		cred, err = auth.LoginBrowser(cfg)
+		cred, err = auth.LoginBrowserWithOptions(cfg, auth.LoginBrowserOptions{NoBrowser: noBrowser})
 	}

 	if err != nil {
@@ -92,10 +92,10 @@ func authLoginOpenAI(useDeviceCode bool) error {
 	return nil
 }

-func authLoginGoogleAntigravity() error {
+func authLoginGoogleAntigravity(noBrowser bool) error {
 	cfg := auth.GoogleAntigravityOAuthConfig()

-	cred, err := auth.LoginBrowser(cfg)
+	cred, err := auth.LoginBrowserWithOptions(cfg, auth.LoginBrowserOptions{NoBrowser: noBrowser})
 	if err != nil {
 		return fmt.Errorf("login failed: %w", err)
 	}
@@ -7,6 +7,7 @@ func newLoginCommand() *cobra.Command {
 		provider      string
 		useDeviceCode bool
 		useOauth      bool
+		noBrowser     bool
 	)

 	cmd := &cobra.Command{
@@ -14,12 +15,15 @@ func newLoginCommand() *cobra.Command {
 		Short: "Login via OAuth or paste token",
 		Args:  cobra.NoArgs,
 		RunE: func(cmd *cobra.Command, _ []string) error {
-			return authLoginCmd(provider, useDeviceCode, useOauth)
+			return authLoginCmd(provider, useDeviceCode, useOauth, noBrowser)
 		},
 	}

-	cmd.Flags().StringVarP(&provider, "provider", "p", "", "Provider to login with (openai, anthropic)")
+	cmd.Flags().StringVarP(
+		&provider, "provider", "p", "", "Provider to login with (openai, anthropic, google-antigravity, antigravity)",
+	)
 	cmd.Flags().BoolVar(&useDeviceCode, "device-code", false, "Use device code flow (for headless environments)")
+	cmd.Flags().BoolVar(&noBrowser, "no-browser", false, "Do not auto-open a browser during OAuth login")
 	cmd.Flags().BoolVar(
 		&useOauth, "setup-token", false,
 		"Use setup-token flow for Anthropic (from `claude setup-token`)",
@@ -18,6 +18,7 @@ func TestNewLoginSubCommand(t *testing.T) {
 	assert.True(t, cmd.HasFlags())

 	assert.NotNil(t, cmd.Flags().Lookup("device-code"))
+	assert.NotNil(t, cmd.Flags().Lookup("no-browser"))

 	providerFlag := cmd.Flags().Lookup("provider")
 	require.NotNil(t, providerFlag)
@@ -1,12 +1,53 @@
 package auth

 import (
+	"bytes"
+	"encoding/json"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
 	"testing"
+	"time"

 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+
+	pkgauth "github.com/sipeed/picoclaw/pkg/auth"
+	"github.com/sipeed/picoclaw/pkg/config"
 )

+func captureAuthStdout(t *testing.T, fn func()) string {
+	t.Helper()
+
+	oldStdout := os.Stdout
+	r, w, err := os.Pipe()
+	require.NoError(t, err)
+	os.Stdout = w
+	t.Cleanup(func() {
+		os.Stdout = oldStdout
+	})
+
+	fn()
+
+	require.NoError(t, w.Close())
+	os.Stdout = oldStdout
+
+	var buf bytes.Buffer
+	_, err = io.Copy(&buf, r)
+	require.NoError(t, err)
+	require.NoError(t, r.Close())
+	return buf.String()
+}
+
+func setAuthStatusTestHome(t *testing.T) string {
+	t.Helper()
+
+	tmpDir := t.TempDir()
+	t.Setenv(config.EnvHome, filepath.Join(tmpDir, ".picoclaw"))
+	return tmpDir
+}
+
 func TestNewStatusSubcommand(t *testing.T) {
 	cmd := newStatusCommand()

@@ -16,3 +57,47 @@ func TestNewStatusSubcommand(t *testing.T) {

 	assert.False(t, cmd.HasFlags())
 }
+
+func TestAuthStatusCmdShowsCanonicalGoogleAntigravityAfterLegacyRefresh(t *testing.T) {
+	tmpDir := setAuthStatusTestHome(t)
+
+	legacyExpiry := time.Date(2026, 4, 16, 10, 0, 0, 0, time.UTC)
+	legacyStore := map[string]any{
+		"credentials": map[string]any{
+			"antigravity": map[string]any{
+				"access_token": "legacy-token",
+				"expires_at":   legacyExpiry.Format(time.RFC3339),
+				"provider":     "antigravity",
+				"auth_method":  "oauth",
+				"project_id":   "legacy-project",
+			},
+		},
+	}
+	data, err := json.Marshal(legacyStore)
+	require.NoError(t, err)
+
+	authPath := filepath.Join(tmpDir, ".picoclaw", "auth.json")
+	require.NoError(t, os.MkdirAll(filepath.Dir(authPath), 0o755))
+	require.NoError(t, os.WriteFile(authPath, data, 0o600))
+
+	refreshedExpiry := time.Date(2026, 4, 16, 12, 30, 0, 0, time.UTC)
+	err = pkgauth.SetCredential("google-antigravity", &pkgauth.AuthCredential{
+		AccessToken: "fresh-token",
+		ExpiresAt:   refreshedExpiry,
+		Provider:    "google-antigravity",
+		AuthMethod:  "oauth",
+		ProjectID:   "fresh-project",
+	})
+	require.NoError(t, err)
+
+	output := captureAuthStdout(t, func() {
+		require.NoError(t, authStatusCmd())
+	})
+
+	assert.Contains(t, output, "\nAuthenticated Providers:")
+	assert.Contains(t, output, "\n  google-antigravity:\n")
+	assert.NotContains(t, output, "\n  antigravity:\n")
+	assert.Contains(t, output, "    Project: fresh-project")
+	assert.Contains(t, output, "    Expires: 2026-04-16 12:30")
+	assert.Equal(t, 1, strings.Count(output, ":\n    Method: oauth"))
+}
@@ -19,6 +19,7 @@ import (

 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal"
 	"github.com/sipeed/picoclaw/pkg/config"
+	"github.com/sipeed/picoclaw/pkg/logger"
 )

 const (
@@ -155,11 +156,31 @@ func defaultWeComQRFlowOptions(timeout time.Duration) wecomQRFlowOptions {
 }

 func applyWeComAuthResult(cfg *config.Config, botInfo wecomQRBotInfo) {
-	cfg.Channels.WeCom.Enabled = true
-	cfg.Channels.WeCom.BotID = botInfo.BotID
-	cfg.Channels.WeCom.SetSecret(botInfo.Secret)
-	if strings.TrimSpace(cfg.Channels.WeCom.WebSocketURL) == "" {
-		cfg.Channels.WeCom.WebSocketURL = wecomDefaultWebSocketURL
+	bc := cfg.Channels.GetByType(config.ChannelWeCom)
+	if bc == nil {
+		bc = &config.Channel{Type: config.ChannelWeCom}
+		cfg.Channels["wecom"] = bc
+	}
+	bc.Enabled = true
+
+	decoded, err := bc.GetDecoded()
+	if err != nil {
+		logger.ErrorCF("wecom", "failed to decode WeCom settings", map[string]any{
+			"error": err.Error(),
+		})
+		return
+	}
+	wecomCfg, ok := decoded.(*config.WeComSettings)
+	if !ok {
+		logger.ErrorCF("wecom", "unexpected WeCom settings type", map[string]any{
+			"got": fmt.Sprintf("%T", decoded),
+		})
+		return
+	}
+	wecomCfg.BotID = botInfo.BotID
+	wecomCfg.Secret = *config.NewSecureString(botInfo.Secret)
+	if strings.TrimSpace(wecomCfg.WebSocketURL) == "" {
+		wecomCfg.WebSocketURL = wecomDefaultWebSocketURL
 	}
 }

@@ -112,17 +112,23 @@ func TestPollWeComQRCodeResult(t *testing.T) {

 func TestApplyWeComAuthResult(t *testing.T) {
 	cfg := config.DefaultConfig()
-	cfg.Channels.WeCom.WebSocketURL = ""
+	require.NoError(t, config.InitChannelList(cfg.Channels))
+	wecom := cfg.Channels["wecom"]
+	t.Logf("wecom: %+v", wecom)
+	decoded, err := wecom.GetDecoded()
+	require.NoError(t, err)
+	weCfg := decoded.(*config.WeComSettings)
+	weCfg.WebSocketURL = ""

 	applyWeComAuthResult(cfg, wecomQRBotInfo{
 		BotID:  "bot-1",
 		Secret: "secret-1",
 	})

-	assert.True(t, cfg.Channels.WeCom.Enabled)
-	assert.Equal(t, "bot-1", cfg.Channels.WeCom.BotID)
-	assert.Equal(t, "secret-1", cfg.Channels.WeCom.Secret())
-	assert.Equal(t, wecomDefaultWebSocketURL, cfg.Channels.WeCom.WebSocketURL)
+	assert.True(t, wecom.Enabled)
+	assert.Equal(t, "bot-1", weCfg.BotID)
+	assert.Equal(t, "secret-1", weCfg.Secret.String())
+	assert.Equal(t, wecomDefaultWebSocketURL, weCfg.WebSocketURL)
 }

 func TestAuthWeComCmdWithScanner(t *testing.T) {
@@ -149,9 +155,13 @@ func TestAuthWeComCmdWithScanner(t *testing.T) {

 	cfg, err := config.LoadConfig(internal.GetConfigPath())
 	require.NoError(t, err)
-	assert.True(t, cfg.Channels.WeCom.Enabled)
-	assert.Equal(t, "bot-1", cfg.Channels.WeCom.BotID)
-	assert.Equal(t, "secret-1", cfg.Channels.WeCom.Secret())
-	assert.Equal(t, wecomDefaultWebSocketURL, cfg.Channels.WeCom.WebSocketURL)
+	wecom := cfg.Channels["wecom"]
+	decoded, err := wecom.GetDecoded()
+	require.NoError(t, err)
+	weCfg := decoded.(*config.WeComSettings)
+	assert.True(t, wecom.Enabled)
+	assert.Equal(t, "bot-1", weCfg.BotID)
+	assert.Equal(t, "secret-1", weCfg.Secret.String())
+	assert.Equal(t, wecomDefaultWebSocketURL, weCfg.WebSocketURL)
 	assert.Contains(t, output.String(), "WeCom connected.")
 }
@@ -95,14 +95,24 @@ func saveWeixinConfig(token, baseURL, proxy string) error {
 		return fmt.Errorf("failed to load config: %w", err)
 	}

-	cfg.Channels.Weixin.Enabled = true
-	cfg.Channels.Weixin.SetToken(token)
-	const defaultBase = "https://ilinkai.weixin.qq.com/"
-	if baseURL != "" && baseURL != defaultBase {
-		cfg.Channels.Weixin.BaseURL = baseURL
+	bc := cfg.Channels.GetByType(config.ChannelWeixin)
+	if bc == nil {
+		bc = &config.Channel{Type: config.ChannelWeixin}
+		cfg.Channels[config.ChannelWeixin] = bc
 	}
-	if proxy != "" {
-		cfg.Channels.Weixin.Proxy = proxy
+	bc.Enabled = true
+
+	if decoded, err := bc.GetDecoded(); err == nil && decoded != nil {
+		if weixinCfg, ok := decoded.(*config.WeixinSettings); ok {
+			weixinCfg.Token = *config.NewSecureString(token)
+			const defaultBase = "https://ilinkai.weixin.qq.com/"
+			if baseURL != "" && baseURL != defaultBase {
+				weixinCfg.BaseURL = baseURL
+			}
+			if proxy != "" {
+				weixinCfg.Proxy = proxy
+			}
+		}
 	}

 	return config.SaveConfig(cfgPath, cfg)
@@ -0,0 +1,147 @@
+// Package cliui renders human-oriented CLI output: bordered panels and columns
+// on wide interactive terminals. Layout (boxes/columns) is independent of ANSI
+// color: use --no-color or NO_COLOR to disable colors only; narrow or non-TTY
+// stdout falls back to plain line-oriented output.
+package cliui
+
+import (
+	"os"
+	"sync"
+
+	"github.com/charmbracelet/lipgloss"
+	"github.com/muesli/termenv"
+	"golang.org/x/term"
+)
+
+// Minimum terminal width (columns) for bordered / structured layout.
+// Below this, plain line-oriented output is used so boxes do not wrap badly.
+const minWidthFancy = 88
+
+// Minimum width to lay out some views in two columns (e.g. status providers).
+const minWidthColumns = 104
+
+var initMu sync.Mutex
+
+// Init configures lipgloss for this process. When disableAnsiColors is true
+// (e.g. --no-color, NO_COLOR, or TERM=dumb), only color is turned off; Unicode
+// borders still render when UseFancyLayout() is true.
+func Init(disableAnsiColors bool) {
+	initMu.Lock()
+	defer initMu.Unlock()
+	if disableAnsiColors {
+		lipgloss.SetColorProfile(termenv.Ascii)
+		return
+	}
+	lipgloss.SetColorProfile(termenv.EnvColorProfile())
+}
+
+// StdoutWidth returns the terminal width or a sane default if unknown.
+func StdoutWidth() int {
+	w, _, err := term.GetSize(int(os.Stdout.Fd()))
+	if err != nil || w < 20 {
+		return 80
+	}
+	return w
+}
+
+// UseFancyLayout is true when styled boxes/columns should be used.
+func UseFancyLayout() bool {
+	if !term.IsTerminal(int(os.Stdout.Fd())) {
+		return false
+	}
+	return StdoutWidth() >= minWidthFancy
+}
+
+// UseColumnLayout is true when a second content column is viable.
+func UseColumnLayout() bool {
+	return UseFancyLayout() && StdoutWidth() >= minWidthColumns
+}
+
+// InnerWidth is the target content width inside borders/margins.
+func InnerWidth() int {
+	w := StdoutWidth()
+	// Rounded border + horizontal padding (lipgloss borders ~= 2 cols each side + padding).
+	const borderBudget = 8
+	if w > borderBudget+48 {
+		return w - borderBudget
+	}
+	return 48
+}
+
+// StderrWidth returns stderr terminal width or a sane default.
+func StderrWidth() int {
+	w, _, err := term.GetSize(int(os.Stderr.Fd()))
+	if err != nil || w < 20 {
+		return 80
+	}
+	return w
+}
+
+// UseFancyStderr is true when stderr can show boxed errors without ugly wraps.
+func UseFancyStderr() bool {
+	if !term.IsTerminal(int(os.Stderr.Fd())) {
+		return false
+	}
+	return StderrWidth() >= minWidthFancy
+}
+
+// InnerStderrWidth mirrors InnerWidth but for stderr.
+func InnerStderrWidth() int {
+	w := StderrWidth()
+	const borderBudget = 8
+	if w > borderBudget+48 {
+		return w - borderBudget
+	}
+	return 48
+}
+
+var (
+	accentBlue = lipgloss.Color("#3E5DB9")
+	accentRed  = lipgloss.Color("#D54646")
+	colorMuted = lipgloss.Color("#6B6B6B")
+	colorOK    = lipgloss.Color("#2E7D32")
+)
+
+func borderStyle() lipgloss.Style {
+	return lipgloss.NewStyle().
+		Border(lipgloss.RoundedBorder()).
+		BorderForeground(accentBlue).
+		Padding(0, 1)
+}
+
+func titleBarStyle() lipgloss.Style {
+	return lipgloss.NewStyle().
+		Foreground(accentRed).
+		Bold(true)
+}
+
+func mutedStyle() lipgloss.Style {
+	return lipgloss.NewStyle().Foreground(colorMuted)
+}
+
+func bodyStyle() lipgloss.Style {
+	return lipgloss.NewStyle()
+}
+
+func kvKeyStyle() lipgloss.Style {
+	return lipgloss.NewStyle().Foreground(accentBlue).Bold(true)
+}
+
+func kvValStyle() lipgloss.Style {
+	return lipgloss.NewStyle()
+}
+
+// helpIntroStyle is the top tagline (PicoClaw blue, matches ASCII banner left side).
+func helpIntroStyle() lipgloss.Style {
+	return lipgloss.NewStyle().Foreground(accentBlue).Bold(true)
+}
+
+// helpIdentStyle is the left column for commands and flags (blue identifiers).
+func helpIdentStyle() lipgloss.Style {
+	return lipgloss.NewStyle().Foreground(accentBlue).Bold(true)
+}
+
+// helpPlaceholderStyle highlights <placeholders> in usage lines (red accent).
+func helpPlaceholderStyle() lipgloss.Style {
+	return lipgloss.NewStyle().Foreground(accentRed).Bold(true)
+}
@@ -0,0 +1,180 @@
+package cliui
+
+import (
+	"testing"
+
+	flag "github.com/spf13/pflag"
+)
+
+func init() {
+	// Disable ANSI colors in tests so output is predictable plain text.
+	Init(true)
+}
+
+// ---------------------------------------------------------------------------
+// showErrHint
+// ---------------------------------------------------------------------------
+
+func TestShowErrHint(t *testing.T) {
+	cases := []struct {
+		msg  string
+		want bool
+	}{
+		// Cobra flag errors — should show hint
+		{"unknown flag: --foo", true},
+		{"unknown shorthand flag: 'f' in -f", true},
+		{"flag needs an argument: --output", true},
+		{"required flag(s) \"model\" not set", true},
+		// Generic invalid-argument errors — should show hint
+		{"invalid argument \"abc\" for --count", true},
+		// required flag errors — should show hint
+		{"required flag(s) \"model\" not set", true},
+		// usage: in message — should show hint
+		{"bad input\nusage: picoclaw ...", true},
+		// Should NOT false-positive on broad words
+		{"connection flagged by remote", false},
+		{"feature flag not set", false},
+		{"invalid API key provided", false},
+		{"authentication required", false},
+		// Unrelated messages — no hint
+		{"something went wrong", false},
+		{"network timeout", false},
+	}
+
+	for _, tc := range cases {
+		got := showErrHint(tc.msg)
+		if got != tc.want {
+			t.Errorf("showErrHint(%q) = %v, want %v", tc.msg, got, tc.want)
+		}
+	}
+}
+
+// ---------------------------------------------------------------------------
+// styleUsageTokens
+// ---------------------------------------------------------------------------
+
+func TestStyleUsageTokensContainsTokens(t *testing.T) {
+	cases := []struct {
+		input    string
+		contains []string // substrings that must appear in plain output
+	}{
+		{
+			"picoclaw agent <message>",
+			[]string{"picoclaw agent", "<message>"},
+		},
+		{
+			"picoclaw [command] [flags]",
+			[]string{"picoclaw", "[command]", "[flags]"},
+		},
+		{
+			"picoclaw",
+			[]string{"picoclaw"},
+		},
+		{
+			"cmd <arg1> [--flag]",
+			[]string{"cmd", "<arg1>", "[--flag]"},
+		},
+	}
+
+	for _, tc := range cases {
+		out := styleUsageTokens(tc.input)
+		for _, sub := range tc.contains {
+			if !containsStripped(out, sub) {
+				t.Errorf("styleUsageTokens(%q): output %q does not contain %q", tc.input, out, sub)
+			}
+		}
+	}
+}
+
+// containsStripped checks whether plain contains sub after stripping ANSI escapes.
+// Since Init(true) sets Ascii profile, lipgloss emits no escape codes in tests,
+// so this is just a plain substring check.
+func containsStripped(plain, sub string) bool {
+	return len(plain) >= len(sub) && findSubstring(plain, sub)
+}
+
+func findSubstring(s, sub string) bool {
+	for i := 0; i <= len(s)-len(sub); i++ {
+		if s[i:i+len(sub)] == sub {
+			return true
+		}
+	}
+	return false
+}
+
+// ---------------------------------------------------------------------------
+// collectFlagRows
+// ---------------------------------------------------------------------------
+
+func TestCollectFlagRows_Empty(t *testing.T) {
+	fs := flag.NewFlagSet("test", flag.ContinueOnError)
+	rows := collectFlagRows(fs)
+	if len(rows) != 0 {
+		t.Fatalf("expected 0 rows for empty FlagSet, got %d", len(rows))
+	}
+}
+
+func TestCollectFlagRows_BasicFlags(t *testing.T) {
+	fs := flag.NewFlagSet("test", flag.ContinueOnError)
+	fs.String("output", "", "output file path")
+	fs.Bool("verbose", false, "enable verbose mode")
+	fs.Int("count", 1, "number of items")
+
+	rows := collectFlagRows(fs)
+
+	if len(rows) != 3 {
+		t.Fatalf("expected 3 rows, got %d", len(rows))
+	}
+
+	// Rows must be sorted alphabetically by flag name.
+	names := make([]string, 0, len(rows))
+	for _, r := range rows {
+		names = append(names, r[0])
+	}
+	if names[0] > names[1] || names[1] > names[2] {
+		t.Errorf("rows not sorted: %v", names)
+	}
+}
+
+func TestCollectFlagRows_Shorthand(t *testing.T) {
+	fs := flag.NewFlagSet("test", flag.ContinueOnError)
+	fs.StringP("model", "m", "", "model name")
+
+	rows := collectFlagRows(fs)
+	if len(rows) != 1 {
+		t.Fatalf("expected 1 row, got %d", len(rows))
+	}
+	left := rows[0][0]
+	if !findSubstring(left, "-m") || !findSubstring(left, "--model") {
+		t.Errorf("expected shorthand and long form in %q", left)
+	}
+}
+
+func TestCollectFlagRows_HiddenFlagsExcluded(t *testing.T) {
+	fs := flag.NewFlagSet("test", flag.ContinueOnError)
+	fs.String("visible", "", "this shows up")
+	hidden := fs.String("hidden", "", "this should not show up")
+	_ = hidden
+	_ = fs.MarkHidden("hidden")
+
+	rows := collectFlagRows(fs)
+	if len(rows) != 1 {
+		t.Fatalf("expected 1 row (hidden excluded), got %d", len(rows))
+	}
+	if !findSubstring(rows[0][0], "visible") {
+		t.Errorf("expected visible flag in rows, got %q", rows[0][0])
+	}
+}
+
+func TestCollectFlagRows_UsageInRightColumn(t *testing.T) {
+	fs := flag.NewFlagSet("test", flag.ContinueOnError)
+	fs.String("format", "json", "output format: json or text")
+
+	rows := collectFlagRows(fs)
+	if len(rows) != 1 {
+		t.Fatalf("expected 1 row, got %d", len(rows))
+	}
+	if rows[0][1] != "output format: json or text" {
+		t.Errorf("expected usage in right column, got %q", rows[0][1])
+	}
+}
@@ -0,0 +1,298 @@
+package cliui
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+
+	"github.com/charmbracelet/lipgloss"
+	"github.com/spf13/cobra"
+	flag "github.com/spf13/pflag"
+)
+
+// RenderCommandHelp builds Ruff-style sectioned, two-column help when
+// UseFancyLayout(); otherwise plain Cobra-style text.
+func RenderCommandHelp(c *cobra.Command) string {
+	if !UseFancyLayout() {
+		return plainCommandHelp(c)
+	}
+	syncFlags(c)
+
+	var b strings.Builder
+	head, sub := helpIntro(c)
+	if head != "" {
+		b.WriteString(helpIntroStyle().Render(head))
+		b.WriteString("\n")
+	}
+	if sub != "" {
+		b.WriteString(mutedStyle().Render(sub))
+		b.WriteString("\n")
+	}
+	if head != "" || sub != "" {
+		b.WriteString("\n")
+	}
+
+	inner := InnerWidth()
+	contentW := inner - 6
+	if contentW < 36 {
+		contentW = 36
+	}
+
+	// Usage
+	usageBody := bodyStyle().MaxWidth(contentW).Render(styleUsageTokens(c.UseLine()))
+	b.WriteString(sectionPanel("Usage", usageBody, inner))
+	b.WriteString("\n")
+
+	// Examples
+	if ex := strings.TrimSpace(c.Example); ex != "" {
+		exBody := bodyStyle().Width(contentW).Render(ex)
+		b.WriteString(sectionPanel("Examples", exBody, inner))
+		b.WriteString("\n")
+	}
+
+	// Subcommands
+	subs := visibleSubcommands(c)
+	if len(subs) > 0 {
+		rows := make([][2]string, 0, len(subs))
+		for _, sub := range subs {
+			left := sub.Name()
+			if a := sub.Aliases; len(a) > 0 {
+				left += " (" + strings.Join(a, ", ") + ")"
+			}
+			rows = append(rows, [2]string{left, sub.Short})
+		}
+		b.WriteString(sectionPanel("Commands", renderTwoColPairs(rows, contentW), inner))
+		b.WriteString("\n")
+	}
+
+	// Local options
+	local := c.LocalFlags()
+	opts := collectFlagRows(local)
+	if len(opts) > 0 {
+		title := "Options"
+		if !c.HasParent() {
+			title = "Flags"
+		}
+		b.WriteString(sectionPanel(title, renderTwoColPairs(opts, contentW), inner))
+		b.WriteString("\n")
+	}
+
+	// Global (inherited) options
+	if c.HasAvailableInheritedFlags() {
+		inh := collectFlagRows(c.InheritedFlags())
+		if len(inh) > 0 {
+			b.WriteString(sectionPanel("Global options", renderTwoColPairs(inh, contentW), inner))
+			b.WriteString("\n")
+		}
+	}
+
+	return b.String()
+}
+
+// RenderCommandQuickRef prints the same Usage / Flags / Global sections as help,
+// for embedding after errors (stderr). outerW is typically InnerStderrWidth().
+func RenderCommandQuickRef(c *cobra.Command, outerW int) string {
+	if c == nil || outerW < 40 {
+		return ""
+	}
+	syncFlags(c)
+	contentW := outerW - 6
+	if contentW < 36 {
+		contentW = 36
+	}
+	var b strings.Builder
+	usageBody := bodyStyle().MaxWidth(contentW).Render(styleUsageTokens(c.UseLine()))
+	b.WriteString(sectionPanel("Usage", usageBody, outerW))
+	b.WriteString("\n")
+	if len(c.Aliases) > 0 {
+		al := "Aliases: " + strings.Join(c.Aliases, ", ")
+		alBody := mutedStyle().MaxWidth(contentW).Render(al)
+		b.WriteString(sectionPanel("Aliases", alBody, outerW))
+		b.WriteString("\n")
+	}
+	opts := collectFlagRows(c.LocalFlags())
+	if len(opts) > 0 {
+		title := "Options"
+		if !c.HasParent() {
+			title = "Flags"
+		}
+		b.WriteString(sectionPanel(title, renderTwoColPairs(opts, contentW), outerW))
+		b.WriteString("\n")
+	}
+	if c.HasAvailableInheritedFlags() {
+		inh := collectFlagRows(c.InheritedFlags())
+		if len(inh) > 0 {
+			b.WriteString(sectionPanel("Global options", renderTwoColPairs(inh, contentW), outerW))
+			b.WriteString("\n")
+		}
+	}
+	return b.String()
+}
+
+func syncFlags(c *cobra.Command) {
+	_ = c.LocalFlags()
+	if c.HasAvailableInheritedFlags() {
+		_ = c.InheritedFlags()
+	}
+}
+
+func plainCommandHelp(c *cobra.Command) string {
+	desc := c.Long
+	if desc == "" {
+		desc = c.Short
+	}
+	desc = strings.TrimRight(desc, " \t\n\r")
+	var b strings.Builder
+	if desc != "" {
+		fmt.Fprintln(&b, desc)
+		fmt.Fprintln(&b)
+	}
+	if c.Runnable() || c.HasSubCommands() {
+		b.WriteString(c.UsageString())
+	}
+	return b.String()
+}
+
+func helpIntro(c *cobra.Command) (head, sub string) {
+	head = strings.TrimSpace(c.Short)
+	long := strings.TrimSpace(c.Long)
+	if long == "" || long == head {
+		return head, ""
+	}
+	lines := strings.Split(long, "\n")
+	var rest []string
+	for i, ln := range lines {
+		ln = strings.TrimSpace(ln)
+		if ln == "" {
+			continue
+		}
+		if i == 0 && ln == head {
+			continue
+		}
+		rest = append(rest, ln)
+	}
+	sub = strings.Join(rest, "\n")
+	return head, sub
+}
+
+func visibleSubcommands(c *cobra.Command) []*cobra.Command {
+	var out []*cobra.Command
+	for _, sub := range c.Commands() {
+		if sub.Hidden {
+			continue
+		}
+		out = append(out, sub)
+	}
+	sort.Slice(out, func(i, j int) bool { return out[i].Name() < out[j].Name() })
+	return out
+}
+
+func sectionPanel(title, body string, width int) string {
+	head := titleBarStyle().Render(title) + "\n\n"
+	return borderStyle().Width(width).Render(head + body)
+}
+
+// styleUsageTokens highlights PicoClaw-blue command tokens and red <placeholders>/[groups].
+func styleUsageTokens(s string) string {
+	var b strings.Builder
+	for len(s) > 0 {
+		ia := strings.Index(s, "<")
+		ib := strings.Index(s, "[")
+		next, kind := -1, 0 // 1 = angle, 2 = bracket
+		switch {
+		case ia >= 0 && (ib < 0 || ia < ib):
+			next, kind = ia, 1
+		case ib >= 0:
+			next, kind = ib, 2
+		}
+		if next < 0 {
+			b.WriteString(helpIdentStyle().Render(s))
+			break
+		}
+		if next > 0 {
+			b.WriteString(helpIdentStyle().Render(s[:next]))
+		}
+		s = s[next:]
+		if kind == 1 {
+			j := strings.Index(s, ">")
+			if j < 0 {
+				b.WriteString(helpIdentStyle().Render(s))
+				break
+			}
+			b.WriteString(helpPlaceholderStyle().Render(s[:j+1]))
+			s = s[j+1:]
+			continue
+		}
+		j := strings.Index(s, "]")
+		if j < 0 {
+			b.WriteString(helpIdentStyle().Render(s))
+			break
+		}
+		b.WriteString(helpPlaceholderStyle().Render(s[:j+1]))
+		s = s[j+1:]
+	}
+	return b.String()
+}
+
+func collectFlagRows(fs *flag.FlagSet) [][2]string {
+	var names []string
+	seen := map[string][2]string{}
+	fs.VisitAll(func(f *flag.Flag) {
+		if f.Hidden {
+			return
+		}
+		left := formatFlagLeft(f)
+		right := f.Usage
+		if f.Deprecated != "" {
+			right += " (deprecated: " + f.Deprecated + ")"
+		}
+		names = append(names, f.Name)
+		seen[f.Name] = [2]string{left, right}
+	})
+	sort.Strings(names)
+	rows := make([][2]string, 0, len(names))
+	for _, n := range names {
+		rows = append(rows, seen[n])
+	}
+	return rows
+}
+
+func formatFlagLeft(f *flag.Flag) string {
+	if len(f.Shorthand) > 0 {
+		return "-" + f.Shorthand + ", --" + f.Name
+	}
+	return "--" + f.Name
+}
+
+func renderTwoColPairs(rows [][2]string, contentW int) string {
+	if len(rows) == 0 {
+		return ""
+	}
+	leftW := 0
+	for _, r := range rows {
+		if w := lipgloss.Width(r[0]); w > leftW {
+			leftW = w
+		}
+	}
+	const minLeft, maxLeft = 16, 34
+	if leftW < minLeft {
+		leftW = minLeft
+	}
+	if leftW > maxLeft {
+		leftW = maxLeft
+	}
+	gap := "  "
+	rightW := contentW - leftW - lipgloss.Width(gap)
+	if rightW < 24 {
+		rightW = 24
+	}
+
+	var b strings.Builder
+	for _, r := range rows {
+		left := helpIdentStyle().Width(leftW).Align(lipgloss.Left).Render(r[0])
+		right := bodyStyle().Width(rightW).Render(strings.TrimSpace(r[1]))
+		b.WriteString(lipgloss.JoinHorizontal(lipgloss.Top, left, gap, right))
+		b.WriteString("\n")
+	}
+	return strings.TrimRight(b.String(), "\n")
+}
@@ -0,0 +1,75 @@
+package cliui
+
+import (
+	"strings"
+
+	"github.com/spf13/cobra"
+)
+
+// FormatCLIError formats errors with the same boxed sections as help. When ctx
+// is the command that was running when the error occurred, Usage / Flags panels
+// are appended so styling matches picoclaw -h.
+func FormatCLIError(msg string, ctx *cobra.Command) string {
+	msg = strings.TrimRight(msg, "\n")
+	if !UseFancyStderr() {
+		s := "Error: " + msg + "\n"
+		if ctx != nil && showErrHint(msg) {
+			s += "\n" + plainCommandHelp(ctx)
+		}
+		return s
+	}
+	w := InnerStderrWidth()
+	contentW := w - 6
+	if contentW < 36 {
+		contentW = 36
+	}
+
+	title := titleBarStyle().Render("Error") + "\n\n"
+
+	paras := strings.Split(msg, "\n")
+	var body strings.Builder
+	for i, p := range paras {
+		p = strings.TrimRight(p, " ")
+		if p == "" {
+			continue
+		}
+		st := bodyStyle().Width(contentW)
+		if i > 0 {
+			body.WriteString("\n")
+		}
+		if i == 0 {
+			body.WriteString(st.Render(p))
+		} else {
+			body.WriteString(mutedStyle().Width(contentW).Render(p))
+		}
+	}
+
+	foot := ""
+	if showErrHint(msg) {
+		if ctx != nil {
+			foot = "\n\n" + mutedStyle().Width(contentW).
+				Render("Full command help: "+ctx.CommandPath()+" --help")
+		} else {
+			foot = "\n\n" + mutedStyle().Width(contentW).
+				Render("Tip: picoclaw --help   ·   picoclaw <command> --help")
+		}
+	}
+
+	out := borderStyle().Width(w).Render(title+body.String()+foot) + "\n"
+	if ctx != nil && showErrHint(msg) {
+		if ref := RenderCommandQuickRef(ctx, w); ref != "" {
+			out += "\n" + ref
+		}
+	}
+	return out
+}
+
+func showErrHint(msg string) bool {
+	m := strings.ToLower(msg)
+	return strings.Contains(m, "unknown flag") ||
+		strings.Contains(m, "unknown shorthand flag") ||
+		strings.Contains(m, "flag needs an argument") ||
+		strings.Contains(m, "invalid argument") ||
+		strings.Contains(m, "required flag") ||
+		strings.Contains(m, "usage:")
+}
@@ -0,0 +1,110 @@
+package cliui
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/charmbracelet/lipgloss"
+)
+
+// PrintOnboardComplete prints the post-onboard “ready” message and next steps.
+func PrintOnboardComplete(logo string, encrypt bool, configPath string) {
+	if !UseFancyLayout() {
+		printOnboardPlain(logo, encrypt, configPath)
+		return
+	}
+	printOnboardFancy(logo, encrypt, configPath)
+}
+
+func printOnboardPlain(logo string, encrypt bool, configPath string) {
+	fmt.Printf("\n%s picoclaw is ready!\n", logo)
+	fmt.Println("\nNext steps:")
+	if encrypt {
+		fmt.Println("  1. Set your encryption passphrase before starting picoclaw:")
+		fmt.Println("       export PICOCLAW_KEY_PASSPHRASE=<your-passphrase>   # Linux/macOS")
+		fmt.Println("       set PICOCLAW_KEY_PASSPHRASE=<your-passphrase>      # Windows cmd")
+		fmt.Println("")
+		fmt.Println("  2. Add your API key to", configPath)
+	} else {
+		fmt.Println("  1. Add your API key to", configPath)
+	}
+	fmt.Println("")
+	fmt.Println("     Recommended:")
+	fmt.Println("     - OpenRouter: https://openrouter.ai/keys (access 100+ models)")
+	fmt.Println("     - Ollama:     https://ollama.com (local, free)")
+	fmt.Println("")
+	fmt.Println("     See README.md for 17+ supported providers.")
+	fmt.Println("")
+	if encrypt {
+		fmt.Println("  3. Chat: picoclaw agent -m \"Hello!\"")
+	} else {
+		fmt.Println("  2. Chat: picoclaw agent -m \"Hello!\"")
+	}
+}
+
+func printOnboardFancy(logo string, encrypt bool, configPath string) {
+	inner := InnerWidth()
+	box := borderStyle().MaxWidth(inner + 8)
+
+	ready := titleBarStyle().Render(logo+" picoclaw is ready!") + "\n"
+	fmt.Println()
+	fmt.Println(box.Width(inner).Render(strings.TrimSpace(ready)))
+	fmt.Println()
+
+	steps := buildOnboardingSteps(encrypt, configPath)
+	rec := recommendedBlock()
+	chat := chatStep(encrypt)
+
+	if UseColumnLayout() {
+		leftW := min(inner/2-2, 52)
+		rightW := inner - leftW - 4
+		if rightW < 36 {
+			rightW = 36
+		}
+		leftBlock := borderStyle().MaxWidth(leftW + 8).Width(leftW).
+			Render(titleBarStyle().Render("Next steps") + "\n\n" + bodyStyle().Width(leftW).Render(steps))
+		rightBlock := borderStyle().MaxWidth(rightW + 8).Width(rightW).
+			Render(mutedStyle().Bold(true).Render("Recommended") + "\n\n" + bodyStyle().Width(rightW).Render(rec))
+		gap := strings.Repeat(" ", 2)
+		fmt.Println(lipgloss.JoinHorizontal(lipgloss.Top, leftBlock, gap, rightBlock))
+		fmt.Println()
+		full := borderStyle().Width(inner).Render(bodyStyle().Width(inner - 4).Render(chat))
+		fmt.Println(full)
+		return
+	}
+
+	// Same order as plain output: numbered steps → recommended → chat line.
+	next := titleBarStyle().Render("Next steps") + "\n\n" +
+		bodyStyle().Width(inner-4).Render(steps+"\n\n"+rec+"\n\n"+chat)
+	fmt.Println(borderStyle().Width(inner).Render(next))
+}
+
+func buildOnboardingSteps(encrypt bool, configPath string) string {
+	var b strings.Builder
+	if encrypt {
+		b.WriteString("1. Set your encryption passphrase before starting picoclaw:\n")
+		b.WriteString("   export PICOCLAW_KEY_PASSPHRASE=<your-passphrase>   # Linux/macOS\n")
+		b.WriteString("   set PICOCLAW_KEY_PASSPHRASE=<your-passphrase>      # Windows cmd\n\n")
+		b.WriteString("2. Add your API key to\n   ")
+		b.WriteString(configPath)
+		b.WriteString("\n")
+	} else {
+		b.WriteString("1. Add your API key to\n   ")
+		b.WriteString(configPath)
+		b.WriteString("\n")
+	}
+	return b.String()
+}
+
+func recommendedBlock() string {
+	return "• OpenRouter: https://openrouter.ai/keys\n  (access 100+ models)\n\n" +
+		"• Ollama: https://ollama.com\n  (local, free)\n\n" +
+		"See README.md for 17+ supported providers."
+}
+
+func chatStep(encrypt bool) string {
+	if encrypt {
+		return "3. Chat:\n   picoclaw agent -m \"Hello!\""
+	}
+	return "2. Chat:\n   picoclaw agent -m \"Hello!\""
+}
@@ -0,0 +1,168 @@
+package cliui
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/charmbracelet/lipgloss"
+)
+
+// ProviderRow holds one provider's display name and status value.
+type ProviderRow struct {
+	Name string
+	Val  string
+}
+
+// StatusReport is a structured status view for PrintStatus.
+type StatusReport struct {
+	Logo          string
+	Version       string
+	Build         string
+	ConfigPath    string
+	ConfigOK      bool
+	WorkspacePath string
+	WorkspaceOK   bool
+	Model         string
+	Providers     []ProviderRow
+	OAuthLines    []string // each full line "provider (method): state"
+}
+
+// PrintStatus renders picoclaw status (plain or fancy).
+func PrintStatus(r StatusReport) {
+	if !UseFancyLayout() {
+		printStatusPlain(r)
+		return
+	}
+	printStatusFancy(r)
+}
+
+func printStatusPlain(r StatusReport) {
+	fmt.Printf("%s picoclaw Status\n", r.Logo)
+	fmt.Printf("Version: %s\n", r.Version)
+	if r.Build != "" {
+		fmt.Printf("Build: %s\n", r.Build)
+	}
+	fmt.Println()
+
+	printPathLine("Config", r.ConfigPath, r.ConfigOK)
+	printPathLine("Workspace", r.WorkspacePath, r.WorkspaceOK)
+
+	if r.ConfigOK {
+		fmt.Printf("Model: %s\n", r.Model)
+		for _, p := range r.Providers {
+			fmt.Printf("%s: %s\n", p.Name, p.Val)
+		}
+		if len(r.OAuthLines) > 0 {
+			fmt.Println("\nOAuth/Token Auth:")
+			for _, line := range r.OAuthLines {
+				fmt.Printf("  %s\n", line)
+			}
+		}
+	}
+}
+
+func printPathLine(label, path string, ok bool) {
+	mark := "✗"
+	if ok {
+		mark = "✓"
+	}
+	fmt.Println(label+":", path, mark)
+}
+
+func printStatusFancy(r StatusReport) {
+	inner := InnerWidth()
+	topBox := borderStyle().Width(inner)
+
+	var head strings.Builder
+	head.WriteString(titleBarStyle().Render(r.Logo + " picoclaw Status"))
+	head.WriteString("\n\n")
+	head.WriteString(kvKeyStyle().Render("Version") + "  " + kvValStyle().Render(r.Version))
+	if r.Build != "" {
+		head.WriteString("\n")
+		head.WriteString(kvKeyStyle().Render("Build") + "     " + kvValStyle().Render(r.Build))
+	}
+	fmt.Println(topBox.Render(head.String()))
+	fmt.Println()
+
+	if UseColumnLayout() && len(r.Providers) > 0 && r.ConfigOK {
+		leftW := (inner - 2) / 2
+		rightW := inner - leftW - 2
+		pathsNarrow := pathStatusPanel(r, leftW)
+		prov := providerTablePanel(r, rightW)
+		gap := strings.Repeat(" ", 2)
+		fmt.Println(lipgloss.JoinHorizontal(lipgloss.Top, pathsNarrow, gap, prov))
+	} else {
+		fmt.Println(pathStatusPanel(r, inner))
+		if len(r.Providers) > 0 && r.ConfigOK {
+			fmt.Println(providerTablePanel(r, inner))
+		}
+	}
+
+	if len(r.OAuthLines) > 0 && r.ConfigOK {
+		var ob strings.Builder
+		ob.WriteString(titleBarStyle().Render("OAuth / token auth") + "\n\n")
+		for _, line := range r.OAuthLines {
+			ob.WriteString("  • " + line + "\n")
+		}
+		fmt.Println()
+		fmt.Println(borderStyle().Width(inner).Render(ob.String()))
+	}
+}
+
+func pathStatusPanel(r StatusReport, inner int) string {
+	cfgMark := statusMark(r.ConfigOK)
+	wsMark := statusMark(r.WorkspaceOK)
+	var b strings.Builder
+	b.WriteString(kvKeyStyle().Render("Config") + "\n")
+	b.WriteString(mutedStyle().Render(r.ConfigPath))
+	b.WriteString(" " + cfgMark + "\n\n")
+	b.WriteString(kvKeyStyle().Render("Workspace") + "\n")
+	b.WriteString(mutedStyle().Render(r.WorkspacePath))
+	b.WriteString(" " + wsMark + "\n")
+	if r.ConfigOK {
+		b.WriteString("\n")
+		b.WriteString(kvKeyStyle().Render("Model") + "  " + kvValStyle().Render(r.Model))
+	}
+	return borderStyle().Width(inner).Render(b.String())
+}
+
+func statusMark(ok bool) string {
+	if ok {
+		return lipgloss.NewStyle().Foreground(colorOK).Render("✓")
+	}
+	return lipgloss.NewStyle().Foreground(accentRed).Render("✗")
+}
+
+func providerTablePanel(r StatusReport, colW int) string {
+	if len(r.Providers) == 0 {
+		return ""
+	}
+	keyW := min(22, colW/3)
+	if keyW < 14 {
+		keyW = 14
+	}
+	valW := colW - keyW - 3
+	if valW < 12 {
+		valW = 12
+	}
+
+	var b strings.Builder
+	b.WriteString(titleBarStyle().Render("Providers & local") + "\n\n")
+	for _, p := range r.Providers {
+		k := lipgloss.NewStyle().Foreground(accentBlue).Bold(true).Width(keyW).Render(p.Name)
+		v := styleProviderVal(p.Val).Width(valW).Render(p.Val)
+		b.WriteString(lipgloss.JoinHorizontal(lipgloss.Top, k, "  ", v))
+		b.WriteString("\n")
+	}
+	return borderStyle().Width(colW).Render(strings.TrimRight(b.String(), "\n"))
+}
+
+func styleProviderVal(s string) lipgloss.Style {
+	if s == "✓" || strings.HasPrefix(s, "✓ ") {
+		return lipgloss.NewStyle().Foreground(colorOK)
+	}
+	if s == "not set" {
+		return mutedStyle()
+	}
+	return lipgloss.NewStyle()
+}
@@ -0,0 +1,61 @@
+package cliui
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/charmbracelet/lipgloss"
+)
+
+// PrintVersion prints version, optional build info, and Go toolchain line.
+func PrintVersion(logo, versionLine string, build, goVer string) {
+	if !UseFancyLayout() {
+		fmt.Printf("%s %s\n", logo, versionLine)
+		if build != "" {
+			fmt.Printf("  Build: %s\n", build)
+		}
+		if goVer != "" {
+			fmt.Printf("  Go: %s\n", goVer)
+		}
+		return
+	}
+
+	inner := InnerWidth()
+	box := borderStyle().Width(inner)
+
+	if UseColumnLayout() {
+		leftCol := kvKeyStyle().Width(12).Align(lipgloss.Right)
+		rightW := inner - 16
+		rightStyle := kvValStyle().Width(rightW)
+
+		rows := [][]string{
+			{leftCol.Render("Version"), rightStyle.Render(versionLine)},
+		}
+		if build != "" {
+			rows = append(rows, []string{leftCol.Render("Build"), rightStyle.Render(build)})
+		}
+		if goVer != "" {
+			rows = append(rows, []string{leftCol.Render("Go"), rightStyle.Render(goVer)})
+		}
+		var body strings.Builder
+		for _, r := range rows {
+			body.WriteString(lipgloss.JoinHorizontal(lipgloss.Top, r[0], "  ", r[1]))
+			body.WriteString("\n")
+		}
+		header := titleBarStyle().Render(logo+" picoclaw") + "\n\n"
+		fmt.Println(box.Render(header + body.String()))
+		return
+	}
+
+	var lines []string
+	lines = append(lines, titleBarStyle().Render(logo+" picoclaw"))
+	lines = append(lines, "")
+	lines = append(lines, kvKeyStyle().Render("Version")+"  "+kvValStyle().Render(versionLine))
+	if build != "" {
+		lines = append(lines, kvKeyStyle().Render("Build")+"     "+kvValStyle().Render(build))
+	}
+	if goVer != "" {
+		lines = append(lines, kvKeyStyle().Render("Go")+"        "+kvValStyle().Render(goVer))
+	}
+	fmt.Println(box.Render(strings.Join(lines, "\n")))
+}
@@ -14,7 +14,6 @@ func newAddCommand(storePath func() string) *cobra.Command {
 		message string
 		every   int64
 		cronExp string
-		deliver bool
 		channel string
 		to      string
 	)
@@ -37,7 +36,7 @@ func newAddCommand(storePath func() string) *cobra.Command {
 			}

 			cs := cron.NewCronService(storePath(), nil)
-			job, err := cs.AddJob(name, schedule, message, deliver, channel, to)
+			job, err := cs.AddJob(name, schedule, message, channel, to)
 			if err != nil {
 				return fmt.Errorf("error adding job: %w", err)
 			}
@@ -52,7 +51,6 @@ func newAddCommand(storePath func() string) *cobra.Command {
 	cmd.Flags().StringVarP(&message, "message", "m", "", "Message for agent")
 	cmd.Flags().Int64VarP(&every, "every", "e", 0, "Run every N seconds")
 	cmd.Flags().StringVarP(&cronExp, "cron", "c", "", "Cron expression (e.g. '0 9 * * *')")
-	cmd.Flags().BoolVarP(&deliver, "deliver", "d", false, "Deliver response to channel")
 	cmd.Flags().StringVar(&to, "to", "", "Recipient for delivery")
 	cmd.Flags().StringVar(&channel, "channel", "", "Channel for delivery")

@@ -21,7 +21,6 @@ func TestNewAddSubcommand(t *testing.T) {

 	assert.NotNil(t, cmd.Flags().Lookup("every"))
 	assert.NotNil(t, cmd.Flags().Lookup("cron"))
-	assert.NotNil(t, cmd.Flags().Lookup("deliver"))
 	assert.NotNil(t, cmd.Flags().Lookup("to"))
 	assert.NotNil(t, cmd.Flags().Lookup("channel"))

@@ -2,19 +2,34 @@ package gateway

 import (
 	"fmt"
+	"os"

 	"github.com/spf13/cobra"

 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal"
+	"github.com/sipeed/picoclaw/pkg/config"
 	"github.com/sipeed/picoclaw/pkg/gateway"
 	"github.com/sipeed/picoclaw/pkg/logger"
+	"github.com/sipeed/picoclaw/pkg/netbind"
 	"github.com/sipeed/picoclaw/pkg/utils"
 )

+func resolveGatewayHostOverride(explicit bool, host string) (string, error) {
+	if !explicit {
+		return "", nil
+	}
+	normalized, err := netbind.NormalizeHostInput(host)
+	if err != nil {
+		return "", fmt.Errorf("invalid --host value: %w", err)
+	}
+	return normalized, nil
+}
+
 func NewGatewayCommand() *cobra.Command {
 	var debug bool
 	var noTruncate bool
 	var allowEmpty bool
+	var host string

 	cmd := &cobra.Command{
 		Use:     "gateway",
@@ -33,7 +48,25 @@ func NewGatewayCommand() *cobra.Command {

 			return nil
 		},
-		RunE: func(_ *cobra.Command, _ []string) error {
+		RunE: func(cmd *cobra.Command, _ []string) error {
+			resolvedHost, err := resolveGatewayHostOverride(cmd.Flags().Changed("host"), host)
+			if err != nil {
+				return err
+			}
+			if resolvedHost != "" {
+				prevHost, hadPrev := os.LookupEnv(config.EnvGatewayHost)
+				if err := os.Setenv(config.EnvGatewayHost, resolvedHost); err != nil {
+					return fmt.Errorf("failed to set %s: %w", config.EnvGatewayHost, err)
+				}
+				defer func() {
+					if hadPrev {
+						_ = os.Setenv(config.EnvGatewayHost, prevHost)
+						return
+					}
+					_ = os.Unsetenv(config.EnvGatewayHost)
+				}()
+			}
+
 			return gateway.Run(debug, internal.GetPicoclawHome(), internal.GetConfigPath(), allowEmpty)
 		},
 	}
@@ -47,6 +80,12 @@ func NewGatewayCommand() *cobra.Command {
 		false,
 		"Continue starting even when no default model is configured",
 	)
+	cmd.Flags().StringVar(
+		&host,
+		"host",
+		"",
+		"Host address for gateway binding (overrides gateway.host for this run)",
+	)

 	return cmd
 }
@@ -29,4 +29,38 @@ func TestNewGatewayCommand(t *testing.T) {
 	assert.True(t, cmd.HasFlags())
 	assert.NotNil(t, cmd.Flags().Lookup("debug"))
 	assert.NotNil(t, cmd.Flags().Lookup("allow-empty"))
+	assert.NotNil(t, cmd.Flags().Lookup("host"))
+}
+
+func TestResolveGatewayHostOverride(t *testing.T) {
+	tests := []struct {
+		name     string
+		explicit bool
+		host     string
+		wantHost string
+		wantErr  bool
+	}{
+		{name: "implicit empty host is allowed", explicit: false, host: "", wantHost: "", wantErr: false},
+		{name: "explicit empty host rejected", explicit: true, host: "   ", wantHost: "", wantErr: true},
+		{name: "explicit localhost kept", explicit: true, host: " localhost ", wantHost: "localhost", wantErr: false},
+		{
+			name:     "explicit multi host normalized",
+			explicit: true,
+			host:     " [::1] , 127.0.0.1 ",
+			wantHost: "::1,127.0.0.1",
+			wantErr:  false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := resolveGatewayHostOverride(tt.explicit, tt.host)
+			if (err != nil) != tt.wantErr {
+				t.Fatalf("resolveGatewayHostOverride() err = %v, wantErr %t", err, tt.wantErr)
+			}
+			if got != tt.wantHost {
+				t.Fatalf("resolveGatewayHostOverride() host = %q, want %q", got, tt.wantHost)
+			}
+		})
+	}
 }
@@ -14,11 +14,7 @@ const Logo = pkg.Logo
 // GetPicoclawHome returns the picoclaw home directory.
 // Priority: $PICOCLAW_HOME > ~/.picoclaw
 func GetPicoclawHome() string {
-	if home := os.Getenv(config.EnvHome); home != "" {
-		return home
-	}
-	home, _ := os.UserHomeDir()
-	return filepath.Join(home, pkg.DefaultPicoClawHome)
+	return config.GetHome()
 }

 func GetConfigPath() string {
@@ -81,7 +81,7 @@ func listAvailableModels(cfg *config.Config) {
 		if model.ModelName == defaultModel {
 			marker = "> "
 		}
-		if model.APIKey() == "" {
+		if !model.Enabled {
 			continue
 		}
 		fmt.Printf("%s- %s (%s)\n", marker, model.ModelName, model.Model)
@@ -92,7 +92,7 @@ func setDefaultModel(configPath string, cfg *config.Config, modelName string) er
 	// Validate that the model exists in model_list
 	modelFound := false
 	for _, model := range cfg.ModelList {
-		if model.APIKey() != "" && model.ModelName == modelName {
+		if model.Enabled && model.ModelName == modelName {
 			modelFound = true
 			break
 		}
@@ -58,24 +58,27 @@ func TestNewModelCommand(t *testing.T) {
 }

 func TestShowCurrentModel_WithDefaultModel(t *testing.T) {
-	cfg := (&config.Config{
+	cfg := &config.Config{
 		Agents: config.AgentsConfig{
 			Defaults: config.AgentDefaults{
 				ModelName: "gpt-4",
 			},
 		},
 		ModelList: []*config.ModelConfig{
-			{ModelName: "gpt-4", Model: "openai/gpt-4"},
-			{ModelName: "claude-3", Model: "anthropic/claude-3"},
+			{
+				ModelName: "gpt-4",
+				Model:     "openai/gpt-4",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
+			{
+				ModelName: "claude-3",
+				Model:     "anthropic/claude-3",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
 		},
-	}).WithSecurity(&config.SecurityConfig{ModelList: map[string]config.ModelSecurityEntry{
-		"gpt-4": {
-			APIKeys: []string{"test"},
-		},
-		"claude-3": {
-			APIKeys: []string{"test"},
-		},
-	}})
+	}

 	output := captureStdout(func() {
 		showCurrentModel(cfg)
@@ -88,20 +91,21 @@ func TestShowCurrentModel_WithDefaultModel(t *testing.T) {
 }

 func TestShowCurrentModel_NoDefaultModel(t *testing.T) {
-	cfg := (&config.Config{
+	cfg := &config.Config{
 		Agents: config.AgentsConfig{
 			Defaults: config.AgentDefaults{
 				ModelName: "",
 			},
 		},
 		ModelList: []*config.ModelConfig{
-			{ModelName: "gpt-4", Model: "openai/gpt-4"},
+			{
+				ModelName: "gpt-4",
+				Model:     "openai/gpt-4",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
 		},
-	}).WithSecurity(&config.SecurityConfig{ModelList: map[string]config.ModelSecurityEntry{
-		"gpt-4": {
-			APIKeys: []string{"test"},
-		},
-	}})
+	}

 	output := captureStdout(func() {
 		showCurrentModel(cfg)
@@ -124,25 +128,28 @@ func TestListAvailableModels_Empty(t *testing.T) {
 }

 func TestListAvailableModels_WithModels(t *testing.T) {
-	cfg := (&config.Config{
+	cfg := &config.Config{
 		Agents: config.AgentsConfig{
 			Defaults: config.AgentDefaults{
 				ModelName: "gpt-4",
 			},
 		},
 		ModelList: []*config.ModelConfig{
-			{ModelName: "gpt-4", Model: "openai/gpt-4"},
-			{ModelName: "claude-3", Model: "anthropic/claude-3"},
+			{
+				ModelName: "gpt-4",
+				Model:     "openai/gpt-4",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
+			{
+				ModelName: "claude-3",
+				Model:     "anthropic/claude-3",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
 			{ModelName: "no-key-model", Model: "openai/test"},
 		},
-	}).WithSecurity(&config.SecurityConfig{ModelList: map[string]config.ModelSecurityEntry{
-		"gpt-4": {
-			APIKeys: []string{"test"},
-		},
-		"claude-3": {
-			APIKeys: []string{"test"},
-		},
-	}})
+	}

 	output := captureStdout(func() {
 		listAvailableModels(cfg)
@@ -157,24 +164,27 @@ func TestListAvailableModels_WithModels(t *testing.T) {
 func TestSetDefaultModel_ValidModel(t *testing.T) {
 	initTest(t)

-	cfg := (&config.Config{
+	cfg := &config.Config{
 		Agents: config.AgentsConfig{
 			Defaults: config.AgentDefaults{
 				ModelName: "old-model",
 			},
 		},
 		ModelList: []*config.ModelConfig{
-			{ModelName: "new-model", Model: "openai/new-model"},
-			{ModelName: "old-model", Model: "openai/old-model"},
+			{
+				ModelName: "new-model",
+				Model:     "openai/new-model",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
+			{
+				ModelName: "old-model",
+				Model:     "openai/old-model",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
 		},
-	}).WithSecurity(&config.SecurityConfig{ModelList: map[string]config.ModelSecurityEntry{
-		"new-model": {
-			APIKeys: []string{"test"},
-		},
-		"old-model": {
-			APIKeys: []string{"test"},
-		},
-	}})
+	}

 	output := captureStdout(func() {
 		err := setDefaultModel(configPath, cfg, "new-model")
@@ -192,20 +202,21 @@ func TestSetDefaultModel_ValidModel(t *testing.T) {
 func TestSetDefaultModel_InvalidModel(t *testing.T) {
 	initTest(t)

-	cfg := (&config.Config{
+	cfg := &config.Config{
 		Agents: config.AgentsConfig{
 			Defaults: config.AgentDefaults{
 				ModelName: "existing-model",
 			},
 		},
 		ModelList: []*config.ModelConfig{
-			{ModelName: "existing-model", Model: "openai/existing"},
+			{
+				ModelName: "existing-model",
+				Model:     "openai/existing",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
 		},
-	}).WithSecurity(&config.SecurityConfig{ModelList: map[string]config.ModelSecurityEntry{
-		"existing-model": {
-			APIKeys: []string{"test"},
-		},
-	}})
+	}

 	assert.Error(t, setDefaultModel(configPath, cfg, "nonexistent-model"))
 }
@@ -213,24 +224,22 @@ func TestSetDefaultModel_InvalidModel(t *testing.T) {
 func TestSetDefaultModel_ModelWithoutAPIKey(t *testing.T) {
 	initTest(t)

-	cfg := (&config.Config{
+	cfg := &config.Config{
 		Agents: config.AgentsConfig{
 			Defaults: config.AgentDefaults{
 				ModelName: "existing-model",
 			},
 		},
 		ModelList: []*config.ModelConfig{
-			{ModelName: "existing-model", Model: "openai/existing"},
+			{
+				ModelName: "existing-model",
+				Model:     "openai/existing",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
 			{ModelName: "no-key-model", Model: "openai/nokey"},
 		},
-	}).WithSecurity(&config.SecurityConfig{ModelList: map[string]config.ModelSecurityEntry{
-		"existing-model": {
-			APIKeys: []string{"test"},
-		},
-		"no-key-model": {
-			APIKeys: []string{""},
-		},
-	}})
+	}

 	assert.Error(t, setDefaultModel(configPath, cfg, "no-key-model"))
 }
@@ -239,20 +248,21 @@ func TestSetDefaultModel_SaveConfigError(t *testing.T) {
 	// Use an invalid path to trigger save error
 	invalidPath := "/nonexistent/directory/config.json"

-	cfg := (&config.Config{
+	cfg := &config.Config{
 		Agents: config.AgentsConfig{
 			Defaults: config.AgentDefaults{
 				ModelName: "old-model",
 			},
 		},
 		ModelList: []*config.ModelConfig{
-			{ModelName: "new-model", Model: "openai/new-model"},
+			{
+				ModelName: "new-model",
+				Model:     "openai/new-model",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
 		},
-	}).WithSecurity(&config.SecurityConfig{ModelList: map[string]config.ModelSecurityEntry{
-		"new-model": {
-			APIKeys: []string{"test"},
-		},
-	}})
+	}

 	err := setDefaultModel(invalidPath, cfg, "new-model")

@@ -284,20 +294,21 @@ func TestModelCommandExecution_Show(t *testing.T) {
 	initTest(t)

 	// Create a test config
-	cfg := (&config.Config{
+	cfg := &config.Config{
 		Agents: config.AgentsConfig{
 			Defaults: config.AgentDefaults{
 				ModelName: "test-model",
 			},
 		},
 		ModelList: []*config.ModelConfig{
-			{ModelName: "test-model", Model: "openai/test"},
+			{
+				ModelName: "test-model",
+				Model:     "openai/test",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
 		},
-	}).WithSecurity(&config.SecurityConfig{ModelList: map[string]config.ModelSecurityEntry{
-		"test-model": {
-			APIKeys: []string{"test"},
-		},
-	}})
+	}

 	err := config.SaveConfig(configPath, cfg)
 	require.NoError(t, err)
@@ -315,25 +326,27 @@ func TestModelCommandExecution_Show(t *testing.T) {
 func TestModelCommandExecution_Set(t *testing.T) {
 	initTest(t)

-	sec := &config.SecurityConfig{ModelList: map[string]config.ModelSecurityEntry{
-		"old-model": {
-			APIKeys: []string{"test"},
-		},
-		"new-model": {
-			APIKeys: []string{"test"},
-		},
-	}}
-	cfg := (&config.Config{
+	cfg := &config.Config{
 		Agents: config.AgentsConfig{
 			Defaults: config.AgentDefaults{
 				ModelName: "old-model",
 			},
 		},
 		ModelList: []*config.ModelConfig{
-			{ModelName: "old-model", Model: "openai/old"},
-			{ModelName: "new-model", Model: "openai/new"},
+			{
+				ModelName: "old-model",
+				Model:     "openai/old",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
+			{
+				ModelName: "new-model",
+				Model:     "openai/new",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
 		},
-	}).WithSecurity(sec)
+	}

 	err := config.SaveConfig(configPath, cfg)
 	require.NoError(t, err)
@@ -357,28 +370,33 @@ func TestModelCommandExecution_TooManyArgs(t *testing.T) {
 }

 func TestListAvailableModels_MarkerLogic(t *testing.T) {
-	cfg := (&config.Config{
+	cfg := &config.Config{
 		Agents: config.AgentsConfig{
 			Defaults: config.AgentDefaults{
 				ModelName: "middle-model",
 			},
 		},
 		ModelList: []*config.ModelConfig{
-			{ModelName: "first-model", Model: "openai/first"},
-			{ModelName: "middle-model", Model: "openai/middle"},
-			{ModelName: "last-model", Model: "openai/last"},
+			{
+				ModelName: "first-model",
+				Model:     "openai/first",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
+			{
+				ModelName: "middle-model",
+				Model:     "openai/middle",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
+			{
+				ModelName: "last-model",
+				Model:     "openai/last",
+				APIKeys:   config.SecureStrings{config.NewSecureString("test")},
+				Enabled:   true,
+			},
 		},
-	}).WithSecurity(&config.SecurityConfig{ModelList: map[string]config.ModelSecurityEntry{
-		"first-model": {
-			APIKeys: []string{"test"},
-		},
-		"middle-model": {
-			APIKeys: []string{"test"},
-		},
-		"last-model": {
-			APIKeys: []string{"test"},
-		},
-	}})
+	}

 	output := captureStdout(func() {
 		listAvailableModels(cfg)
@@ -9,6 +9,7 @@ import (
 	"golang.org/x/term"

 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal"
+	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/cliui"
 	"github.com/sipeed/picoclaw/pkg/config"
 	"github.com/sipeed/picoclaw/pkg/credential"
 )
@@ -79,25 +80,7 @@ func onboard(encrypt bool) {
 	workspace := cfg.WorkspacePath()
 	createWorkspaceTemplates(workspace)

-	fmt.Printf("\n%s picoclaw is ready!\n", internal.Logo)
-	fmt.Println("\nNext steps:")
-	if encrypt {
-		fmt.Println("  1. Set your encryption passphrase before starting picoclaw:")
-		fmt.Println("       export PICOCLAW_KEY_PASSPHRASE=<your-passphrase>   # Linux/macOS")
-		fmt.Println("       set PICOCLAW_KEY_PASSPHRASE=<your-passphrase>      # Windows cmd")
-		fmt.Println("")
-		fmt.Println("  2. Add your API key to", configPath)
-	} else {
-		fmt.Println("  1. Add your API key to", configPath)
-	}
-	fmt.Println("")
-	fmt.Println("     Recommended:")
-	fmt.Println("     - OpenRouter: https://openrouter.ai/keys (access 100+ models)")
-	fmt.Println("     - Ollama:     https://ollama.com (local, free)")
-	fmt.Println("")
-	fmt.Println("     See README.md for 17+ supported providers.")
-	fmt.Println("")
-	fmt.Println("  3. Chat: picoclaw agent -m \"Hello!\"")
+	cliui.PrintOnboardComplete(internal.Logo, encrypt, configPath)
 }

 // promptPassphrase reads the encryption passphrase twice from the terminal
@@ -189,6 +172,9 @@ func copyEmbeddedToTarget(targetDir string) error {
 		if err != nil {
 			return fmt.Errorf("Failed to get relative path for %s: %v\n", path, err)
 		}
+		if new_path == "AGENTS.md" || new_path == "IDENTITY.md" {
+			return nil
+		}

 		// Build target file path
 		targetPath := filepath.Join(targetDir, new_path)
@@ -12,7 +12,6 @@ import (

 type deps struct {
 	workspace    string
-	installer    *skills.SkillInstaller
 	skillsLoader *skills.SkillsLoader
 }

@@ -29,15 +28,6 @@ func NewSkillsCommand() *cobra.Command {
 			}

 			d.workspace = cfg.WorkspacePath()
-			installer, err := skills.NewSkillInstaller(
-				d.workspace,
-				cfg.Tools.Skills.Github.Token(),
-				cfg.Tools.Skills.Github.Proxy,
-			)
-			if err != nil {
-				return fmt.Errorf("error creating skills installer: %w", err)
-			}
-			d.installer = installer

 			// get global config directory and builtin skills directory
 			globalDir := filepath.Dir(internal.GetConfigPath())
@@ -52,13 +42,6 @@ func NewSkillsCommand() *cobra.Command {
 		},
 	}

-	installerFn := func() (*skills.SkillInstaller, error) {
-		if d.installer == nil {
-			return nil, fmt.Errorf("skills installer is not initialized")
-		}
-		return d.installer, nil
-	}
-
 	loaderFn := func() (*skills.SkillsLoader, error) {
 		if d.skillsLoader == nil {
 			return nil, fmt.Errorf("skills loader is not initialized")
@@ -75,10 +58,10 @@ func NewSkillsCommand() *cobra.Command {

 	cmd.AddCommand(
 		newListCommand(loaderFn),
-		newInstallCommand(installerFn),
+		newInstallCommand(),
 		newInstallBuiltinCommand(workspaceFn),
 		newListBuiltinCommand(),
-		newRemoveCommand(installerFn),
+		newRemoveCommand(),
 		newSearchCommand(),
 		newShowCommand(loaderFn),
 	)
@@ -2,6 +2,7 @@ package skills

 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"io"
 	"os"
@@ -11,12 +12,23 @@ import (

 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal"
 	"github.com/sipeed/picoclaw/pkg/config"
+	"github.com/sipeed/picoclaw/pkg/fileutil"
 	"github.com/sipeed/picoclaw/pkg/skills"
 	"github.com/sipeed/picoclaw/pkg/utils"
 )

 const skillsSearchMaxResults = 20

+type installedSkillOriginMeta struct {
+	Version          int    `json:"version"`
+	OriginKind       string `json:"origin_kind,omitempty"`
+	Registry         string `json:"registry,omitempty"`
+	Slug             string `json:"slug,omitempty"`
+	RegistryURL      string `json:"registry_url,omitempty"`
+	InstalledVersion string `json:"installed_version,omitempty"`
+	InstalledAt      int64  `json:"installed_at"`
+}
+
 func skillsListCmd(loader *skills.SkillsLoader) {
 	allSkills := loader.ListSkills()

@@ -35,61 +47,32 @@ func skillsListCmd(loader *skills.SkillsLoader) {
 	}
 }

-func skillsInstallCmd(installer *skills.SkillInstaller, repo string) error {
-	fmt.Printf("Installing skill from %s...\n", repo)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
-	defer cancel()
-
-	if err := installer.InstallFromGitHub(ctx, repo); err != nil {
-		return fmt.Errorf("failed to install skill: %w", err)
-	}
-
-	fmt.Printf("\u2713 Skill '%s' installed successfully!\n", filepath.Base(repo))
-
-	return nil
-}
-
 // skillsInstallFromRegistry installs a skill from a named registry (e.g. clawhub).
-func skillsInstallFromRegistry(cfg *config.Config, registryName, slug string) error {
+func skillsInstallFromRegistry(cfg *config.Config, registryName, target string) error {
 	err := utils.ValidateSkillIdentifier(registryName)
 	if err != nil {
 		return fmt.Errorf("✗  invalid registry name: %w", err)
 	}

-	err = utils.ValidateSkillIdentifier(slug)
-	if err != nil {
-		return fmt.Errorf("✗  invalid slug: %w", err)
-	}
-
-	fmt.Printf("Installing skill '%s' from %s registry...\n", slug, registryName)
-
-	clawHubConfig := cfg.Tools.Skills.Registries.ClawHub
-	registryMgr := skills.NewRegistryManagerFromConfig(skills.RegistryConfig{
-		MaxConcurrentSearches: cfg.Tools.Skills.MaxConcurrentSearches,
-		ClawHub: skills.ClawHubConfig{
-			Enabled:         clawHubConfig.Enabled,
-			BaseURL:         clawHubConfig.BaseURL,
-			AuthToken:       clawHubConfig.AuthToken(),
-			SearchPath:      clawHubConfig.SearchPath,
-			SkillsPath:      clawHubConfig.SkillsPath,
-			DownloadPath:    clawHubConfig.DownloadPath,
-			Timeout:         clawHubConfig.Timeout,
-			MaxZipSize:      clawHubConfig.MaxZipSize,
-			MaxResponseSize: clawHubConfig.MaxResponseSize,
-		},
-	})
+	registryMgr := skills.NewRegistryManagerFromToolsConfig(cfg.Tools.Skills)

 	registry := registryMgr.GetRegistry(registryName)
 	if registry == nil {
 		return fmt.Errorf("✗  registry '%s' not found or not enabled. check your config.json.", registryName)
 	}

+	dirName, err := registry.ResolveInstallDirName(target)
+	if err != nil {
+		return fmt.Errorf("✗  invalid install target %q: %w", target, err)
+	}
+
+	fmt.Printf("Installing skill '%s' from %s registry...\n", target, registryName)
+
 	workspace := cfg.WorkspacePath()
-	targetDir := filepath.Join(workspace, "skills", slug)
+	targetDir := filepath.Join(workspace, "skills", dirName)

 	if _, err = os.Stat(targetDir); err == nil {
-		return fmt.Errorf("\u2717 skill '%s' already installed at %s", slug, targetDir)
+		return fmt.Errorf("\u2717 skill '%s' already installed at %s", dirName, targetDir)
 	}

 	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
@@ -99,7 +82,7 @@ func skillsInstallFromRegistry(cfg *config.Config, registryName, slug string) er
 		return fmt.Errorf("\u2717 failed to create skills directory: %v", err)
 	}

-	result, err := registry.DownloadAndInstall(ctx, slug, "", targetDir)
+	result, err := registry.DownloadAndInstall(ctx, target, "", targetDir)
 	if err != nil {
 		rmErr := os.RemoveAll(targetDir)
 		if rmErr != nil {
@@ -114,14 +97,34 @@ func skillsInstallFromRegistry(cfg *config.Config, registryName, slug string) er
 			fmt.Printf("\u2717 Failed to remove partial install: %v\n", rmErr)
 		}

-		return fmt.Errorf("\u2717 Skill '%s' is flagged as malicious and cannot be installed.\n", slug)
+		return fmt.Errorf("\u2717 Skill '%s' is flagged as malicious and cannot be installed.\n", target)
 	}

 	if result.IsSuspicious {
-		fmt.Printf("\u26a0\ufe0f  Warning: skill '%s' is flagged as suspicious.\n", slug)
+		fmt.Printf("\u26a0\ufe0f  Warning: skill '%s' is flagged as suspicious.\n", target)
 	}

-	fmt.Printf("\u2713 Skill '%s' v%s installed successfully!\n", slug, result.Version)
+	if !workspaceHasValidSkillDirectory(workspace, dirName) {
+		_ = os.RemoveAll(targetDir)
+		return fmt.Errorf("✗ failed to install skill: registry archive for %q is not a valid skill", target)
+	}
+
+	normalizedSlug, registryURL := skills.BuildInstallMetadataForRegistryInstance(registry, target, result.Version)
+	installedAt := time.Now().UnixMilli()
+	if err := writeInstalledSkillOriginMeta(targetDir, installedSkillOriginMeta{
+		Version:          1,
+		OriginKind:       "third_party",
+		Registry:         registry.Name(),
+		Slug:             normalizedSlug,
+		RegistryURL:      registryURL,
+		InstalledVersion: result.Version,
+		InstalledAt:      installedAt,
+	}); err != nil {
+		_ = os.RemoveAll(targetDir)
+		return fmt.Errorf("✗ failed to persist skill metadata: %w", err)
+	}
+
+	fmt.Printf("\u2713 Skill '%s' v%s installed successfully!\n", dirName, result.Version)
 	if result.Summary != "" {
 		fmt.Printf("  %s\n", result.Summary)
 	}
@@ -129,15 +132,51 @@ func skillsInstallFromRegistry(cfg *config.Config, registryName, slug string) er
 	return nil
 }

-func skillsRemoveCmd(installer *skills.SkillInstaller, skillName string) {
-	fmt.Printf("Removing skill '%s'...\n", skillName)
-
-	if err := installer.Uninstall(skillName); err != nil {
-		fmt.Printf("✗ Failed to remove skill: %v\n", err)
-		os.Exit(1)
+func writeInstalledSkillOriginMeta(targetDir string, meta installedSkillOriginMeta) error {
+	data, err := json.MarshalIndent(meta, "", "  ")
+	if err != nil {
+		return err
 	}
+	return fileutil.WriteFileAtomic(filepath.Join(targetDir, ".skill-origin.json"), data, 0o600)
+}

-	fmt.Printf("✓ Skill '%s' removed successfully!\n", skillName)
+func workspaceHasValidSkillDirectory(workspace, directory string) bool {
+	loader := skills.NewSkillsLoader(workspace, "", "")
+	for _, skill := range loader.ListSkills() {
+		if skill.Source != "workspace" {
+			continue
+		}
+		if filepath.Base(filepath.Dir(skill.Path)) == directory {
+			return true
+		}
+	}
+	return false
+}
+
+func skillsRemoveFromWorkspace(workspace string, toolsConfig config.SkillsToolsConfig, skillName string) error {
+	name := strings.TrimSpace(skillName)
+	name = strings.Trim(name, "/")
+	if name == "" {
+		return fmt.Errorf("skill name is required")
+	}
+	if strings.Contains(name, "/") {
+		dirName, err := skills.GitHubInstallDirNameFromToolsConfig(toolsConfig, name)
+		if err != nil || dirName == "" {
+			return fmt.Errorf("invalid skill name %q", skillName)
+		}
+		name = dirName
+	}
+	if name == "." || name == ".." {
+		return fmt.Errorf("invalid skill name %q", skillName)
+	}
+	skillDir := filepath.Join(workspace, "skills", name)
+	if _, err := os.Stat(skillDir); os.IsNotExist(err) {
+		return fmt.Errorf("skill '%s' not found", name)
+	}
+	if err := os.RemoveAll(skillDir); err != nil {
+		return fmt.Errorf("failed to remove skill '%s': %w", name, err)
+	}
+	return nil
 }

 func skillsInstallBuiltinCmd(workspace string) {
@@ -237,21 +276,7 @@ func skillsSearchCmd(query string) {
 		return
 	}

-	clawHubConfig := cfg.Tools.Skills.Registries.ClawHub
-	registryMgr := skills.NewRegistryManagerFromConfig(skills.RegistryConfig{
-		MaxConcurrentSearches: cfg.Tools.Skills.MaxConcurrentSearches,
-		ClawHub: skills.ClawHubConfig{
-			Enabled:         clawHubConfig.Enabled,
-			BaseURL:         clawHubConfig.BaseURL,
-			AuthToken:       clawHubConfig.AuthToken(),
-			SearchPath:      clawHubConfig.SearchPath,
-			SkillsPath:      clawHubConfig.SkillsPath,
-			DownloadPath:    clawHubConfig.DownloadPath,
-			Timeout:         clawHubConfig.Timeout,
-			MaxZipSize:      clawHubConfig.MaxZipSize,
-			MaxResponseSize: clawHubConfig.MaxResponseSize,
-		},
-	})
+	registryMgr := skills.NewRegistryManagerFromToolsConfig(cfg.Tools.Skills)

 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 	defer cancel()
@@ -0,0 +1,191 @@
+package skills
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/sipeed/picoclaw/pkg/config"
+)
+
+func TestSkillsInstallFromRegistryWritesOriginMetadata(t *testing.T) {
+	workspace := t.TempDir()
+	cfg := config.DefaultConfig()
+	cfg.Agents.Defaults.Workspace = workspace
+
+	var server *httptest.Server
+	server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/api/v3/repos/foo/bar":
+			require.NoError(t, json.NewEncoder(w).Encode(map[string]any{"default_branch": "master"}))
+		case "/api/v3/repos/foo/bar/contents/.agents/skills/pr-review":
+			assert.Equal(t, "ref=master", r.URL.RawQuery)
+			require.NoError(t, json.NewEncoder(w).Encode([]map[string]any{{
+				"type":         "file",
+				"name":         "SKILL.md",
+				"download_url": server.URL + "/raw/foo/bar/master/.agents/skills/pr-review/SKILL.md",
+			}}))
+		case "/raw/foo/bar/master/.agents/skills/pr-review/SKILL.md":
+			_, _ = w.Write([]byte("---\nname: pr-review\ndescription: PR review skill\n---\n# PR Review\n"))
+		default:
+			http.NotFound(w, r)
+		}
+	}))
+	defer server.Close()
+
+	githubRegistry, ok := cfg.Tools.Skills.Registries.Get("github")
+	require.True(t, ok)
+	githubRegistry.BaseURL = server.URL
+	cfg.Tools.Skills.Registries.Set("github", githubRegistry)
+
+	target := server.URL + "/foo/bar/tree/master/.agents/skills/pr-review"
+	require.NoError(t, skillsInstallFromRegistry(cfg, "github", target))
+
+	metaPath := filepath.Join(workspace, "skills", "pr-review", ".skill-origin.json")
+	data, err := os.ReadFile(metaPath)
+	require.NoError(t, err)
+
+	var meta installedSkillOriginMeta
+	require.NoError(t, json.Unmarshal(data, &meta))
+	assert.Equal(t, "third_party", meta.OriginKind)
+	assert.Equal(t, "github", meta.Registry)
+	assert.Equal(t, "foo/bar/.agents/skills/pr-review", meta.Slug)
+	assert.Equal(t, server.URL+"/foo/bar/tree/master/.agents/skills/pr-review", meta.RegistryURL)
+	assert.Equal(t, "master", meta.InstalledVersion)
+	assert.NotZero(t, meta.InstalledAt)
+}
+
+func TestSkillsInstallFromRegistryRejectsInvalidSkillArchive(t *testing.T) {
+	workspace := t.TempDir()
+	cfg := config.DefaultConfig()
+	cfg.Agents.Defaults.Workspace = workspace
+
+	var server *httptest.Server
+	server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/api/v3/repos/foo/bar":
+			require.NoError(t, json.NewEncoder(w).Encode(map[string]any{"default_branch": "master"}))
+		case "/api/v3/repos/foo/bar/contents/.agents/skills/pr-review":
+			require.NoError(t, json.NewEncoder(w).Encode([]map[string]any{{
+				"type":         "file",
+				"name":         "SKILL.md",
+				"download_url": server.URL + "/raw/foo/bar/master/.agents/skills/pr-review/SKILL.md",
+			}}))
+		case "/raw/foo/bar/master/.agents/skills/pr-review/SKILL.md":
+			_, _ = w.Write([]byte("---\nname: bad_skill\ndescription: Invalid skill name\n---\n# Invalid\n"))
+		default:
+			http.NotFound(w, r)
+		}
+	}))
+	defer server.Close()
+
+	githubRegistry, ok := cfg.Tools.Skills.Registries.Get("github")
+	require.True(t, ok)
+	githubRegistry.BaseURL = server.URL
+	cfg.Tools.Skills.Registries.Set("github", githubRegistry)
+
+	target := server.URL + "/foo/bar/tree/master/.agents/skills/pr-review"
+	err := skillsInstallFromRegistry(cfg, "github", target)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "is not a valid skill")
+	_, statErr := os.Stat(filepath.Join(workspace, "skills", "pr-review"))
+	assert.True(t, os.IsNotExist(statErr))
+}
+
+func TestSkillsRemoveFromWorkspaceRejectsDotTarget(t *testing.T) {
+	workspace := t.TempDir()
+	skillsDir := filepath.Join(workspace, "skills")
+	require.NoError(t, os.MkdirAll(skillsDir, 0o755))
+	require.NoError(t, os.WriteFile(filepath.Join(skillsDir, "keep.txt"), []byte("keep"), 0o644))
+
+	err := skillsRemoveFromWorkspace(workspace, config.DefaultConfig().Tools.Skills, ".")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "invalid skill name")
+
+	_, statErr := os.Stat(skillsDir)
+	assert.NoError(t, statErr)
+	_, fileErr := os.Stat(filepath.Join(skillsDir, "keep.txt"))
+	assert.NoError(t, fileErr)
+}
+
+func TestSkillsRemoveFromWorkspaceUsesLastPathSegment(t *testing.T) {
+	workspace := t.TempDir()
+	targetDir := filepath.Join(workspace, "skills", "pr-review")
+	require.NoError(t, os.MkdirAll(targetDir, 0o755))
+
+	err := skillsRemoveFromWorkspace(
+		workspace,
+		config.DefaultConfig().Tools.Skills,
+		"https://github.com/foo/bar/tree/main/.agents/skills/pr-review",
+	)
+	require.NoError(t, err)
+
+	_, statErr := os.Stat(targetDir)
+	assert.True(t, os.IsNotExist(statErr))
+}
+
+func TestSkillsRemoveFromWorkspaceSupportsRepoRootGitHubBlobURL(t *testing.T) {
+	workspace := t.TempDir()
+	targetDir := filepath.Join(workspace, "skills", "bar")
+	require.NoError(t, os.MkdirAll(targetDir, 0o755))
+
+	err := skillsRemoveFromWorkspace(
+		workspace,
+		config.DefaultConfig().Tools.Skills,
+		"https://github.com/foo/bar/blob/feature/skills-registry/SKILL.md",
+	)
+	require.NoError(t, err)
+
+	_, statErr := os.Stat(targetDir)
+	assert.True(t, os.IsNotExist(statErr))
+}
+
+func TestSkillsRemoveFromWorkspaceSupportsGitHubEnterpriseURL(t *testing.T) {
+	workspace := t.TempDir()
+	targetDir := filepath.Join(workspace, "skills", "pr-review")
+	require.NoError(t, os.MkdirAll(targetDir, 0o755))
+
+	cfg := config.DefaultConfig()
+	githubRegistry, ok := cfg.Tools.Skills.Registries.Get("github")
+	require.True(t, ok)
+	githubRegistry.BaseURL = "https://ghe.example.com/git"
+	cfg.Tools.Skills.Registries.Set("github", githubRegistry)
+
+	err := skillsRemoveFromWorkspace(
+		workspace,
+		cfg.Tools.Skills,
+		"https://ghe.example.com/git/foo/bar/tree/main/.agents/skills/pr-review",
+	)
+	require.NoError(t, err)
+
+	_, statErr := os.Stat(targetDir)
+	assert.True(t, os.IsNotExist(statErr))
+}
+
+func TestSkillsRemoveFromWorkspaceDoesNotRequireEnabledGitHubRegistry(t *testing.T) {
+	workspace := t.TempDir()
+	targetDir := filepath.Join(workspace, "skills", "pr-review")
+	require.NoError(t, os.MkdirAll(targetDir, 0o755))
+
+	cfg := config.DefaultConfig()
+	githubRegistry, ok := cfg.Tools.Skills.Registries.Get("github")
+	require.True(t, ok)
+	githubRegistry.Enabled = false
+	cfg.Tools.Skills.Registries.Set("github", githubRegistry)
+
+	err := skillsRemoveFromWorkspace(
+		workspace,
+		cfg.Tools.Skills,
+		"https://github.com/foo/bar/tree/main/.agents/skills/pr-review",
+	)
+	require.NoError(t, err)
+
+	_, statErr := os.Stat(targetDir)
+	assert.True(t, os.IsNotExist(statErr))
+}
@@ -6,15 +6,14 @@ import (
 	"github.com/spf13/cobra"

 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal"
-	"github.com/sipeed/picoclaw/pkg/skills"
 )

-func newInstallCommand(installerFn func() (*skills.SkillInstaller, error)) *cobra.Command {
+func newInstallCommand() *cobra.Command {
 	var registry string

 	cmd := &cobra.Command{
 		Use:   "install",
-		Short: "Install skill from GitHub",
+		Short: "Install skill from GitHub or a registry",
 		Example: `
 picoclaw skills install sipeed/picoclaw-skills/weather
 picoclaw skills install --registry clawhub github
@@ -34,21 +33,15 @@ picoclaw skills install --registry clawhub github
 			return nil
 		},
 		RunE: func(_ *cobra.Command, args []string) error {
-			installer, err := installerFn()
+			cfg, err := internal.LoadConfig()
 			if err != nil {
 				return err
 			}
-
 			if registry != "" {
-				cfg, err := internal.LoadConfig()
-				if err != nil {
-					return err
-				}
-
 				return skillsInstallFromRegistry(cfg, registry, args[0])
 			}

-			return skillsInstallCmd(installer, args[0])
+			return skillsInstallFromRegistry(cfg, "github", args[0])
 		},
 	}

@@ -8,12 +8,12 @@ import (
 )

 func TestNewInstallSubcommand(t *testing.T) {
-	cmd := newInstallCommand(nil)
+	cmd := newInstallCommand()

 	require.NotNil(t, cmd)

 	assert.Equal(t, "install", cmd.Use)
-	assert.Equal(t, "Install skill from GitHub", cmd.Short)
+	assert.Equal(t, "Install skill from GitHub or a registry", cmd.Short)

 	assert.Nil(t, cmd.Run)
 	assert.NotNil(t, cmd.RunE)
@@ -79,7 +79,7 @@ func TestInstallCommandArgs(t *testing.T) {

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			cmd := newInstallCommand(nil)
+			cmd := newInstallCommand()

 			if tt.registry != "" {
 				require.NoError(t, cmd.Flags().Set("registry", tt.registry))
@@ -3,10 +3,10 @@ package skills
 import (
 	"github.com/spf13/cobra"

-	"github.com/sipeed/picoclaw/pkg/skills"
+	"github.com/sipeed/picoclaw/cmd/picoclaw/internal"
 )

-func newRemoveCommand(installerFn func() (*skills.SkillInstaller, error)) *cobra.Command {
+func newRemoveCommand() *cobra.Command {
 	cmd := &cobra.Command{
 		Use:     "remove",
 		Aliases: []string{"rm", "uninstall"},
@@ -14,12 +14,11 @@ func newRemoveCommand(installerFn func() (*skills.SkillInstaller, error)) *cobra
 		Args:    cobra.ExactArgs(1),
 		Example: `picoclaw skills remove weather`,
 		RunE: func(_ *cobra.Command, args []string) error {
-			installer, err := installerFn()
+			cfg, err := internal.LoadConfig()
 			if err != nil {
 				return err
 			}
-			skillsRemoveCmd(installer, args[0])
-			return nil
+			return skillsRemoveFromWorkspace(cfg.WorkspacePath(), cfg.Tools.Skills, args[0])
 		},
 	}

@@ -8,7 +8,7 @@ import (
 )

 func TestNewRemoveSubcommand(t *testing.T) {
-	cmd := newRemoveCommand(nil)
+	cmd := newRemoveCommand()

 	require.NotNil(t, cmd)

@@ -3,8 +3,10 @@ package status
 import (
 	"fmt"
 	"os"
+	"strings"

 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal"
+	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/cliui"
 	"github.com/sipeed/picoclaw/pkg/auth"
 	"github.com/sipeed/picoclaw/pkg/config"
 )
@@ -17,43 +19,125 @@ func statusCmd() {
 	}

 	configPath := internal.GetConfigPath()
-
-	fmt.Printf("%s picoclaw Status\n", internal.Logo)
-	fmt.Printf("Version: %s\n", config.FormatVersion())
 	build, _ := config.FormatBuildInfo()
-	if build != "" {
-		fmt.Printf("Build: %s\n", build)
-	}
-	fmt.Println()

-	if _, err := os.Stat(configPath); err == nil {
-		fmt.Println("Config:", configPath, "✓")
-	} else {
-		fmt.Println("Config:", configPath, "✗")
-	}
+	_, configStatErr := os.Stat(configPath)
+	configOK := configStatErr == nil

 	workspace := cfg.WorkspacePath()
-	if _, err := os.Stat(workspace); err == nil {
-		fmt.Println("Workspace:", workspace, "✓")
-	} else {
-		fmt.Println("Workspace:", workspace, "✗")
+	_, wsErr := os.Stat(workspace)
+	wsOK := wsErr == nil
+
+	report := cliui.StatusReport{
+		Logo:          internal.Logo,
+		Version:       config.FormatVersion(),
+		Build:         build,
+		ConfigPath:    configPath,
+		ConfigOK:      configOK,
+		WorkspacePath: workspace,
+		WorkspaceOK:   wsOK,
+		Model:         cfg.Agents.Defaults.GetModelName(),
 	}

-	if _, err := os.Stat(configPath); err == nil {
-		fmt.Printf("Model: %s\n", cfg.Agents.Defaults.GetModelName())
+	if configOK {
+		// PicoClaw moved to a model-centric configuration (model_list). Status should
+		// not depend on a legacy cfg.Providers field (which may not exist under some
+		// build tags). We infer provider availability from model_list entries.
+		hasProtocolKey := func(protocol string) bool {
+			prefix := protocol + "/"
+			for _, m := range cfg.ModelList {
+				if m == nil {
+					continue
+				}
+				if strings.HasPrefix(m.Model, prefix) && m.APIKey() != "" {
+					return true
+				}
+			}
+			return false
+		}
+		findLocalModelBase := func(modelName string) (string, bool) {
+			for _, m := range cfg.ModelList {
+				if m == nil {
+					continue
+				}
+				if m.ModelName == modelName && m.APIBase != "" {
+					return m.APIBase, true
+				}
+			}
+			return "", false
+		}
+		findProtocolBase := func(protocol string) (string, bool) {
+			prefix := protocol + "/"
+			for _, m := range cfg.ModelList {
+				if m == nil {
+					continue
+				}
+				if strings.HasPrefix(m.Model, prefix) && m.APIBase != "" {
+					return m.APIBase, true
+				}
+			}
+			return "", false
+		}
+
+		hasOpenRouter := hasProtocolKey("openrouter")
+		hasAnthropic := hasProtocolKey("anthropic")
+		hasOpenAI := hasProtocolKey("openai")
+		hasGemini := hasProtocolKey("gemini")
+		hasZhipu := hasProtocolKey("zhipu")
+		hasQwen := hasProtocolKey("qwen")
+		hasGroq := hasProtocolKey("groq")
+		hasMoonshot := hasProtocolKey("moonshot")
+		hasDeepSeek := hasProtocolKey("deepseek")
+		hasVolcEngine := hasProtocolKey("volcengine")
+		hasNvidia := hasProtocolKey("nvidia")
+
+		// Local endpoints: allow both the special reserved name and protocol-based entries.
+		vllmBase, hasVLLM := findLocalModelBase("local-model")
+		if !hasVLLM {
+			vllmBase, hasVLLM = findProtocolBase("vllm")
+		}
+		ollamaBase, hasOllama := findProtocolBase("ollama")
+
+		val := func(enabled bool, extra ...string) string {
+			if enabled {
+				if len(extra) > 0 && extra[0] != "" {
+					return "✓ " + extra[0]
+				}
+				return "✓"
+			}
+			return "not set"
+		}
+
+		report.Providers = []cliui.ProviderRow{
+			{Name: "OpenRouter API", Val: val(hasOpenRouter)},
+			{Name: "Anthropic API", Val: val(hasAnthropic)},
+			{Name: "OpenAI API", Val: val(hasOpenAI)},
+			{Name: "Gemini API", Val: val(hasGemini)},
+			{Name: "Zhipu API", Val: val(hasZhipu)},
+			{Name: "Qwen API", Val: val(hasQwen)},
+			{Name: "Groq API", Val: val(hasGroq)},
+			{Name: "Moonshot API", Val: val(hasMoonshot)},
+			{Name: "DeepSeek API", Val: val(hasDeepSeek)},
+			{Name: "VolcEngine API", Val: val(hasVolcEngine)},
+			{Name: "Nvidia API", Val: val(hasNvidia)},
+			{Name: "vLLM / local", Val: val(hasVLLM, vllmBase)},
+			{Name: "Ollama", Val: val(hasOllama, ollamaBase)},
+		}

 		store, _ := auth.LoadStore()
 		if store != nil && len(store.Credentials) > 0 {
-			fmt.Println("\nOAuth/Token Auth:")
 			for provider, cred := range store.Credentials {
-				status := "authenticated"
+				st := "authenticated"
 				if cred.IsExpired() {
-					status = "expired"
+					st = "expired"
 				} else if cred.NeedsRefresh() {
-					status = "needs refresh"
+					st = "needs refresh"
 				}
-				fmt.Printf("  %s (%s): %s\n", provider, cred.AuthMethod, status)
+				report.OAuthLines = append(report.OAuthLines,
+					fmt.Sprintf("%s (%s): %s", provider, cred.AuthMethod, st))
 			}
 		}
 	}
+
+	cliui.PrintStatus(report)
 }
@@ -1,11 +1,10 @@
 package version

 import (
-	"fmt"
-
 	"github.com/spf13/cobra"

 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal"
+	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/cliui"
 	"github.com/sipeed/picoclaw/pkg/config"
 )

@@ -23,12 +22,6 @@ func NewVersionCommand() *cobra.Command {
 }

 func printVersion() {
-	fmt.Printf("%s picoclaw %s\n", internal.Logo, config.FormatVersion())
 	build, goVer := config.FormatBuildInfo()
-	if build != "" {
-		fmt.Printf("  Build: %s\n", build)
-	}
-	if goVer != "" {
-		fmt.Printf("  Go: %s\n", goVer)
-	}
+	cliui.PrintVersion(internal.Logo, "picoclaw "+config.FormatVersion(), build, goVer)
 }
@@ -9,12 +9,14 @@ package main
 import (
 	"fmt"
 	"os"
+	"time"

 	"github.com/spf13/cobra"

 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal"
 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/agent"
 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/auth"
+	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/cliui"
 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/cron"
 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/gateway"
 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/migrate"
@@ -24,17 +26,60 @@ import (
 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/status"
 	"github.com/sipeed/picoclaw/cmd/picoclaw/internal/version"
 	"github.com/sipeed/picoclaw/pkg/config"
+	"github.com/sipeed/picoclaw/pkg/updater"
 )

+var rootNoColor bool
+
+func syncCliUIColor(root *cobra.Command) {
+	no, _ := root.PersistentFlags().GetBool("no-color")
+	cliui.Init(no || os.Getenv("NO_COLOR") != "" || os.Getenv("TERM") == "dumb")
+}
+
+// earlyColorDisabled matches lipgloss/banner behavior from env and argv before Cobra parses flags.
+func earlyColorDisabled() bool {
+	if os.Getenv("NO_COLOR") != "" || os.Getenv("TERM") == "dumb" {
+		return true
+	}
+	for i := 1; i < len(os.Args); i++ {
+		arg := os.Args[i]
+		if arg == "--no-color" || arg == "--no-color=true" || arg == "--no-color=1" {
+			return true
+		}
+	}
+	return false
+}
+
 func NewPicoclawCommand() *cobra.Command {
-	short := fmt.Sprintf("%s picoclaw - Personal AI Assistant v%s\n\n", internal.Logo, config.GetVersion())
+	short := fmt.Sprintf("%s PicoClaw — personal AI assistant", internal.Logo)
+	long := fmt.Sprintf(`%s PicoClaw is a lightweight personal AI assistant.
+
+Version: %s`, internal.Logo, config.FormatVersion())

 	cmd := &cobra.Command{
-		Use:     "picoclaw",
-		Short:   short,
-		Example: "picoclaw version",
+		Use:   "picoclaw",
+		Short: short,
+		Long:  long,
+		Example: `picoclaw version
+picoclaw onboard
+picoclaw --no-color status`,
+		SilenceErrors: true,
+		// Avoid plain UsageString() on stderr/stdout when a command fails; cliui
+		// renders matching panels on stderr instead.
+		SilenceUsage: true,
+		PersistentPreRun: func(c *cobra.Command, _ []string) {
+			syncCliUIColor(c.Root())
+		},
 	}

+	cmd.PersistentFlags().BoolVar(&rootNoColor, "no-color", false,
+		"Disable colors (boxed layout unchanged)")
+
+	cmd.SetHelpFunc(func(c *cobra.Command, _ []string) {
+		syncCliUIColor(c.Root())
+		fmt.Fprint(c.OutOrStdout(), cliui.RenderCommandHelp(c))
+	})
+
 	cmd.AddCommand(
 		onboard.NewOnboardCommand(),
 		agent.NewAgentCommand(),
@@ -45,6 +90,7 @@ func NewPicoclawCommand() *cobra.Command {
 		migrate.NewMigrateCommand(),
 		skills.NewSkillsCommand(),
 		model.NewModelCommand(),
+		updater.NewUpdateCommand("picoclaw"),
 		version.NewVersionCommand(),
 	)

@@ -62,12 +108,44 @@ const (
 		colorBlue + "██║     ██║╚██████╗╚██████╔╝" + colorRed + "╚██████╗███████╗██║  ██║╚███╔███╔╝\n" +
 		colorBlue + "╚═╝     ╚═╝ ╚═════╝ ╚═════╝ " + colorRed + " ╚═════╝╚══════╝╚═╝  ╚═╝ ╚══╝╚══╝\n " +
 		"\033[0m\r\n"
+	plainBanner = "\r\n" +
+		"██████╗ ██╗ ██████╗ ██████╗  ██████╗██╗      █████╗ ██╗    ██╗\n" +
+		"██╔══██╗██║██╔════╝██╔═══██╗██╔════╝██║     ██╔══██╗██║    ██║\n" +
+		"██████╔╝██║██║     ██║   ██║██║     ██║     ███████║██║ █╗ ██║\n" +
+		"██╔═══╝ ██║██║     ██║   ██║██║     ██║     ██╔══██║██║███╗██║\n" +
+		"██║     ██║╚██████╗╚██████╔╝╚██████╗███████╗██║  ██║╚███╔███╔╝\n" +
+		"╚═╝     ╚═╝ ╚═════╝ ╚═════╝  ╚═════╝╚══════╝╚═╝  ╚═╝ ╚══╝╚══╝\n " +
+		"\r\n"
 )

 func main() {
-	fmt.Printf("%s", banner)
+	cliui.Init(earlyColorDisabled())
+
+	if earlyColorDisabled() {
+		fmt.Print(plainBanner)
+	} else {
+		fmt.Printf("%s", banner)
+	}
+
+	tzEnv := os.Getenv("TZ")
+	if tzEnv != "" {
+		fmt.Println("TZ environment:", tzEnv)
+		zoneinfoEnv := os.Getenv("ZONEINFO")
+		fmt.Println("ZONEINFO environment:", zoneinfoEnv)
+		loc, err := time.LoadLocation(tzEnv)
+		if err != nil {
+			fmt.Println("Error loading time zone:", err)
+		} else {
+			fmt.Println("Time zone loaded successfully:", loc)
+			time.Local = loc //nolint:gosmopolitan // We intentionally set local timezone from TZ env
+		}
+	}
+
 	cmd := NewPicoclawCommand()
-	if err := cmd.Execute(); err != nil {
+	last, err := cmd.ExecuteC()
+	if err != nil {
+		syncCliUIColor(cmd)
+		fmt.Fprint(os.Stderr, cliui.FormatCLIError(err.Error(), last))
 		os.Exit(1)
 	}
 }
@@ -3,6 +3,7 @@ package main
 import (
 	"fmt"
 	"slices"
+	"strings"
 	"testing"

 	"github.com/stretchr/testify/assert"
@@ -17,20 +18,22 @@ func TestNewPicoclawCommand(t *testing.T) {

 	require.NotNil(t, cmd)

-	short := fmt.Sprintf("%s picoclaw - Personal AI Assistant v%s\n\n", internal.Logo, config.GetVersion())
+	short := fmt.Sprintf("%s PicoClaw — personal AI assistant", internal.Logo)
+	longHas := strings.Contains(cmd.Long, config.FormatVersion())

 	assert.Equal(t, "picoclaw", cmd.Use)
 	assert.Equal(t, short, cmd.Short)
+	assert.True(t, longHas)

 	assert.True(t, cmd.HasSubCommands())
 	assert.True(t, cmd.HasAvailableSubCommands())

-	assert.False(t, cmd.HasFlags())
+	assert.True(t, cmd.PersistentFlags().Lookup("no-color") != nil)

 	assert.Nil(t, cmd.Run)
 	assert.Nil(t, cmd.RunE)

-	assert.Nil(t, cmd.PersistentPreRun)
+	assert.NotNil(t, cmd.PersistentPreRun)
 	assert.Nil(t, cmd.PersistentPostRun)

 	allowedCommands := []string{
@@ -43,6 +46,7 @@ func TestNewPicoclawCommand(t *testing.T) {
 		"onboard",
 		"skills",
 		"status",
+		"update",
 		"version",
 	}

@@ -10,6 +10,7 @@
      "max_tool_iterations": 20,
      "summarize_message_threshold": 20,
      "summarize_token_percent": 75,
+      "split_on_marker": false,
      "tool_feedback": {
        "enabled": false,
        "max_args_length": 300
@@ -47,6 +48,15 @@
      "model": "deepseek/deepseek-chat",
      "api_key": "sk-your-deepseek-key"
    },
+    {
+      "model_name": "venice-uncensored",
+      "model": "venice/venice-uncensored",
+      "api_key": "your-venice-api-key"
+    },
+    {
+      "model_name": "lmstudio-local",
+      "model": "lmstudio/openai/gpt-oss-20b"
+    },
    {
      "model_name": "longcat",
      "model": "longcat/LongCat-Flash-Thinking",
@@ -129,6 +139,10 @@
      "encrypt_key": "",
      "verification_token": "",
      "allow_from": [],
+      "placeholder": {
+        "enabled": true,
+        "text": ["Thinking...", "Processing...", "Typing..."]
+      },
      "reasoning_channel_id": "",
      "random_reaction_emoji": [],
      "is_lark": false
@@ -160,7 +174,7 @@
      },
      "placeholder": {
        "enabled": true,
-        "text": "Thinking... 💭"
+        "text": ["Thinking...", "Processing...", "Typing..."]
      },
      "reasoning_channel_id": "",
      "crypto_database_path": "",
@@ -223,13 +237,8 @@
      "nickserv_password": "",
      "sasl_user": "",
      "sasl_password": "",
-      "channels": [
-        "#mychannel"
-      ],
-      "request_caps": [
-        "server-time",
-        "message-tags"
-      ],
+      "channels": ["#mychannel"],
+      "request_caps": ["server-time", "message-tags"],
      "allow_from": [],
      "group_trigger": {
        "mention_only": true
@@ -251,9 +260,7 @@
      "brave": {
        "enabled": false,
        "api_key": "YOUR_BRAVE_API_KEY",
-        "api_keys": [
-          "YOUR_BRAVE_API_KEY"
-        ],
+        "api_keys": ["YOUR_BRAVE_API_KEY"],
        "max_results": 5
      },
      "tavily": {
@@ -262,16 +269,19 @@
        "base_url": "",
        "max_results": 0
      },
-      "duckduckgo": {
+      "provider": "auto",
+      "sogou": {
        "enabled": true,
        "max_results": 5
      },
+      "duckduckgo": {
+        "enabled": false,
+        "max_results": 5
+      },
      "perplexity": {
        "enabled": false,
        "api_key": "pplx-xxx",
-        "api_keys": [
-          "pplx-xxx"
-        ],
+        "api_keys": ["pplx-xxx"],
        "max_results": 5
      },
      "searxng": {
@@ -320,19 +330,12 @@
        "filesystem": {
          "enabled": false,
          "command": "npx",
-          "args": [
-            "-y",
-            "@modelcontextprotocol/server-filesystem",
-            "/tmp"
-          ]
+          "args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"]
        },
        "github": {
          "enabled": false,
          "command": "npx",
-          "args": [
-            "-y",
-            "@modelcontextprotocol/server-github"
-          ],
+          "args": ["-y", "@modelcontextprotocol/server-github"],
          "env": {
            "GITHUB_PERSONAL_ACCESS_TOKEN": "YOUR_GITHUB_TOKEN"
          }
@@ -340,10 +343,7 @@
        "brave-search": {
          "enabled": false,
          "command": "npx",
-          "args": [
-            "-y",
-            "@modelcontextprotocol/server-brave-search"
-          ],
+          "args": ["-y", "@modelcontextprotocol/server-brave-search"],
          "env": {
            "BRAVE_API_KEY": "YOUR_BRAVE_API_KEY"
          }
@@ -360,10 +360,7 @@
        "slack": {
          "enabled": false,
          "command": "npx",
-          "args": [
-            "-y",
-            "@modelcontextprotocol/server-slack"
-          ],
+          "args": ["-y", "@modelcontextprotocol/server-slack"],
          "env": {
            "SLACK_BOT_TOKEN": "YOUR_SLACK_BOT_TOKEN",
            "SLACK_TEAM_ID": "YOUR_SLACK_TEAM_ID"
@@ -390,9 +387,16 @@
          "timeout": 0,
          "max_zip_size": 0,
          "max_response_size": 0
+        },
+        "github": {
+          "enabled": true,
+          "base_url": "https://github.com",
+          "auth_token": "",
+          "proxy": "http://127.0.0.1:7891"
        }
      },
      "github": {
+        "base_url": "https://github.com",
        "proxy": "http://127.0.0.1:7891",
        "token": ""
      },
@@ -429,7 +433,11 @@
      "enabled": true
    },
    "read_file": {
-      "enabled": true
+      "enabled": true,
+      "mode": "bytes"
+    },
+    "send_tts": {
+      "enabled": false
    },
    "spawn": {
      "enabled": true
@@ -469,7 +477,7 @@
  },
  "gateway": {
    "_comment": "Default log level is set to 'fatal'. Other available options are 'debug', 'info', 'warn' and 'error'.",
-    "host": "127.0.0.1",
+    "host": "localhost",
    "port": 18790,
    "hot_reload": false,
    "log_level": "fatal"
@@ -26,18 +26,9 @@ RUN apk add --no-cache ca-certificates tzdata curl
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
  CMD wget -q --spider http://localhost:18790/health || exit 1

-# Copy binary
+# Copy binary and first-run entrypoint (same as release image).
 COPY --from=builder /src/build/picoclaw /usr/local/bin/picoclaw
+COPY docker/entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh

-# Create non-root user and group
-RUN addgroup -g 1000 picoclaw && \
-    adduser -D -u 1000 -G picoclaw picoclaw
-
-# Switch to non-root user
-USER picoclaw
-
-# Run onboard to create initial directories and config
-RUN /usr/local/bin/picoclaw onboard
-
-ENTRYPOINT ["picoclaw"]
-CMD ["gateway"]
+ENTRYPOINT ["/entrypoint.sh"]
@@ -9,4 +9,4 @@ COPY $TARGETPLATFORM/picoclaw-launcher /usr/local/bin/picoclaw-launcher
 COPY $TARGETPLATFORM/picoclaw-launcher-tui /usr/local/bin/picoclaw-launcher-tui

 ENTRYPOINT ["picoclaw-launcher"]
-CMD ["-public", "-no-browser"]
+CMD ["-console", "-public", "-no-browser"]
@@ -48,20 +48,13 @@ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
 # Copy binary
 COPY --from=builder /src/build/picoclaw /usr/local/bin/picoclaw

-# Reuse existing node user (UID/GID 1000) — rename to picoclaw
-RUN deluser node 2>/dev/null; delgroup node 2>/dev/null; \
-    addgroup -g 1000 picoclaw 2>/dev/null; \
-    adduser -D -u 1000 -G picoclaw -h /home/picoclaw picoclaw 2>/dev/null || true
-
-USER picoclaw
-
 # Run onboard to create initial directories and config
 RUN /usr/local/bin/picoclaw onboard

 # Copy default workspace
-COPY --chown=picoclaw:picoclaw workspace/ /home/picoclaw/.picoclaw/workspace/
+COPY workspace/ /root/.picoclaw/workspace/

-VOLUME /home/picoclaw/.picoclaw/workspace
+VOLUME /root/.picoclaw/workspace

 ENTRYPOINT ["picoclaw"]
 CMD ["gateway"]
@@ -24,7 +24,7 @@ services:
  picoclaw-gateway:
    image: docker.io/sipeed/picoclaw:latest
    container_name: picoclaw-gateway
-    restart: on-failure
+    restart: unless-stopped
    profiles:
      - gateway
    # Uncomment to access host network; leave commented unless needed.
@@ -40,13 +40,16 @@ services:
  picoclaw-launcher:
    image: docker.io/sipeed/picoclaw:launcher
    container_name: picoclaw-launcher
-    restart: on-failure
+    restart: unless-stopped
    profiles:
      - launcher
    environment:
      - PICOCLAW_GATEWAY_HOST=0.0.0.0
+      # Set a fixed dashboard token instead of a random one each restart.
+      # If not set, a random token is generated and printed to the console on startup.
+      #- PICOCLAW_LAUNCHER_TOKEN=your-secret-token-here
    ports:
-      - "127.0.0.1:18800:18800"
-      - "127.0.0.1:18790:18790"
+      - "18800:18800"
+      - "18790:18790"
    volumes:
      - ./data:/root/.picoclaw
@@ -0,0 +1,132 @@
+# PicoClaw Documentation
+
+PicoClaw documentation is organized by document type first and language second.
+
+This file describes the recommended documentation layout, how translated files should be named, and what `make lint-docs` currently checks locally.
+
+These conventions are intended as contributor guidance for new or moved docs. Existing docs may still have historical exceptions, and `make lint-docs` only checks a common subset of the patterns described here.
+
+## Reader Navigation
+
+If you are browsing docs rather than reorganizing them, start with these directory indexes:
+
+- [Guides](guides/README.md): setup, configuration, provider, and workflow guides.
+- [Reference](reference/README.md): precise configuration and behavior reference.
+- [Operations](operations/README.md): debugging and troubleshooting material.
+- [Security](security/README.md): security-focused guides and controls.
+- [Architecture](architecture/README.md): implementation notes and internal design docs.
+- [Migration](migration/README.md): upgrade and migration notes.
+
+For channel-specific setup, start with [Chat Apps Configuration](guides/chat-apps.md) and then drill into `docs/channels/<name>/README.md` as needed.
+
+## Principles
+
+- Choose the document type directory first. Do not create language buckets such as `docs/zh/` or `docs/fr/`.
+- Keep each translated document next to its English source document.
+- Use English as the base filename with no locale suffix.
+- Use lowercase locale suffixes for translations, for example `configuration.zh.md` or `README.pt-br.md`.
+- Keep module-specific docs next to the code they describe instead of moving them into `docs/`.
+
+## Recommended Directories
+
+- `README.md`: English project entry document at the repository root.
+- `docs/project/`: translated project entry documents such as `README.zh.md` and `CONTRIBUTING.zh.md`.
+- `docs/guides/`: setup and usage guides.
+- `docs/reference/`: reference material and detailed configuration docs.
+- `docs/operations/`: debugging and troubleshooting docs.
+- `docs/security/`: security-related documentation.
+- `docs/architecture/`: architecture and internal design notes.
+- `docs/channels/`: channel-specific integration guides.
+- `docs/design/`: design proposals and investigations.
+- `docs/migration/`: migration notes.
+
+## Recommended Naming
+
+- English documents use the base filename:
+  - `README.md`
+  - `configuration.md`
+- Translations use `.<locale>.md`:
+  - `README.zh.md`
+  - `configuration.fr.md`
+  - `README.pt-br.md`
+- Code-adjacent translated READMEs follow the same rule:
+  - `pkg/audio/asr/README.zh.md`
+  - `pkg/isolation/README.zh.md`
+
+## Common Patterns To Avoid
+
+- Root-level translated entry docs such as `README.zh.md` or `CONTRIBUTING.fr.md`
+  - Use `docs/project/README.zh.md` or `docs/project/CONTRIBUTING.fr.md` instead.
+- Language directories under `docs/` such as `docs/zh/`, `docs/ZH/`, `docs/ja/`, or `docs/fr/`
+  - Use `docs/<type>/<name>.<locale>.md` instead.
+- Nested locale buckets such as `docs/guides/zh/configuration.md` or `docs/channels/telegram/zh/README.md`
+  - Keep translations beside the English source file instead.
+- Legacy translation filenames such as `README_zh.md` or `README_CN.md`
+  - Use `README.zh.md`.
+- Non-canonical locale suffixes such as `configuration_zh.md` or `configuration.ZH.md`
+  - Use lowercase `.<locale>.md`, for example `configuration.zh.md`.
+
+## Translation Placement
+
+- For docs under `docs/guides`, `docs/reference`, `docs/operations`, `docs/security`, `docs/architecture`, `docs/channels`, and `docs/migration`, keep translations beside the English source file.
+- For project entry translations, keep translated files in `docs/project/` and keep the English source in the repository root.
+- In most cases, each translated file should have an English source document:
+  - `docs/guides/configuration.zh.md` usually sits beside `docs/guides/configuration.md`
+  - `docs/project/README.zh.md` usually corresponds to `README.md`
+- Exception: `docs/design/` may contain locale-specific working notes without an English source document. The naming rules still apply there.
+
+## Code-Adjacent Docs
+
+Keep documentation next to the implementation when it primarily describes a package, command, example, or subproject.
+
+Examples:
+
+- `pkg/**/README.md`
+- `cmd/**/README.md`
+- `web/README.md`
+- `examples/**/README.md`
+
+These files still follow the same translation naming rules.
+
+## Adding a New Document
+
+1. Pick the correct document type directory.
+2. Create the English source file first.
+3. Add translated siblings after the English source exists when that source is part of the same docs set.
+4. Update links from existing docs when the new doc becomes a navigation target.
+5. Run `make lint-docs` locally when adding or moving docs.
+
+## Examples
+
+- New setup guide:
+  - `docs/guides/launcher-setup.md`
+  - `docs/guides/launcher-setup.zh.md`
+- New security guide:
+  - `docs/security/token-rotation.md`
+- New translated package README:
+  - `pkg/channels/README.zh.md`
+
+## Validation
+
+Run:
+
+```bash
+make lint-docs
+```
+
+The local docs linter currently checks these common cases:
+
+- no root-level translated `README` or `CONTRIBUTING` files
+- no `docs/<locale>/` language buckets, regardless of case
+- no nested locale buckets under typed docs directories
+- no legacy `README_*.md` filenames
+- no non-canonical translation-like filenames such as `_zh.md` or `.ZH.md`
+- no extra Markdown files directly under `docs/` except `docs/README.md`
+- every translated Markdown file has a matching English source file
+  - except for locale-specific working notes under `docs/design/`
+
+`make lint-docs` is a local consistency check for common naming and placement mistakes. It helps contributors stay close to the recommended layout, but it is not intended to describe every acceptable documentation pattern in the repository.
+
+When a check fails, `make lint-docs` prints the failing path, the reason, and a suggested fix.
+
+If you change these recommendations or want the local linter to reflect them more closely, update this file and `scripts/lint-docs.sh` together.
@@ -0,0 +1,12 @@
+# Architecture
+
+Internal architecture notes for major runtime mechanisms and subsystem design.
+
+- [Steering](steering.md): injecting messages into a running agent loop between tool calls.
+- [SubTurn Mechanism](subturn.md): sub-agent coordination, concurrency control, and lifecycle handling.
+- [Session System](session-system.md): session scope allocation, JSONL persistence, alias compatibility, and migration. ([ZH](session-system.zh.md))
+- [Routing System](routing-system.md): agent dispatch, session policy selection, and light/heavy model routing. ([ZH](routing-system.zh.md))
+- [Hook System Guide](hooks/README.md): current hook architecture and protocol details.
+- [Agent Refactor](agent-refactor/README.md): notes and checkpoints for the agent refactor work.
+
+For proposal-style or exploratory docs, also see [`../design/`](../design/).
@@ -0,0 +1,100 @@
+# Agent File Rename Plan
+
+## Goal
+
+Unify `pkg/agent/` package file naming to resolve the `loop_*` prefix naming confusion and unclear responsibility boundaries.
+
+## Change Overview
+
+### File Renames (12 files)
+
+| Original | New | Description |
+|----------|-----|-------------|
+| `loop.go` | `agent.go` | AgentLoop main body + lifecycle methods |
+| `loop_message.go` | `agent_message.go` | Message handling and routing |
+| `loop_outbound.go` | `agent_outbound.go` | Response publishing |
+| `loop_event.go` | `agent_event.go` | Event system |
+| `loop_command.go` | `agent_command.go` | Command processing |
+| `loop_steering.go` | `agent_steering.go` | Steering message handling |
+| `loop_transcribe.go` | `agent_transcribe.go` | Audio transcription |
+| `loop_media.go` | `agent_media.go` | Media processing |
+| `loop_mcp.go` | `agent_mcp.go` | MCP initialization |
+| `loop_utils.go` | `agent_utils.go` | Utility functions |
+| `loop_inject.go` | `agent_inject.go` | Dependency injection |
+| `loop_turn.go` | `turn_coord.go` | Turn coordinator |
+
+### File Merges (2 → 1)
+
+| Original | New | Description |
+|----------|-----|-------------|
+| `turn.go` + `turn_exec.go` | `turn_state.go` | Turn-related type definitions |
+
+## Final File Structure
+
+```
+pkg/agent/
+├── agent.go              # AgentLoop + Run/Stop/Close lifecycle
+├── agent_message.go     # Message processing
+├── agent_outbound.go    # Response publishing
+├── agent_event.go       # Event system
+├── agent_command.go     # Command processing
+├── agent_steering.go    # Steering
+├── agent_transcribe.go  # Transcription
+├── agent_media.go       # Media processing
+├── agent_mcp.go         # MCP
+├── agent_utils.go       # Utility functions
+├── agent_inject.go      # Dependency injection
+├── turn_coord.go       # runTurn + coordinator
+├── turn_state.go       # turnState + turnExecution + Control + ToolControl + LLMPhase
+├── pipeline.go         # Pipeline struct + NewPipeline
+├── pipeline_setup.go
+├── pipeline_llm.go
+├── pipeline_execute.go
+└── pipeline_finalize.go
+```
+
+## Naming Convention
+
+| Prefix | Content | Example |
+|--------|---------|---------|
+| `agent_*` | AgentLoop method files | `agent_message.go`, `agent_event.go` |
+| `turn_*` | Turn lifecycle related | `turn_coord.go`, `turn_state.go` |
+| `pipeline_*` | Pipeline methods | `pipeline_setup.go`, `pipeline_llm.go` |
+| `context_*` | Context management | `context_manager.go`, `context_legacy.go` |
+| `hook_*` | Hook system | `hook_process.go`, `hook_mount.go` |
+
+## Architecture Layers
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    AgentLoop (agent.go)                │
+│  - Message loop Run/Stop/Close                        │
+│  - Dependency injection (agent_inject.go)             │
+│  - Message routing (agent_message.go)                 │
+│  - Response publishing (agent_outbound.go)            │
+└─────────────────────────────────────────────────────────┘
+                          │
+                          ▼
+┌─────────────────────────────────────────────────────────┐
+│              Turn Coordinator (turn_coord.go)          │
+│  - runTurn(): main coordinator                         │
+│  - abortTurn(): abort                                 │
+│  - askSideQuestion(): side question                   │
+│  - selectCandidates(): model selection                │
+└─────────────────────────────────────────────────────────┘
+                          │
+                          ▼
+┌─────────────────────────────────────────────────────────┐
+│                 Pipeline (pipeline_*.go)               │
+│  - SetupTurn(): initialization                        │
+│  - CallLLM(): LLM call                               │
+│  - ExecuteTools(): tool execution                     │
+│  - Finalize(): finalization                          │
+└─────────────────────────────────────────────────────────┘
+```
+
+## Verification Results
+
+- ✅ `go build ./pkg/agent/...` - Pass
+- ✅ `go vet ./pkg/agent/...` - No warnings
+- ✅ `go test ./pkg/agent/... -skip "TestSeahorse|TestGlobalSkillFileContentChange"` - Pass
@@ -0,0 +1,100 @@
+# Agent 文件重命名计划
+
+## 目标
+
+统一 `pkg/agent/` 包的文件命名，解决 `loop_*` 前缀命名混乱、职责边界不清晰的问题。
+
+## 变更概览
+
+### 文件重命名（12 个）
+
+| 原文件 | 新文件 | 说明 |
+|--------|--------|------|
+| `loop.go` | `agent.go` | AgentLoop 主体 + 生命周期方法 |
+| `loop_message.go` | `agent_message.go` | 消息处理和路由 |
+| `loop_outbound.go` | `agent_outbound.go` | 响应发布 |
+| `loop_event.go` | `agent_event.go` | 事件系统 |
+| `loop_command.go` | `agent_command.go` | 命令处理 |
+| `loop_steering.go` | `agent_steering.go` | Steering 消息处理 |
+| `loop_transcribe.go` | `agent_transcribe.go` | 音频转录 |
+| `loop_media.go` | `agent_media.go` | 媒体处理 |
+| `loop_mcp.go` | `agent_mcp.go` | MCP 初始化 |
+| `loop_utils.go` | `agent_utils.go` | 工具函数 |
+| `loop_inject.go` | `agent_inject.go` | 依赖注入 |
+| `loop_turn.go` | `turn_coord.go` | Turn 协调器 |
+
+### 文件合并（2 → 1）
+
+| 原文件 | 新文件 | 说明 |
+|--------|--------|------|
+| `turn.go` + `turn_exec.go` | `turn_state.go` | Turn 相关类型定义 |
+
+## 最终文件结构
+
+```
+pkg/agent/
+├── agent.go              # AgentLoop + Run/Stop/Close 生命周期
+├── agent_message.go     # 消息处理
+├── agent_outbound.go    # 响应发布
+├── agent_event.go       # 事件系统
+├── agent_command.go     # 命令处理
+├── agent_steering.go    # Steering
+├── agent_transcribe.go  # 转录
+├── agent_media.go       # 媒体处理
+├── agent_mcp.go         # MCP
+├── agent_utils.go       # 工具函数
+├── agent_inject.go      # 依赖注入
+├── turn_coord.go       # runTurn + 协调器
+├── turn_state.go       # turnState + turnExecution + Control + ToolControl + LLMPhase
+├── pipeline.go         # Pipeline struct + NewPipeline
+├── pipeline_setup.go
+├── pipeline_llm.go
+├── pipeline_execute.go
+└── pipeline_finalize.go
+```
+
+## 命名约定
+
+| 前缀 | 内容 | 示例 |
+|------|------|------|
+| `agent_*` | AgentLoop 的方法文件 | `agent_message.go`, `agent_event.go` |
+| `turn_*` | Turn 生命周期相关 | `turn_coord.go`, `turn_state.go` |
+| `pipeline_*` | Pipeline 方法 | `pipeline_setup.go`, `pipeline_llm.go` |
+| `context_*` | 上下文管理 | `context_manager.go`, `context_legacy.go` |
+| `hook_*` | Hook 系统 | `hook_process.go`, `hook_mount.go` |
+
+## 架构层次
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    AgentLoop (agent.go)                │
+│  - 消息循环 Run/Stop/Close                              │
+│  - 依赖注入 (agent_inject.go)                           │
+│  - 消息路由 (agent_message.go)                          │
+│  - 响应发布 (agent_outbound.go)                         │
+└─────────────────────────────────────────────────────────┘
+                          │
+                          ▼
+┌─────────────────────────────────────────────────────────┐
+│              Turn Coordinator (turn_coord.go)           │
+│  - runTurn(): 主协调器                                  │
+│  - abortTurn(): 中止                                  │
+│  - askSideQuestion(): 侧问                             │
+│  - selectCandidates(): 模型选择                        │
+└─────────────────────────────────────────────────────────┘
+                          │
+                          ▼
+┌─────────────────────────────────────────────────────────┐
+│                 Pipeline (pipeline_*.go)               │
+│  - SetupTurn(): 初始化                                 │
+│  - CallLLM(): LLM 调用                                │
+│  - ExecuteTools(): 工具执行                            │
+│  - Finalize(): 终结                                   │
+└─────────────────────────────────────────────────────────┘
+```
+
+## 验证结果
+
+- ✅ `go build ./pkg/agent/...` - 通过
+- ✅ `go vet ./pkg/agent/...` - 无警告
+- ✅ `go test ./pkg/agent/... -skip "TestSeahorse|TestGlobalSkillFileContentChange"` - 通过
@@ -0,0 +1,77 @@
+# AgentLoop File Split
+
+> **Note:** This document describes the file split that was completed in a previous phase. The `loop_*` naming has since been renamed to `agent_*` and `turn_*`. See [agent-rename-plan.md](./agent-rename-plan.md) for the current file structure.
+
+## Overview
+
+The `pkg/agent/loop.go` file (originally 4384 lines) has been split into 12 focused source files. This is a pure refactoring with no behavioral changes.
+
+## Goals
+
+- Reduce cognitive load when navigating agent loop code
+- Enable parallel work by decoupling concerns
+- Maintain all existing functionality and tests
+- Keep imports minimal per file
+
+## Original File Map (Renamed in Phase 2)
+
+| Old File | New File | Responsibility |
+|----------|----------|----------------|
+| `loop.go` | `agent.go` | Core `AgentLoop` struct, `Run`, `Stop`, `Close` |
+| `loop_turn.go` | `turn_coord.go` + `pipeline_*.go` | Turn execution: coordinator + Pipeline methods |
+| `loop_utils.go` | `agent_utils.go` | Standalone utility functions |
+| `loop_init.go` | `agent_init.go` | `NewAgentLoop` constructor and tool registration |
+| `loop_message.go` | `agent_message.go` | Message handling and routing |
+| `loop_command.go` | `agent_command.go` | Command processing |
+| `loop_mcp.go` | `agent_mcp.go` | MCP runtime |
+| `loop_event.go` | `agent_event.go` | Event system helpers |
+| `loop_media.go` | `agent_media.go` | Media resolution |
+| `loop_outbound.go` | `agent_outbound.go` | Response publishing |
+| `loop_transcribe.go` | `agent_transcribe.go` | Audio transcription |
+| `loop_steering.go` | `agent_steering.go` | Steering queue |
+| `loop_inject.go` | `agent_inject.go` | Setter injection |
+
+## Current File Structure
+
+See [agent-rename-plan.md](./agent-rename-plan.md) for the complete current file structure.
+
+## Phase 2: Rename and Pipeline Restructuring
+
+Phase 2 completed the following:
+
+1. **File renaming**: All `loop_*` files renamed to `agent_*` or `turn_*`
+2. **Turn state merging**: `turn.go` + `turn_exec.go` → `turn_state.go`
+3. **Pipeline extraction**: Split large `runTurn` into Pipeline methods
+
+### Pipeline Architecture
+
+The Pipeline methods provide structured turn execution:
+
+| Method | File | Responsibility |
+|--------|------|----------------|
+| `SetupTurn()` | `pipeline_setup.go` | History assembly, message building, candidate selection |
+| `CallLLM()` | `pipeline_llm.go` | PreLLM hooks, fallback, retry, AfterLLM hooks |
+| `ExecuteTools()` | `pipeline_execute.go` | Tool execution with hooks |
+| `Finalize()` | `pipeline_finalize.go` | Session persistence, compression |
+
+## Core Principles Applied
+
+### 1. Same Package, Independent Files
+All files belong to the `agent` package and compile together. This preserves the original visibility rules.
+
+### 2. No Logic Changes
+All functions were moved verbatim. The extraction preserved behavioral equivalence.
+
+### 3. Shared Types in turn_state.go
+The `turnState`, `turnExecution`, `Control`, `ToolControl`, and `LLMPhase` types are centralized in `turn_state.go`.
+
+## Testing
+
+All existing tests pass. The 5 failing tests (`TestGlobalSkillFileContentChange` and 4 Seahorse tests) are pre-existing failures unrelated to this refactor.
+
+Build status: `go build ./pkg/agent/...` passes with no errors.
+
+## See Also
+
+- [agent-rename-plan.md](./agent-rename-plan.md) — Current file naming convention
+- [context.md](context.md) — context management and session handling
@@ -0,0 +1,68 @@
+# Pipeline Restructuring Plan
+
+## Goal
+
+Split `agent/pipeline.go` (~1400 lines) into multiple logical files, organizing code by responsibility.
+
+## Final File Structure
+
+```
+pkg/agent/
+├── pipeline.go           # Pipeline struct + NewPipeline (~39 lines)
+├── pipeline_setup.go   # SetupTurn method (~115 lines)
+├── pipeline_llm.go     # CallLLM method (~519 lines)
+├── pipeline_execute.go  # ExecuteTools method (~693 lines)
+└── pipeline_finalize.go # Finalize method (~78 lines)
+```
+
+## Actual Line Counts
+
+| File | Lines |
+|------|-------|
+| `pipeline.go` | 39 |
+| `pipeline_setup.go` | 115 |
+| `pipeline_llm.go` | 519 |
+| `pipeline_execute.go` | 693 |
+| `pipeline_finalize.go` | 78 |
+| **Total** | **1444** |
+
+## Responsibility Matrix
+
+| File | Method | Responsibility |
+|------|--------|----------------|
+| `pipeline.go` | `Pipeline` struct, `NewPipeline()` | Pipeline dependency container |
+| `pipeline_setup.go` | `SetupTurn()` | Turn initialization: history assembly, message building, candidate selection |
+| `pipeline_llm.go` | `CallLLM()` | LLM call: PreLLM hooks, fallback, retry, AfterLLM hooks |
+| `pipeline_execute.go` | `ExecuteTools()` | Tool execution: BeforeTool/ApproveTool/AfterTool hooks, media sending, steering handling |
+| `pipeline_finalize.go` | `Finalize()` | Turn finalization: session save, compression, status setting |
+
+## Relationship Between Pipeline and Turn Coordinator
+
+```
+AgentLoop (agent.go)
+    │
+    ├── runAgentLoop() ──────────────────┐
+    │                                    │
+    │    ┌───────────────────────────────▼───────────────────────────────┐
+    │    │                    Turn Coordinator (turn_coord.go)           │
+    │    │                                                           │
+    │    │   runTurn() {                                             │
+    │    │       exec = pipeline.SetupTurn()                          │
+    │    │       loop {                                              │
+    │    │           ctrl = pipeline.CallLLM()  ──► Pipeline (pipeline_*.go) │
+    │    │           if ctrl == ToolLoop {                            │
+    │    │               toolCtrl = pipeline.ExecuteTools()             │
+    │    │           }                                                │
+    │    │       }                                                    │
+    │    │       return pipeline.Finalize()                            │
+    │    │   }                                                         │
+    │    └─────────────────────────────────────────────────────────────┘
+    │
+    └── Publish response (agent_outbound.go)
+```
+
+## Verification Results
+
+- ✅ `go build ./pkg/agent/...` - Pass
+- ✅ `go vet ./pkg/agent/...` - No warnings
+- ✅ `go test ./pkg/agent/... -skip "TestSeahorse|TestGlobalSkillFileContentChange"` - Pass
@@ -0,0 +1,68 @@
+# Pipeline 重构文档
+
+## 目标
+
+将 `agent/pipeline.go` (1400行) 拆分为多个逻辑文件，代码按职责组织。
+
+## 最终文件结构
+
+```
+pkg/agent/
+├── pipeline.go           # Pipeline struct + NewPipeline (~39行)
+├── pipeline_setup.go   # SetupTurn 方法 (~115行)
+├── pipeline_llm.go     # CallLLM 方法 (~519行)
+├── pipeline_execute.go  # ExecuteTools 方法 (~693行)
+└── pipeline_finalize.go # Finalize 方法 (~78行)
+```
+
+## 实际行数
+
+| 文件 | 行数 |
+|------|------|
+| `pipeline.go` | 39 |
+| `pipeline_setup.go` | 115 |
+| `pipeline_llm.go` | 519 |
+| `pipeline_execute.go` | 693 |
+| `pipeline_finalize.go` | 78 |
+| **总计** | **1444** |
+
+## 职责说明
+
+| 文件 | 方法 | 职责 |
+|------|------|------|
+| `pipeline.go` | `Pipeline` struct, `NewPipeline()` | Pipeline 依赖容器 |
+| `pipeline_setup.go` | `SetupTurn()` | Turn 初始化：历史组装、消息构建、候选人选择 |
+| `pipeline_llm.go` | `CallLLM()` | LLM 调用：PreLLM hook、fallback、重试、AfterLLM hook |
+| `pipeline_execute.go` | `ExecuteTools()` | 工具执行：BeforeTool/ApproveTool/AfterTool hook、媒体发送、steering 处理 |
+| `pipeline_finalize.go` | `Finalize()` | Turn 终结：会话保存、压缩、状态设置 |
+
+## Pipeline 与 Turn Coordinator 的关系
+
+```
+AgentLoop (agent.go)
+    │
+    ├── runAgentLoop() ──────────────────┐
+    │                                    │
+    │    ┌───────────────────────────────▼───────────────────────────────┐
+    │    │                    Turn Coordinator (turn_coord.go)              │
+    │    │                                                           │
+    │    │   runTurn() {                                             │
+    │    │       exec = pipeline.SetupTurn()                           │
+    │    │       loop {                                               │
+    │    │           ctrl = pipeline.CallLLM()  ──► Pipeline (pipeline_*.go) │
+    │    │           if ctrl == ToolLoop {                             │
+    │    │               toolCtrl = pipeline.ExecuteTools()             │
+    │    │           }                                                 │
+    │    │       }                                                    │
+    │    │       return pipeline.Finalize()                            │
+    │    │   }                                                        │
+    │    └─────────────────────────────────────────────────────────────┘
+    │
+    └── 发布响应 (agent_outbound.go)
+```
+
+## 验证结果
+
+- ✅ `go build ./pkg/agent/...` - 通过
+- ✅ `go vet ./pkg/agent/...` - 无警告
+- ✅ `go test ./pkg/agent/... -skip "TestSeahorse|TestGlobalSkillFileContentChange"` - 通过
@@ -28,6 +28,69 @@ The currently exposed synchronous hook points are:

 Everything else is exposed as read-only events.

+## Hook Actions
+
+Hooks can return different actions to control the flow:
+
+| Action | Applicable Stages | Effect |
+| --- | --- | --- |
+| `continue` | All interceptors | Pass through without modification |
+| `modify` | `before_llm`, `after_llm`, `before_tool`, `after_tool` | Modify request/response and continue |
+| `respond` | `before_tool` | Return a tool result directly, skip actual tool execution |
+| `deny_tool` | `before_tool` | Deny tool execution, return error message |
+| `abort_turn` | All interceptors | Abort the current turn |
+| `hard_abort` | All interceptors | Force stop the entire agent loop |
+
+### The `respond` Action
+
+The `respond` action is special: it allows a `before_tool` hook to provide the tool result directly, skipping the actual tool execution. This is useful for:
+
+1. **Plugin tool injection**: External hooks can implement tools without registering them in the tool registry
+2. **Tool result caching**: Return cached results for repeated tool calls
+3. **Tool mocking**: Return mock results for testing purposes
+
+When a hook returns `respond` with a `HookResult`, the agent loop:
+1. Skips the actual tool execution
+2. Uses the provided result as if the tool had executed
+3. Continues the turn normally with the result
+
+Example (Go in-process hook):
+
+```go
+func (h *MyHook) BeforeTool(
+    ctx context.Context,
+    call *agent.ToolCallHookRequest,
+) (*agent.ToolCallHookRequest, agent.HookDecision, error) {
+    if call.Tool == "my_plugin_tool" {
+        next := call.Clone()
+        next.HookResult = &tools.ToolResult{
+            ForLLM:  "Plugin tool executed successfully",
+            Silent:  false,
+            IsError: false,
+        }
+        return next, agent.HookDecision{Action: agent.HookActionRespond}, nil
+    }
+    return call, agent.HookDecision{Action: agent.HookActionContinue}, nil
+}
+```
+
+Example (Python process hook):
+
+```python
+def handle_before_tool(params: dict) -> dict:
+    tool = params.get("tool", "")
+    if tool == "my_plugin_tool":
+        return {
+            "action": "respond",
+            "result": {
+                "for_llm": "Plugin tool executed successfully",
+                "silent": False,
+                "is_error": False
+            }
+        }
+    return {"action": "continue"}
+```
+
 ## Execution Order

 `HookManager` sorts hooks like this:
@@ -28,6 +28,69 @@

 其余 lifecycle 通过事件形式只读暴露。

+## Hook Actions
+
+Hook 可以返回不同的 action 来控制流程：
+
+| Action | 适用阶段 | 效果 |
+| --- | --- | --- |
+| `continue` | 所有拦截型 | 放行，不做修改 |
+| `modify` | `before_llm`, `after_llm`, `before_tool`, `after_tool` | 改写请求/响应后放行 |
+| `respond` | `before_tool` | 直接返回工具结果，跳过实际工具执行 |
+| `deny_tool` | `before_tool` | 拒绝工具执行，返回错误信息 |
+| `abort_turn` | 所有拦截型 | 中止当前 turn |
+| `hard_abort` | 所有拦截型 | 强制终止整个 agent loop |
+
+### `respond` Action
+
+`respond` action 是特殊的：它允许 `before_tool` hook 直接提供工具结果，跳过实际工具执行。适用于：
+
+1. **插件工具注入**：外部 hook 可以实现工具，无需在 ToolRegistry 注册
+2. **工具结果缓存**：对重复调用返回缓存结果
+3. **工具模拟**：测试时返回模拟结果
+
+当 hook 返回 `respond` 并携带 `HookResult` 时，agent loop 会：
+1. 跳过实际工具执行
+2. 使用提供的结果作为工具执行结果
+3. 正常继续 turn 流程
+
+示例（Go 进程内 hook）：
+
+```go
+func (h *MyHook) BeforeTool(
+    ctx context.Context,
+    call *agent.ToolCallHookRequest,
+) (*agent.ToolCallHookRequest, agent.HookDecision, error) {
+    if call.Tool == "my_plugin_tool" {
+        next := call.Clone()
+        next.HookResult = &tools.ToolResult{
+            ForLLM:  "Plugin tool executed successfully",
+            Silent:  false,
+            IsError: false,
+        }
+        return next, agent.HookDecision{Action: agent.HookActionRespond}, nil
+    }
+    return call, agent.HookDecision{Action: agent.HookActionContinue}, nil
+}
+```
+
+示例（Python process hook）：
+
+```python
+def handle_before_tool(params: dict) -> dict:
+    tool = params.get("tool", "")
+    if tool == "my_plugin_tool":
+        return {
+            "action": "respond",
+            "result": {
+                "for_llm": "Plugin tool executed successfully",
+                "silent": False,
+                "is_error": False
+            }
+        }
+    return {"action": "continue"}
+```
+
 ## 执行顺序

 HookManager 的排序规则是：
@@ -0,0 +1,568 @@
+# Hook JSON-RPC Protocol Details
+
+All hooks use `JSON-RPC 2.0` format, with one JSON message per line, transmitted via stdio.
+
+---
+
+## Basic Protocol Structure
+
+### Request (PicoClaw → Hook)
+
+```json
+{"jsonrpc":"2.0","id":1,"method":"hook.xxx","params":{...}}
+```
+
+### Response (Hook → PicoClaw)
+
+Success:
+```json
+{"jsonrpc":"2.0","id":1,"result":{...}}
+```
+
+Error:
+```json
+{"jsonrpc":"2.0","id":1,"error":{"code":-32000,"message":"error message"}}
+```
+
+---
+
+## 1. `hook.hello` (Handshake)
+
+Handshake must be completed at startup, otherwise the hook process will be terminated.
+
+### Request
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 1,
+  "method": "hook.hello",
+  "params": {
+    "name": "py_review_gate",
+    "version": 1,
+    "modes": ["observe", "tool", "approve"]
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `name` | hook name (from configuration) |
+| `version` | protocol version, currently `1` |
+| `modes` | capability modes supported by the hook |
+
+### Response
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 1,
+  "result": {
+    "ok": true,
+    "name": "python-review-gate"
+  }
+}
+```
+
+---
+
+## 2. `hook.before_llm`
+
+Triggered before sending request to LLM. Can be used to inject tools.
+
+### Request
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 2,
+  "method": "hook.before_llm",
+  "params": {
+    "meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1",
+      "ParentTurnID": "",
+      "SessionKey": "session-1",
+      "Iteration": 0,
+      "TracePath": "runTurn",
+      "Source": "turn.llm.request"
+    },
+    "model": "claude-sonnet",
+    "messages": [
+      {"role": "user", "content": "hello"}
+    ],
+    "tools": [
+      {
+        "type": "function",
+        "function": {
+          "name": "echo",
+          "description": "echo text",
+          "parameters": {"type": "object"}
+        }
+      }
+    ],
+    "options": {
+      "temperature": 0.7
+    },
+    "channel": "cli",
+    "chat_id": "chat-1",
+    "graceful_terminal": false
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `meta` | event metadata for tracing |
+| `model` | requested model name |
+| `messages` | conversation history |
+| `tools` | list of available tool definitions |
+| `options` | LLM parameters (temperature, max_tokens, etc.) |
+| `channel` | request source channel |
+| `chat_id` | session ID |
+
+### Response (Tool Injection Example)
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 2,
+  "result": {
+    "action": "modify",
+    "request": {
+      "model": "claude-sonnet",
+      "messages": [{"role": "user", "content": "hello"}],
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "echo",
+            "description": "echo",
+            "parameters": {}
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "my_plugin_tool",
+            "description": "Plugin injected tool",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {"type": "string"}
+              }
+            }
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `action` | decision action (see table below) |
+| `request` | modified request object |
+
+---
+
+## 3. `hook.after_llm`
+
+Triggered after receiving LLM response. Can modify response content.
+
+### Request
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 3,
+  "method": "hook.after_llm",
+  "params": {
+    "meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1",
+      "SessionKey": "session-1"
+    },
+    "model": "claude-sonnet",
+    "response": {
+      "role": "assistant",
+      "content": "Hi!",
+      "tool_calls": [
+        {
+          "id": "tc-1",
+          "type": "function",
+          "function": {
+            "name": "echo",
+            "arguments": "{\"text\":\"hi\"}"
+          }
+        }
+      ]
+    },
+    "channel": "cli",
+    "chat_id": "chat-1"
+  }
+}
+```
+
+### Response
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 3,
+  "result": {
+    "action": "continue"
+  }
+}
+```
+
+---
+
+## 4. `hook.before_tool`
+
+Triggered before tool execution. Can modify tool name and arguments, deny execution, or return result directly.
+
+### Request
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 4,
+  "method": "hook.before_tool",
+  "params": {
+    "meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1",
+      "SessionKey": "session-1"
+    },
+    "tool": "echo_text",
+    "arguments": {
+      "text": "hello"
+    },
+    "channel": "cli",
+    "chat_id": "chat-1"
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `tool` | tool name |
+| `arguments` | tool arguments |
+
+### Response (Modify Arguments)
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 4,
+  "result": {
+    "action": "modify",
+    "call": {
+      "tool": "echo_text",
+      "arguments": {
+        "text": "modified hello"
+      }
+    }
+  }
+}
+```
+
+### Response (Deny Execution)
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 4,
+  "result": {
+    "action": "deny_tool",
+    "reason": "Invalid arguments"
+  }
+}
+```
+
+### Response (Return Result Directly - respond)
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 4,
+  "result": {
+    "action": "respond",
+    "call": {
+      "tool": "my_plugin_tool",
+      "arguments": {
+        "query": "hello"
+      }
+    },
+    "result": {
+      "for_llm": "Plugin tool executed successfully",
+      "for_user": "",
+      "silent": false,
+      "is_error": false
+    }
+  }
+}
+```
+
+The `respond` action allows hooks to return tool results directly, skipping actual tool execution. Use cases:
+1. **Plugin tool injection**: External hooks can implement tools without registering in ToolRegistry
+2. **Tool result caching**: Return cached results for repeated calls
+3. **Tool mocking**: Return mock results during testing
+
+| Field | Description |
+|-------|-------------|
+| `action` | must be `respond` |
+| `call` | modified call information (optional) |
+| `result` | tool result to return directly |
+
+---
+
+## 5. `hook.after_tool`
+
+Triggered after tool execution completes. Can modify the result returned to LLM.
+
+### Request
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 5,
+  "method": "hook.after_tool",
+  "params": {
+    "meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1",
+      "SessionKey": "session-1"
+    },
+    "tool": "echo_text",
+    "arguments": {
+      "text": "hello"
+    },
+    "result": {
+      "for_llm": "echoed: hello",
+      "for_user": "",
+      "silent": false,
+      "is_error": false,
+      "async": false,
+      "media": [],
+      "artifact_tags": [],
+      "response_handled": false
+    },
+    "duration": 15000000,
+    "channel": "cli",
+    "chat_id": "chat-1"
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `result.for_llm` | content returned to LLM |
+| `result.for_user` | content sent to user |
+| `result.silent` | whether silent (not sent to user) |
+| `result.is_error` | whether it's an error |
+| `result.async` | whether executed asynchronously |
+| `result.media` | list of media references |
+| `result.artifact_tags` | local artifact path tags |
+| `result.response_handled` | whether response has been handled |
+| `duration` | execution time (nanoseconds) |
+
+### Response
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 5,
+  "result": {
+    "action": "continue"
+  }
+}
+```
+
+---
+
+## 6. `hook.approve_tool`
+
+Approval hook for deciding whether to allow execution of sensitive tools.
+
+### Request
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 6,
+  "method": "hook.approve_tool",
+  "params": {
+    "meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1",
+      "SessionKey": "session-1"
+    },
+    "tool": "bash",
+    "arguments": {
+      "command": "rm -rf /"
+    },
+    "channel": "cli",
+    "chat_id": "chat-1"
+  }
+}
+```
+
+### Response (Approved)
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 6,
+  "result": {
+    "approved": true
+  }
+}
+```
+
+### Response (Denied)
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 6,
+  "result": {
+    "approved": false,
+    "reason": "Dangerous command, execution denied"
+  }
+}
+```
+
+---
+
+## 7. `hook.event` (notification)
+
+Observer event, broadcast only, no response required. `id` is `0` or absent.
+
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "hook.event",
+  "params": {
+    "Kind": "tool_exec_start",
+    "Meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1"
+    },
+    "Payload": {
+      "Tool": "echo_text",
+      "Arguments": {"text": "hello"}
+    }
+  }
+}
+```
+
+Common `Kind` values:
+- `turn_start` / `turn_end`
+- `llm_request` / `llm_response`
+- `tool_exec_start` / `tool_exec_end` / `tool_exec_skipped`
+- `steering_injected`
+- `interrupt_received`
+- `error`
+
+---
+
+## Action Options
+
+| action | Applicable hooks | Effect |
+|--------|-----------------|--------|
+| `continue` | All interceptor types | Pass through without modification |
+| `modify` | `before_llm`, `before_tool`, `after_llm`, `after_tool` | Modify request/response and pass through |
+| `respond` | `before_tool` | Return tool result directly, skip actual execution. **Note: AfterTool is NOT called (design decision - respond provides final answer).** |
+| `deny_tool` | `before_tool` | Deny tool execution |
+| `abort_turn` | All interceptor types | Abort current turn, return error |
+| `hard_abort` | All interceptor types | Force stop entire agent loop |
+
+---
+
+## Complete Flow Example
+
+```json
+{"jsonrpc":"2.0","id":1,"method":"hook.hello","params":{"name":"my_hook","version":1,"modes":["tool","approve"]}}
+{"jsonrpc":"2.0","id":1,"result":{"ok":true,"name":"my_hook"}}
+{"jsonrpc":"2.0","id":2,"method":"hook.before_llm","params":{"model":"claude-sonnet","messages":[{"role":"user","content":"hello"}],"tools":[]}}
+{"jsonrpc":"2.0","id":2,"result":{"action":"continue"}}
+{"jsonrpc":"2.0","id":3,"method":"hook.before_tool","params":{"tool":"bash","arguments":{"command":"ls"}}}
+{"jsonrpc":"2.0","id":3,"result":{"action":"continue"}}
+{"jsonrpc":"2.0","id":4,"method":"hook.approve_tool","params":{"tool":"bash","arguments":{"command":"ls"}}}
+{"jsonrpc":"2.0","id":4,"result":{"approved":true}}
+{"jsonrpc":"2.0","id":5,"method":"hook.after_tool","params":{"tool":"bash","arguments":{"command":"ls"},"result":{"for_llm":"file1.txt\nfile2.txt"},"duration":5000000}}
+{"jsonrpc":"2.0","id":5,"result":{"action":"continue"}}
+{"jsonrpc":"2.0","id":6,"method":"hook.after_llm","params":{"model":"claude-sonnet","response":{"role":"assistant","content":"Files listed"}}}
+{"jsonrpc":"2.0","id":6,"result":{"action":"continue"}}
+```
+
+---
+
+## Plugin Tool Injection via `before_llm` and `before_tool`
+
+Standard flow for plugin tool injection:
+
+1. In `before_llm`, inject tool definition to let LLM know the tool is available
+2. In `before_tool`, use `respond` action to return tool execution result directly
+
+### `before_llm` Inject Tool Definition
+
+```python
+def handle_before_llm(params: dict) -> dict:
+    tools = params.get("tools", [])
+    
+    # Add plugin tool definition
+    tools.append({
+        "type": "function",
+        "function": {
+            "name": "my_plugin_tool",
+            "description": "Plugin provided tool",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "input": {"type": "string", "description": "Input content"}
+                },
+                "required": ["input"]
+            }
+        }
+    })
+    
+    return {
+        "action": "modify",
+        "request": {
+            "model": params["model"],
+            "messages": params["messages"],
+            "tools": tools,
+            "options": params.get("options", {})
+        }
+    }
+```
+
+### `before_tool` Return Execution Result
+
+```python
+def handle_before_tool(params: dict) -> dict:
+    tool = params.get("tool", "")
+    
+    if tool == "my_plugin_tool":
+        # Implement tool logic here
+        args = params.get("arguments", {})
+        input_text = args.get("input", "")
+        
+        # Return result directly, no need to register in ToolRegistry
+        return {
+            "action": "respond",
+            "result": {
+                "for_llm": f"Plugin tool executed successfully, input: {input_text}",
+                "silent": False,
+                "is_error": False
+            }
+        }
+    
+    return {"action": "continue"}
+```
+
+This way, external hooks can fully implement plugin tools without registering any tool implementation inside PicoClaw.
@@ -0,0 +1,568 @@
+# Hook JSON-RPC 协议详解
+
+所有 hook 使用 `JSON-RPC 2.0` 格式，每行一个 JSON 消息，通过 stdio 传输。
+
+---
+
+## 基础协议结构
+
+### 请求（PicoClaw → Hook）
+
+```json
+{"jsonrpc":"2.0","id":1,"method":"hook.xxx","params":{...}}
+```
+
+### 响应（Hook → PicoClaw）
+
+成功：
+```json
+{"jsonrpc":"2.0","id":1,"result":{...}}
+```
+
+错误：
+```json
+{"jsonrpc":"2.0","id":1,"error":{"code":-32000,"message":"错误信息"}}
+```
+
+---
+
+## 1. `hook.hello`（握手）
+
+启动时必须完成握手，否则 hook 进程会被终止。
+
+### 请求
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 1,
+  "method": "hook.hello",
+  "params": {
+    "name": "py_review_gate",
+    "version": 1,
+    "modes": ["observe", "tool", "approve"]
+  }
+}
+```
+
+| 字段 | 说明 |
+|------|------|
+| `name` | hook 名称（来自配置） |
+| `version` | 协议版本，当前为 `1` |
+| `modes` | hook 支持的能力模式 |
+
+### 响应
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 1,
+  "result": {
+    "ok": true,
+    "name": "python-review-gate"
+  }
+}
+```
+
+---
+
+## 2. `hook.before_llm`
+
+在发送请求给 LLM 之前触发。可用于注入工具。
+
+### 请求
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 2,
+  "method": "hook.before_llm",
+  "params": {
+    "meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1",
+      "ParentTurnID": "",
+      "SessionKey": "session-1",
+      "Iteration": 0,
+      "TracePath": "runTurn",
+      "Source": "turn.llm.request"
+    },
+    "model": "claude-sonnet",
+    "messages": [
+      {"role": "user", "content": "hello"}
+    ],
+    "tools": [
+      {
+        "type": "function",
+        "function": {
+          "name": "echo",
+          "description": "echo text",
+          "parameters": {"type": "object"}
+        }
+      }
+    ],
+    "options": {
+      "temperature": 0.7
+    },
+    "channel": "cli",
+    "chat_id": "chat-1",
+    "graceful_terminal": false
+  }
+}
+```
+
+| 字段 | 说明 |
+|------|------|
+| `meta` | 事件元数据，用于追踪 |
+| `model` | 请求的模型名称 |
+| `messages` | 对话历史 |
+| `tools` | 可用工具定义列表 |
+| `options` | LLM 参数（temperature、max_tokens 等） |
+| `channel` | 请求来源通道 |
+| `chat_id` | 会话 ID |
+
+### 响应（注入工具示例）
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 2,
+  "result": {
+    "action": "modify",
+    "request": {
+      "model": "claude-sonnet",
+      "messages": [{"role": "user", "content": "hello"}],
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "echo",
+            "description": "echo",
+            "parameters": {}
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "my_plugin_tool",
+            "description": "插件注入的工具",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {"type": "string"}
+              }
+            }
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+| 字段 | 说明 |
+|------|------|
+| `action` | 决策动作（见下表） |
+| `request` | 修改后的请求对象 |
+
+---
+
+## 3. `hook.after_llm`
+
+在收到 LLM 响应后触发。可修改响应内容。
+
+### 请求
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 3,
+  "method": "hook.after_llm",
+  "params": {
+    "meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1",
+      "SessionKey": "session-1"
+    },
+    "model": "claude-sonnet",
+    "response": {
+      "role": "assistant",
+      "content": "Hi!",
+      "tool_calls": [
+        {
+          "id": "tc-1",
+          "type": "function",
+          "function": {
+            "name": "echo",
+            "arguments": "{\"text\":\"hi\"}"
+          }
+        }
+      ]
+    },
+    "channel": "cli",
+    "chat_id": "chat-1"
+  }
+}
+```
+
+### 响应
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 3,
+  "result": {
+    "action": "continue"
+  }
+}
+```
+
+---
+
+## 4. `hook.before_tool`
+
+在执行工具前触发。可修改工具名称和参数，或拒绝执行，或直接返回结果。
+
+### 请求
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 4,
+  "method": "hook.before_tool",
+  "params": {
+    "meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1",
+      "SessionKey": "session-1"
+    },
+    "tool": "echo_text",
+    "arguments": {
+      "text": "hello"
+    },
+    "channel": "cli",
+    "chat_id": "chat-1"
+  }
+}
+```
+
+| 字段 | 说明 |
+|------|------|
+| `tool` | 工具名称 |
+| `arguments` | 工具参数 |
+
+### 响应（改写参数）
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 4,
+  "result": {
+    "action": "modify",
+    "call": {
+      "tool": "echo_text",
+      "arguments": {
+        "text": "modified hello"
+      }
+    }
+  }
+}
+```
+
+### 响应（拒绝执行）
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 4,
+  "result": {
+    "action": "deny_tool",
+    "reason": "参数不合法"
+  }
+}
+```
+
+### 响应（直接返回结果 - respond）
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 4,
+  "result": {
+    "action": "respond",
+    "call": {
+      "tool": "my_plugin_tool",
+      "arguments": {
+        "query": "hello"
+      }
+    },
+    "result": {
+      "for_llm": "Plugin tool executed successfully",
+      "for_user": "",
+      "silent": false,
+      "is_error": false
+    }
+  }
+}
+```
+
+`respond` action 允许 hook 直接返回工具结果，跳过实际工具执行。适用于：
+1. **插件工具注入**：外部 hook 可实现工具，无需在 ToolRegistry 注册
+2. **工具结果缓存**：对重复调用返回缓存结果
+3. **工具模拟**：测试时返回模拟结果
+
+| 字段 | 说明 |
+|------|------|
+| `action` | 必须为 `respond` |
+| `call` | 修改后的调用信息（可选） |
+| `result` | 直接返回的工具结果 |
+
+---
+
+## 5. `hook.after_tool`
+
+在工具执行完成后触发。可修改返回给 LLM 的结果。
+
+### 请求
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 5,
+  "method": "hook.after_tool",
+  "params": {
+    "meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1",
+      "SessionKey": "session-1"
+    },
+    "tool": "echo_text",
+    "arguments": {
+      "text": "hello"
+    },
+    "result": {
+      "for_llm": "echoed: hello",
+      "for_user": "",
+      "silent": false,
+      "is_error": false,
+      "async": false,
+      "media": [],
+      "artifact_tags": [],
+      "response_handled": false
+    },
+    "duration": 15000000,
+    "channel": "cli",
+    "chat_id": "chat-1"
+  }
+}
+```
+
+| 字段 | 说明 |
+|------|------|
+| `result.for_llm` | 返回给 LLM 的内容 |
+| `result.for_user` | 发送给用户的内容 |
+| `result.silent` | 是否静默（不发送给用户） |
+| `result.is_error` | 是否为错误 |
+| `result.async` | 是否异步执行 |
+| `result.media` | 媒体引用列表 |
+| `result.artifact_tags` | 本地产物路径标签 |
+| `result.response_handled` | 是否已处理响应 |
+| `duration` | 执行耗时（纳秒） |
+
+### 响应
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 5,
+  "result": {
+    "action": "continue"
+  }
+}
+```
+
+---
+
+## 6. `hook.approve_tool`
+
+审批型 hook，用于决定是否允许执行敏感工具。
+
+### 请求
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 6,
+  "method": "hook.approve_tool",
+  "params": {
+    "meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1",
+      "SessionKey": "session-1"
+    },
+    "tool": "bash",
+    "arguments": {
+      "command": "rm -rf /"
+    },
+    "channel": "cli",
+    "chat_id": "chat-1"
+  }
+}
+```
+
+### 响应（批准）
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 6,
+  "result": {
+    "approved": true
+  }
+}
+```
+
+### 响应（拒绝）
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 6,
+  "result": {
+    "approved": false,
+    "reason": "危险命令，禁止执行"
+  }
+}
+```
+
+---
+
+## 7. `hook.event`（notification）
+
+观察型事件，仅广播，无需响应。`id` 为 `0` 或不存在。
+
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "hook.event",
+  "params": {
+    "Kind": "tool_exec_start",
+    "Meta": {
+      "AgentID": "agent-1",
+      "TurnID": "turn-1"
+    },
+    "Payload": {
+      "Tool": "echo_text",
+      "Arguments": {"text": "hello"}
+    }
+  }
+}
+```
+
+常见 `Kind` 值：
+- `turn_start` / `turn_end`
+- `llm_request` / `llm_response`
+- `tool_exec_start` / `tool_exec_end` / `tool_exec_skipped`
+- `steering_injected`
+- `interrupt_received`
+- `error`
+
+---
+
+## action 可选值
+
+| action | 适用 hook | 效果 |
+|--------|----------|------|
+| `continue` | 所有拦截型 | 放行，不做修改 |
+| `modify` | `before_llm`, `before_tool`, `after_llm`, `after_tool` | 改写请求/响应后放行 |
+| `respond` | `before_tool` | 直接返回工具结果，跳过实际执行 |
+| `deny_tool` | `before_tool` | 拒绝执行该工具 |
+| `abort_turn` | 所有拦截型 | 中止当前 turn，返回错误 |
+| `hard_abort` | 所有拦截型 | 强制终止整个 agent loop |
+
+---
+
+## 完整流程示例
+
+```json
+{"jsonrpc":"2.0","id":1,"method":"hook.hello","params":{"name":"my_hook","version":1,"modes":["tool","approve"]}}
+{"jsonrpc":"2.0","id":1,"result":{"ok":true,"name":"my_hook"}}
+{"jsonrpc":"2.0","id":2,"method":"hook.before_llm","params":{"model":"claude-sonnet","messages":[{"role":"user","content":"hello"}],"tools":[]}}
+{"jsonrpc":"2.0","id":2,"result":{"action":"continue"}}
+{"jsonrpc":"2.0","id":3,"method":"hook.before_tool","params":{"tool":"bash","arguments":{"command":"ls"}}}
+{"jsonrpc":"2.0","id":3,"result":{"action":"continue"}}
+{"jsonrpc":"2.0","id":4,"method":"hook.approve_tool","params":{"tool":"bash","arguments":{"command":"ls"}}}
+{"jsonrpc":"2.0","id":4,"result":{"approved":true}}
+{"jsonrpc":"2.0","id":5,"method":"hook.after_tool","params":{"tool":"bash","arguments":{"command":"ls"},"result":{"for_llm":"file1.txt\nfile2.txt"},"duration":5000000}}
+{"jsonrpc":"2.0","id":5,"result":{"action":"continue"}}
+{"jsonrpc":"2.0","id":6,"method":"hook.after_llm","params":{"model":"claude-sonnet","response":{"role":"assistant","content":"已列出文件"}}}
+{"jsonrpc":"2.0","id":6,"result":{"action":"continue"}}
+```
+
+---
+
+## 通过 `before_llm` 和 `before_tool` 实现插件工具注入
+
+插件工具注入的标准流程：
+
+1. 在 `before_llm` 中注入工具定义，让 LLM 知道有这个工具可用
+2. 在 `before_tool` 中使用 `respond` action 直接返回工具执行结果
+
+### `before_llm` 注入工具定义
+
+```python
+def handle_before_llm(params: dict) -> dict:
+    tools = params.get("tools", [])
+    
+    # 添加插件工具定义
+    tools.append({
+        "type": "function",
+        "function": {
+            "name": "my_plugin_tool",
+            "description": "插件提供的工具",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "input": {"type": "string", "description": "输入内容"}
+                },
+                "required": ["input"]
+            }
+        }
+    })
+    
+    return {
+        "action": "modify",
+        "request": {
+            "model": params["model"],
+            "messages": params["messages"],
+            "tools": tools,
+            "options": params.get("options", {})
+        }
+    }
+```
+
+### `before_tool` 返回执行结果
+
+```python
+def handle_before_tool(params: dict) -> dict:
+    tool = params.get("tool", "")
+    
+    if tool == "my_plugin_tool":
+        # 在这里实现工具逻辑
+        args = params.get("arguments", {})
+        input_text = args.get("input", "")
+        
+        # 直接返回结果，无需在 ToolRegistry 注册
+        return {
+            "action": "respond",
+            "result": {
+                "for_llm": f"插件工具执行成功，输入: {input_text}",
+                "silent": False,
+                "is_error": False
+            }
+        }
+    
+    return {"action": "continue"}
+```
+
+通过这种方式，外部 hook 可以完全实现插件工具，无需在 PicoClaw 内部注册任何工具实现。
@@ -0,0 +1,587 @@
+# Plugin Tool Injection Example
+
+This document demonstrates how to use PicoClaw's hook system to implement external plugin tool injection, allowing LLM to call tools implemented by external hook processes.
+
+---
+
+## Core Principle
+
+Through the hook system's `respond` action, external hooks can:
+
+1. Inject tool **definitions** in `before_llm`, letting LLM know the tool is available
+2. Return tool **execution results** directly in `before_tool` using `respond` action, skipping ToolRegistry
+
+This way, external hooks can fully implement plugin tools without registering any tools inside PicoClaw.
+
+---
+
+## Complete Example: Weather Query Plugin
+
+Below is a complete Python hook example implementing a weather query plugin tool.
+
+### 1. Hook Script Implementation
+
+Save as `/tmp/weather_plugin.py`:
+
+```python
+#!/usr/bin/env python3
+"""Weather query plugin hook example"""
+from __future__ import annotations
+
+import json
+import sys
+import signal
+from typing import Any
+
+# Simulated weather data
+WEATHER_DATA = {
+    "Beijing": {"temp": 15, "weather": "Sunny", "humidity": 45},
+    "Shanghai": {"temp": 18, "weather": "Cloudy", "humidity": 60},
+    "Guangzhou": {"temp": 25, "weather": "Sunny", "humidity": 70},
+    "Shenzhen": {"temp": 26, "weather": "Cloudy", "humidity": 75},
+}
+
+
+def get_weather(city: str) -> dict:
+    """Get weather data (simulated)"""
+    data = WEATHER_DATA.get(city)
+    if data:
+        return {
+            "for_llm": f"{city} weather: {data['weather']}, temperature {data['temp']}°C, humidity {data['humidity']}%",
+            "for_user": "",
+            "silent": False,
+            "is_error": False,
+        }
+    return {
+        "for_llm": f"Weather data not found for city {city}",
+        "for_user": "",
+        "silent": False,
+        "is_error": True,
+    }
+
+
+def handle_hello(params: dict) -> dict:
+    return {"ok": True, "name": "weather-plugin"}
+
+
+def handle_before_llm(params: dict) -> dict:
+    """Inject weather query tool definition"""
+    tools = params.get("tools", [])
+    
+    # Add weather query tool
+    tools.append({
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Query weather information for a specified city",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "city": {
+                        "type": "string",
+                        "description": "City name, e.g.: Beijing, Shanghai, Guangzhou"
+                    }
+                },
+                "required": ["city"]
+            }
+        }
+    })
+    
+    return {
+        "action": "modify",
+        "request": {
+            "model": params.get("model"),
+            "messages": params.get("messages", []),
+            "tools": tools,
+            "options": params.get("options", {}),
+        }
+    }
+
+
+def handle_before_tool(params: dict) -> dict:
+    """Handle tool call, return result directly"""
+    tool = params.get("tool", "")
+    args = params.get("arguments", {})
+    
+    if tool == "get_weather":
+        city = args.get("city", "")
+        result = get_weather(city)
+        
+        # Use respond action to return result directly, skip ToolRegistry
+        return {
+            "action": "respond",
+            "result": result,
+        }
+    
+    # Other tools continue normal flow
+    return {"action": "continue"}
+
+
+def handle_request(method: str, params: dict) -> dict:
+    if method == "hook.hello":
+        return handle_hello(params)
+    if method == "hook.before_llm":
+        return handle_before_llm(params)
+    if method == "hook.before_tool":
+        return handle_before_tool(params)
+    if method == "hook.after_llm":
+        return {"action": "continue"}
+    if method == "hook.after_tool":
+        return {"action": "continue"}
+    if method == "hook.approve_tool":
+        return {"approved": True}
+    raise KeyError(f"method not found: {method}")
+
+
+def send_response(message_id: int, result: Any | None = None, error: str | None = None) -> None:
+    payload: dict[str, Any] = {
+        "jsonrpc": "2.0",
+        "id": message_id,
+    }
+    if error is not None:
+        payload["error"] = {"code": -32000, "message": error}
+    else:
+        payload["result"] = result if result is not None else {}
+    
+    sys.stdout.write(json.dumps(payload, ensure_ascii=True) + "\n")
+    sys.stdout.flush()
+
+
+def main() -> int:
+    for raw_line in sys.stdin:
+        line = raw_line.strip()
+        if not line:
+            continue
+        
+        try:
+            message = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        
+        method = message.get("method")
+        message_id = message.get("id", 0)
+        params = message.get("params") or {}
+        
+        if not message_id:
+            continue
+        
+        try:
+            result = handle_request(str(method or ""), params)
+            send_response(int(message_id), result=result)
+        except KeyError as exc:
+            send_response(int(message_id), error=str(exc))
+        except Exception as exc:
+            send_response(int(message_id), error=f"unexpected error: {exc}")
+    
+    return 0
+
+
+if __name__ == "__main__":
+    signal.signal(signal.SIGINT, lambda *_: raise SystemExit(0))
+    signal.signal(signal.SIGTERM, lambda *_: raise SystemExit(0))
+    raise SystemExit(main())
+```
+
+### 2. Configure PicoClaw
+
+Add hook configuration in the config file:
+
+```json
+{
+  "hooks": {
+    "enabled": true,
+    "processes": {
+      "weather_plugin": {
+        "enabled": true,
+        "priority": 100,
+        "transport": "stdio",
+        "command": ["python3", "/tmp/weather_plugin.py"],
+        "intercept": ["before_llm", "before_tool"]
+      }
+    }
+  }
+}
+```
+
+### 3. Test Results
+
+When user asks "What's the weather in Beijing today?":
+
+1. PicoClaw sends `hook.before_llm`, hook injects `get_weather` tool definition
+2. LLM sees tool definition, decides to call `get_weather(city="Beijing")`
+3. PicoClaw sends `hook.before_tool`, hook uses `respond` action to return weather data
+4. LLM receives result, replies to user "Beijing is sunny today, temperature 15°C"
+
+---
+
+## Flow Diagram
+
+```
+User: "What's the weather in Beijing today?"
+        ↓
+    PicoClaw
+        ↓
+    hook.before_llm
+        ↓ (inject get_weather tool definition)
+    LLM request
+        ↓
+    LLM decides to call get_weather(city="Beijing")
+        ↓
+    hook.before_tool
+        ↓ (respond action returns weather data)
+    Return result directly to LLM
+        ↓ (skip ToolRegistry)
+    LLM replies: "Beijing is sunny today, temperature 15°C"
+```
+
+---
+
+## Key Points
+
+### `before_llm` Inject Tool Definition
+
+Tool definition follows OpenAI function calling format:
+
+```json
+{
+  "type": "function",
+  "function": {
+    "name": "tool_name",
+    "description": "tool description",
+    "parameters": {
+      "type": "object",
+      "properties": {
+        "param_name": {
+          "type": "string",
+          "description": "parameter description"
+        }
+      },
+      "required": ["list of required parameters"]
+    }
+  }
+}
+```
+
+### `before_tool` Use respond Action
+
+`respond` action response format:
+
+```json
+{
+  "action": "respond",
+  "result": {
+    "for_llm": "Content returned to LLM",
+    "for_user": "Optional, content sent to user",
+    "silent": false,
+    "is_error": false,
+    "media": ["Optional, media reference list"],
+    "response_handled": false
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `for_llm` | Required, LLM will see this content |
+| `for_user` | Optional, sent directly to user |
+| `silent` | When true, not sent to user |
+| `is_error` | When true, indicates execution failure |
+| `media` | Optional, media file references (images, files, etc.) |
+| `response_handled` | When true, indicates user request is handled, turn will end |
+
+---
+
+## Media File Handling
+
+The `respond` action supports returning media files (images, files, etc.). There are two processing modes:
+
+### 1. Automatic Delivery (`response_handled=true`)
+
+When `response_handled=true`, media files are automatically sent to the user and the turn ends:
+
+```json
+{
+  "action": "respond",
+  "result": {
+    "for_llm": "Image sent to user",
+    "for_user": "",
+    "media": ["media://abc123"],
+    "response_handled": true
+  }
+}
+```
+
+Use cases:
+- Image generation plugin directly returning results
+- File download plugin sending files to user
+
+### 2. LLM Visible (`response_handled=false`)
+
+When `response_handled=false`, media references are passed to the LLM, which can see the content in the next request:
+
+```json
+{
+  "action": "respond",
+  "result": {
+    "for_llm": "Image loaded, path: /tmp/image.png [file:/tmp/image.png]",
+    "media": ["media://abc123"]
+  }
+}
+```
+
+After seeing the content, the LLM can decide:
+- Use `send_file` tool to send to user
+- Analyze image content and reply to user
+- Other processing approaches
+
+### Media Reference Format
+
+Media references use the `media://` protocol:
+
+```
+media://<store-id>
+```
+
+These references are managed by PicoClaw's MediaStore and can be:
+- Sent to user via channel
+- Converted to base64 in LLM vision requests
+
+### Alternative: Use Existing Tools
+
+If the plugin generates files, you can return the file path and let the LLM call `send_file` or similar tools:
+
+```json
+{
+  "action": "respond",
+  "result": {
+    "for_llm": "Image generated, saved at /tmp/generated_image.png. Use send_file tool to send to user.",
+    "for_user": "",
+    "silent": false
+  }
+}
+```
+
+This approach:
+- More decoupled, LLM decides when to send
+- Leverages existing tool mechanisms
+- Supports batch sending, delayed sending, etc.
+
+---
+
+## Multi-Tool Injection Example
+
+Multiple tools can be injected simultaneously:
+
+```python
+def handle_before_llm(params: dict) -> dict:
+    tools = params.get("tools", [])
+    
+    # Tool 1: Weather query
+    tools.append({
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Query city weather",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "city": {"type": "string", "description": "City name"}
+                },
+                "required": ["city"]
+            }
+        }
+    })
+    
+    # Tool 2: Calculator
+    tools.append({
+        "type": "function",
+        "function": {
+            "name": "calculate",
+            "description": "Perform mathematical calculations",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "expression": {"type": "string", "description": "Mathematical expression"}
+                },
+                "required": ["expression"]
+            }
+        }
+    })
+    
+    return {
+        "action": "modify",
+        "request": {
+            "model": params.get("model"),
+            "messages": params.get("messages", []),
+            "tools": tools,
+            "options": params.get("options", {}),
+        }
+    }
+
+
+def handle_before_tool(params: dict) -> dict:
+    tool = params.get("tool", "")
+    args = params.get("arguments", {})
+    
+    if tool == "get_weather":
+        return {
+            "action": "respond",
+            "result": get_weather(args.get("city", "")),
+        }
+    
+    if tool == "calculate":
+        # Simple calculation example
+        try:
+            expr = args.get("expression", "")
+            result = eval(expr)  # Note: needs security handling in actual use
+            return {
+                "action": "respond",
+                "result": {
+                    "for_llm": f"Calculation result: {result}",
+                    "silent": False,
+                    "is_error": False,
+                },
+            }
+        except Exception as e:
+            return {
+                "action": "respond",
+                "result": {
+                    "for_llm": f"Calculation error: {e}",
+                    "silent": False,
+                    "is_error": True,
+                },
+            }
+    
+    return {"action": "continue"}
+```
+
+---
+
+## Coexistence with Built-in Tools
+
+Injected plugin tools coexist with PicoClaw built-in tools:
+
+- Built-in tools (like `bash`, `read_file`) execute normally through ToolRegistry
+- Plugin tools return results through hook's `respond` action
+- `handle_before_tool` only handles plugin tools, other tools return `continue`
+
+---
+
+## Go In-Process Hook Example
+
+If you need to implement plugin tool injection in Go code:
+
+```go
+package myhooks
+
+import (
+    "context"
+    "github.com/sipeed/picoclaw/pkg/agent"
+    "github.com/sipeed/picoclaw/pkg/tools"
+)
+
+type WeatherPluginHook struct{}
+
+func (h *WeatherPluginHook) BeforeLLM(
+    ctx context.Context,
+    req *agent.LLMHookRequest,
+) (*agent.LLMHookRequest, agent.HookDecision, error) {
+    // Inject tool definition
+    req.Tools = append(req.Tools, agent.ToolDefinition{
+        Type: "function",
+        Function: agent.FunctionDefinition{
+            Name:        "get_weather",
+            Description: "Query city weather",
+            Parameters: map[string]any{
+                "type": "object",
+                "properties": map[string]any{
+                    "city": map[string]any{
+                        "type":        "string",
+                        "description": "City name",
+                    },
+                },
+                "required": []string{"city"},
+            },
+        },
+    })
+    
+    return req, agent.HookDecision{Action: agent.HookActionContinue}, nil
+}
+
+func (h *WeatherPluginHook) BeforeTool(
+    ctx context.Context,
+    call *agent.ToolCallHookRequest,
+) (*agent.ToolCallHookRequest, agent.HookDecision, error) {
+    if call.Tool == "get_weather" {
+        city := call.Arguments["city"].(string)
+        
+        // Set HookResult, use respond action
+        next := call.Clone()
+        next.HookResult = &tools.ToolResult{
+            ForLLM:  getWeatherData(city),
+            Silent:  false,
+            IsError: false,
+        }
+        
+        return next, agent.HookDecision{Action: agent.HookActionRespond}, nil
+    }
+    
+    return call, agent.HookDecision{Action: agent.HookActionContinue}, nil
+}
+
+func getWeatherData(city string) string {
+    // Implement weather query logic
+    return fmt.Sprintf("%s weather: Sunny, temperature 20°C", city)
+}
+```
+
+---
+
+## Summary
+
+Through the hook system's `respond` action, external processes can:
+
+1. **Inject tool definitions**: Let LLM know new tools are available
+2. **Provide tool implementation**: Return execution results directly, no need to register in ToolRegistry
+3. **Coexist with built-in tools**: Does not affect normal operation of PicoClaw's original tools
+
+This provides a flexible and elegant solution for plugin development.
+
+---
+
+## Security Boundaries
+
+### Bypassing Approval Checks
+
+**Important**: The `respond` action bypasses `ApproveTool` approval checks.
+
+This means:
+- A `before_tool` hook can return `respond` for **any tool name**, including sensitive tools (like `bash`)
+- The tool won't go through the approval process, directly returning the hook-provided result
+- This is designed for plugin tools but introduces security risks
+
+### Security Recommendations
+
+1. **Review hook configuration**: Ensure only trusted hook processes are enabled
+2. **Limit hook scope**: Add your own security checks in hook implementation
+3. **Use `deny_tool` for rejection**: Use `deny_tool` action instead of `respond` with error for denying execution
+
+### Example: Hook-Internal Security Check
+
+```python
+def handle_before_tool(params: dict) -> dict:
+    tool = params.get("tool", "")
+    args = params.get("arguments", {})
+    
+    # Security check: only handle plugin tools
+    if tool in ["get_weather", "calculate"]:
+        return {
+            "action": "respond",
+            "result": execute_plugin_tool(tool, args),
+        }
+    
+    # Other tools continue normal flow (will go through approval)
+    return {"action": "continue"}
+```
+
+This ensures the hook only affects plugin tools, not system tool approval flow.
@@ -0,0 +1,587 @@
+# 插件工具注入示例
+
+本文档展示如何利用 PicoClaw 的 hook 系统实现外部插件工具注入，让 LLM 能调用由外部 hook 进程实现的工具。
+
+---
+
+## 核心原理
+
+通过 hook 系统的 `respond` action，外部 hook 可以：
+
+1. 在 `before_llm` 中注入工具**定义**，让 LLM 知道有这个工具可用
+2. 在 `before_tool` 中使用 `respond` action 直接返回工具**执行结果**，跳过 ToolRegistry
+
+这样，外部 hook 可以完全实现插件工具，无需在 PicoClaw 内部注册任何工具。
+
+---
+
+## 完整示例：天气查询插件
+
+下面是一个完整的 Python hook 示例，实现一个天气查询插件工具。
+
+### 1. Hook 脚本实现
+
+保存为 `/tmp/weather_plugin.py`：
+
+```python
+#!/usr/bin/env python3
+"""天气查询插件 hook 示例"""
+from __future__ import annotations
+
+import json
+import sys
+import signal
+from typing import Any
+
+# 模拟天气数据
+WEATHER_DATA = {
+    "北京": {"temp": 15, "weather": "晴", "humidity": 45},
+    "上海": {"temp": 18, "weather": "多云", "humidity": 60},
+    "广州": {"temp": 25, "weather": "晴", "humidity": 70},
+    "深圳": {"temp": 26, "weather": "多云", "humidity": 75},
+}
+
+
+def get_weather(city: str) -> dict:
+    """获取天气数据（模拟）"""
+    data = WEATHER_DATA.get(city)
+    if data:
+        return {
+            "for_llm": f"{city}天气：{data['weather']}，温度{data['temp']}°C，湿度{data['humidity']}%",
+            "for_user": "",
+            "silent": False,
+            "is_error": False,
+        }
+    return {
+        "for_llm": f"未找到城市 {city} 的天气数据",
+        "for_user": "",
+        "silent": False,
+        "is_error": True,
+    }
+
+
+def handle_hello(params: dict) -> dict:
+    return {"ok": True, "name": "weather-plugin"}
+
+
+def handle_before_llm(params: dict) -> dict:
+    """注入天气查询工具定义"""
+    tools = params.get("tools", [])
+    
+    # 添加天气查询工具
+    tools.append({
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "查询指定城市的天气信息",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "city": {
+                        "type": "string",
+                        "description": "城市名称，如：北京、上海、广州"
+                    }
+                },
+                "required": ["city"]
+            }
+        }
+    })
+    
+    return {
+        "action": "modify",
+        "request": {
+            "model": params.get("model"),
+            "messages": params.get("messages", []),
+            "tools": tools,
+            "options": params.get("options", {}),
+        }
+    }
+
+
+def handle_before_tool(params: dict) -> dict:
+    """处理工具调用，直接返回结果"""
+    tool = params.get("tool", "")
+    args = params.get("arguments", {})
+    
+    if tool == "get_weather":
+        city = args.get("city", "")
+        result = get_weather(city)
+        
+        # 使用 respond action 直接返回结果，跳过 ToolRegistry
+        return {
+            "action": "respond",
+            "result": result,
+        }
+    
+    # 其他工具继续正常流程
+    return {"action": "continue"}
+
+
+def handle_request(method: str, params: dict) -> dict:
+    if method == "hook.hello":
+        return handle_hello(params)
+    if method == "hook.before_llm":
+        return handle_before_llm(params)
+    if method == "hook.before_tool":
+        return handle_before_tool(params)
+    if method == "hook.after_llm":
+        return {"action": "continue"}
+    if method == "hook.after_tool":
+        return {"action": "continue"}
+    if method == "hook.approve_tool":
+        return {"approved": True}
+    raise KeyError(f"method not found: {method}")
+
+
+def send_response(message_id: int, result: Any | None = None, error: str | None = None) -> None:
+    payload: dict[str, Any] = {
+        "jsonrpc": "2.0",
+        "id": message_id,
+    }
+    if error is not None:
+        payload["error"] = {"code": -32000, "message": error}
+    else:
+        payload["result"] = result if result is not None else {}
+    
+    sys.stdout.write(json.dumps(payload, ensure_ascii=True) + "\n")
+    sys.stdout.flush()
+
+
+def main() -> int:
+    for raw_line in sys.stdin:
+        line = raw_line.strip()
+        if not line:
+            continue
+        
+        try:
+            message = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        
+        method = message.get("method")
+        message_id = message.get("id", 0)
+        params = message.get("params") or {}
+        
+        if not message_id:
+            continue
+        
+        try:
+            result = handle_request(str(method or ""), params)
+            send_response(int(message_id), result=result)
+        except KeyError as exc:
+            send_response(int(message_id), error=str(exc))
+        except Exception as exc:
+            send_response(int(message_id), error=f"unexpected error: {exc}")
+    
+    return 0
+
+
+if __name__ == "__main__":
+    signal.signal(signal.SIGINT, lambda *_: raise SystemExit(0))
+    signal.signal(signal.SIGTERM, lambda *_: raise SystemExit(0))
+    raise SystemExit(main())
+```
+
+### 2. 配置 PicoClaw
+
+在配置文件中添加 hook 配置：
+
+```json
+{
+  "hooks": {
+    "enabled": true,
+    "processes": {
+      "weather_plugin": {
+        "enabled": true,
+        "priority": 100,
+        "transport": "stdio",
+        "command": ["python3", "/tmp/weather_plugin.py"],
+        "intercept": ["before_llm", "before_tool"]
+      }
+    }
+  }
+}
+```
+
+### 3. 测试效果
+
+当用户问"北京今天天气怎么样？"时：
+
+1. PicoClaw 发送 `hook.before_llm`，hook 注入 `get_weather` 工具定义
+2. LLM 看到工具定义，决定调用 `get_weather(city="北京")`
+3. PicoClaw 发送 `hook.before_tool`，hook 使用 `respond` action 返回天气数据
+4. LLM 收到结果，回复用户"北京今天晴天，温度15°C"
+
+---
+
+## 流程图解
+
+```
+用户: "北京今天天气怎么样？"
+        ↓
+    PicoClaw
+        ↓
+    hook.before_llm
+        ↓ (注入 get_weather 工具定义)
+    LLM 请求
+        ↓
+    LLM 决定调用 get_weather(city="北京")
+        ↓
+    hook.before_tool
+        ↓ (respond action 返回天气数据)
+    直接返回结果给 LLM
+        ↓ (跳过 ToolRegistry)
+    LLM 回复: "北京今天晴天，温度15°C"
+```
+
+---
+
+## 关键点说明
+
+### `before_llm` 注入工具定义
+
+工具定义遵循 OpenAI function calling 格式：
+
+```json
+{
+  "type": "function",
+  "function": {
+    "name": "工具名称",
+    "description": "工具描述",
+    "parameters": {
+      "type": "object",
+      "properties": {
+        "参数名": {
+          "type": "string",
+          "description": "参数描述"
+        }
+      },
+      "required": ["必需参数列表"]
+    }
+  }
+}
+```
+
+### `before_tool` 使用 respond action
+
+`respond` action 的响应格式：
+
+```json
+{
+  "action": "respond",
+  "result": {
+    "for_llm": "返回给 LLM 的内容",
+    "for_user": "可选，发送给用户的内容",
+    "silent": false,
+    "is_error": false,
+    "media": ["可选，媒体引用列表"],
+    "response_handled": false
+  }
+}
+```
+
+| 字段 | 说明 |
+|------|------|
+| `for_llm` | 必须，LLM 会看到这个内容 |
+| `for_user` | 可选，直接发送给用户 |
+| `silent` | 为 true 时不发送给用户 |
+| `is_error` | 为 true 时表示执行失败 |
+| `media` | 可选，媒体文件引用列表（如图片、文件） |
+| `response_handled` | 为 true 时表示已处理用户请求，轮次将结束 |
+
+---
+
+## 媒体文件处理
+
+`respond` action 支持返回媒体文件（图片、文件等）。有两种处理方式：
+
+### 1. 自动发送（`response_handled=true`）
+
+当 `response_handled=true` 时，媒体文件会自动发送给用户，轮次结束：
+
+```json
+{
+  "action": "respond",
+  "result": {
+    "for_llm": "图片已发送给用户",
+    "for_user": "",
+    "media": ["media://abc123"],
+    "response_handled": true
+  }
+}
+```
+
+适用场景：
+- 图像生成插件直接返回结果
+- 文件下载插件发送文件给用户
+
+### 2. LLM 可见（`response_handled=false`）
+
+当 `response_handled=false` 时，媒体引用会传递给 LLM，LLM 可以在下一轮请求中看到内容：
+
+```json
+{
+  "action": "respond",
+  "result": {
+    "for_llm": "图片已加载，路径：/tmp/image.png [file:/tmp/image.png]",
+    "media": ["media://abc123"]
+  }
+}
+```
+
+LLM 看到内容后，可以自主决定：
+- 使用 `send_file` 工具发送给用户
+- 分析图片内容并回复用户
+- 其他处理方式
+
+### 媒体引用格式
+
+媒体引用使用 `media://` 协议：
+
+```
+media://<store-id>
+```
+
+这些引用由 PicoClaw 的 MediaStore 管理，可以：
+- 通过 channel 发送给用户
+- 在 LLM vision 请求中转换为 base64
+
+### 替代方案：使用现有工具
+
+如果插件生成文件，可以返回文件路径让 LLM 调用 `send_file` 等工具：
+
+```json
+{
+  "action": "respond",
+  "result": {
+    "for_llm": "图片已生成，保存在 /tmp/generated_image.png。使用 send_file 工具发送给用户。",
+    "for_user": "",
+    "silent": false
+  }
+}
+```
+
+这种方式：
+- 更解耦，LLM 自主决策发送时机
+- 利用现有工具机制
+- 支持批量发送、延迟发送等场景
+
+---
+
+## 多工具注入示例
+
+可以同时注入多个工具：
+
+```python
+def handle_before_llm(params: dict) -> dict:
+    tools = params.get("tools", [])
+    
+    # 工具1：天气查询
+    tools.append({
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "查询城市天气",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "city": {"type": "string", "description": "城市名称"}
+                },
+                "required": ["city"]
+            }
+        }
+    })
+    
+    # 工具2：计算器
+    tools.append({
+        "type": "function",
+        "function": {
+            "name": "calculate",
+            "description": "执行数学计算",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "expression": {"type": "string", "description": "数学表达式"}
+                },
+                "required": ["expression"]
+            }
+        }
+    })
+    
+    return {
+        "action": "modify",
+        "request": {
+            "model": params.get("model"),
+            "messages": params.get("messages", []),
+            "tools": tools,
+            "options": params.get("options", {}),
+        }
+    }
+
+
+def handle_before_tool(params: dict) -> dict:
+    tool = params.get("tool", "")
+    args = params.get("arguments", {})
+    
+    if tool == "get_weather":
+        return {
+            "action": "respond",
+            "result": get_weather(args.get("city", "")),
+        }
+    
+    if tool == "calculate":
+        # 简单计算示例
+        try:
+            expr = args.get("expression", "")
+            result = eval(expr)  # 注意：实际使用时需要安全处理
+            return {
+                "action": "respond",
+                "result": {
+                    "for_llm": f"计算结果: {result}",
+                    "silent": False,
+                    "is_error": False,
+                },
+            }
+        except Exception as e:
+            return {
+                "action": "respond",
+                "result": {
+                    "for_llm": f"计算错误: {e}",
+                    "silent": False,
+                    "is_error": True,
+                },
+            }
+    
+    return {"action": "continue"}
+```
+
+---
+
+## 与内置工具共存
+
+注入的插件工具与 PicoClaw 内置工具共存：
+
+- 内置工具（如 `bash`、`read_file`）正常通过 ToolRegistry 执行
+- 插件工具通过 hook 的 `respond` action 返回结果
+- `handle_before_tool` 中只处理插件工具，其他工具返回 `continue`
+
+---
+
+## Go 进程内 Hook 示例
+
+如果需要在 Go 代码中实现插件工具注入：
+
+```go
+package myhooks
+
+import (
+    "context"
+    "github.com/sipeed/picoclaw/pkg/agent"
+    "github.com/sipeed/picoclaw/pkg/tools"
+)
+
+type WeatherPluginHook struct{}
+
+func (h *WeatherPluginHook) BeforeLLM(
+    ctx context.Context,
+    req *agent.LLMHookRequest,
+) (*agent.LLMHookRequest, agent.HookDecision, error) {
+    // 注入工具定义
+    req.Tools = append(req.Tools, agent.ToolDefinition{
+        Type: "function",
+        Function: agent.FunctionDefinition{
+            Name:        "get_weather",
+            Description: "查询城市天气",
+            Parameters: map[string]any{
+                "type": "object",
+                "properties": map[string]any{
+                    "city": map[string]any{
+                        "type":        "string",
+                        "description": "城市名称",
+                    },
+                },
+                "required": []string{"city"},
+            },
+        },
+    })
+    
+    return req, agent.HookDecision{Action: agent.HookActionContinue}, nil
+}
+
+func (h *WeatherPluginHook) BeforeTool(
+    ctx context.Context,
+    call *agent.ToolCallHookRequest,
+) (*agent.ToolCallHookRequest, agent.HookDecision, error) {
+    if call.Tool == "get_weather" {
+        city := call.Arguments["city"].(string)
+        
+        // 设置 HookResult，使用 respond action
+        next := call.Clone()
+        next.HookResult = &tools.ToolResult{
+            ForLLM:  getWeatherData(city),
+            Silent:  false,
+            IsError: false,
+        }
+        
+        return next, agent.HookDecision{Action: agent.HookActionRespond}, nil
+    }
+    
+    return call, agent.HookDecision{Action: agent.HookActionContinue}, nil
+}
+
+func getWeatherData(city string) string {
+    // 实现天气查询逻辑
+    return fmt.Sprintf("%s天气：晴，温度20°C", city)
+}
+```
+
+---
+
+## 总结
+
+通过 hook 系统的 `respond` action，外部进程可以：
+
+1. **注入工具定义**：让 LLM 知道有新工具可用
+2. **提供工具实现**：直接返回执行结果，无需注册到 ToolRegistry
+3. **与内置工具共存**：不影响 PicoClaw 原有工具的正常运行
+
+这为插件开发提供了灵活、优雅的解决方案。
+
+---
+
+## 安全边界说明
+
+### 绕过审批检查
+
+**重要**：`respond` action 会绕过 `ApproveTool` 审批检查。
+
+这意味着：
+- `before_tool` hook 可以为**任何工具名称**返回 `respond`，包括敏感工具（如 `bash`）
+- 工具不会经过审批流程，直接返回 hook 提供的结果
+- 这是为了支持插件工具而设计，但也带来了安全风险
+
+### 安全建议
+
+1. **审查 hook 配置**：确保只有可信的 hook 进程被启用
+2. **限制 hook 权限**：在 hook 实现中添加自己的安全检查
+3. **优先使用 `deny_tool`**：对于拒绝执行，使用 `deny_tool` action 而非 `respond` 返回错误
+
+### 示例：hook 内置安全检查
+
+```python
+def handle_before_tool(params: dict) -> dict:
+    tool = params.get("tool", "")
+    args = params.get("arguments", {})
+    
+    # 安全检查：只处理插件工具
+    if tool in ["get_weather", "calculate"]:
+        return {
+            "action": "respond",
+            "result": execute_plugin_tool(tool, args),
+        }
+    
+    # 其他工具继续正常流程（会经过审批）
+    return {"action": "continue"}
+```
+
+这样可以确保 hook 只影响插件工具，不影响系统工具的审批流程。
@@ -0,0 +1,282 @@
+# Routing System
+
+> Back to [README](../README.md)
+
+In PicoClaw, the runtime "routing system" is not just one decision.
+It is the combined pipeline that decides:
+
+1. which agent handles an inbound message
+2. which session dimensions should isolate that conversation
+3. whether the turn should use the agent's primary model or a configured light model
+
+This document covers the runtime path in `pkg/routing` and its integration in `pkg/agent`.
+It does not describe the launcher's HTTP `ServeMux` routes or the frontend's TanStack Router files under `web/`.
+
+## Routing Layers
+
+| Layer | Files | Responsibility |
+| --- | --- | --- |
+| Agent dispatch | `pkg/routing/route.go`, `pkg/routing/agent_id.go` | Choose the target agent for the inbound message. |
+| Session policy selection | `pkg/routing/route.go` | Decide which dimensions should define session isolation for that routed turn. |
+| Model routing | `pkg/routing/router.go`, `pkg/routing/features.go`, `pkg/routing/classifier.go` | Choose between the primary model and a configured light model based on message complexity. |
+| Runtime integration | `pkg/agent/registry.go`, `pkg/agent/agent_message.go`, `pkg/agent/turn_coord.go` | Apply the route result, allocate session scope, and select model candidates before provider execution. |
+
+## End-To-End Flow
+
+The normal path for a user message is:
+
+```text
+InboundMessage
+  -> NormalizeInboundContext
+  -> RouteResolver.ResolveRoute(...)
+  -> session.AllocateRouteSession(...)
+  -> ensureSessionMetadata(...)
+  -> Router.SelectModel(...)
+  -> provider execution
+```
+
+The first half answers "who should handle this message and what session does it belong to".
+The second half answers "which model tier should that agent use for this turn".
+
+## Agent Dispatch
+
+`routing.RouteResolver` turns a normalized `bus.InboundContext` into a `ResolvedRoute`:
+
+```go
+type ResolvedRoute struct {
+    AgentID       string
+    Channel       string
+    AccountID     string
+    SessionPolicy SessionPolicy
+    MatchedBy     string
+}
+```
+
+`MatchedBy` is a debugging aid.
+Typical values are:
+
+- `default`
+- `dispatch.rule`
+- `dispatch.rule:<rule-name>`
+
+## Dispatch Input View
+
+Before matching rules, the resolver builds a normalized `dispatchView`.
+Each field is normalized to the exact shape expected by rule matching.
+
+| Selector field | Runtime shape |
+| --- | --- |
+| `channel` | lowercased channel name |
+| `account` | normalized account ID |
+| `space` | `<space_type>:<space_id>` |
+| `chat` | `<chat_type>:<chat_id>` |
+| `topic` | `topic:<topic_id>` |
+| `sender` | lowercased canonical sender ID |
+| `mentioned` | boolean copied from inbound context |
+
+This means dispatch rules must match the normalized shape, for example:
+
+```json
+{
+  "agents": {
+    "dispatch": {
+      "rules": [
+        {
+          "name": "support-group",
+          "agent": "support",
+          "when": {
+            "channel": "telegram",
+            "chat": "group:-100123"
+          }
+        },
+        {
+          "name": "slack-mentions",
+          "agent": "support",
+          "when": {
+            "channel": "slack",
+            "space": "workspace:t001",
+            "mentioned": true
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+## Dispatch Algorithm
+
+`ResolveRoute(...)` follows this sequence:
+
+1. Normalize `channel` and `account`.
+2. Clone `session.identity_links` from config.
+3. Build the normalized dispatch view.
+4. Scan `agents.dispatch.rules` in order.
+5. Skip rules with no constraints at all.
+6. Return the first rule whose selector fields all match exactly.
+7. If no rule matches, fall back to the default agent.
+
+Important consequences:
+
+- first match wins
+- there is no score or priority field beyond list order
+- invalid target agent IDs fall back to the default agent
+- sender matching can see canonical identities produced by `identity_links`
+
+## Default Agent Resolution
+
+If no dispatch rule wins, or if a rule points at an unknown agent, the resolver picks a default agent using this order:
+
+1. the agent marked `default: true`
+2. otherwise the first entry in `agents.list`
+3. otherwise implicit `main`
+
+Both agent IDs and account IDs are normalized through the helpers in `pkg/routing/agent_id.go`.
+
+## Session Policy Handoff
+
+Agent dispatch does not directly build a session key.
+Instead it emits a `SessionPolicy`:
+
+```go
+type SessionPolicy struct {
+    Dimensions    []string
+    IdentityLinks map[string][]string
+}
+```
+
+The dimensions come from:
+
+- global `session.dimensions`
+- or `dispatch_rule.session_dimensions` when the matching rule overrides them
+
+Only these dimension names survive normalization:
+
+- `space`
+- `chat`
+- `topic`
+- `sender`
+
+Invalid or duplicated entries are silently dropped.
+
+`pkg/session/AllocateRouteSession(...)` then turns that policy into:
+
+- a structured `SessionScope`
+- a canonical routed session key
+- legacy compatibility aliases
+
+So the routing package owns "what should isolate this conversation", while the session package owns "how that isolation becomes keys and durable storage".
+
+## Identity Links
+
+`session.identity_links` is shared between dispatch and session allocation.
+That is intentional: a sender canonicalized for routing should also map to the same session identity.
+
+Without that symmetry, the system could route two messages to the same agent but still fragment their history into different sessions.
+
+## Model Routing
+
+The second routing stage decides whether a turn can use a cheaper or faster light model.
+
+Config shape:
+
+```json
+{
+  "routing": {
+    "enabled": true,
+    "light_model": "gemini-2.0-flash",
+    "threshold": 0.35
+  }
+}
+```
+
+`pkg/routing.Router` compares the current turn against structural features and returns:
+
+- chosen model name
+- whether the light model was used
+- computed complexity score
+
+If the score is below the threshold, the light model wins.
+Otherwise the agent's primary model is used.
+At runtime this only matters when the agent actually has light-model candidates configured; otherwise execution stays on the primary candidate set.
+
+## Complexity Features
+
+`ExtractFeatures(...)` computes a language-agnostic feature vector:
+
+| Feature | Meaning |
+| --- | --- |
+| `TokenEstimate` | Approximate token count; CJK runes count more accurately than a flat rune split. |
+| `CodeBlockCount` | Number of fenced code blocks in the current message. |
+| `RecentToolCalls` | Tool-call count across the last six history entries. |
+| `ConversationDepth` | Total history length. |
+| `HasAttachments` | Detects embedded media or common media URL/file extensions. |
+
+This is intentionally structural rather than keyword-based, so the router behaves the same across languages.
+
+## RuleClassifier Scoring
+
+The current classifier is `RuleClassifier`.
+It uses a weighted sum capped to `[0, 1]`.
+
+| Signal | Score |
+| --- | --- |
+| attachments present | `1.00` |
+| token estimate `> 200` | `0.35` |
+| token estimate `> 50` | `0.15` |
+| code block present | `0.40` |
+| recent tool calls `> 3` | `0.25` |
+| recent tool calls `1..3` | `0.10` |
+| conversation depth `> 10` | `0.10` |
+
+The default threshold is `0.35`.
+That makes the following behavior intentional:
+
+- trivial chat stays on the light model
+- code tasks usually jump to the heavy model immediately
+- attachments always force the heavy model
+- long, plain-text prompts cross the heavy-model boundary at the default threshold
+
+## Runtime Integration
+
+Agent dispatch and model routing happen in different places:
+
+- `pkg/agent/registry.go` owns `RouteResolver`
+- `pkg/agent/agent_message.go` resolves the route and allocates session scope
+- `pkg/agent/turn_coord.go:selectCandidates` calls `agent.Router.SelectModel(...)`
+
+When the light model is selected, the agent loop swaps to `agent.LightCandidates`.
+When it is not selected, execution stays on the agent's primary provider candidate set.
+
+## Explicit Session Keys
+
+One nuance sits just outside `pkg/routing` but matters for the full routing story.
+
+After a route is allocated, `pkg/agent/agent_utils.go:resolveScopeKey` preserves an explicit incoming session key when the caller already supplied:
+
+- an opaque canonical key
+- a legacy `agent:...` key
+
+That makes manual system flows, tests, and compatibility paths deterministic even when the normal routed scope would have produced a different key.
+
+## What This Document Does Not Cover
+
+The repository also contains two unrelated route systems:
+
+- backend HTTP routes registered in `web/backend/api/router.go`
+- frontend file routes under `web/frontend/src/routes/`
+
+Those are launcher implementation details.
+They are separate from the runtime routing system described here.
+
+## Related Files
+
+- `pkg/routing/route.go`
+- `pkg/routing/router.go`
+- `pkg/routing/classifier.go`
+- `pkg/routing/features.go`
+- `pkg/routing/agent_id.go`
+- `pkg/session/allocator.go`
+- `pkg/agent/registry.go`
+- `pkg/agent/agent_message.go`
+- `pkg/agent/turn_coord.go`
@@ -0,0 +1,281 @@
+# 路由系统
+
+> 返回 [README](../README.md)
+
+在 PicoClaw 里，“路由系统”不是单一判断。
+它实际上是组合起来的一条运行时决策链，负责决定：
+
+1. 哪个 agent 来处理一条入站消息
+2. 这条消息应该落在哪种 session 隔离维度下
+3. 这一轮该使用 agent 的主模型，还是配置中的轻量模型
+
+本文覆盖 `pkg/routing` 及其在 `pkg/agent` 中的集成方式。
+它不讨论 `web/` 目录下 launcher 的 HTTP `ServeMux` 路由，也不讨论前端 TanStack Router 文件路由。
+
+## 路由分层
+
+| 层次 | 文件 | 作用 |
+| --- | --- | --- |
+| Agent 分发 | `pkg/routing/route.go`、`pkg/routing/agent_id.go` | 为入站消息选择目标 agent。 |
+| Session 策略选择 | `pkg/routing/route.go` | 决定该 turn 的会话隔离维度。 |
+| 模型路由 | `pkg/routing/router.go`、`pkg/routing/features.go`、`pkg/routing/classifier.go` | 根据消息复杂度在主模型和轻量模型之间做选择。 |
+| 运行时集成 | `pkg/agent/registry.go`、`pkg/agent/loop_message.go`、`pkg/agent/loop_turn.go` | 应用 route 结果、分配 session scope，并在真正调用 provider 前选出模型候选集。 |
+
+## 端到端流程
+
+普通用户消息的路径如下：
+
+```text
+InboundMessage
+  -> NormalizeInboundContext
+  -> RouteResolver.ResolveRoute(...)
+  -> session.AllocateRouteSession(...)
+  -> ensureSessionMetadata(...)
+  -> Router.SelectModel(...)
+  -> provider execution
+```
+
+前半段回答的是“谁来处理，以及属于哪段会话”。
+后半段回答的是“这个 agent 这一轮该走哪一档模型”。
+
+## Agent 分发
+
+`routing.RouteResolver` 会把归一化后的 `bus.InboundContext` 转成 `ResolvedRoute`：
+
+```go
+type ResolvedRoute struct {
+    AgentID       string
+    Channel       string
+    AccountID     string
+    SessionPolicy SessionPolicy
+    MatchedBy     string
+}
+```
+
+`MatchedBy` 主要用于日志和调试，常见值包括：
+
+- `default`
+- `dispatch.rule`
+- `dispatch.rule:<rule-name>`
+
+## Dispatch 输入视图
+
+真正做规则匹配前，resolver 会先构造一个归一化后的 `dispatchView`。
+每个字段都会变成规则匹配所期待的固定形状。
+
+| Selector 字段 | 运行时形状 |
+| --- | --- |
+| `channel` | 小写 channel 名称 |
+| `account` | 归一化后的 account ID |
+| `space` | `<space_type>:<space_id>` |
+| `chat` | `<chat_type>:<chat_id>` |
+| `topic` | `topic:<topic_id>` |
+| `sender` | 小写 canonical sender ID |
+| `mentioned` | 直接来自 inbound context 的布尔值 |
+
+这意味着 dispatch rule 必须写成归一化后的形状，例如：
+
+```json
+{
+  "agents": {
+    "dispatch": {
+      "rules": [
+        {
+          "name": "support-group",
+          "agent": "support",
+          "when": {
+            "channel": "telegram",
+            "chat": "group:-100123"
+          }
+        },
+        {
+          "name": "slack-mentions",
+          "agent": "support",
+          "when": {
+            "channel": "slack",
+            "space": "workspace:t001",
+            "mentioned": true
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+## Dispatch 算法
+
+`ResolveRoute(...)` 的流程是：
+
+1. 归一化 `channel` 和 `account`。
+2. 从配置复制 `session.identity_links`。
+3. 构建归一化后的 dispatch view。
+4. 按顺序扫描 `agents.dispatch.rules`。
+5. 没有任何约束条件的 rule 会被跳过。
+6. 第一个所有 selector 字段都精确匹配的 rule 胜出。
+7. 如果没有 rule 匹配，则回退到默认 agent。
+
+这带来几个重要结论：
+
+- 第一条命中的规则优先，没有额外 priority 字段
+- rule 顺序本身就是优先级
+- 指向无效 agent 的 rule 最终会回退到默认 agent
+- sender 匹配看到的是经过 `identity_links` 归一化后的身份
+
+## 默认 Agent 解析
+
+如果没有 dispatch rule 命中，或者 rule 指向了不存在的 agent，resolver 会按以下顺序选择默认 agent：
+
+1. `default: true` 的 agent
+2. 否则取 `agents.list` 的第一项
+3. 如果配置里没有 agent，则使用隐式 `main`
+
+Agent ID 和 Account ID 都会经过 `pkg/routing/agent_id.go` 中的归一化逻辑。
+
+## Session 策略交接
+
+Agent 分发本身不会直接生成 session key。
+它只会产出一个 `SessionPolicy`：
+
+```go
+type SessionPolicy struct {
+    Dimensions    []string
+    IdentityLinks map[string][]string
+}
+```
+
+维度来源有两种：
+
+- 全局 `session.dimensions`
+- 如果命中的 dispatch rule 指定了 `session_dimensions`，则用 rule 覆盖
+
+最终只有这些维度名会被保留下来：
+
+- `space`
+- `chat`
+- `topic`
+- `sender`
+
+非法项或重复项会被静默丢弃。
+
+随后 `pkg/session/AllocateRouteSession(...)` 再把这份策略转成：
+
+- 结构化 `SessionScope`
+- canonical routed session key
+- legacy 兼容 alias
+
+所以可以把职责边界理解为：
+
+- `pkg/routing` 决定“这段对话应该按什么维度隔离”
+- `pkg/session` 决定“这些维度如何变成 key 和持久化状态”
+
+## Identity Links
+
+`session.identity_links` 会同时被 dispatch 和 session allocation 使用。
+这是刻意保持一致的设计：如果某个 sender 在路由阶段已经被规范化，那么 session 阶段也应该落到同一个身份上。
+
+否则就会出现“消息路由到了同一个 agent，但上下文仍被拆成多个 session”的问题。
+
+## 模型路由
+
+第二阶段路由决定这一轮能否使用更便宜或更快的轻量模型。
+
+配置形状如下：
+
+```json
+{
+  "routing": {
+    "enabled": true,
+    "light_model": "gemini-2.0-flash",
+    "threshold": 0.35
+  }
+}
+```
+
+`pkg/routing.Router` 会根据当前 turn 的结构特征，返回：
+
+- 选中的模型名
+- 是否使用了 light model
+- 复杂度分数
+
+当分数低于阈值时，走轻量模型；否则仍使用 agent 的主模型。
+但在运行时，只有当 agent 实际配置了 light-model candidates 时，这个判断才会产生效果；否则仍会停留在主模型候选集上。
+
+## 复杂度特征
+
+`ExtractFeatures(...)` 会计算一个与自然语言内容无关、偏结构化的特征向量：
+
+| 特征 | 含义 |
+| --- | --- |
+| `TokenEstimate` | 估算 token 数；对 CJK 文本比简单 rune 平分更准确。 |
+| `CodeBlockCount` | 当前消息中 fenced code block 的数量。 |
+| `RecentToolCalls` | 最近 6 条历史消息中的 tool call 总数。 |
+| `ConversationDepth` | 整体历史长度。 |
+| `HasAttachments` | 是否检测到嵌入媒体或常见媒体 URL / 文件扩展名。 |
+
+这样做的目的，是让模型路由不依赖关键词，从而在不同语言下都保持一致行为。
+
+## RuleClassifier 评分
+
+当前分类器是 `RuleClassifier`，使用加权求和并把结果截断到 `[0, 1]`。
+
+| 信号 | 分值 |
+| --- | --- |
+| 存在附件 | `1.00` |
+| token 估计 `> 200` | `0.35` |
+| token 估计 `> 50` | `0.15` |
+| 存在代码块 | `0.40` |
+| 最近 tool calls `> 3` | `0.25` |
+| 最近 tool calls `1..3` | `0.10` |
+| 会话深度 `> 10` | `0.10` |
+
+默认阈值是 `0.35`。
+这意味着以下行为是刻意设计出来的：
+
+- 很轻的闲聊仍走轻量模型
+- 编码类请求通常会立刻切到重模型
+- 带附件的请求一定走重模型
+- 很长的纯文本请求在默认阈值下也会跨过重模型边界
+
+## 运行时集成
+
+Agent 分发和模型路由发生在不同位置：
+
+- `pkg/agent/registry.go` 持有 `RouteResolver`
+- `pkg/agent/loop_message.go` 负责 resolve route 并分配 session scope
+- `pkg/agent/loop_turn.go:selectCandidates` 调用 `agent.Router.SelectModel(...)`
+
+当 light model 被选中时，agent loop 会切换到 `agent.LightCandidates`。
+如果没有被选中，则继续使用 agent 的主 provider 候选集。
+
+## 显式 Session Key
+
+还有一个不在 `pkg/routing` 内部、但对整体“路由语义”很重要的细节。
+
+在 route 分配完成后，`pkg/agent/loop_utils.go:resolveScopeKey` 会优先保留调用方显式传入的 session key，只要它属于以下格式之一：
+
+- 不透明 canonical key
+- legacy `agent:...` key
+
+这样一来，手工系统流、测试和兼容路径即使在正常路由 scope 会生成不同 key 的情况下，仍然能保持确定性。
+
+## 本文不覆盖的内容
+
+仓库里还存在两套和这里无关的“route”系统：
+
+- `web/backend/api/router.go` 注册的后端 HTTP 路由
+- `web/frontend/src/routes/` 下的前端文件路由
+
+它们属于 launcher 的实现细节，和本文描述的运行时路由系统是两回事。
+
+## 相关文件
+
+- `pkg/routing/route.go`
+- `pkg/routing/router.go`
+- `pkg/routing/classifier.go`
+- `pkg/routing/features.go`
+- `pkg/routing/agent_id.go`
+- `pkg/session/allocator.go`
+- `pkg/agent/registry.go`
+- `pkg/agent/loop_message.go`
+- `pkg/agent/loop_turn.go`
@@ -0,0 +1,255 @@
+# Session System
+
+> Back to [README](../README.md)
+
+This document describes the runtime session system used by PicoClaw to:
+
+- map inbound messages onto stable conversation scopes
+- persist message history and summaries
+- preserve compatibility with legacy `agent:...` session keys while the runtime uses opaque canonical keys
+
+This document covers the core runtime path in `pkg/session`, `pkg/memory`, and `pkg/agent`.
+It does not describe launcher login cookies or dashboard authentication sessions in `web/backend/middleware`.
+
+## Responsibilities
+
+The session system has four jobs:
+
+1. Decide which messages should share the same conversation context.
+2. Persist that context durably across turns and restarts.
+3. Expose a small `SessionStore` interface to the agent loop.
+4. Keep older session-key formats working during storage and routing migrations.
+
+## Main Components
+
+| Layer | Files | Responsibility |
+| --- | --- | --- |
+| Session contract | `pkg/session/session_store.go` | Defines the `SessionStore` interface used by the agent loop. |
+| Legacy backend | `pkg/session/manager.go` | Stores one JSON file per session. Still used as a fallback. |
+| Session adapter | `pkg/session/jsonl_backend.go` | Adapts `pkg/memory.Store` to `SessionStore`, including alias and scope metadata support. |
+| Durable storage | `pkg/memory/jsonl.go` | Append-only JSONL storage plus `.meta.json` sidecar metadata. |
+| Scope and key building | `pkg/session/scope.go`, `pkg/session/key.go`, `pkg/session/allocator.go` | Builds structured scopes, opaque canonical keys, and legacy aliases from routing results. |
+| Runtime integration | `pkg/agent/instance.go`, `pkg/agent/agent.go`, `pkg/agent/agent_message.go` | Initializes the store, allocates session scope, and persists metadata before turns run. |
+
+## Session Data Model
+
+The structured session identity is represented by `session.SessionScope`:
+
+| Field | Meaning |
+| --- | --- |
+| `Version` | Schema version. Current value is `ScopeVersionV1`. |
+| `AgentID` | Routed agent handling the turn. |
+| `Channel` | Normalized inbound channel name. |
+| `Account` | Normalized account or bot identifier. |
+| `Dimensions` | Ordered list of active partition dimensions such as `chat` or `sender`. |
+| `Values` | Concrete normalized values for each selected dimension. |
+
+Only four dimensions are currently recognized by the allocator:
+
+- `space`
+- `chat`
+- `topic`
+- `sender`
+
+The default config uses:
+
+```json
+{
+  "session": {
+    "dimensions": ["chat"]
+  }
+}
+```
+
+That means one shared conversation per chat unless a dispatch rule overrides it.
+
+## Canonical Keys And Legacy Aliases
+
+The runtime now prefers opaque canonical keys:
+
+```text
+sk_v1_<sha256>
+```
+
+These keys are built from a canonical scope signature in `pkg/session/key.go`.
+The goal is to make storage keys stable while decoupling them from any specific legacy text format.
+
+For compatibility, the allocator also emits legacy aliases such as:
+
+```text
+agent:main:direct:user123
+agent:main:slack:channel:c001
+agent:main:pico:direct:pico:session-123
+```
+
+These aliases matter because older sessions, tests, and some tools still refer to the legacy shape.
+The JSONL backend resolves aliases back to the canonical key before reads and writes.
+
+The agent loop also preserves explicit incoming session keys when the caller already supplied one of the recognized explicit formats:
+
+- opaque canonical key
+- legacy `agent:...` key
+
+That behavior lives in `pkg/agent/agent_utils.go:resolveScopeKey`.
+
+## Allocation Flow
+
+The end-to-end flow for a normal inbound message is:
+
+```text
+InboundMessage
+  -> RouteResolver.ResolveRoute(...)
+  -> session.AllocateRouteSession(...)
+  -> resolveScopeKey(...)
+  -> ensureSessionMetadata(...)
+  -> AgentLoop turn execution
+  -> SessionStore read/write operations
+```
+
+More concretely:
+
+1. `pkg/agent/agent_message.go` resolves the agent route from normalized inbound context.
+2. `session.AllocateRouteSession` converts the route's `SessionPolicy` plus inbound context into a structured `SessionScope`.
+3. The allocator builds:
+   - `SessionKey`: canonical routed session key
+   - `SessionAliases`: compatibility aliases for that routed scope
+   - `MainSessionKey`: agent-level main session key
+   - `MainAliases`: legacy alias for the main session
+4. `runAgentLoop` persists scope metadata and aliases through `ensureSessionMetadata`.
+5. During later reads or writes, `JSONLBackend.ResolveSessionKey` maps aliases back onto the canonical key.
+
+The main session key is separate from routed chat sessions.
+It is mainly used for agent-level or system-style flows that need one stable per-agent conversation, for example `processSystemMessage`.
+
+## Scope Construction Rules
+
+`pkg/session/allocator.go` builds scope values from normalized inbound context.
+Important rules:
+
+- `space` becomes `<space_type>:<space_id>`
+- `chat` becomes `<chat_type>:<chat_id>`
+- `topic` becomes `topic:<topic_id>`
+- `sender` is canonicalized through `session.identity_links` before being stored
+
+There are two special cases worth calling out.
+
+### Telegram forum isolation
+
+Telegram forum topics must stay isolated even when the configured dimensions only mention `chat`.
+To preserve that behavior, the allocator appends `/<topic_id>` to the `chat` value for Telegram forum messages unless `topic` is already an explicit dimension.
+
+Example:
+
+```text
+group:-1001234567890/42
+group:-1001234567890/99
+```
+
+Those produce different session keys.
+
+### Identity links
+
+`session.identity_links` lets multiple sender identifiers collapse into one canonical identity.
+Both dispatch matching and session allocation use that mapping so that the same person can keep one conversation even if their raw sender IDs differ across channels or accounts.
+
+## Storage Format
+
+The default runtime backend is `pkg/memory.JSONLStore`, wrapped by `session.JSONLBackend`.
+
+Each session uses two files:
+
+```text
+{sanitized_key}.jsonl
+{sanitized_key}.meta.json
+```
+
+The files store:
+
+- `.jsonl`: one `providers.Message` per line, append-only
+- `.meta.json`: summary, timestamps, line counts, logical truncation offset, scope, aliases
+
+`SessionMeta` currently includes:
+
+- `Key`
+- `Summary`
+- `Skip`
+- `Count`
+- `CreatedAt`
+- `UpdatedAt`
+- `Scope`
+- `Aliases`
+
+## Write And Crash Semantics
+
+The JSONL store is designed around append-first durability and stale-over-loss recovery:
+
+- `AddMessage` and `AddFullMessage` append one JSON line, `fsync`, then update metadata.
+- `TruncateHistory` is logical first: it only advances `meta.Skip`.
+- `Compact` physically rewrites the JSONL file to remove skipped lines.
+- `SetHistory` and `Compact` write metadata before rewriting JSONL so a crash may temporarily expose old data, but should not lose data.
+- Corrupt JSONL lines are skipped during reads instead of failing the entire session.
+
+`JSONLBackend.Save` maps onto `store.Compact(...)`.
+In other words, `Save` is no longer "flush dirty memory to disk"; it is now "reclaim dead lines after logical truncation".
+
+## Concurrency Model
+
+`pkg/memory.JSONLStore` uses a fixed 64-shard mutex array keyed by session hash.
+That gives per-session serialization without keeping an unbounded mutex map in memory.
+
+The legacy `SessionManager` uses a single in-memory map guarded by an RW mutex.
+
+Both backends satisfy the same `SessionStore` interface, which is why the agent loop does not need storage-specific code.
+
+## Compatibility And Migration
+
+`pkg/agent/instance.go:initSessionStore` prefers the JSONL backend.
+
+Startup sequence:
+
+1. Create `memory.NewJSONLStore(dir)`.
+2. Run `memory.MigrateFromJSON(...)` to import legacy `.json` sessions.
+3. Wrap the store with `session.NewJSONLBackend(store)`.
+4. If JSONL initialization or migration fails, fall back to `session.NewSessionManager(dir)`.
+
+This fallback is intentional: a partial migration would be worse than staying on the legacy store for one run.
+
+### Alias promotion
+
+When canonical metadata is first created, `EnsureSessionMetadata` may promote history from a non-empty legacy alias into the canonical session.
+That promotion only happens when the canonical session is still empty, so active canonical history is not overwritten.
+
+This is how the system preserves old histories such as:
+
+- legacy direct-message keys
+- older Pico direct-session keys
+
+while moving the runtime onto opaque canonical keys.
+
+## Other SessionStore Implementations
+
+`pkg/agent/subturn.go` defines an `ephemeralSessionStore`.
+It satisfies the same `SessionStore` interface, but keeps data in memory only and is destroyed when the sub-turn ends.
+
+That lets SubTurn reuse the same session-facing APIs without writing child-session history into the parent's durable storage.
+
+## Operational Consumers
+
+The session system is consumed by more than the agent loop:
+
+- `web/backend/api/session.go` reads JSONL metadata and legacy JSON sessions to expose session history in the launcher UI.
+- `pkg/agent/steering.go` can recover scope metadata for active steering flows.
+- tooling and tests can still refer to legacy aliases because alias resolution is handled below the agent loop.
+
+## Related Files
+
+- `pkg/session/session_store.go`
+- `pkg/session/manager.go`
+- `pkg/session/jsonl_backend.go`
+- `pkg/session/scope.go`
+- `pkg/session/key.go`
+- `pkg/session/allocator.go`
+- `pkg/memory/jsonl.go`
+- `pkg/agent/instance.go`
+- `pkg/agent/agent.go`
+- `pkg/agent/agent_message.go`
@@ -0,0 +1,254 @@
+# Session 系统
+
+> 返回 [README](../README.md)
+
+本文说明 PicoClaw 运行时的 Session 系统如何完成以下事情：
+
+- 把入站消息映射到稳定的会话作用域
+- 持久化消息历史与摘要
+- 在运行时使用不透明 canonical key 的同时，继续兼容旧的 `agent:...` session key
+
+本文覆盖 `pkg/session`、`pkg/memory` 和 `pkg/agent` 中的核心运行时链路。
+它不讨论 `web/backend/middleware` 中 launcher 登录 Cookie 或 dashboard 鉴权 session。
+
+## 职责
+
+Session 系统承担四件事：
+
+1. 决定哪些消息应该共享同一段上下文。
+2. 让这段上下文能跨 turn、跨进程重启持久存在。
+3. 向 agent loop 暴露一个足够小的 `SessionStore` 抽象。
+4. 在存储层和路由层迁移期间继续兼容旧 session key。
+
+## 主要组件
+
+| 层次 | 文件 | 作用 |
+| --- | --- | --- |
+| Session 抽象 | `pkg/session/session_store.go` | 定义 agent loop 依赖的 `SessionStore` 接口。 |
+| 旧后端 | `pkg/session/manager.go` | 每个 session 一个 JSON 文件的旧实现，仍作为回退方案保留。 |
+| Session 适配层 | `pkg/session/jsonl_backend.go` | 把 `pkg/memory.Store` 适配成 `SessionStore`，并支持 alias 与 scope metadata。 |
+| 持久化存储 | `pkg/memory/jsonl.go` | Append-only JSONL 存储与 `.meta.json` 元数据侧文件。 |
+| Scope / Key 构建 | `pkg/session/scope.go`、`pkg/session/key.go`、`pkg/session/allocator.go` | 从路由结果生成结构化 scope、不透明 canonical key 和 legacy alias。 |
+| 运行时集成 | `pkg/agent/instance.go`、`pkg/agent/loop.go`、`pkg/agent/loop_message.go` | 初始化存储、分配 session scope，并在 turn 执行前落 metadata。 |
+
+## Session 数据模型
+
+结构化的会话身份由 `session.SessionScope` 表示：
+
+| 字段 | 含义 |
+| --- | --- |
+| `Version` | Scope 模式版本，当前为 `ScopeVersionV1`。 |
+| `AgentID` | 处理该 turn 的路由 agent。 |
+| `Channel` | 归一化后的入站 channel 名称。 |
+| `Account` | 归一化后的 bot / account 标识。 |
+| `Dimensions` | 当前启用的隔离维度顺序，例如 `chat` 或 `sender`。 |
+| `Values` | 每个维度对应的具体归一化值。 |
+
+Allocator 当前只识别四个维度：
+
+- `space`
+- `chat`
+- `topic`
+- `sender`
+
+默认配置是：
+
+```json
+{
+  "session": {
+    "dimensions": ["chat"]
+  }
+}
+```
+
+也就是默认按 chat 共享上下文；如果 dispatch rule 覆盖了维度，则以 rule 为准。
+
+## Canonical Key 与 Legacy Alias
+
+运行时现在优先使用不透明 canonical key：
+
+```text
+sk_v1_<sha256>
+```
+
+它由 `pkg/session/key.go` 中的 scope signature 计算得到。
+这样可以让存储 key 稳定，同时不再把持久化格式和某一种旧文本 key 绑定死。
+
+为了兼容旧数据，allocator 还会生成 legacy alias，例如：
+
+```text
+agent:main:direct:user123
+agent:main:slack:channel:c001
+agent:main:pico:direct:pico:session-123
+```
+
+这些 alias 很重要，因为旧 session、部分测试以及某些工具仍然会引用这种格式。
+JSONL backend 会在读写前先把 alias 解析回 canonical key。
+
+此外，如果调用方已经显式传入了受支持的 session key，agent loop 会保留它，不强行改成新分配的 routed key。
+这条逻辑在 `pkg/agent/loop_utils.go:resolveScopeKey` 中：
+
+- 不透明 canonical key
+- legacy `agent:...` key
+
+都属于“显式 key”。
+
+## 分配流程
+
+普通入站消息的完整链路如下：
+
+```text
+InboundMessage
+  -> RouteResolver.ResolveRoute(...)
+  -> session.AllocateRouteSession(...)
+  -> resolveScopeKey(...)
+  -> ensureSessionMetadata(...)
+  -> AgentLoop turn 执行
+  -> SessionStore 读写
+```
+
+具体来说：
+
+1. `pkg/agent/loop_message.go` 先用归一化后的 inbound context 解析 agent route。
+2. `session.AllocateRouteSession` 把 route 的 `SessionPolicy` 和 inbound context 组合成结构化 `SessionScope`。
+3. Allocator 会生成：
+   - `SessionKey`：当前路由会话的 canonical key
+   - `SessionAliases`：该路由会话的兼容 alias
+   - `MainSessionKey`：agent 级主会话 key
+   - `MainAliases`：主会话对应的 legacy alias
+4. `runAgentLoop` 通过 `ensureSessionMetadata` 持久化 scope metadata 和 alias。
+5. 后续读写时，`JSONLBackend.ResolveSessionKey` 会先把 alias 映射回 canonical key。
+
+`MainSessionKey` 和普通聊天会话是分开的。
+它主要服务于 agent 级、系统级的上下文场景，比如 `processSystemMessage`。
+
+## Scope 构建规则
+
+`pkg/session/allocator.go` 会从归一化后的 inbound context 生成 scope 值。
+关键规则如下：
+
+- `space` 变成 `<space_type>:<space_id>`
+- `chat` 变成 `<chat_type>:<chat_id>`
+- `topic` 变成 `topic:<topic_id>`
+- `sender` 会先经过 `session.identity_links` 归一化再写入
+
+其中有两个需要单独记住的特殊规则。
+
+### Telegram forum 隔离
+
+Telegram forum topic 必须默认保持隔离，即使配置只写了 `chat` 维度。
+为此，如果消息来自 Telegram forum 且策略里没有显式包含 `topic`，allocator 会把 `/<topic_id>` 拼到 `chat` 值后面。
+
+例如：
+
+```text
+group:-1001234567890/42
+group:-1001234567890/99
+```
+
+这两者会得到不同的 session key。
+
+### Identity links
+
+`session.identity_links` 可以把多个 sender 标识折叠为一个 canonical identity。
+dispatch 匹配和 session 分配都会使用这套映射，因此同一个人即使跨 channel 或 account 使用不同原始 sender ID，也可以继续落到同一段上下文里。
+
+## 存储格式
+
+默认运行时后端是 `pkg/memory.JSONLStore`，外面包了一层 `session.JSONLBackend`。
+
+每个 session 使用两类文件：
+
+```text
+{sanitized_key}.jsonl
+{sanitized_key}.meta.json
+```
+
+各自保存：
+
+- `.jsonl`：一行一个 `providers.Message`，append-only
+- `.meta.json`：摘要、时间戳、行数、逻辑截断偏移、scope、aliases
+
+`SessionMeta` 当前包含：
+
+- `Key`
+- `Summary`
+- `Skip`
+- `Count`
+- `CreatedAt`
+- `UpdatedAt`
+- `Scope`
+- `Aliases`
+
+## 写入与崩溃语义
+
+JSONL store 的设计核心是“追加优先、宁可暂时读到旧数据也不要丢数据”：
+
+- `AddMessage` / `AddFullMessage` 先追加一行 JSON，再 `fsync`，最后更新 metadata。
+- `TruncateHistory` 先做逻辑截断，本质上只是推进 `meta.Skip`。
+- `Compact` 才会真正重写 JSONL 文件，把被跳过的旧行物理移除。
+- `SetHistory` 和 `Compact` 都会先写 metadata 再改写 JSONL；如果中途崩溃，最多短时间暴露旧数据，不应丢数据。
+- 读取 JSONL 时如果碰到损坏行，会跳过该行，而不是让整个 session 读取失败。
+
+`JSONLBackend.Save` 对应到底层的 `store.Compact(...)`。
+也就是说，`Save` 在新实现里不再是“把内存脏数据刷盘”，而是“在逻辑截断后回收无效行占用的磁盘空间”。
+
+## 并发模型
+
+`pkg/memory.JSONLStore` 使用固定 64 分片 mutex，按 session key 的 hash 做串行化。
+这样既能做到“按 session 串行”，又不会因为 session 数量增长而把 mutex map 做成无界结构。
+
+旧的 `SessionManager` 则是一个内存 map 加 RW mutex。
+
+这两个实现都满足同一个 `SessionStore` 接口，所以 agent loop 不需要写任何存储后端特化逻辑。
+
+## 兼容与迁移
+
+`pkg/agent/instance.go:initSessionStore` 会优先初始化 JSONL 后端。
+
+启动过程如下：
+
+1. 创建 `memory.NewJSONLStore(dir)`。
+2. 执行 `memory.MigrateFromJSON(...)`，把旧 `.json` session 迁入新格式。
+3. 用 `session.NewJSONLBackend(store)` 包装。
+4. 如果 JSONL 初始化或迁移失败，则回退到 `session.NewSessionManager(dir)`。
+
+这个回退是刻意设计的：做一半的迁移，比整轮继续使用旧后端更危险。
+
+### Alias 提升
+
+第一次为 canonical key 建 metadata 时，`EnsureSessionMetadata` 会尝试把某个非空 legacy alias 的历史提升到 canonical session。
+但这件事只会在 canonical session 仍然为空时发生，因此不会覆盖已经存在的 canonical 历史。
+
+这保证了系统在迁移到 opaque key 的同时，仍能保留旧历史，例如：
+
+- 旧的 direct-message key
+- 旧的 Pico direct-session key
+
+## 其他 SessionStore 实现
+
+`pkg/agent/subturn.go` 里定义了 `ephemeralSessionStore`。
+它同样实现 `SessionStore`，但只存在于内存里，在 sub-turn 结束时销毁。
+
+这样 SubTurn 就能复用相同的 session 接口，而不会把子任务历史写进父会话的持久存储。
+
+## 运行时消费者
+
+Session 系统不只被 agent loop 使用：
+
+- `web/backend/api/session.go` 会读取 JSONL metadata 和旧 JSON session，并把历史暴露给 launcher UI。
+- `pkg/agent/steering.go` 可以在 steering 场景下恢复 scope metadata。
+- 因为 alias 解析发生在 agent loop 之下，测试和工具仍然可以继续使用 legacy alias。
+
+## 相关文件
+
+- `pkg/session/session_store.go`
+- `pkg/session/manager.go`
+- `pkg/session/jsonl_backend.go`
+- `pkg/session/scope.go`
+- `pkg/session/key.go`
+- `pkg/session/allocator.go`
+- `pkg/memory/jsonl.go`
+- `pkg/agent/instance.go`
+- `pkg/agent/loop.go`
+- `pkg/agent/loop_message.go`
@@ -170,13 +170,19 @@ This is saved to the session via `AddFullMessage` and sent to the model, so it i

 ## Automatic bus drain

-When the agent loop (`Run()`) starts processing a message, it spawns a background goroutine that keeps consuming new inbound messages from the bus. These messages are automatically redirected into the steering queue via `Steer()`. This means:
+When the agent loop (`Run()`) starts, it reads inbound messages from a shared message bus. The routing logic determines how each message is handled:

- Users on any channel (Telegram, Discord, etc.) don't need to do anything special — their messages are automatically captured as steering when the agent is busy
- Audio messages are transcribed before being steered, so the agent receives text. If transcription fails, the original (non-transcribed) message is steered as-is
- Only messages that resolve to the **same steering scope** as the active turn are redirected. Messages for other chats/sessions are requeued onto the inbound bus so they can be processed normally
- `system` inbound messages are not treated as steering input
- When `processMessage` finishes, the drain goroutine is canceled and normal message consumption resumes
+1. **No active turn for the message's session** — the message is dispatched to a **worker goroutine** that processes the full turn (LLM calls, tool execution, steering drain)
+2. **An active turn already exists for the same session** — the message is enqueued directly into that session's **steering queue** via `enqueueSteeringMessage`. No background drain goroutine is needed
+3. **Non-routable message** (e.g. `system`) — processed synchronously in the main loop
+
+This design enables **parallel processing of messages from different sessions** while keeping same-session messages strictly sequential. Key implications:
+
+- Messages from different users/channels are processed **concurrently** (up to `max_parallel_turns`)
+- Messages from the same session are **serialized** — subsequent messages go to the steering queue
+- Users don't need to do anything special — their messages are automatically captured as steering when the agent is busy for their session
+- Audio messages are transcribed within the worker that processes the turn, so the agent receives text
+- `system` inbound messages are processed immediately and do not trigger steering

 ## Steering with media

@@ -112,13 +112,17 @@ When the parent task is forcefully aborted (e.g., user interrupts with `/stop`):

 ## Agent Loop Integration

-### Bus Draining During Processing
+### Message Routing and Steering

-When a message enters the `Run()` loop, the agent starts a `drainBusToSteering` goroutine before calling `processMessage`. This goroutine runs concurrently with the entire processing lifecycle and continuously consumes any new inbound messages from the bus, redirecting them into the **steering queue** instead of dropping them.
+When a message enters the `Run()` loop, the agent determines whether to start a new worker or enqueue to steering:

-This ensures that if a user sends a follow-up message while the agent is processing (including during SubTurn execution), the message is not lost — it will be picked up between tool call iterations via `dequeueSteeringMessages`.
+- If **no active turn** exists for the message's session key, the session is atomically reserved and a **worker goroutine** is spawned. The worker processes the full turn lifecycle: `processMessage` → tool execution → steering drain → `Continue` for queued messages.
+- If an **active turn already exists** for the same session, the message is enqueued directly into that session's steering queue. It will be picked up by the existing worker's steering drain loop.

-The drain goroutine stops automatically when `processMessage` returns (via a cancellable context).
+This ensures that:
+- Messages from **different sessions** are processed **in parallel** (up to `max_parallel_turns` concurrent workers)
+- Messages from the **same session** are strictly **serialized** — they go to the steering queue and are processed sequentially within the active turn
+- No background drain goroutine is needed; steering is handled by the worker itself after processing

 ### Pending Result Polling

@@ -129,7 +133,7 @@ The agent loop polls for async SubTurn results at two points per iteration:

 ### Turn State Tracking

-All active root turns are registered in `AgentLoop.activeTurnStates` (`sync.Map`, keyed by session key). This allows `HardAbort` and `/subagents` observability commands to find and operate on active turns.
+All active turns are registered in `AgentLoop.activeTurnStates` (`sync.Map`, keyed by session key). A reservation sentinel is stored atomically via `LoadOrStore` before the worker starts, then replaced with the real `*turnState` when `runTurn` registers. This prevents a TOCTOU race where multiple messages for the same session could spawn concurrent workers. The sentinel is cleaned up by the worker's deferred cleanup. This allows `HardAbort` and `/subagents` observability commands to find and operate on active turns.

 ## Event Bus Integration

@@ -181,10 +185,10 @@ Creates a new spawner instance for the given AgentLoop. Pass the returned value
 ### Continue

 ```go
-func (al *AgentLoop) Continue(ctx context.Context, sessionKey string) error
+func (al *AgentLoop) Continue(ctx context.Context, sessionKey, channel, chatID string) (string, error)
 ```

-Resumes an idle agent turn by injecting any queued steering messages as a new LLM iteration. Used when the agent is waiting and a deferred steering message needs to be processed without a new inbound message arriving.
+Resumes an idle agent turn by dequeuing steering messages for the given session and running them through the agent loop. Returns the response string if processing occurred, or empty string if no steering messages were pending. Uses session-aware active turn checking — it only blocks if a turn is active for the *same* session, not for unrelated sessions.

 ## Context Propagation

@@ -1,4 +1,4 @@
-> Retour au [README](../../../README.fr.md)
+> Retour au [README](../../project/README.fr.md)

 # DingTalk

@@ -8,9 +8,10 @@ DingTalk est la plateforme de communication d'entreprise d'Alibaba, très popula

 ```json
 {
-  "channels": {
+  "channel_list": {
    "dingtalk": {
      "enabled": true,
+      "type": "dingtalk",
      "client_id": "YOUR_CLIENT_ID",
      "client_secret": "YOUR_CLIENT_SECRET",
      "allow_from": []
@@ -1,4 +1,4 @@
-> [README](../../../README.ja.md) に戻る
+> [README](../../project/README.ja.md) に戻る

 # DingTalk

@@ -8,9 +8,10 @@ DingTalkはアリババの企業向けコミュニケーションプラットフ

 ```json
 {
-  "channels": {
+  "channel_list": {
    "dingtalk": {
      "enabled": true,
+      "type": "dingtalk",
      "client_id": "YOUR_CLIENT_ID",
      "client_secret": "YOUR_CLIENT_SECRET",
      "allow_from": []
@@ -8,9 +8,10 @@ DingTalk is Alibaba's enterprise communication platform, widely used in Chinese

 ```json
 {
-  "channels": {
+  "channel_list": {
    "dingtalk": {
      "enabled": true,
+      "type": "dingtalk",
      "client_id": "YOUR_CLIENT_ID",
      "client_secret": "YOUR_CLIENT_SECRET",
      "allow_from": []
@@ -1,4 +1,4 @@
-> Voltar ao [README](../../../README.pt-br.md)
+> Voltar ao [README](../../project/README.pt-br.md)

 # DingTalk

@@ -8,9 +8,10 @@ DingTalk é a plataforma de comunicação empresarial da Alibaba, amplamente uti

 ```json
 {
-  "channels": {
+  "channel_list": {
    "dingtalk": {
      "enabled": true,
+      "type": "dingtalk",
      "client_id": "YOUR_CLIENT_ID",
      "client_secret": "YOUR_CLIENT_SECRET",
      "allow_from": []
@@ -1,4 +1,4 @@
-> Quay lại [README](../../../README.vi.md)
+> Quay lại [README](../../project/README.vi.md)

 # DingTalk

@@ -8,9 +8,10 @@ DingTalk là nền tảng giao tiếp doanh nghiệp của Alibaba, được s

 ```json
 {
-  "channels": {
+  "channel_list": {
    "dingtalk": {
      "enabled": true,
+      "type": "dingtalk",
      "client_id": "YOUR_CLIENT_ID",
      "client_secret": "YOUR_CLIENT_SECRET",
      "allow_from": []
@@ -1,4 +1,4 @@
-> 返回 [README](../../../README.zh.md)
+> 返回 [README](../../project/README.zh.md)

 # 钉钉

@@ -8,9 +8,10 @@

 ```json
 {
-  "channels": {
+  "channel_list": {
    "dingtalk": {
      "enabled": true,
+      "type": "dingtalk",
      "client_id": "YOUR_CLIENT_ID",
      "client_secret": "YOUR_CLIENT_SECRET",
      "allow_from": []
--- a/Show More
+++ b/Show More