🐛 fix(skill): prevent SKILL frontmatter from rendering twice

2026-06-15 12:10:16 +00:00 · 2026-05-24 19:43:56 +08:00
4367 changed files with 54927 additions and 328770 deletions
@@ -51,7 +51,7 @@ export interface GlobalServerConfig {

 ### 3. Assemble Server Config (if new domain)

-In `apps/server/src/globalConfig/index.ts`:
+In `src/server/globalConfig/index.ts`:

 ```typescript
 import { <domain>Env } from '@/envs/<domain>';
@@ -97,7 +97,7 @@ AI_IMAGE_DEFAULT_IMAGE_NUM: z.coerce.number().min(1).max(20).optional(),
 // packages/types/src/serverConfig.ts
 image?: PartialDeep<UserImageConfig>;

-// apps/server/src/globalConfig/index.ts
+// src/server/globalConfig/index.ts
 image: cleanObject({ defaultImageNum: imageEnv.AI_IMAGE_DEFAULT_IMAGE_NUM }),

 // src/store/user/slices/common/action.ts
@@ -1,6 +1,6 @@
 ---
 name: agent-runtime-hooks
-description: 'Agent runtime lifecycle hooks. Use for before/after tool or step hooks, tool mocks, human intervention, sub-agent calls, context compression, evals, tracing, callAgent, or lifecycle events.'
+description: "Agent runtime lifecycle hooks for observing and intercepting agent execution. Use when adding hooks to agent operations, mocking tool calls, logging step events, handling human intervention, sub-agent calls, context compression, or building eval/tracing integrations. Triggers on 'hooks', 'beforeToolCall', 'afterToolCall', 'beforeStep', 'afterStep', 'onComplete', 'onError', 'tool mock', 'agent lifecycle', 'human intervention', 'callAgent', 'compact'."
 user-invocable: false
 ---

@@ -50,14 +50,14 @@ execAgent({ hooks })

 ## Key Files

-| File                                                            | Role                                                   |
-| --------------------------------------------------------------- | ------------------------------------------------------ |
-| `packages/agent-runtime/src/types/hooks.ts`                     | Type definitions (AgentHookType, all event interfaces) |
-| `apps/server/src/services/agentRuntime/hooks/types.ts`          | Server-side types (AgentHook, re-exports)              |
-| `apps/server/src/services/agentRuntime/hooks/HookDispatcher.ts` | Registration, dispatch, dispatchBeforeToolCall         |
-| `apps/server/src/modules/AgentRuntime/RuntimeExecutors.ts`      | Tool/Compact/HumanIntervention hook dispatch           |
-| `apps/server/src/services/agentRuntime/AgentRuntimeService.ts`  | Step hooks + HumanIntervention resume/reject           |
-| `apps/server/src/services/aiAgent/index.ts`                     | CallAgent hook dispatch                                |
+| File                                                       | Role                                                   |
+| ---------------------------------------------------------- | ------------------------------------------------------ |
+| `packages/agent-runtime/src/types/hooks.ts`                | Type definitions (AgentHookType, all event interfaces) |
+| `src/server/services/agentRuntime/hooks/types.ts`          | Server-side types (AgentHook, re-exports)              |
+| `src/server/services/agentRuntime/hooks/HookDispatcher.ts` | Registration, dispatch, dispatchBeforeToolCall         |
+| `src/server/modules/AgentRuntime/RuntimeExecutors.ts`      | Tool/Compact/HumanIntervention hook dispatch           |
+| `src/server/services/agentRuntime/AgentRuntimeService.ts`  | Step hooks + HumanIntervention resume/reject           |
+| `src/server/services/aiAgent/index.ts`                     | CallAgent hook dispatch                                |

 ## Registration Flow

@@ -1,6 +1,6 @@
 ---
 name: agent-signal
-description: 'Build or extend LobeHub Agent Signal pipelines. Use for signal sources, signal/action types, policies, middleware, workflow handoff, dedupe, scope behavior, or observability.'
+description: Build or extend LobeHub Agent Signal pipelines for background or quiet agent work driven by event sources, semantic signals, and action handlers. Use when adding a new Agent Signal source, signal or action type, policy, middleware handler, workflow handoff, dedupe or scope behavior, or observability around `src/server/services/agentSignal/**`, `packages/agent-signal`, or `packages/observability-otel/src/modules/agent-signal`.
 ---

 # Agent Signal
@@ -26,9 +26,9 @@ Agent Signal has one consistent shape:

 Read:

- `apps/server/src/services/agentSignal/index.ts`
- `apps/server/src/workflows/agentSignal/index.ts`
- `apps/server/src/workflows/agentSignal/run.ts`
+- `src/server/services/agentSignal/index.ts`
+- `src/server/workflows/agentSignal/index.ts`
+- `src/server/workflows/agentSignal/run.ts`

 ## Core Model

@@ -48,11 +48,11 @@ Keep the boundaries strict:
 ## Implementation Workflow

 1. Decide whether the use case is synchronous or quiet background work.
-2. Define or reuse a source type in `apps/server/src/services/agentSignal/sourceTypes.ts`.
-3. Define or reuse signal and action types in `apps/server/src/services/agentSignal/policies/types.ts`.
+2. Define or reuse a source type in `src/server/services/agentSignal/sourceTypes.ts`.
+3. Define or reuse signal and action types in `src/server/services/agentSignal/policies/types.ts`.
 4. Implement handlers with `defineSourceHandler`, `defineSignalHandler`, or `defineActionHandler`.
 5. Bundle handlers with `defineAgentSignalHandlers(...)`.
-6. Register the policy in `apps/server/src/services/agentSignal/policies/index.ts` and pass it into the runtime factory if needed.
+6. Register the policy in `src/server/services/agentSignal/policies/index.ts` and pass it into the runtime factory if needed.
 7. Add or update ingress code that emits or enqueues the source event.
 8. Add observability and tests before considering the flow complete.

@@ -63,19 +63,19 @@ Keep the boundaries strict:
  `packages/agent-signal/src/base/builders.ts`
  `packages/agent-signal/src/base/types.ts`
 - Server-owned runtime and middleware:
-  `apps/server/src/services/agentSignal/runtime/AgentSignalRuntime.ts`
-  `apps/server/src/services/agentSignal/runtime/AgentSignalScheduler.ts`
-  `apps/server/src/services/agentSignal/runtime/middleware.ts`
-  `apps/server/src/services/agentSignal/runtime/context.ts`
+  `src/server/services/agentSignal/runtime/AgentSignalRuntime.ts`
+  `src/server/services/agentSignal/runtime/AgentSignalScheduler.ts`
+  `src/server/services/agentSignal/runtime/middleware.ts`
+  `src/server/services/agentSignal/runtime/context.ts`
 - Existing policy example:
-  `apps/server/src/services/agentSignal/policies/analyzeIntent/index.ts`
-  `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackSatisfaction.ts`
-  `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackDomain.ts`
-  `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackAction.ts`
-  `apps/server/src/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`
+  `src/server/services/agentSignal/policies/analyzeIntent/index.ts`
+  `src/server/services/agentSignal/policies/analyzeIntent/feedbackSatisfaction.ts`
+  `src/server/services/agentSignal/policies/analyzeIntent/feedbackDomain.ts`
+  `src/server/services/agentSignal/policies/analyzeIntent/feedbackAction.ts`
+  `src/server/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`
 - Observability:
-  `apps/server/src/services/agentSignal/observability/projector.ts`
-  `apps/server/src/services/agentSignal/observability/traceEvents.ts`
+  `src/server/services/agentSignal/observability/projector.ts`
+  `src/server/services/agentSignal/observability/traceEvents.ts`
  `packages/observability-otel/src/modules/agent-signal/index.ts`

 ## Implementation Rules
@@ -86,7 +86,7 @@ Keep the boundaries strict:
 - Use stable ids and idempotency keys when the same source can arrive more than once.
 - Preserve scope discipline. The runtime uses `scopeKey` to serialize related background work.
 - Prefer the dedicated shared package types and builders from `@lobechat/agent-signal` for normalized nodes and result contracts.
- Add focused tests near the touched runtime, policy, or store module. Existing tests under `apps/server/src/services/agentSignal/**/__tests__` are the reference pattern.
+- Add focused tests near the touched runtime, policy, or store module. Existing tests under `src/server/services/agentSignal/**/__tests__` are the reference pattern.

 ## References

@@ -32,9 +32,9 @@ source node

 Read:

- `apps/server/src/services/agentSignal/index.ts`
- `apps/server/src/services/agentSignal/sources/index.ts`
- `apps/server/src/services/agentSignal/runtime/AgentSignalScheduler.ts`
+- `src/server/services/agentSignal/index.ts`
+- `src/server/services/agentSignal/sources/index.ts`
+- `src/server/services/agentSignal/runtime/AgentSignalScheduler.ts`

 ## Package Boundaries

@@ -56,7 +56,7 @@ Read:
 - `packages/agent-signal/src/types/events.ts`
 - `packages/agent-signal/src/types/builtin.ts`

-### `apps/server/src/services/agentSignal`
+### `src/server/services/agentSignal`

 Treat this as the server-owned implementation layer.

@@ -89,11 +89,11 @@ Examples:

 Define source payloads in:

- `apps/server/src/services/agentSignal/sourceTypes.ts`
+- `src/server/services/agentSignal/sourceTypes.ts`

 Build normalized sources in:

- `apps/server/src/services/agentSignal/sources/buildSource.ts`
+- `src/server/services/agentSignal/sources/buildSource.ts`
 - `packages/agent-signal/src/base/builders.ts`

 ### Signal
@@ -109,7 +109,7 @@ Examples from `analyzeIntent`:

 Define server-owned signal types in:

- `apps/server/src/services/agentSignal/policies/types.ts`
+- `src/server/services/agentSignal/policies/types.ts`

 ### Action

@@ -157,9 +157,9 @@ When a user asks for "the procedure", document the flow above and point to the e

 Read:

- `apps/server/src/services/agentSignal/sources/index.ts`
- `apps/server/src/services/agentSignal/runtime/context.ts`
- `apps/server/src/services/agentSignal/constants.ts`
+- `src/server/services/agentSignal/sources/index.ts`
+- `src/server/services/agentSignal/runtime/context.ts`
+- `src/server/services/agentSignal/constants.ts`

 Use `enqueueAgentSignalSourceEvent(...)` when the work should stay quiet and out-of-band. That path:

@@ -172,8 +172,8 @@ This is the preferred path when the UI request should finish immediately and the

 Read:

- `apps/server/src/workflows/agentSignal/index.ts`
- `apps/server/src/workflows/agentSignal/run.ts`
+- `src/server/workflows/agentSignal/index.ts`
+- `src/server/workflows/agentSignal/run.ts`

 ## Existing Example: `analyzeIntent`

@@ -192,8 +192,8 @@ agent.user.message

 Read:

- `apps/server/src/services/agentSignal/policies/analyzeIntent/index.ts`
- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackSatisfaction.ts`
- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackDomain.ts`
- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackAction.ts`
- `apps/server/src/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/index.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/feedbackSatisfaction.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/feedbackDomain.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/feedbackAction.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`
@@ -2,7 +2,7 @@

 ## Fluent Registration API

-Use the middleware helpers in `apps/server/src/services/agentSignal/runtime/middleware.ts`.
+Use the middleware helpers in `src/server/services/agentSignal/runtime/middleware.ts`.

 They provide:

@@ -32,7 +32,7 @@ The context gives you:

 Read:

- `apps/server/src/services/agentSignal/runtime/context.ts`
+- `src/server/services/agentSignal/runtime/context.ts`

 ## Return Contracts

@@ -48,7 +48,7 @@ Return one of these shapes:
 Read:

 - `packages/agent-signal/src/base/types.ts`
- `apps/server/src/services/agentSignal/runtime/AgentSignalScheduler.ts`
+- `src/server/services/agentSignal/runtime/AgentSignalScheduler.ts`

 ## Policy Composition Pattern

@@ -72,8 +72,8 @@ That bundle is later passed into the runtime via:

 Read:

- `apps/server/src/services/agentSignal/policies/index.ts`
- `apps/server/src/services/agentSignal/policies/analyzeIntent/index.ts`
+- `src/server/services/agentSignal/policies/index.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/index.ts`

 ## Source Handler Pattern

@@ -81,7 +81,7 @@ Use a source handler when you are interpreting a producer event into semantic si

 Reference:

- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackSatisfaction.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/feedbackSatisfaction.ts`

 Pattern:

@@ -114,8 +114,8 @@ Use a signal handler when one semantic state should branch into more semantic st

 References:

- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackDomain.ts`
- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackAction.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/feedbackDomain.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/feedbackAction.ts`

 Pattern:

@@ -148,7 +148,7 @@ Use an action handler when the runtime should do actual work.

 Reference:

- `apps/server/src/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`

 Pattern:

@@ -186,9 +186,9 @@ Keep these rules:
 Use this split:

 - external event payloads:
-  `apps/server/src/services/agentSignal/sourceTypes.ts`
+  `src/server/services/agentSignal/sourceTypes.ts`
 - policy-owned signal and action payloads:
-  `apps/server/src/services/agentSignal/policies/types.ts`
+  `src/server/services/agentSignal/policies/types.ts`
 - normalized shared node contracts:
  `packages/agent-signal/src/base/types.ts`

@@ -216,10 +216,10 @@ Prefer focused tests near the touched code.

 Useful references:

- `apps/server/src/services/agentSignal/runtime/__tests__/AgentSignalRuntime.test.ts`
- `apps/server/src/services/agentSignal/__tests__/index.integration.test.ts`
- `apps/server/src/services/agentSignal/policies/analyzeIntent/__tests__/*`
- `apps/server/src/services/agentSignal/policies/analyzeIntent/actions/__tests__/*`
+- `src/server/services/agentSignal/runtime/__tests__/AgentSignalRuntime.test.ts`
+- `src/server/services/agentSignal/__tests__/index.integration.test.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/__tests__/*`
+- `src/server/services/agentSignal/policies/analyzeIntent/actions/__tests__/*`

 Test at the smallest level that proves the behavior:

@@ -24,9 +24,9 @@ After runtime execution, the service projects one compact observability model fr

 Read:

- `apps/server/src/services/agentSignal/observability/projector.ts`
- `apps/server/src/services/agentSignal/observability/traceEvents.ts`
- `apps/server/src/services/agentSignal/observability/store.ts`
+- `src/server/services/agentSignal/observability/projector.ts`
+- `src/server/services/agentSignal/observability/traceEvents.ts`
+- `src/server/services/agentSignal/observability/store.ts`

 Projection outputs:

@@ -58,7 +58,7 @@ Workflow-triggered runs do not naturally pass through the normal foreground runt

 Read:

- `apps/server/src/workflows/agentSignal/run.ts`
+- `src/server/workflows/agentSignal/run.ts`

 Use that path when:

@@ -77,8 +77,8 @@ Check:

 Read:

- `apps/server/src/services/agentSignal/index.ts`
- `apps/server/src/services/agentSignal/sources/index.ts`
+- `src/server/services/agentSignal/index.ts`
+- `src/server/services/agentSignal/sources/index.ts`

 ### The signal exists but no action runs

@@ -98,8 +98,8 @@ Check:

 Reference:

- `apps/server/src/services/agentSignal/policies/actionIdempotency.ts`
- `apps/server/src/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`
+- `src/server/services/agentSignal/policies/actionIdempotency.ts`
+- `src/server/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`

 ### Background runs are hard to discover

@@ -1,212 +0,0 @@
---
-name: agent-testing
-description: >
-  Agentic end-to-end testing for LobeHub: backend verification via the CLI,
-  frontend verification via agent-browser (Electron), full-stack verification in
-  the browser, and bot-channel verification via osascript. Local-first today,
-  designed to extend to cloud automation. Triggers on 'cli test', 'test with cli',
-  'verify with cli', 'backend test with cli', 'local test', 'test in electron',
-  'test desktop', 'test bot', 'bot test', 'test in discord', 'test in telegram',
-  'test in slack', 'test in wechat', 'test in weixin', 'test in lark', 'test in feishu',
-  'test in qq', 'manual test', 'osascript', 'test report', or any local
-  end-to-end verification task.
---
-
-# Agent Testing (Agentic End-to-End Verification)
-
-One skill for all agentic end-to-end testing — local-first today, designed to
-also run as full cloud automation. Every test session follows the same
-four-step contract:
-
-```
-Step 0: Env + Auth  →  Step 1: Pick surface  →  Step 2: Run  →  Step 3: Structured report
-```
-
-## Step 0 — Environment setup + auth check (mandatory)
-
-Step 0 is about getting the environment ready: **dependencies are healthy**
-and **auth is green**. A test run that dies halfway on a missing dependency or
-a login wall wastes the whole session — clear both gates BEFORE writing a
-single test step.
-
-### 0.1 Dependencies are installed — root AND standalone apps
-
-The root pnpm workspace does **NOT** cover every app: `pnpm-workspace.yaml`
-lists `packages/**`, `e2e`, `apps/server`, and only `apps/desktop/src/main` —
-**`apps/desktop` and `apps/cli` are standalone**, each keeping its own
-`node_modules` with its own links into `packages/`. A root install does not
-refresh them, so install in every app the test will touch:
-
-```bash
-pnpm install                      # root workspace
-cd apps/desktop && pnpm install   # Electron surface
-cd apps/cli && pnpm install       # CLI surface
-```
-
-Symptom of a stale standalone install: the build/launch fails to resolve a
-recently added workspace package — `Rolldown failed to resolve import
-"@lobechat/<pkg>"` (Electron) or `Cannot find module '@lobechat/<pkg>'` (CLI).
-
-### 0.2 Run scripts from the repo root
-
-All paths in this skill (`./.agents/skills/agent-testing/...`) are
-repo-root-relative, and background commands inherit the current working
-directory — a script launched while `cwd` is `apps/desktop` fails with
-`No such file or directory`. Verify `pwd` is the repo root before launching
-long-running scripts.
-
-### 0.3 Auth is green
-
-**Auth is the gate for all automated testing.**
-
-```bash
-./.agents/skills/agent-testing/scripts/setup-auth.sh status
-```
-
-| Surface  | Mechanism                                         | One-key path                   | Standard check                 |
-| -------- | ------------------------------------------------- | ------------------------------ | ------------------------------ |
-| CLI      | OIDC Device Code Flow (`apps/cli/.lobehub-dev`)   | `setup-auth.sh cli`            | `setup-auth.sh status`         |
-| Web      | better-auth cookie injection into `agent-browser` | `pbpaste \| setup-auth.sh web` | `setup-auth.sh web-verify`     |
-| Electron | App's own persistent login state                  | Log in once in the app         | `app-probe.sh auth`            |
-| Bot      | Native apps already logged in                     | —                              | per-platform screenshot        |
-
-Login-state checks are standardized — do NOT hand-roll `window.__LOBE_STORES`
-eval snippets; use `scripts/app-probe.sh auth` (returns `{ isSignedIn, userId }`,
-works for Electron CDP and web sessions via `AB_TARGET`).
-
-If `status` is not all green, fix auth first (the steps that need a human must be
-requested from the user explicitly). Full background and failure modes:
-[references/auth.md](./references/auth.md).
-
-## Step 1 — Pick the surface by change scope
-
-| Change scope                                            | Default surface                      | Why                                                               | Guide                              |
-| ------------------------------------------------------- | ------------------------------------ | ----------------------------------------------------------------- | ---------------------------------- |
-| **Backend** (TRPC router / service / model / migration) | **CLI**                              | Fastest loop, text-assertable output, zero UI flakiness           | [cli/index.md](./cli/index.md)     |
-| **Pure frontend** (components, store, styles, UX)       | **Electron** (agent-browser + CDP)   | Primary product shape; `__LOBE_STORES` state introspection        | [ui/electron.md](./ui/electron.md) |
-| **Full-stack** (new API + UI consuming it)              | **Web** (browser + local dev server) | One surface where network requests and UI are observable together | [ui/web.md](./ui/web.md)           |
-| **Bot channels** (Discord / WeChat / Lark / …)          | Native app via osascript / bridge    | Only way to exercise the real channel end-to-end                  | `bot/<platform>/index.md`          |
-
-Escalate, don't duplicate: verify a backend change with the CLI first; only add
-a UI pass when the change actually affects the UI.
-
-### Environment support (local macOS vs cloud Linux)
-
-The decisive constraint per surface is **how evidence (screenshots) is
-captured**: CDP-based capture (`agent-browser screenshot`) renders from the
-browser engine and needs no real display; OS-level capture (`screencapture`,
-osascript) is macOS-only.
-
-| Surface  | macOS (local) | Linux / cloud (headless)                                  | Screenshot mechanism                                   |
-| -------- | ------------- | --------------------------------------------------------- | ------------------------------------------------------ |
-| CLI      | ✅            | ✅                                                        | n/a — text output                                      |
-| Web      | ✅            | ✅ headless Chromium works natively                       | CDP — no display needed                                |
-| Electron | ✅            | ⚠️ runs, but needs a display server: wrap with `xvfb-run` | CDP works under Xvfb; `capture-app-window.sh` does NOT |
-| Bot      | ✅            | ❌ osascript + native apps are macOS-only                 | macOS `screencapture` only                             |
-
-When a test must stay cloud-portable, prefer CDP-based evidence over
-OS-level capture wherever both exist.
-
-### Bot platforms
-
-| Platform      | Guide                                            | Quick switcher        |
-| ------------- | ------------------------------------------------ | --------------------- |
-| Discord       | [bot/discord/index.md](./bot/discord/index.md)   | `Cmd+K`               |
-| Slack         | [bot/slack/index.md](./bot/slack/index.md)       | `Cmd+K`               |
-| Telegram      | [bot/telegram/index.md](./bot/telegram/index.md) | `Cmd+F`               |
-| WeChat / 微信 | [bot/wechat/index.md](./bot/wechat/index.md)     | `Cmd+F`               |
-| Lark / 飞书   | [bot/lark/index.md](./bot/lark/index.md)         | `Cmd+K`               |
-| QQ            | [bot/qq/index.md](./bot/qq/index.md)             | `Cmd+F`               |
-| iMessage      | [bot/imessage/index.md](./bot/imessage/index.md) | bridge (no osascript) |
-
-Each platform folder contains an `index.md` (activation, navigation,
-send-message, verification snippets) and a `test-<platform>-bot.sh` script
-sharing the interface:
-
-```bash
-./.agents/skills/agent-testing/bot/<platform>/test-<platform>-bot.sh <channel_or_contact> <message> [wait_seconds] [screenshot_path]
-```
-
-New to osascript automation? Read
-[references/osascript.md](./references/osascript.md) first — it is a general
-macOS-automation asset (activate, type, paste, screenshot, accessibility reads,
-gotchas), not bot-specific.
-
-## Step 2 — Run
-
-Surface guides above carry the detailed workflows. Shared infrastructure:
-
-| Need                                 | Where                                                                |
-| ------------------------------------ | -------------------------------------------------------------------- |
-| Start / restart the local dev server | [references/dev-server.md](./references/dev-server.md)               |
-| `agent-browser` command reference    | [references/agent-browser.md](./references/agent-browser.md)         |
-| osascript patterns (general macOS)   | [references/osascript.md](./references/osascript.md)                 |
-| Agent gateway probing                | [references/agent-gateway.md](./references/agent-gateway.md)         |
-| Screen recording                     | [references/record-app-screen.md](./references/record-app-screen.md) |
-
-### Scripts
-
-All under `.agents/skills/agent-testing/scripts/`:
-
-| Script                    | Usage                                                                          |
-| ------------------------- | ------------------------------------------------------------------------------ |
-| `setup-auth.sh`           | One-stop auth setup & status check (`status` / `cli` / `web`)                  |
-| `app-probe.sh`            | LobeHub app probes: `auth` / `route` / `ops` / `goto <path>` / `errors`        |
-| `record-gif.sh`           | Frame-sequence → GIF for time-based behavior (streaming, timers, animations)   |
-| `report-init.sh`          | Scaffold a structured test report (Step 3)                                     |
-| `electron-dev.sh`         | Manage Electron dev env (start/stop/status/restart, CDP 9222)                  |
-| `capture-app-window.sh`   | Screenshot a specific app window (general; used by bot tests)                  |
-| `record-app-screen.sh`    | Record app screen (video + periodic screenshots)                               |
-| `record-electron-demo.sh` | Record Electron app demo with ffmpeg                                           |
-| `agent-gateway/`          | Gateway probe / dump / analyze tools                                           |
-
-`app-probe.sh` is the LobeHub-specific fast path into app state — auth check,
-current route, running operations, and `goto <path>` quick navigation
-(`/agent/<agentId>/<topicId>`, `/task/<taskId>`, `/settings`, …) so a test can
-jump straight to the state under test instead of clicking through the UI. See
-[ui/electron.md](./ui/electron.md#lobehub-probes--quick-navigation) for usage.
-
-## Step 3 — Structured report (mandatory deliverable)
-
-Every automated test session ends with a structured, evidence-backed report —
-not a chat-only summary. Scaffold it up front and fill it as you test:
-
-```bash
-DIR=$(./.agents/skills/agent-testing/scripts/report-init.sh my-feature "Verify my feature")
-# ... test, saving screenshots / CLI transcripts into $DIR/assets/ ...
-# fill $DIR/report.md (case table, embedded evidence, verdict) and $DIR/result.json
-```
-
-Reports live in `.records/reports/<timestamp>-<slug>/` (gitignored): `report.md`
-(human-readable, with embedded screenshots), `result.json` (machine-readable
-pass/fail + score), `assets/` (evidence). Format spec and evidence rules:
-[references/report.md](./references/report.md).
-
-Two hard rules worth front-loading:
-
- **Report language = the user's conversation language.** Write the ENTIRE
-  `report.md` (headings included) in the language the user is conversing in —
-  no mixed English. `result.json` keys/status values stay English.
- **Time-based behavior needs a GIF, not a screenshot.** If a case asserts
-  change over time (streaming output, a ticking timer, loading states,
-  animations), record it with `scripts/record-gif.sh` and embed the GIF —
-  a static screenshot cannot prove the behavior.
-
-## Directory map
-
-```
-agent-testing/
-├── SKILL.md            # this router
-├── cli/index.md        # backend verification via the LobeHub CLI
-├── ui/electron.md      # pure-frontend verification in the desktop app
-├── ui/web.md           # full-stack verification in the browser
-├── bot/<platform>/     # bot-channel verification (osascript / bridge)
-├── references/         # shared knowledge: auth, dev-server, agent-browser, osascript, report
-└── scripts/            # setup-auth, report-init, electron-dev, capture, recording, gateway
-```
-
-## Gotchas
-
- agent-browser: see [references/agent-browser.md](./references/agent-browser.md#gotchas)
- Electron: see [ui/electron.md](./ui/electron.md#electron-gotchas)
- osascript: see [references/osascript.md](./references/osascript.md#gotchas)
@@ -1,232 +0,0 @@
-# iMessage Desktop bridge regression test
-
-The iMessage channel is different from the other bot platforms: there is **no
-native app to drive with osascript**. Instead the Desktop app runs a local
-**BlueBubbles bridge** — a small HTTP server in the Electron main process that
-registers a webhook on a local [BlueBubbles](https://bluebubbles.app/) server,
-receives iMessage events, and forwards them to LobeHub Cloud.
-
-So the test surface is three layers:
-
-1. **Electron main IPC** — `imessageBridge.*` handlers (`getStatus`,
-   `testConfig`, `upsertConfig`, `removeConfig`, `start`, `stop`)
-2. **Local bridge HTTP server** — `http://127.0.0.1:<port>/webhooks/bluebubbles/<appId>?secret=<secret>`
-3. **BlueBubbles REST API** — `http://127.0.0.1:1234/api/v1/*` (webhook + server/info)
-
-## Prerequisites
-
- A running **BlueBubbles server** (macOS, default `http://127.0.0.1:1234`) with
-  a known password. Sanity check:
-  ```bash
-  curl -sS -m4 -o /dev/null -w '%{http_code}\n' \
-    "http://127.0.0.1:1234/api/v1/server/info?password=<PW>" # expect 200
-  ```
- **Electron dev running with CDP**: `./.agents/skills/agent-testing/scripts/electron-dev.sh start`
- The **iMessage Desktop branch** checked out (the `imessageBridge` IPC group
-  and `@lobechat/chat-adapter-imessage` must be compiled into the main bundle).
-  Run `pnpm install --ignore-scripts` at the repo root **and** in `apps/desktop/`
-  after switching branches — the new workspace package must be linked or the
-  main build fails to resolve `@lobechat/chat-adapter-imessage`.
-
-## Fast path: automated script
-
-```bash
-./.agents/skills/agent-testing/bot/imessage/test-imessage-bridge.sh '<bluebubbles_password>' [bb_url] [cdp_port]
-```
-
-Asserts the whole flow and self-cleans (unique `applicationId` per run, removes
-its bridge config + BlueBubbles webhook on exit). Exit 0 = all green. It covers:
-
- BlueBubbles reachable + password valid; Electron CDP reachable; IPC available
- `testConfig` happy path → success
- `testConfig` wrong password → rejected; unreachable URL → rejected
- `upsertConfig` **first-time save → success** (Bug #1 regression guard, below)
- `getStatus` → `running:true`, config persisted, password redacted (`blueBubblesPasswordSet`)
- BlueBubbles webhook actually registered for the appId
- Local bridge HTTP server: wrong secret → 401; valid secret → past auth
-
-The password is passed as argv (visible in `ps`) — local dev only, don't use a
-real secret on a shared machine.
-
-## Layer 1 — IPC probes (no UI)
-
-The renderer exposes the main-process handlers via `window.electronAPI.invoke`.
-This is the quickest way to exercise the bridge without clicking:
-
-```bash
-# baseline
-agent-browser --cdp 9222 eval \
-  "(async()=>JSON.stringify(await window.electronAPI.invoke('imessageBridge.getStatus',{})))()"
-
-# test a connection (note: password as a JS string)
-agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
-(async function () {
-  try {
-    var r = await window.electronAPI.invoke('imessageBridge.testConfig', {
-      applicationId: 'probe',
-      blueBubblesServerUrl: 'http://127.0.0.1:1234',
-      blueBubblesPassword: 'PASTE_PW',
-      enabled: true,
-      webhookSecret: 'probe-secret',
-    });
-    return JSON.stringify(r);            // { success: true }
-  } catch (e) { return 'ERR: ' + (e.message || e); }
-})()
-EVALEOF
-```
-
-`upsertConfig` persists to the Electron store, starts the local HTTP server, and
-registers the BlueBubbles webhook. `removeConfig` + `stop` reverse it.
-
-## Layer 2 — full UI flow (agent-browser)
-
-The bridge settings only render in Desktop (`isDesktop` guard) under the agent's
-**Channel → iMessage** screen. The platform tile only appears as a real (non
-"Coming Soon") entry once the server registers `imessage` **and** the frontend
-drops it from `COMING_SOON_PLATFORMS` (`src/routes/(main)/agent/channel/const.ts`).
-
-```bash
-agent-browser --cdp 9222 open "http://localhost:5173/agent/<aid>/channel"
-agent-browser --cdp 9222 wait --load networkidle && agent-browser --cdp 9222 wait 1500
-
-# confirm the remote backend lists imessage (it must be registered + deployed)
-agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
-(async function(){
-  var url='lobe-backend://lobe/trpc/lambda/agentBotProvider.listPlatforms?input='+encodeURIComponent('{"json":null,"meta":{"values":["undefined"],"v":1}}');
-  var d=await (await fetch(url,{credentials:'include'})).json();
-  var p=d.result?.data?.json||d;
-  return JSON.stringify(p.map(function(x){return x.id;}));
-})()
-EVALEOF
-
-# click the iMessage tile, then fill the form by ref
-agent-browser --cdp 9222 eval "(()=>{var b=[...document.querySelectorAll('aside button')].find(x=>/imessage/i.test(x.textContent));b&&b.click();})()"
-agent-browser --cdp 9222 wait 1500
-agent-browser --cdp 9222 snapshot -i | grep -iE "127.0.0.1:1234|Application ID|Webhook Secret|Test BlueBubbles|Save Bridge"
-```
-
-Field refs (from the snapshot): Application ID, Webhook Secret, BlueBubbles
-Server URL (`placeholder="http://127.0.0.1:1234"`), and a **nested** textbox right
-under the URL one is the BlueBubbles Password. Fill with `fill` (real input
-events — `eval`-setting React inputs won't fire onChange), click **Test
-BlueBubbles**, then **Save Bridge**. Read the antd toast immediately (it
-auto-dismisses):
-
-```bash
-agent-browser --cdp 9222 eval \
-  "JSON.stringify([...new Set([...document.querySelectorAll('.ant-message-custom-content')].map(n=>n.textContent.trim()))])"
-# Test  → "BlueBubbles connection passed"
-# Save  → "iMessage Desktop bridge saved"
-```
-
-Verify the end state via BlueBubbles + IPC:
-
-```bash
-curl -sS "http://127.0.0.1:1234/api/v1/webhook?password=<PW>" # webhook for the appId present
-agent-browser --cdp 9222 eval "(async()=>JSON.stringify(await window.electronAPI.invoke('imessageBridge.getStatus',{})))()"
-# running:true, serverUrl: http://127.0.0.1:33270, configs[].blueBubblesPasswordSet:true
-```
-
-Cleanup: `removeConfig` + `stop` via IPC, then `DELETE /api/v1/webhook/<id>` on
-BlueBubbles.
-
-## Outbound send test (desktop → BlueBubbles → iMessage)
-
-Verifies the leg the bridge uses to _reply_: `BlueBubblesApiClient.sendText`
-→ `POST /api/v1/message/text`. Run the helper against your own number:
-
-```bash
-./.agents/skills/agent-testing/bot/imessage/send-imessage-test.sh '<bb_password>' '+<E164>' # e.g. +15551234567
-```
-
-**Gotcha that bites everyone:** with `method=apple-script` and a _new_
-conversation, the HTTP POST often **times out** even though the message is
-sent. Never judge success by the HTTP response. Instead poll
-`POST /api/v1/message/query` and read the matching `isFromMe:true` row's
-`error` field:
-
- `error: 0` (or null) → sent OK
- non-zero `error` → real send failure
-
-The script does exactly this: fires the send, ignores the timeout, then matches
-its marker text in the message store and asserts `error == 0`.
-
-Two more notes:
-
- Use a full E.164 handle (`iMessage;-;+<countrycode><number>`) or an Apple ID
-  email. Looking the chat up by guid afterwards may 404 if BB filed the message
-  under a differently-formatted guid — that's a lookup quirk, not a send failure.
- Sending to _your own_ number round-trips: BB records both the outgoing
-  (`fromMe:true`) and an incoming copy (`fromMe:false`).
-
-## Inbound e2e test (iMessage → cloud agent → reply)
-
-Full inbound chain: a message arrives → BlueBubbles fires its `new-message`
-webhook → local bridge (`:33270`) → `forwardWebhook` POSTs to
-`<remote>/api/agent/webhooks/imessage/<appId>?secret=…` → cloud agent → reply
-flows back via Device Gateway → BB `sendText`.
-
-Prerequisites:
-
- A cloud bot provider for the same `applicationId` exists and is **connected**
-  (Save Configuration + the device gateway connected — a _disconnected_ gateway
-  yields `DEVICE_NOT_FOUND` on connect and blocks the reply leg).
- The `imessage` Labs toggle is on (otherwise the channel is gated to "Coming
-  Soon"), and `webhookSecret` matches on both ends (auto-generated on save).
-
-Two ways to drive it:
-
-1. **Second device / Apple ID (recommended).** Have _another_ Apple ID message
-   the BB-hosted number (e.g. "please reply pong"). The bot replies; you see it
-   on the other device. **No loop risk** — the reply goes to the other party,
-   not back to itself.
-2. **Send to your own number (quick, loop-aware).** `sendText` to the hosted
-   number; the loopback _incoming_ copy (`isFromMe:false`) triggers the bot.
-   Watch the reply land in `message/query` as a `fromMe:true` row.
-
-**Loop guard — why a self-send doesn't spin forever:** the Chat SDK adapter
-drops any `isFromMe` message before dispatch
-(`packages/chat-adapter-imessage/src/adapter.ts`: `if (message.isFromMe) return`).
-The bot's own reply (`isFromMe:true`) is never re-processed, so in the normal
-case (someone else → bot → reply to them) there is no loop. The self-send case
-is a **test-only edge**: the bot's reply also round-trips to your number, and
-only the adapter's `isFromMe` check stops a second pass. Keep the prompt
-conversational (so the bot doesn't keep finding something to answer), and
-**turn the `imessage` lab off / remove the config when done** — never leave a
-self-send bot running unattended.
-
-Watch the chain live:
-
-```bash
-tail -f /tmp/electron-dev.log | grep -iE "imessage|bridge|forward|Message API"
-# the agent reply shows up as a fromMe:true row with the bot's text:
-curl -sS -X POST "http://127.0.0.1:1234/api/v1/message/query?password=<PW>" \
-  -H 'Content-Type: application/json' -d '{"limit":5,"sort":"DESC"}'
-```
-
-`startTyping` will log a Private-API error unless BlueBubbles has the Private
-API helper set up (needs a jailbroken / SIP-disabled Mac) — it's logged and
-ignored; text replies still work.
-
-## Known bugs / gotchas
-
- **Bug #1 — first-time save (fixed; guarded by the script).** BlueBubbles'
-  `GET /api/v1/webhook?url=<unregistered>` returns **HTTP 500**
-  (`Cannot read properties of null (reading 'events')`). The bridge must list
-  **all** webhooks and match client-side, never pass the `?url=` filter. If you
-  see `upsertConfig` fail with "An unhandled error has occurred!" originating in
-  `listWebhooks`, this regressed.
- **Save leaves a half-state on webhook failure.** `upsertConfig` writes the
-  config + starts the HTTP server _before_ registering the webhook, so a webhook
-  failure still reports `running:true` with the config persisted but no
-  BlueBubbles webhook. Always assert the BlueBubbles webhook list, not just IPC
-  status.
- **Unknown appId / forward failure → 500.** Posting to the local bridge for an
-  unknown appId, or when no cloud bot is bound, returns 500 (BlueBubbles retries
-  on 5xx). Auth (wrong secret → 401) is enforced before that.
- **Backend deploy lag.** Desktop dev proxies tRPC through `lobe-backend://` to
-  the _remote_ server. iMessage only appears in `listPlatforms` once the server
-  registration is deployed there, regardless of local branch.
- **Restart to load main-process fixes.** Editing `imessageBridgeSrv.ts` /
-  `@lobechat/chat-adapter-imessage` needs `electron-dev.sh restart` — main isn't
-  hot-replaced. On restart, enabled configs auto-register their webhook again.
@@ -1,81 +0,0 @@
-#!/usr/bin/env bash
-#
-# send-imessage-test.sh — Verify the outbound leg: desktop → BlueBubbles → iMessage
-#
-# Sends one real iMessage via the same REST call the Desktop bridge uses
-# (`POST /api/v1/message/text`, which BlueBubblesApiClient.sendText wraps) and
-# confirms it actually went out.
-#
-# KEY GOTCHA: with method=apple-script and a NEW conversation, the HTTP request
-# often TIMES OUT even though the message is sent. Do NOT treat the timeout as a
-# failure — instead poll `POST /api/v1/message/query` and check the message's
-# `error` field (0 = sent OK). This script does that for you.
-#
-# This sends a REAL message, so it has side effects. Target your own number.
-#
-# Usage:
-#   ./send-imessage-test.sh <bb_password> <target_e164> [message] [bb_url]
-#
-# Example (send to your own phone, E.164 with country code):
-#   ./send-imessage-test.sh 'my-bb-pass' '+15551234567'
-#
-set -euo pipefail
-
-BB_PASS="${1:?Usage: $0 <bb_password> <target_e164(+countrycode)> [message] [bb_url]}"
-TARGET="${2:?Need a target handle in E.164, e.g. +15551234567 (or an Apple ID email)}"
-MARKER="lobe-imsg-test-$(date +%s)"
-MESSAGE="${3:-[${MARKER}] desktop bridge → BlueBubbles → iMessage outbound check}"
-BB_URL="${4:-http://127.0.0.1:1234}"
-
-CHAT_GUID="iMessage;-;${TARGET}"
-
-echo "[send-test] target=${TARGET}  marker=${MARKER}"
-
-# 1) Fire the send. apple-script on a new chat may hang the HTTP response, so we
-#    cap it short and ignore a timeout — step 2 is the source of truth.
-python3 - "$BB_PASS" "$BB_URL" "$CHAT_GUID" "$MESSAGE" <<'PY' || true
-import json,sys,urllib.request,urllib.parse,uuid
-pw,base,guid,msg=sys.argv[1:5]
-url=base+"/api/v1/message/text?password="+urllib.parse.quote(pw)
-body={"chatGuid":guid,"message":msg,"method":"apple-script","tempGuid":str(uuid.uuid4())}
-req=urllib.request.Request(url,data=json.dumps(body).encode("utf-8"),
-    headers={"Content-Type":"application/json"},method="POST")
-try:
-    r=urllib.request.urlopen(req,timeout=8)
-    print("[send-test] HTTP",r.status,"(immediate response)")
-except urllib.error.HTTPError as e:
-    print("[send-test] HTTP",e.code,e.read().decode()[:200])
-except Exception as e:
-    print("[send-test] HTTP request returned no body (likely apple-script delay):",type(e).__name__)
-PY
-
-# 2) Source of truth: find our marker in the message store and read its error.
-echo "[send-test] verifying via message/query (the HTTP timeout above is expected)…"
-sleep 3
-python3 - "$BB_PASS" "$BB_URL" "$MARKER" <<'PY'
-import json,sys,time,urllib.request,urllib.parse
-pw,base,marker=sys.argv[1:4]
-url=base+"/api/v1/message/query?password="+urllib.parse.quote(pw)
-def query():
-    body={"limit":15,"offset":0,"with":["chats"],"sort":"DESC"}
-    req=urllib.request.Request(url,data=json.dumps(body).encode(),
-        headers={"Content-Type":"application/json"},method="POST")
-    return json.load(urllib.request.urlopen(req,timeout=12)).get("data") or []
-hit=None
-for _ in range(5):
-    for m in query():
-        if marker in (m.get("text") or "") and m.get("isFromMe"):
-            hit=m; break
-    if hit: break
-    time.sleep(2)
-if not hit:
-    print("[send-test] ✗ outbound message not found in BB store — send likely failed")
-    sys.exit(1)
-err=hit.get("error")
-if err in (0,None):
-    print("[send-test] ✓ outbound message sent (fromMe=True, error=%s)"%err)
-    print("[send-test]   → confirm it arrived in the Messages app on the target device")
-else:
-    print("[send-test] ✗ BlueBubbles reported send error=%s"%err)
-    sys.exit(1)
-PY
@@ -1,187 +0,0 @@
-#!/usr/bin/env bash
-#
-# test-imessage-bridge.sh — Regression test for the iMessage Desktop bridge
-#
-# Drives the Electron main-process `imessageBridge.*` IPC handlers plus the
-# local bridge HTTP server and the BlueBubbles server, asserting the full
-# connect/configure flow. Use it to regression-test PR work on the iMessage
-# channel (BlueBubbles bridge) without clicking through the UI every time.
-#
-# Prerequisites:
-#   1. BlueBubbles server running and reachable (default http://127.0.0.1:1234)
-#   2. Electron dev running with CDP — `electron-dev.sh start`
-#   3. `agent-browser` on PATH, connected to the same CDP port
-#
-# Usage:
-#   ./test-imessage-bridge.sh <bluebubbles_password> [bb_url] [cdp_port]
-#
-# Example:
-#   ./test-imessage-bridge.sh 'my-bb-password'
-#   ./test-imessage-bridge.sh 'my-bb-password' http://127.0.0.1:1234 9222
-#
-# Notes:
-#   - The password is passed as an argv, so it is visible in `ps`. This is a
-#     local dev tool; do not run it on shared machines with a real secret.
-#   - It uses a unique applicationId per run (imsg-regression-$$) and cleans up
-#     its own bridge config + BlueBubbles webhook on exit, so it is safe to
-#     re-run and does not disturb real configs.
-set -euo pipefail
-
-BB_PASS="${1:?Usage: $0 <bluebubbles_password> [bb_url] [cdp_port]}"
-BB_URL="${2:-http://127.0.0.1:1234}"
-CDP_PORT="${3:-9222}"
-
-APP_ID="imsg-regression-$$"
-SECRET="regression-secret-$$"
-
-PASS=0
-FAIL=0
-
-# ── Output helpers ───────────────────────────────────────────────────
-ok()   { echo "  ✓ $1"; PASS=$((PASS + 1)); }
-bad()  { echo "  ✗ $1 — $2"; FAIL=$((FAIL + 1)); }
-note() { echo "[imsg-test] $1"; }
-
-# ── BlueBubbles REST helpers ─────────────────────────────────────────
-bb_get_webhooks() {
-  curl -sS -m 8 "${BB_URL}/api/v1/webhook?password=${BB_PASS}"
-}
-
-# Delete every webhook whose URL mentions our APP_ID (cleanup is idempotent).
-bb_cleanup_webhooks() {
-  local ids
-  ids=$(bb_get_webhooks | python3 -c '
-import json,sys
-try: d=json.load(sys.stdin)
-except Exception: sys.exit(0)
-for w in (d.get("data") or []):
-    if "'"$APP_ID"'" in (w.get("url") or ""): print(w["id"])
-' 2>/dev/null || true)
-  for id in $ids; do
-    curl -sS -m 8 -X DELETE "${BB_URL}/api/v1/webhook/${id}?password=${BB_PASS}" >/dev/null 2>&1 || true
-  done
-}
-
-# ── IPC helper (drives the Electron renderer's electronAPI bridge) ───
-# Runs a JS snippet that returns a string token; prints the raw token.
-# The BlueBubbles password is base64-injected (atob) so special chars in the
-# secret never need shell/JS quoting.
-ipc_eval() {
-  local js="$1"
-  agent-browser --cdp "$CDP_PORT" eval -b "$(printf '%s' "$js" | base64)" 2>/dev/null
-}
-
-PASS_B64=$(printf '%s' "$BB_PASS" | base64)
-
-# Emit an inline JS object literal for the bridge config. $1 overrides the
-# password expression (defaults to atob of the real password); pass a JS string
-# literal like "'wrong'" to test the rejection path.
-ipc_config_js() {
-  local pwexpr="${1:-atob('${PASS_B64}')}"
-  printf "{applicationId:'%s',blueBubblesServerUrl:'%s',blueBubblesPassword:%s,enabled:true,webhookSecret:'%s'}" \
-    "$APP_ID" "$BB_URL" "$pwexpr" "$SECRET"
-}
-
-# ── Preflight ────────────────────────────────────────────────────────
-note "BlueBubbles: ${BB_URL}   CDP: ${CDP_PORT}   appId: ${APP_ID}"
-
-code=$(curl -sS -m 6 -o /dev/null -w '%{http_code}' \
-  "${BB_URL}/api/v1/server/info?password=${BB_PASS}" || echo 000)
-if [ "$code" = "200" ]; then ok "BlueBubbles reachable + password valid"; else
-  bad "BlueBubbles preflight" "HTTP $code (is BlueBubbles running on ${BB_URL}?)"
-  echo "Aborting — fix BlueBubbles first."; exit 1
-fi
-
-if ! curl -sf --max-time 3 "http://localhost:${CDP_PORT}/json/version" >/dev/null 2>&1; then
-  bad "Electron CDP preflight" "CDP ${CDP_PORT} unreachable — run electron-dev.sh start"
-  echo "Aborting."; exit 1
-fi
-ok "Electron CDP reachable"
-
-# Bridge must expose the IPC group (built from this branch's code).
-probe=$(ipc_eval "(async()=>{try{var s=await window.electronAPI.invoke('imessageBridge.getStatus',{});return 'OK:'+JSON.stringify(s);}catch(e){return 'ERR:'+(e.message||e);}})()")
-case "$probe" in
-  *OK:*) ok "imessageBridge IPC available" ;;
-  *) bad "imessageBridge IPC" "got: $probe (is the iMessage Desktop branch checked out?)"; echo "Aborting."; exit 1 ;;
-esac
-
-# Start clean: remove any leftover config for this appId + BB webhooks.
-ipc_eval "(async()=>{try{await window.electronAPI.invoke('imessageBridge.removeConfig',{applicationId:'${APP_ID}'});}catch(e){}return 'done';})()" >/dev/null
-bb_cleanup_webhooks
-
-# ── testConfig: happy path ───────────────────────────────────────────
-r=$(ipc_eval "(async()=>{try{var c=$(ipc_config_js);var x=await window.electronAPI.invoke('imessageBridge.testConfig',c);return 'OK:'+JSON.stringify(x);}catch(e){return 'ERR:'+(e.message||e);}})()")
-case "$r" in
-  *OK:*success*true*) ok "testConfig with valid password → success" ;;
-  *) bad "testConfig (valid)" "got: $r" ;;
-esac
-
-# ── testConfig: wrong password rejects ───────────────────────────────
-r=$(ipc_eval "(async()=>{try{var c=$(ipc_config_js "'definitely-wrong-password'");var x=await window.electronAPI.invoke('imessageBridge.testConfig',c);return 'OK:'+JSON.stringify(x);}catch(e){return 'ERR:'+(e.message||e);}})()")
-case "$r" in
-  *ERR:*) ok "testConfig with wrong password → rejected" ;;
-  *) bad "testConfig (wrong password)" "expected rejection, got: $r" ;;
-esac
-
-# ── testConfig: unreachable URL rejects ──────────────────────────────
-r=$(ipc_eval "(async()=>{try{var x=await window.electronAPI.invoke('imessageBridge.testConfig',{applicationId:'${APP_ID}',blueBubblesServerUrl:'http://127.0.0.1:65530',blueBubblesPassword:atob('${PASS_B64}'),enabled:true,webhookSecret:'${SECRET}'});return 'OK:'+JSON.stringify(x);}catch(e){return 'ERR:'+(e.message||e);}})()")
-case "$r" in
-  *ERR:*) ok "testConfig with unreachable URL → rejected" ;;
-  *) bad "testConfig (unreachable)" "expected rejection, got: $r" ;;
-esac
-
-# ── upsertConfig: FIRST-TIME registration (Bug #1 regression guard) ──
-# BlueBubbles' GET /webhook?url=<unregistered> returns HTTP 500. The bridge
-# must list ALL webhooks and match client-side, otherwise this first save
-# fails. This assertion guards that fix.
-r=$(ipc_eval "(async()=>{try{var c=$(ipc_config_js);var x=await window.electronAPI.invoke('imessageBridge.upsertConfig',c);return 'OK:'+JSON.stringify(x);}catch(e){return 'ERR:'+(e.message||e);}})()")
-case "$r" in
-  *OK:*success*true*) ok "upsertConfig first-time save → success (Bug #1 guard)" ;;
-  *) bad "upsertConfig (first-time)" "got: $r" ;;
-esac
-
-# ── getStatus: bridge running + config persisted ─────────────────────
-# Return a quote-free token so grep isn't tripped up by agent-browser's
-# JSON-string escaping of the eval result.
-r=$(ipc_eval "(async()=>{var s=await window.electronAPI.invoke('imessageBridge.getStatus',{});var c=(s.configs||[]).find(function(x){return x.applicationId==='${APP_ID}';});return 'RUN='+(s.running?'Y':'N')+' CFG='+(c?'Y':'N')+' PW='+((c&&c.blueBubblesPasswordSet)?'Y':'N');})()")
-echo "$r" | grep -q 'RUN=Y' && ok "bridge running" || bad "bridge running" "got: $r"
-echo "$r" | grep -q 'CFG=Y' && ok "config persisted" || bad "config persisted" "got: $r"
-echo "$r" | grep -q 'PW=Y'  && ok "password stored (redacted in status)" || bad "password stored" "got: $r"
-
-# ── BlueBubbles webhook actually registered ──────────────────────────
-if bb_get_webhooks | grep -q "${APP_ID}"; then
-  ok "BlueBubbles webhook registered for appId"
-else
-  bad "BlueBubbles webhook" "no webhook URL containing ${APP_ID}"
-fi
-
-# ── Local bridge HTTP server: secret enforcement ─────────────────────
-BRIDGE_URL=$(ipc_eval "(async()=>{var s=await window.electronAPI.invoke('imessageBridge.getStatus',{});return s.serverUrl||'';})()" | tr -d '"')
-if [ -n "$BRIDGE_URL" ]; then
-  # wrong secret → 401
-  code=$(curl -sS -m 6 -o /dev/null -w '%{http_code}' -X POST \
-    -H 'Content-Type: application/json' \
-    "${BRIDGE_URL}/webhooks/bluebubbles/${APP_ID}?secret=WRONG" \
-    -d '{"type":"new-message","data":{"guid":"x"}}' || echo 000)
-  [ "$code" = "401" ] && ok "local bridge rejects wrong secret (401)" || bad "local bridge wrong secret" "expected 401, got $code"
-
-  # right secret → passes auth (reaches forward; without a bound cloud bot it
-  # returns 5xx — that's fine, we're only asserting auth + routing here)
-  code=$(curl -sS -m 6 -o /dev/null -w '%{http_code}' -X POST \
-    -H 'Content-Type: application/json' \
-    "${BRIDGE_URL}/webhooks/bluebubbles/${APP_ID}?secret=${SECRET}" \
-    -d '{"type":"new-message","data":{"guid":"x","text":"hi"}}' || echo 000)
-  [ "$code" != "401" ] && ok "local bridge accepts valid secret (HTTP $code, past auth)" || bad "local bridge valid secret" "got 401 with correct secret"
-else
-  bad "local bridge URL" "getStatus returned no serverUrl"
-fi
-
-# ── Cleanup ──────────────────────────────────────────────────────────
-ipc_eval "(async()=>{try{await window.electronAPI.invoke('imessageBridge.removeConfig',{applicationId:'${APP_ID}'});await window.electronAPI.invoke('imessageBridge.stop',{});}catch(e){}return 'cleaned';})()" >/dev/null
-bb_cleanup_webhooks
-note "cleaned up config + BlueBubbles webhook for ${APP_ID}"
-
-# ── Summary ──────────────────────────────────────────────────────────
-echo ""
-echo "[imsg-test] PASS=${PASS}  FAIL=${FAIL}"
-[ "$FAIL" -eq 0 ] || exit 1
@@ -1,142 +0,0 @@
-# CLI Backend Verification
-
-Default surface for verifying **backend changes** (TRPC routers, services,
-models, migrations) end-to-end: fastest loop, text-assertable output, zero UI
-flakiness.
-
-## When to use
-
- Verifying TRPC router / service / model changes end-to-end
- Testing new API fields or response structure changes
- Validating CLI command output after backend modifications
- Debugging data flow issues between server and CLI
-
-## Prerequisites
-
-| Requirement  | Details                                                                           |
-| ------------ | --------------------------------------------------------------------------------- |
-| Dev server   | `localhost:3010` — see [../references/dev-server.md](../references/dev-server.md) |
-| CLI source   | `apps/cli/` — runs from source, no rebuild; standalone `node_modules` — run `pnpm install` inside `apps/cli/` (root install does not cover it) |
-| CLI dev mode | `LOBEHUB_CLI_HOME=.lobehub-dev` for isolated credentials                          |
-| Auth         | Device Code Flow login — see [../references/auth.md](../references/auth.md)       |
-
-All CLI dev commands run from `apps/cli/`. Subsequent examples use `$CLI`:
-
-```bash
-CLI="LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts"
-```
-
-## Workflow
-
-### Step 1 — Server up?
-
-See [../references/dev-server.md](../references/dev-server.md) for the health
-check, start, and restart commands. Server-side code changes require a restart.
-
-### Step 2 — Auth ready?
-
-```bash
-./.agents/skills/agent-testing/scripts/setup-auth.sh status
-```
-
-If the CLI is not logged in, **the user must run the login themselves**
-(interactive browser authorization):
-
-```bash
-cd apps/cli && LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts login --server http://localhost:3010
-```
-
-Credentials persist in `apps/cli/.lobehub-dev/`. Details:
-[../references/auth.md](../references/auth.md).
-
-### Step 3 — Test with CLI commands
-
-CLI runs from source, so CLI-side code changes take effect immediately without
-rebuilding:
-
-```bash
-cd apps/cli
-$CLI <command>
-```
-
-Capture output for the report as you go (e.g. `$CLI task list | tee "$DIR/assets/task-list.txt"`).
-
-### Step 4 — Clean up test data
-
-```bash
-$CLI task delete < id > -y
-$CLI agent delete < id > -y
-```
-
-### Step 5 — Report
-
-Finish with a structured report —
-[../references/report.md](../references/report.md). CLI evidence = exact
-command + trimmed output.
-
-## Common testing patterns
-
-### Task system
-
-```bash
-$CLI task list
-$CLI task create -n "Root Task" -i "Test instruction"
-$CLI task create -n "Child Task" -i "Sub instruction" --parent T-1
-$CLI task view T-1
-$CLI task tree T-1
-$CLI task edit T-1 --status running
-$CLI task comment T-1 -m "Test comment"
-$CLI task delete T-1 -y
-```
-
-### Agent system
-
-```bash
-$CLI agent list
-$CLI agent view <agent-id>
-$CLI agent run <agent-id> -m "Test prompt"
-```
-
-### Document & knowledge base
-
-```bash
-$CLI doc list
-$CLI doc create -t "Test Doc" -c "Content here"
-$CLI doc view <doc-id>
-$CLI kb list
-$CLI kb tree <kb-id>
-```
-
-### Model & provider
-
-```bash
-$CLI model list
-$CLI provider list
-$CLI provider test <provider-id>
-```
-
-## Dev-test cycle
-
-```
-1. Make code changes (service/model/router/type)
-         |
-2. Run unit tests (fast feedback)
-   bunx vitest run --silent='passed-only' '<test-file>'
-         |
-3. Restart dev server (if server-side changes — see dev-server.md)
-         |
-4. CLI verification (end-to-end)
-   $CLI <command>
-         |
-5. Clean up test data + write the report
-```
-
-## Troubleshooting
-
-| Issue                       | Solution                                        |
-| --------------------------- | ----------------------------------------------- |
-| `No authentication found`   | Run `login --server http://localhost:3010`      |
-| `UNAUTHORIZED` on API calls | Token expired; re-run login                     |
-| `ECONNREFUSED`              | Dev server not running — see dev-server.md      |
-| CLI shows old data/behavior | Server needs restart to pick up code changes    |
-| Login opens wrong server    | Must use `--server` flag (env var doesn't work) |
@@ -1,257 +0,0 @@
-# agent-browser CLI Reference
-
-Generic reference for the `agent-browser` CLI — automate Chromium-based apps (Electron, Chrome, web) via Chrome DevTools Protocol. LobeHub-specific patterns live in [../ui/electron.md](../ui/electron.md) and [../ui/web.md](../ui/web.md); authentication recipes live in [auth.md](./auth.md).
-
-Use `agent-browser` to automate Chromium-based apps via Chrome DevTools Protocol.
-
-Install via `npm i -g agent-browser`, `brew install agent-browser`, or `cargo install agent-browser`. Run `agent-browser install` to download Chrome. Run `agent-browser upgrade` to update.
-
-## Core Workflow
-
-Every browser automation follows this pattern:
-
-1. **Navigate**: `agent-browser open <url>`
-2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
-3. **Interact**: Use refs to click, fill, select
-4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
-
-```bash
-agent-browser open https://example.com/form
-agent-browser snapshot -i
-# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
-
-agent-browser fill @e1 "user@example.com"
-agent-browser fill @e2 "password123"
-agent-browser click @e3
-agent-browser wait --load networkidle
-agent-browser snapshot -i # Check result
-```
-
-## Command Chaining
-
-```bash
-# Chain open + wait + snapshot in one call
-agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser snapshot -i
-```
-
-Use `&&` when you don't need to read intermediate output. Run commands separately when you need to parse output first (e.g., snapshot to discover refs, then interact).
-
-## Essential Commands
-
-```bash
-# Navigation
-agent-browser open <url>              # Navigate (aliases: goto, navigate)
-agent-browser close                   # Close browser
-agent-browser close --all             # Close all active sessions
-
-# Snapshot
-agent-browser snapshot -i             # Interactive elements with refs (recommended)
-agent-browser snapshot -s "#selector" # Scope to CSS selector
-
-# Interaction (use @refs from snapshot)
-agent-browser click @e1               # Click element
-agent-browser click @e1 --new-tab     # Click and open in new tab
-agent-browser fill @e2 "text"         # Clear and type text
-agent-browser type @e2 "text"         # Type without clearing
-agent-browser select @e1 "option"     # Select dropdown option
-agent-browser check @e1               # Check checkbox
-agent-browser press Enter             # Press key
-agent-browser keyboard type "text"    # Type at current focus (no selector)
-agent-browser keyboard inserttext "text"  # Insert without key events
-agent-browser scroll down 500         # Scroll page
-agent-browser scroll down 500 --selector "div.content"  # Scroll within container
-
-# Get information
-agent-browser get text @e1            # Get element text
-agent-browser get url                 # Get current URL
-agent-browser get title               # Get page title
-agent-browser get cdp-url             # Get CDP WebSocket URL
-
-# Wait
-agent-browser wait @e1                # Wait for element
-agent-browser wait --load networkidle # Wait for network idle
-agent-browser wait --url "**/page"    # Wait for URL pattern
-agent-browser wait 2000               # Wait milliseconds
-agent-browser wait --text "Welcome"   # Wait for text to appear
-agent-browser wait --fn "!document.body.innerText.includes('Loading...')"  # Wait for text to disappear
-agent-browser wait "#spinner" --state hidden  # Wait for element to disappear
-
-# Downloads
-agent-browser download @e1 ./file.pdf          # Click element to trigger download
-agent-browser wait --download ./output.zip     # Wait for any download to complete
-
-# Network
-agent-browser network requests                 # Inspect tracked requests
-agent-browser network requests --type xhr,fetch  # Filter by resource type
-agent-browser network requests --method POST   # Filter by HTTP method
-agent-browser network route "**/api/*" --abort # Block matching requests
-agent-browser network har start                # Start HAR recording
-agent-browser network har stop ./capture.har   # Stop and save HAR file
-
-# Viewport & Device Emulation
-agent-browser set viewport 1920 1080          # Set viewport size (default: 1280x720)
-agent-browser set viewport 1920 1080 2        # 2x retina
-agent-browser set device "iPhone 14"          # Emulate device (viewport + user agent)
-
-# Capture
-agent-browser screenshot              # Screenshot to temp dir
-agent-browser screenshot --full       # Full page screenshot
-agent-browser screenshot --annotate   # Annotated screenshot with numbered element labels
-agent-browser pdf output.pdf          # Save as PDF
-
-# Clipboard
-agent-browser clipboard read          # Read text from clipboard
-agent-browser clipboard write "text"  # Write text to clipboard
-agent-browser clipboard copy          # Copy current selection
-agent-browser clipboard paste         # Paste from clipboard
-
-# Dialogs (alert, confirm, prompt, beforeunload)
-agent-browser dialog accept           # Accept dialog
-agent-browser dialog accept "input"   # Accept prompt dialog with text
-agent-browser dialog dismiss          # Dismiss/cancel dialog
-agent-browser dialog status           # Check if dialog is open
-
-# Diff (compare page states)
-agent-browser diff snapshot                        # Compare current vs last snapshot
-agent-browser diff screenshot --baseline before.png  # Visual pixel diff
-agent-browser diff url <url1> <url2>               # Compare two pages
-
-# Streaming
-agent-browser stream enable           # Start WebSocket streaming
-agent-browser stream status           # Inspect streaming state
-agent-browser stream disable          # Stop streaming
-```
-
-## Batch Execution
-
-```bash
-echo '[
-  ["open", "https://example.com"],
-  ["snapshot", "-i"],
-  ["click", "@e1"],
-  ["screenshot", "result.png"]
-]' | agent-browser batch --json
-```
-
-## Authentication
-
-```bash
-# Option 1: Auth vault (credentials stored encrypted)
-echo "$PASSWORD" | agent-browser auth save myapp --url https://app.example.com/login --username user --password-stdin
-agent-browser auth login myapp
-
-# Option 2: Session name (auto-save/restore cookies + localStorage)
-agent-browser --session-name myapp open https://app.example.com/login
-agent-browser close                                                       # State auto-saved
-agent-browser --session-name myapp open https://app.example.com/dashboard # Auto-restored
-
-# Option 3: Persistent profile
-agent-browser --profile ~/.myapp open https://app.example.com/login
-
-# Option 4: State file
-agent-browser state save auth.json
-agent-browser state load auth.json
-```
-
-### LobeHub dev server — inject better-auth cookie
-
-`agent-browser --headed` on macOS can create an off-screen Chromium window, blocking manual login. For a local LobeHub dev server (e.g. `localhost:3010`), copy the `better-auth.session_token` cookie out of a **Network request** in the user's own Chrome DevTools and load it via `state load`. See [auth.md](./auth.md) for the full recipe.
-
-## Semantic Locators (Alternative to Refs)
-
-```bash
-agent-browser find text "Sign In" click
-agent-browser find label "Email" fill "user@test.com"
-agent-browser find role button click --name "Submit"
-agent-browser find placeholder "Search" type "query"
-agent-browser find testid "submit-btn" click
-```
-
-## JavaScript Evaluation (eval)
-
-```bash
-# Simple expressions
-agent-browser eval 'document.title'
-
-# Complex JS: use --stdin with heredoc (RECOMMENDED)
-agent-browser eval --stdin << 'EVALEOF'
-JSON.stringify(
-  Array.from(document.querySelectorAll("img"))
-    .filter(i => !i.alt)
-    .map(i => ({ src: i.src.split("/").pop(), width: i.width }))
-)
-EVALEOF
-
-# Base64 encoding (avoids all shell escaping issues)
-agent-browser eval -b "$(echo -n 'document.title' | base64)"
-```
-
-## Ref Lifecycle
-
-Refs (`@e1`, `@e2`, etc.) are invalidated when the page changes. Always re-snapshot after clicking links/buttons that navigate, form submissions, or dynamic content loading.
-
-## Annotated Screenshots (Vision Mode)
-
-```bash
-agent-browser screenshot --annotate
-# Output includes the image path and a legend:
-#   [1] @e1 button "Submit"
-#   [2] @e2 link "Home"
-agent-browser click @e2 # Click using ref from annotated screenshot
-```
-
-## Parallel Sessions
-
-```bash
-agent-browser --session site1 open https://site-a.com
-agent-browser --session site2 open https://site-b.com
-agent-browser session list
-```
-
-## Connect to Existing Chrome
-
-```bash
-agent-browser --auto-connect snapshot # Auto-discover running Chrome
-agent-browser --cdp 9222 snapshot     # Explicit CDP port
-```
-
-## iOS Simulator (Mobile Safari)
-
-```bash
-agent-browser device list
-agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
-agent-browser -p ios snapshot -i
-agent-browser -p ios tap @e1
-agent-browser -p ios swipe up
-agent-browser -p ios screenshot mobile.png
-agent-browser -p ios close
-```
-
-## Observability Dashboard
-
-```bash
-agent-browser dashboard install
-agent-browser dashboard start # Background server on port 4848
-agent-browser dashboard stop
-```
-
-## Cloud Providers
-
-Use `-p <provider>` to run against cloud browsers: `agentcore`, `browserbase`, `browserless`, `browseruse`, `kernel`.
-
-## Browser Engine Selection
-
-```bash
-agent-browser --engine lightpanda open example.com # 10x faster, 10x less memory
-```
-
-## Gotchas
-
- **Daemon can get stuck** — if commands hang, `agent-browser close --all` or `pkill -f agent-browser` to reset
- **HMR invalidates everything** — after code changes, refs break. Re-snapshot or restart
- **`snapshot -i` doesn't find contenteditable** — use `snapshot -i -C` for rich text editors
- **`fill` doesn't work on contenteditable** — use `type` for chat inputs
- **Screenshots go to `~/.agent-browser/tmp/screenshots/`** — read them with the `Read` tool
- **Dialogs block all commands** — if commands time out, check `agent-browser dialog status`
- **Default timeout is 25s** — override with `AGENT_BROWSER_DEFAULT_TIMEOUT` (ms) or use explicit waits
- **Shell quoting corrupts eval** — use `eval --stdin <<'EVALEOF'` for complex JS
@@ -1,123 +0,0 @@
-# Auth Setup for Local Agent Testing
-
-**Auth is the gate for all automated testing.** Prepare and verify it before
-writing any test step. The one-stop entry point is:
-
-```bash
-SCRIPT=".agents/skills/agent-testing/scripts/setup-auth.sh"
-
-$SCRIPT status        # check server + CLI + web auth readiness
-$SCRIPT cli           # interactive CLI device-code login (must be run by the user)
-pbpaste | $SCRIPT web # inject a copied Cookie header into the agent-browser session
-$SCRIPT web-verify    # live-check that the agent-browser session is authenticated
-```
-
-`SERVER_URL` defaults to `http://localhost:3010` (this repo's `dev:next` port).
-Override it when testing against another server (e.g. `SERVER_URL=http://localhost:3011`
-in the cloud repo).
-
-## Per-surface overview
-
-| Surface  | Mechanism                                | Persistence                                                       | Human interaction                               |
-| -------- | ---------------------------------------- | ----------------------------------------------------------------- | ----------------------------------------------- |
-| CLI      | OIDC Device Code Flow                    | `apps/cli/.lobehub-dev/settings.json`                             | Yes — browser authorization, every token expiry |
-| Web      | better-auth cookie injection             | `~/.lobehub-agent-testing/web-state.json` + agent-browser session | Copy the Cookie header once per token rotation  |
-| Electron | App's own login state                    | Electron user-data dir                                            | Log in once manually in the app                 |
-| Bot      | Native apps (Discord/WeChat/…) logged in | Each app's own session                                            | Once per app                                    |
-
-## CLI — Device Code Flow
-
-Credentials are isolated from the user's real CLI config via
-`LOBEHUB_CLI_HOME=.lobehub-dev` (kept inside `apps/cli/`, gitignored).
-
-Login requires interactive browser authorization, so **the user must run it
-themselves** (e.g. via the `!` prefix in Claude Code):
-
-```bash
-cd apps/cli && LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts login --server http://localhost:3010
-```
-
- The `--server` flag is required — an env var does NOT work and login will hit
-  the wrong server without it.
- Check state without logging in: `setup-auth.sh status` (verifies
-  `settings.json` exists and `serverUrl` matches).
- `UNAUTHORIZED` on API calls means the token expired — re-run login.
-
-## Web — better-auth cookie injection (agent-browser)
-
-`agent-browser --headed` on macOS often creates the Chromium window off-screen —
-the user can't see or interact with it, so manual login inside the agent-browser
-session fails. Instead, copy the **better-auth session cookie** out of the
-user's own logged-in Chrome and inject it as a Playwright-style state file.
-
-Do **not** use this on production URLs — only local dev. Treat the cookie as a
-secret: don't paste it into shared logs, PRs, or commit it anywhere.
-
-### One-key path
-
-1. Ask the user to copy the Cookie header **from a Network request, NOT
-   `document.cookie`** (`document.cookie` cannot see HttpOnly cookies, which is
-   exactly where better-auth puts its session):
-   - Open the logged-in tab (`http://localhost:<port>/…`) in Chrome.
-   - `Cmd+Option+I` → **Network** tab → refresh → click any same-origin request.
-   - Under **Request Headers**, right-click the `Cookie:` line → **Copy value**.
-2. Inject and verify in one shot:
-
-```bash
-pbpaste | ./.agents/skills/agent-testing/scripts/setup-auth.sh web
-```
-
-The script filters the header down to the better-auth cookies
-(`better-auth.session_token`, `better-auth.state`), builds the Playwright
-`storageState` JSON, loads it into the `agent-browser` session (default name
-`lobehub-dev`), opens `SERVER_URL`, and asserts the URL is not `/signin`.
-
-### Using the authenticated session
-
-```bash
-agent-browser --session lobehub-dev open "http://localhost:3010/"
-agent-browser --session lobehub-dev snapshot -i | head -20
-# Look for the user's avatar/name in the sidebar, or absence of the signin form.
-```
-
-### Notes
-
- `storageState` doesn't enforce the HttpOnly flag on load — the script stores
-  cookies with `httpOnly: false`, which is fine for local dev and sidesteps a
-  CDP-context quirk where HttpOnly cookies sometimes fail to attach.
- The state file is kept at `~/.lobehub-agent-testing/web-state.json` so
-  `setup-auth.sh status` can report web-auth readiness across sessions.
-
-### Common failure modes
-
-| Symptom                                       | Cause                                                                     | Fix                                               |
-| --------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------- |
-| Still redirects to `/signin` after injection  | User pasted from `document.cookie` → missed HttpOnly session              | Re-pull from Network request Headers, not console |
-| Script reports `no better-auth cookies found` | Separator wrong, or user pasted URL-decoded value                         | Keep the raw `Cookie:` header as-is               |
-| Login works briefly then expires              | `better-auth.session_token` rotated (user logged out / signed in again)   | Re-copy and re-inject                             |
-| Domain mismatch                               | Cookie domain must be `localhost` literally, no leading dot for local dev | —                                                 |
-
-## Electron
-
-The desktop app keeps its own persistent login state in its user-data
-directory — log in once manually inside the app and it survives restarts of
-`electron-dev.sh`. No injection needed. The standard check (do NOT hand-roll a
-store eval) once Electron is up with CDP:
-
-```bash
-./.agents/skills/agent-testing/scripts/app-probe.sh auth
-# → {"ok":true,"isSignedIn":true,"userId":"user_xxx"}
-```
-
-`setup-auth.sh status` runs this probe automatically when CDP 9222 is
-reachable.
-
-## Scope
-
-These recipes only cover **local dev** authentication. They do not:
-
- Work for production — production cookies are `Secure; HttpOnly; Domain=.lobehub.com`
-  and must be delivered over HTTPS.
- Replace real OAuth flows — tests that must exercise the login UI itself need a
-  real Chromium with `--remote-debugging-port` or a bot account.
- Flow cookies back to the user's Chrome — injection is one-way.
@@ -1,55 +0,0 @@
-# Local Dev Server
-
-Single source of truth for starting / restarting the backend that all test
-surfaces (CLI, Electron, Web) hit.
-
-## Ports & modes
-
-| Command             | What it runs                                              | Port                              |
-| ------------------- | --------------------------------------------------------- | --------------------------------- |
-| `pnpm run dev:next` | Next.js backend (API + auth)                              | `3010`                            |
-| `bun run dev`       | Full-stack (Next.js + Vite SPA, via `devStartupSequence`) | `3010` (API) + SPA                |
-| `bun run dev:spa`   | Vite SPA only, proxies API to `3010`                      | `9876` (prints a Debug Proxy URL) |
-
-In the **cloud repo** (where this repo is the `lobehub/` submodule) the dev
-server conventionally runs on `3011` — set `SERVER_URL=http://localhost:3011`
-for the scripts in this skill when testing there.
-
-## Health check
-
-```bash
-curl -s -o /dev/null -w '%{http_code}' http://localhost:3010/
-```
-
-## Start / restart
-
-```bash
-# Start (from repo root)
-pnpm run dev:next
-
-# Restart — required to pick up server-side code changes
-lsof -ti:3010 | xargs kill
-pnpm run dev:next
-```
-
-## When a server restart is needed
-
-Next.js hot-reload may not pick up changes in workspace packages — restart when
-in doubt.
-
-| Change location                                 | Restart? |
-| ----------------------------------------------- | -------- |
-| `apps/server/src/` (routers, services, modules) | Yes      |
-| `src/server/` (agent-hono, workflows-hono)      | Yes      |
-| `packages/database/` (models)                   | Yes      |
-| `packages/types/`                               | Yes      |
-| `packages/prompts/`                             | Yes      |
-| `apps/cli/` (CLI runs from source)              | No       |
-
-## Troubleshooting
-
-| Issue                     | Solution                                                |
-| ------------------------- | ------------------------------------------------------- |
-| `ECONNREFUSED`            | Server not running — start it                           |
-| `EADDRINUSE` on the port  | Already running — `lsof -ti:<port> \| xargs kill` first |
-| Stale data / old behavior | Server needs a restart to pick up code changes          |
@@ -1,124 +0,0 @@
-# Structured Test Reports
-
-Every automated test session ends with a structured, evidence-backed report.
-A chat-only summary is not an acceptable deliverable: the report is what the
-user (or a reviewer, or a later agent) audits without replaying the session.
-
-## Location & layout
-
-Reports live under `.records/reports/` (gitignored, like all `.records/`
-output):
-
-```
-.records/reports/<YYYYMMDD-HHMMSS>-<slug>/
-├── report.md      # human-readable report (embedded screenshots, case table, verdict)
-├── result.json    # machine-readable results (pass/fail counts, score)
-└── assets/        # evidence: screenshots, HAR files, CLI transcripts
-```
-
-## Workflow
-
-1. **Scaffold up front** — before running the first test step:
-
-   ```bash
-   DIR=$(./.agents/skills/agent-testing/scripts/report-init.sh < slug > "<title>")
-   ```
-
-   The script creates the directory, pre-fills branch / commit / date in both
-   files, and prints the directory path.
-
-2. **Collect evidence as you test** — every asserted behavior gets one evidence
-   item in `$DIR/assets/`:
-   - UI (static state): `agent-browser screenshot` or `capture-app-window.sh`,
-     then **verify the screenshot with the Read tool before citing it** —
-     never cite an image you haven't looked at.
-   - UI (time-based behavior): **screenshot vs GIF is a judgment you must
-     make per case.** If the assertion is about change over time — streaming
-     output, a ticking timer, loading/progress states, animations,
-     appear/disappear transitions — a static screenshot cannot prove it.
-     Record a frame sequence and synthesize a GIF:
-
-     ```bash
-     # start recording (background), trigger the behavior, wait for it to finish
-     ../scripts/record-gif.sh "$DIR/assets/case2-streaming.gif" 12 2 &
-     GIF_PID=$!
-     # ... drive the scenario ...
-     wait $GIF_PID
-     ```
-
-     Embed it like an image: `![case 2](assets/case2-streaming.gif)`. Verify
-     at least the first/last frames visually (Read the GIF) before citing.
-   - CLI: exact command + trimmed output (`$CLI task list | tee "$DIR/assets/task-list.txt"`).
-   - Network: `agent-browser network requests` dumps or HAR files.
-
-3. **Fill `report.md` as you go** — don't reconstruct from memory at the end.
-
-4. **Set the verdict** in both `report.md` and `result.json`, then link the
-   report directory in your final answer to the user.
-
-## Report language (hard rule)
-
-**`report.md` MUST be written in the language the user is conversing in** —
-the whole file, headings included. If the conversation is in Chinese, the
-report is in Chinese; do not mix English prose into it. The scaffold's English
-headings are placeholders — translate them when filling. Exceptions that stay
-as-is: code/commands, identifiers, log excerpts, and `result.json` (its keys
-and status values are machine-read and stay English; the `title` and case
-`name` fields follow the user's language).
-
-## report.md sections
-
-| Section         | Content                                                                            |
-| --------------- | ---------------------------------------------------------------------------------- |
-| **Scope**       | What changed / what is being verified; branch + commit                             |
-| **Environment** | Server URL, surfaces used (cli / electron / web / bot), relevant versions          |
-| **Cases**       | Table: `# \| case \| surface \| steps \| expected \| actual \| status \| evidence` |
-| **Evidence**    | Embedded screenshots/GIFs (`![case 1](assets/case1.png)`), fenced CLI transcripts  |
-| **Verdict**     | Pass/fail/blocked counts, optional 0–100 score, open issues / follow-ups           |
-
-Status values: `pass` / `fail` / `blocked` (couldn't run — e.g. auth or env
-missing; a blocked case is not a pass).
-
-## result.json schema
-
-```json
-{
-  "branch": "feat/task-tree",
-  "cases": [
-    {
-      "id": "1",
-      "name": "task tree returns nested children",
-      "surface": "cli",
-      "status": "pass",
-      "evidence": ["assets/task-tree.txt"]
-    }
-  ],
-  "commit": "abc1234",
-  "createdAt": "2026-06-11T15:30:00+08:00",
-  "summary": {
-    "total": 1,
-    "passed": 1,
-    "failed": 0,
-    "blocked": 0,
-    "score": 100,
-    "verdict": "pass"
-  },
-  "surfaces": ["cli"],
-  "title": "Verify task tree API"
-}
-```
-
-`score` is optional — use it when the verdict has a subjective component (UI
-polish, copy quality); omit it for purely binary runs. `verdict` is the single
-word the user reads first: `pass`, `fail`, or `partial`.
-
-## Rules
-
- **No evidence, no claim** — every `pass`/`fail` in the case table must link
-  at least one asset.
- **Screenshots must be visually verified** with the Read tool before being
-  cited.
- **Report failures faithfully** — a failing case with clear evidence is a good
-  report; a vague green one is not.
- If coverage was cut (cases skipped, surfaces not exercised), say so in the
-  Verdict section — silent truncation reads as "covered everything".
@@ -1,243 +0,0 @@
-// Analyzer for probe-events dumps. Reads a JSON file produced by `run.ts dump`
-// and prints a layered breakdown:
-//
-//   1. STREAM EVENTS — every non-chunk WS/SSE event in receipt order
-//   2. CHUNKS SUMMARY — collapsed per-step chunk counts (otherwise floods)
-//   3. ACTION CALLS — replaceMessages / refreshMessages / MARK:* with stack
-//   4. CORRELATION — calls ↔ nearest stream event within ±300ms
-//   5. PER-KEY ASSISTANT GROWTH — for each messagesMap key, when the leading
-//      assistant message's cLen / rLen actually moves (this is what reveals
-//      "chunks arrived but the message never grew" regressions)
-//   6. ROLLBACKS — msgN / childN / role drops in the active-topic timeline
-//
-// Usage:
-//   bun run .agents/skills/agent-testing/scripts/agent-gateway/analyze-events.ts <dump.json>
-
-import { readFileSync } from 'node:fs';
-
-import type {
-  ProbeActionCall,
-  ProbeDump,
-  ProbeMessageSummary,
-  ProbeStreamEvent,
-  ProbeTimelineSample,
-} from './types';
-
-const file = process.argv[2];
-if (!file) {
-  console.error('usage: bun run analyze-events.ts <dump.json>');
-  process.exit(1);
-}
-
-const raw = readFileSync(file, 'utf8');
-// agent-browser eval --stdin wraps return values in quotes when the value is
-// a string — so the JSON file may be double-encoded depending on how it was
-// captured. Handle both.
-const parsedOnce = JSON.parse(raw) as ProbeDump | string;
-const dump: ProbeDump = typeof parsedOnce === 'string' ? JSON.parse(parsedOnce) : parsedOnce;
-
-const { streamEvents = [], actionCalls = [], timeline = [] } = dump;
-
-const pad = (v: unknown, n: number) => String(v).padStart(n);
-
-// ── META ───────────────────────────────────────────────────────────
-console.log('=== META ===');
-console.log(`  events:    ${streamEvents.length}`);
-console.log(`  calls:     ${actionCalls.length}`);
-console.log(`  timeline:  ${timeline.length}`);
-
-// ── 1. STREAM EVENTS (non-chunk) ───────────────────────────────────
-const nonChunkEvents = streamEvents.filter((e) => e.type !== 'stream_chunk');
-const chunkEvents = streamEvents.filter((e) => e.type === 'stream_chunk');
-
-console.log(
-  `\n=== STREAM EVENTS (${nonChunkEvents.length} non-chunk + ${chunkEvents.length} chunks elided) ===`,
-);
-for (const e of nonChunkEvents) {
-  const dataStr = e.dataKeys?.length ? ` [${e.dataKeys.join(',')}]` : '';
-  const data = e.data as Record<string, unknown> | undefined;
-  const uiHint = data?.uiMessagesPreview
-    ? ` uiPreview=${JSON.stringify(data.uiMessagesPreview)}`
-    : data?.uiMessagesTotal
-      ? ` uiTotal=${data.uiMessagesTotal}`
-      : '';
-  const phaseHint = data?.phase ? ` phase=${data.phase}` : '';
-  const extra = e.serverType ? ` serverType=${e.serverType}` : '';
-  console.log(
-    `  t=${pad(e.t, 7)}  [${(e.transport ?? '?').padEnd(3)}]  step=${pad(e.stepIndex ?? '-', 2)}  ` +
-      `type=${(e.type ?? '').padEnd(22)}  op=${e.opIdTail ?? '-'}${phaseHint}${uiHint}${extra}${dataStr}`,
-  );
-}
-
-// ── 2. CHUNK SUMMARY ───────────────────────────────────────────────
-console.log('\n=== CHUNKS SUMMARY (per step / chunkType) ===');
-const chunkBuckets = new Map<string, { count: number; firstT: number; lastT: number }>();
-for (const c of chunkEvents) {
-  const data = c.data as Record<string, unknown> | undefined;
-  const ct = (data?.chunkType as string | undefined) ?? '?';
-  const key = `step=${c.stepIndex ?? '-'}  chunkType=${ct.padEnd(8)}  op=${c.opIdTail}`;
-  const slot = chunkBuckets.get(key);
-  if (slot) {
-    slot.count += 1;
-    slot.lastT = c.t;
-  } else {
-    chunkBuckets.set(key, { count: 1, firstT: c.t, lastT: c.t });
-  }
-}
-for (const [k, v] of chunkBuckets) {
-  console.log(`  ${k}  count=${pad(v.count, 4)}  t=${pad(v.firstT, 7)}..${pad(v.lastT, 7)}`);
-}
-
-// ── 3. ACTION CALLS ───────────────────────────────────────────────
-console.log('\n=== ACTION CALLS (replace/refresh/MARK) ===');
-for (const c of actionCalls) {
-  if (c.name?.startsWith('MARK:')) {
-    console.log(`  t=${pad(c.t, 7)}  ${c.name}`);
-    continue;
-  }
-  const snapshot = (c.args as any)?.snapshot as
-    | Array<{ id: string; role: string; cLen: number; rLen: number }>
-    | undefined;
-  const snapStr = snapshot?.length
-    ? '  snapshot=' + snapshot.map((m) => `${m.id}:${m.role}/c${m.cLen}/r${m.rLen}`).join(' | ')
-    : '';
-  const summary =
-    c.name === 'replaceMessages'
-      ? `count=${c.args?.count} action=${(c.args?.params as any)?.action ?? '-'}${snapStr}`
-      : c.name === 'refreshMessages'
-        ? `ctx=${JSON.stringify(c.args?.context)}`
-        : c.error
-          ? `error=${c.error}`
-          : '';
-  console.log(`  t=${pad(c.t, 7)}  ${c.name.padEnd(20)} ${summary}`);
-  if (c.stack) {
-    const frames = c.stack
-      .split(' ← ')
-      .filter((f) => !!f && !f.includes('Object.<anonymous>'))
-      .slice(0, 3);
-    for (const f of frames) console.log(`             ↳ ${f}`);
-  }
-}
-
-// ── 4. CORRELATION ────────────────────────────────────────────────
-function nearestEventForCall(
-  call: ProbeActionCall,
-  windowMs = 300,
-): { event: ProbeStreamEvent; delta: number } | null {
-  let best: ProbeStreamEvent | null = null;
-  let bestDelta = Infinity;
-  for (const e of streamEvents) {
-    const d = Math.abs(e.t - call.t);
-    if (d < bestDelta && d <= windowMs) {
-      bestDelta = d;
-      best = e;
-    }
-  }
-  return best ? { event: best, delta: bestDelta } : null;
-}
-
-console.log('\n=== CORRELATION (replace/refresh ↔ nearest event within ±300ms) ===');
-for (const c of actionCalls) {
-  if (c.name !== 'refreshMessages' && c.name !== 'replaceMessages') continue;
-  const hit = nearestEventForCall(c);
-  if (hit) {
-    const phase = (hit.event.data as Record<string, unknown> | undefined)?.phase;
-    console.log(
-      `  t=${pad(c.t, 7)}  ${c.name.padEnd(16)} ← Δ${pad(hit.delta, 4)}ms ${hit.event.type}` +
-        (phase ? ` phase=${phase}` : ''),
-    );
-  } else {
-    console.log(`  t=${pad(c.t, 7)}  ${c.name.padEnd(16)} ← (no event nearby — external trigger)`);
-  }
-}
-
-// ── 5. PER-KEY ASSISTANT GROWTH ───────────────────────────────────
-// For each messagesMap key, find the trailing assistant message and report
-// the points in time where its cLen / rLen actually changed. If the timeline
-// shows chunks arriving but the assistant cLen never moves, that's the
-// signature of "dispatch queue blocked / messageId mismatch".
-console.log('\n=== PER-KEY ASSISTANT GROWTH ===');
-const keysEverSeen = new Set<string>();
-for (const s of timeline) for (const k of Object.keys(s.byKey ?? {})) keysEverSeen.add(k);
-
-for (const key of keysEverSeen) {
-  console.log(`\n  key=${key}`);
-  let lastSig: string | null = null;
-  for (const s of timeline) {
-    const slot = s.byKey?.[key];
-    if (!slot) continue;
-    const last = slot.msgs.at(-1) as ProbeMessageSummary | undefined;
-    if (!last) continue;
-    const sig = `${last.id}|c${last.cLen}|r${last.rLen}|n${slot.n}`;
-    if (sig === lastSig) continue;
-    lastSig = sig;
-    console.log(
-      `    t=${pad(s.t, 7)}  msgN=${pad(slot.n, 3)}  ` +
-        `lastAssistant=${last.id}  cLen=${pad(last.cLen, 5)}  rLen=${pad(last.rLen, 5)}` +
-        `  runOps=${s.runOps}`,
-    );
-  }
-}
-
-// ── 6. ROLLBACKS (active-topic msgN / childN / role drops) ─────────
-console.log('\n=== ROLLBACKS (active-topic msgN / childN / role drops) ===');
-let prev: ProbeTimelineSample | null = null;
-const rollbacks: Array<{ t: number; topic: string | null; drops: string[] }> = [];
-
-const flatten = (s: ProbeTimelineSample) => {
-  if (!s.activeTopic) return [];
-  return Object.entries(s.byKey ?? {})
-    .filter(([k]) => k.includes(s.activeTopic!))
-    .flatMap(([, v]) => v.msgs);
-};
-
-for (const s of timeline) {
-  if (s.err) {
-    prev = null;
-    continue;
-  }
-  if (!prev || prev.activeTopic !== s.activeTopic) {
-    prev = s;
-    continue;
-  }
-  const prevMsgs = flatten(prev);
-  const curMsgs = flatten(s);
-  const drops: string[] = [];
-
-  if (curMsgs.length < prevMsgs.length) drops.push(`msgN ${prevMsgs.length}→${curMsgs.length}`);
-
-  let prevChild = 0;
-  let curChild = 0;
-  for (const m of prevMsgs) prevChild += m.chN ?? 0;
-  for (const m of curMsgs) curChild += m.chN ?? 0;
-  if (curChild < prevChild) drops.push(`childN ${prevChild}→${curChild}`);
-
-  const prevById = new Map(prevMsgs.map((m) => [m.id, m]));
-  for (const m of curMsgs) {
-    const pr = prevById.get(m.id);
-    if (!pr) continue;
-    if (m.cLen < pr.cLen) drops.push(`cLen[${m.id}] ${pr.cLen}→${m.cLen}`);
-    if (m.rLen < pr.rLen) drops.push(`rLen[${m.id}] ${pr.rLen}→${m.rLen}`);
-  }
-
-  if (drops.length) rollbacks.push({ t: s.t, topic: s.activeTopic, drops });
-  prev = s;
-}
-
-if (rollbacks.length === 0) {
-  console.log('  (none)');
-} else {
-  for (const r of rollbacks) {
-    const nearEvent = streamEvents
-      .filter((e) => Math.abs(e.t - r.t) <= 300)
-      .map((e) => `${e.type}${(e.data as any)?.phase ? ':' + (e.data as any).phase : ''}`);
-    const nearCall = actionCalls
-      .filter((c) => Math.abs(c.t - r.t) <= 300 && !c.name?.startsWith('MARK:'))
-      .map((c) => c.name);
-    console.log(
-      `  t=${pad(r.t, 7)}  topic=${r.topic}  ${r.drops.join(' | ')}` +
-        (nearEvent.length ? `  near-event:[${nearEvent.join(',')}]` : '') +
-        (nearCall.length ? `  near-call:[${nearCall.join(',')}]` : ''),
-    );
-  }
-}
@@ -1,37 +0,0 @@
-// Stops the events-probe timeline timer and stashes the full capture as a
-// JSON string on `window.__PROBE_LAST_DUMP_JSON`. `run.ts` wraps the bundle
-// in an IIFE that returns that global, which `agent-browser eval` prints to
-// stdout — the runner then persists it under `.agent-gateway/`.
-
-import type { ProbeDump } from './types';
-
-declare global {
-  interface Window {
-    __PROBE_LAST_DUMP_JSON?: string;
-  }
-}
-
-const w = window;
-
-if (w.__PROBE_TIMELINE_TIMER) {
-  clearInterval(w.__PROBE_TIMELINE_TIMER);
-  w.__PROBE_TIMELINE_TIMER = null;
-}
-
-const mutations = w.__PROBE_MUTATIONS ?? [];
-
-const dump: ProbeDump & { mutations: typeof mutations } = {
-  meta: {
-    t0: w.__PROBE_T0 ?? 0,
-    collectedAt: Date.now(),
-    sampleCount: (w.__PROBE_MSG_TIMELINE ?? []).length,
-    eventCount: (w.__PROBE_STREAM_EVENTS ?? []).length,
-    callCount: (w.__PROBE_ACTION_CALLS ?? []).length,
-  },
-  streamEvents: w.__PROBE_STREAM_EVENTS ?? [],
-  actionCalls: w.__PROBE_ACTION_CALLS ?? [],
-  timeline: w.__PROBE_MSG_TIMELINE ?? [],
-  mutations,
-};
-
-w.__PROBE_LAST_DUMP_JSON = JSON.stringify(dump);
@@ -1,637 +0,0 @@
-// LobeHub gateway raw-event-stream probe.
-//
-// Gateway-mode chats subscribe via WebSocket — NOT via the `/api/agent/stream`
-// SSE endpoint (that one belongs to the direct/client durable-agent runtime).
-// `AgentStreamClient` (`packages/agent-gateway-client/src/client.ts`) opens
-// `new WebSocket('wss://.../ws?operationId=...')`, then parses JSON frames in
-// its `onmessage` handler and re-emits `agent_event.event` objects to the
-// chat store.
-//
-// To capture the RAW gateway events before the store touches them, we wrap
-// `window.WebSocket` so that for any socket whose URL contains `operationId=`
-// we intercept the `onmessage` handler / `addEventListener('message')` and
-// log every `agent_event` frame.
-//
-// We *also* keep the `window.fetch` hook for `/api/agent/stream` so this
-// probe still works for direct-mode runs — but gateway-mode events come
-// through the WebSocket path.
-//
-// Buffers (read via `dump`):
-//   __PROBE_STREAM_EVENTS  — raw events parsed off the wire
-//   __PROBE_ACTION_CALLS   — replaceMessages / refreshMessages calls (best-effort)
-//   __PROBE_MSG_TIMELINE   — 200ms snapshots of every messagesMap key
-
-import type {
-  ProbeActionCall,
-  ProbeMessageSummary,
-  ProbeStreamEvent,
-  ProbeTimelineSample,
-} from './types';
-
-// Bundled by esbuild as an IIFE. Top-level code runs once on injection.
-
-const w = window;
-
-// ── Buffers ─────────────────────────────────────────────────────────
-
-declare global {
-  interface Window {
-    __PROBE_MUTATIONS?: Array<{
-      t: number;
-      key: string;
-      n: number;
-      last?: { id: string; role: string; cLen: number; rLen: number; updatedAt?: unknown };
-      prevLast?: { id: string; role: string; cLen: number; rLen: number };
-      delta?: string;
-    }>;
-    __PROBE_STORE_UNSUB?: () => void;
-  }
-}
-
-const events: ProbeStreamEvent[] = (w.__PROBE_STREAM_EVENTS ??= []);
-const calls: ProbeActionCall[] = (w.__PROBE_ACTION_CALLS ??= []);
-const timeline: ProbeTimelineSample[] = (w.__PROBE_MSG_TIMELINE ??= []);
-const mutations = (w.__PROBE_MUTATIONS ??= []);
-events.length = 0;
-calls.length = 0;
-timeline.length = 0;
-mutations.length = 0;
-
-const t0 = Date.now();
-w.__PROBE_T0 = t0;
-const now = (): number => Date.now() - t0;
-
-// ── Helpers ─────────────────────────────────────────────────────────
-
-function summarizeData(data: unknown): Record<string, unknown> | unknown {
-  if (!data || typeof data !== 'object') return data;
-  const src = data as Record<string, unknown>;
-  const out: Record<string, unknown> = {};
-  for (const k of Object.keys(src)) {
-    const v = src[k];
-    if (v == null) {
-      out[k] = v;
-    } else if (Array.isArray(v)) {
-      out[k] = `Array(${v.length})`;
-      if (k === 'uiMessages') {
-        out.uiMessagesPreview = v.slice(0, 5).map((m: any) => ({
-          id: (m.id ?? '').slice(-8),
-          role: m.role,
-          cLen: (m.content ?? '').length,
-          children: (m.children ?? []).length,
-          tools: (m.tools ?? []).length,
-          reasoning: (m.reasoning?.content ?? '').length,
-        }));
-        out.uiMessagesTotal = v.length;
-      }
-    } else if (typeof v === 'object') {
-      const obj = v as Record<string, unknown>;
-      out[k] =
-        'Object{' +
-        Object.keys(obj)
-          .slice(0, 6)
-          .map((kk) => kk + (typeof obj[kk] === 'string' ? `=${(obj[kk] as string).length}ch` : ''))
-          .join(',') +
-        '}';
-    } else if (typeof v === 'string') {
-      out[k] = v.length > 100 ? v.slice(0, 100) + `…(${v.length})` : v;
-    } else {
-      out[k] = v;
-    }
-  }
-  return out;
-}
-
-function summarizeMessages(msgs: any[]): ProbeMessageSummary[] {
-  return (msgs ?? []).slice(0, 80).map((m) => ({
-    id: (m.id ?? '').slice(-8),
-    role: m.role,
-    cLen: (m.content ?? '').length,
-    rLen: (m.reasoning?.content ?? '').length,
-    tools: (m.tools ?? []).length,
-    chN: (m.children ?? []).length,
-  }));
-}
-
-function shortStack(): string {
-  const raw = new Error('probe-stack').stack ?? '';
-  return raw
-    .split('\n')
-    .slice(3)
-    .filter((l) => !l.includes('probe-events') && !l.includes('node_modules'))
-    .map((l) => l.trim().replace(/^at\s+/, ''))
-    .slice(0, 6)
-    .join(' ← ');
-}
-
-function recordAgentEvent(args: {
-  transport: 'ws' | 'sse';
-  opId: string | null;
-  agentEvent: any;
-  eventId?: string | null;
-  rawLen?: number;
-}): void {
-  const { transport, opId, agentEvent, eventId, rawLen } = args;
-  if (!agentEvent || typeof agentEvent !== 'object') return;
-  events.push({
-    t: now(),
-    transport,
-    opIdTail: (opId ?? '').slice(-10),
-    eventId: eventId ?? null,
-    type: agentEvent.type,
-    stepIndex: agentEvent.stepIndex,
-    dataKeys: agentEvent.data ? Object.keys(agentEvent.data) : [],
-    data: summarizeData(agentEvent.data) as Record<string, unknown>,
-    rawLen,
-  });
-}
-
-// ── 1. Patch window.WebSocket for gateway WS events ────────────────
-
-if (!w.__PROBE_ORIG_WEBSOCKET) w.__PROBE_ORIG_WEBSOCKET = w.WebSocket;
-const OrigWS = w.__PROBE_ORIG_WEBSOCKET;
-
-function extractOpIdFromWsUrl(url: string | URL): string | null {
-  const m = String(url ?? '').match(/operationId=([^&]+)/);
-  return m ? decodeURIComponent(m[1]) : null;
-}
-
-function isGatewayWs(url: string | URL): boolean {
-  return String(url ?? '').includes('operationId=');
-}
-
-function handleWsFrame(rawData: unknown, opId: string | null): void {
-  const rawLen = typeof rawData === 'string' ? rawData.length : -1;
-  let parsed: any;
-  try {
-    parsed = typeof rawData === 'string' ? JSON.parse(rawData) : null;
-  } catch {
-    events.push({
-      t: now(),
-      transport: 'ws',
-      opIdTail: (opId ?? '').slice(-10),
-      type: '_PARSE_ERROR_',
-      raw: typeof rawData === 'string' && rawData.length < 400 ? rawData : '(non-string or large)',
-    });
-    return;
-  }
-  if (!parsed) return;
-
-  if (parsed.type === 'agent_event') {
-    recordAgentEvent({
-      transport: 'ws',
-      opId,
-      agentEvent: parsed.event,
-      eventId: parsed.id,
-      rawLen,
-    });
-  } else {
-    events.push({
-      t: now(),
-      transport: 'ws',
-      opIdTail: (opId ?? '').slice(-10),
-      type: '_SERVER_MSG_',
-      serverType: parsed.type,
-      rawLen,
-    });
-  }
-}
-
-// Wrap the constructor. Instance `constructor` will still reflect OrigWS
-// (we share prototypes), so use the `_WS_OPEN_` sentinel events to confirm
-// the patch is firing.
-function PatchedWebSocket(this: WebSocket, url: string | URL, protocols?: string | string[]) {
-  const ws: WebSocket = protocols == null ? new OrigWS(url) : new OrigWS(url, protocols);
-  const opId = extractOpIdFromWsUrl(url);
-  if (!isGatewayWs(url)) return ws;
-
-  events.push({
-    t: now(),
-    transport: 'ws',
-    opIdTail: (opId ?? '').slice(-10),
-    type: '_WS_OPEN_',
-    url: String(url),
-  });
-
-  // One observer listener that always fires, regardless of how the consumer
-  // (AgentStreamClient uses `ws.onmessage = …`) subscribes.
-  ws.addEventListener('message', (e) => {
-    try {
-      handleWsFrame((e as MessageEvent).data, opId);
-    } catch {
-      /* swallow */
-    }
-  });
-
-  ws.addEventListener('close', () => {
-    events.push({
-      t: now(),
-      transport: 'ws',
-      opIdTail: (opId ?? '').slice(-10),
-      type: '_WS_CLOSE_',
-    });
-  });
-
-  return ws;
-}
-
-// Preserve prototype + static fields so `instanceof WebSocket` and
-// `WebSocket.OPEN` constants still work.
-(PatchedWebSocket as unknown as { prototype: WebSocket }).prototype = OrigWS.prototype;
-for (const k of Object.keys(OrigWS) as Array<keyof typeof OrigWS>) {
-  try {
-    (PatchedWebSocket as any)[k] = (OrigWS as any)[k];
-  } catch {
-    /* readonly */
-  }
-}
-(['CONNECTING', 'OPEN', 'CLOSING', 'CLOSED'] as const).forEach((k) => {
-  (PatchedWebSocket as any)[k] = (OrigWS as any)[k];
-});
-w.WebSocket = PatchedWebSocket as unknown as typeof WebSocket;
-
-// ── 2. Patch window.fetch for `/api/agent/stream` (direct-mode SSE) ─
-
-if (!w.__PROBE_ORIG_FETCH) w.__PROBE_ORIG_FETCH = w.fetch.bind(w);
-const origFetch = w.__PROBE_ORIG_FETCH;
-
-function isAgentStreamUrl(input: RequestInfo | URL): boolean {
-  let url = '';
-  if (typeof input === 'string') url = input;
-  else if (input instanceof URL) url = input.toString();
-  else if (input && typeof (input as Request).url === 'string') url = (input as Request).url;
-  return url.includes('/api/agent/stream');
-}
-
-function extractOpIdFromHttpUrl(input: RequestInfo | URL): string | null {
-  const url = typeof input === 'string' ? input : (input as Request | URL).toString();
-  const m = url.match(/operationId=([^&]+)/);
-  return m ? decodeURIComponent(m[1]) : null;
-}
-
-function pushFromSSEFrame(rawFrame: string, opId: string | null): void {
-  const lines = rawFrame.split('\n');
-  let dataJson = '';
-  let evtName = 'message';
-  for (const line of lines) {
-    if (line.startsWith('event:')) evtName = line.slice(6).trim();
-    else if (line.startsWith('data:')) dataJson += line.slice(5).trim();
-  }
-  if (!dataJson) return;
-  let parsed: any;
-  try {
-    parsed = JSON.parse(dataJson);
-  } catch {
-    events.push({
-      t: now(),
-      transport: 'sse',
-      opIdTail: (opId ?? '').slice(-10),
-      type: '_PARSE_ERROR_',
-      sseEvent: evtName,
-      raw: dataJson.length > 400 ? dataJson.slice(0, 400) + '…' : dataJson,
-    });
-    return;
-  }
-  recordAgentEvent({
-    transport: 'sse',
-    opId,
-    agentEvent: parsed,
-    eventId: null,
-    rawLen: dataJson.length,
-  });
-}
-
-async function teeAndDrain(response: Response, opId: string | null): Promise<Response> {
-  if (!response.body) return response;
-  const [a, b] = response.body.tee();
-
-  void (async () => {
-    const reader = b.getReader();
-    const decoder = new TextDecoder();
-    let buf = '';
-    try {
-      while (true) {
-        const { value, done } = await reader.read();
-        if (done) break;
-        buf += decoder.decode(value, { stream: true });
-        let idx: number;
-
-        while ((idx = buf.indexOf('\n\n')) !== -1) {
-          const frame = buf.slice(0, idx);
-          buf = buf.slice(idx + 2);
-          if (frame.trim()) pushFromSSEFrame(frame, opId);
-        }
-      }
-      if (buf.trim()) pushFromSSEFrame(buf, opId);
-    } catch (e: any) {
-      events.push({
-        t: now(),
-        transport: 'sse',
-        opIdTail: (opId ?? '').slice(-10),
-        type: '_TEE_ERROR_',
-        message: String(e?.message ?? e),
-      });
-    }
-  })();
-
-  return new Response(a, {
-    headers: response.headers,
-    status: response.status,
-    statusText: response.statusText,
-  });
-}
-
-w.fetch = async function patchedFetch(input: RequestInfo | URL, init?: RequestInit) {
-  const response = await origFetch(input as any, init);
-  if (!isAgentStreamUrl(input)) return response;
-  const opId = extractOpIdFromHttpUrl(input);
-  const url =
-    typeof input === 'string'
-      ? input.split('?')[0]
-      : (input as Request | URL).toString().split('?')[0];
-  events.push({
-    t: now(),
-    transport: 'sse',
-    opIdTail: (opId ?? '').slice(-10),
-    type: '_CONNECTED_',
-    url,
-    status: response.status,
-  });
-  return teeAndDrain(response, opId);
-} as typeof fetch;
-
-// ── 3. Wrap store actions (best-effort for "who called replace") ────
-
-// Side-global stash for the original chat-store actions. Re-installs ALWAYS
-// rewrap from the originals so updates to the probe body take effect
-// without a page reload — using only a `__probeWrapped` flag on the chat
-// state object would freeze the first-installed wrapper across re-installs.
-declare global {
-  interface Window {
-    __PROBE_ORIG_REFRESH_MESSAGES?: any;
-    __PROBE_ORIG_REPLACE_MESSAGES?: any;
-  }
-}
-
-try {
-  const chat = w.__LOBE_STORES?.chat?.();
-  if (chat) {
-    // First-time install: cache the originals. Re-install: restore from
-    // the cached originals before wrapping again.
-    if (!w.__PROBE_ORIG_REFRESH_MESSAGES) w.__PROBE_ORIG_REFRESH_MESSAGES = chat.refreshMessages;
-    if (!w.__PROBE_ORIG_REPLACE_MESSAGES) w.__PROBE_ORIG_REPLACE_MESSAGES = chat.replaceMessages;
-    const origRefresh = w.__PROBE_ORIG_REFRESH_MESSAGES;
-    const origReplace = w.__PROBE_ORIG_REPLACE_MESSAGES;
-    chat.refreshMessages = origRefresh;
-    chat.replaceMessages = origReplace;
-
-    chat.refreshMessages = async function probeRefresh(this: unknown, ...args: any[]) {
-      calls.push({
-        t: now(),
-        name: 'refreshMessages',
-        args: { context: args[0] ?? null },
-        stack: shortStack(),
-      });
-      return origRefresh.apply(this, args);
-    };
-    chat.replaceMessages = function probeReplace(this: unknown, ...args: any[]) {
-      const msgs = (args[0] as any[]) ?? [];
-      const snapshot = msgs.slice(-2).map((m) => ({
-        id: (m.id ?? '').slice(-8),
-        role: m.role,
-        cLen: (m.content ?? '').length,
-        rLen: (m.reasoning?.content ?? '').length,
-        updatedAt: m.updatedAt,
-      }));
-      calls.push({
-        t: now(),
-        name: 'replaceMessages',
-        args: { count: msgs.length, params: args[1] ?? null, snapshot } as any,
-        stack: shortStack(),
-      });
-
-      // Pair the call with a mutation row so the analyzer can build a
-      // single ordered timeline across replaceMessages + dispatchMessage.
-      const stackTop = shortStack().split(' ← ')[0]?.slice(0, 80);
-      const last = msgs.at(-1);
-      const lastSum = last
-        ? {
-            id: (last.id ?? '').slice(-8),
-            role: last.role,
-            cLen: (last.content ?? '').length,
-            rLen: (last.reasoning?.content ?? '').length,
-            updatedAt: last.updatedAt,
-          }
-        : undefined;
-      const params: any = args[1] ?? {};
-      const ctxKey = params.context
-        ? `main_${params.context.agentId ?? '?'}_${
-            params.context.topicId ? 'tpc_' + params.context.topicId : 'new'
-          }`.replace('main_tpc_', 'main_') // crude key inference
-        : '(no-ctx)';
-      mutations.push({
-        t: now(),
-        key: ctxKey,
-        n: msgs.length,
-        last: lastSum,
-        delta: `replaceMessages(action=${params.action ?? '-'})  src=${stackTop ?? '-'}`,
-      });
-
-      return origReplace.apply(this, args);
-    };
-  }
-} catch (e: any) {
-  calls.push({ t: now(), name: '_WRAP_ERROR_', error: String(e?.message ?? e) });
-}
-
-// ── 3.5. Mutation log — wrap the TWO ChatStore writers (replaceMessages,
-// internal_dispatchMessage) to record EVERY dbMessagesMap[key] reference
-// change with a one-line "before/after last assistant message" delta. This
-// reveals dispatchMessage-driven collapses that the replaceMessages wrap
-// alone cannot see.
-
-declare global {
-  interface Window {
-    __PROBE_ORIG_DISPATCH_MESSAGE?: any;
-  }
-}
-
-try {
-  const chat = w.__LOBE_STORES?.chat?.();
-  if (chat?.internal_dispatchMessage) {
-    if (!w.__PROBE_ORIG_DISPATCH_MESSAGE)
-      w.__PROBE_ORIG_DISPATCH_MESSAGE = chat.internal_dispatchMessage;
-    const origDispatch = w.__PROBE_ORIG_DISPATCH_MESSAGE;
-    chat.internal_dispatchMessage = origDispatch;
-
-    chat.internal_dispatchMessage = function probeDispatch(this: unknown, payload: any, ctx?: any) {
-      // Snapshot BEFORE — read the would-be target key + last message.
-      const before = (() => {
-        try {
-          const state = w.__LOBE_STORES?.chat?.();
-          if (!state) return null;
-          // Replicate state.internal_getConversationContext logic enough to
-          // resolve a key — but most callers pass operationId on ctx, and
-          // operationId-keyed lookup needs store internals. Easiest: snapshot
-          // ALL keys' last-assistant cLen and compare BEFORE vs AFTER below.
-          const map = state.dbMessagesMap ?? {};
-          const out: Record<string, any> = {};
-          for (const k of Object.keys(map)) {
-            const last = (map[k] ?? []).at(-1);
-            out[k] = last
-              ? {
-                  id: (last.id ?? '').slice(-8),
-                  cLen: (last.content ?? '').length,
-                  rLen: (last.reasoning?.content ?? '').length,
-                  n: map[k].length,
-                }
-              : { n: 0 };
-          }
-          return out;
-        } catch {
-          return null;
-        }
-      })();
-
-      const result = origDispatch.apply(this, [payload, ctx]);
-
-      // Snapshot AFTER — find which key(s) actually changed.
-      try {
-        const state = w.__LOBE_STORES?.chat?.();
-        if (state && before) {
-          const map = state.dbMessagesMap ?? {};
-          for (const k of Object.keys(map)) {
-            const last = (map[k] ?? []).at(-1);
-            const beforeSnap = before[k];
-            const afterSnap = last
-              ? {
-                  id: (last.id ?? '').slice(-8),
-                  cLen: (last.content ?? '').length,
-                  rLen: (last.reasoning?.content ?? '').length,
-                  n: map[k].length,
-                }
-              : { n: 0 };
-            const changed =
-              !beforeSnap ||
-              beforeSnap.n !== afterSnap.n ||
-              beforeSnap.id !== (afterSnap as any).id ||
-              beforeSnap.cLen !== (afterSnap as any).cLen ||
-              beforeSnap.rLen !== (afterSnap as any).rLen;
-            if (!changed) continue;
-            let delta = '';
-            if (beforeSnap?.id !== undefined && beforeSnap.id !== (afterSnap as any).id)
-              delta += `id:${beforeSnap.id}→${(afterSnap as any).id};`;
-            if (
-              beforeSnap?.cLen !== undefined &&
-              (afterSnap as any).cLen !== undefined &&
-              (afterSnap as any).cLen < beforeSnap.cLen
-            )
-              delta += `cLen↓${beforeSnap.cLen}→${(afterSnap as any).cLen};`;
-            if (
-              beforeSnap?.rLen !== undefined &&
-              (afterSnap as any).rLen !== undefined &&
-              (afterSnap as any).rLen < beforeSnap.rLen
-            )
-              delta += `rLen↓${beforeSnap.rLen}→${(afterSnap as any).rLen};`;
-            if (beforeSnap?.n !== undefined && afterSnap.n < beforeSnap.n)
-              delta += `n↓${beforeSnap.n}→${afterSnap.n};`;
-            mutations.push({
-              t: now(),
-              key: k,
-              n: afterSnap.n,
-              last: (afterSnap as any).id ? (afterSnap as any) : undefined,
-              prevLast: beforeSnap?.id ? beforeSnap : undefined,
-              delta: delta || `dispatch:${payload?.type}`,
-            });
-          }
-        }
-      } catch (e: any) {
-        mutations.push({
-          t: now(),
-          key: '_DISPATCH_PROBE_ERROR_',
-          n: -1,
-          delta: String(e?.message ?? e),
-        });
-      }
-      return result;
-    };
-  }
-} catch (e: any) {
-  calls.push({ t: now(), name: '_DISPATCH_WRAP_ERROR_', error: String(e?.message ?? e) });
-}
-
-// ── 4. Periodic per-key timeline snapshots ─────────────────────────
-
-function captureTimeline(): void {
-  try {
-    const c = w.__LOBE_STORES?.chat?.();
-    if (!c) return;
-    const msgsMap = (c.messagesMap ?? {}) as Record<string, any[]>;
-    const dbMap = (c.dbMessagesMap ?? {}) as Record<string, any[]>;
-    const byKey: ProbeTimelineSample['byKey'] = {};
-    for (const k of Object.keys(msgsMap)) {
-      const display = msgsMap[k] ?? [];
-      const db = dbMap[k] ?? [];
-      if (display.length === 0 && db.length === 0) continue;
-      byKey[k] = {
-        n: display.length,
-        dbN: db.length,
-        msgs: summarizeMessages(display),
-      };
-    }
-    const ops = Object.values((c.operations ?? {}) as Record<string, any>);
-    timeline.push({
-      t: now(),
-      activeTopic: ((c.activeTopicId as string | null) ?? '').slice(-10) || null,
-      keys: Object.keys(byKey),
-      byKey,
-      runOps: ops.filter((o: any) => o.status === 'running').length,
-    });
-  } catch (e: any) {
-    timeline.push({
-      t: now(),
-      activeTopic: null,
-      keys: [],
-      byKey: {},
-      runOps: 0,
-      err: e?.message ?? String(e),
-    });
-  }
-}
-captureTimeline();
-if (w.__PROBE_TIMELINE_TIMER) clearInterval(w.__PROBE_TIMELINE_TIMER);
-w.__PROBE_TIMELINE_TIMER = setInterval(captureTimeline, 200);
-
-// ── 5. Tab-switch helpers ──────────────────────────────────────────
-
-function listTopBarTabs(): HTMLElement[] {
-  return Array.from(
-    document.querySelectorAll<HTMLElement>(
-      '[data-insp-path*="TabItem.tsx"][data-contextmenu-trigger]',
-    ),
-  ).filter((t) => t.getBoundingClientRect().top < 30);
-}
-
-w.__listTabs = () =>
-  listTopBarTabs().map((t, i) => ({
-    i,
-    key: t.getAttribute('data-contextmenu-trigger'),
-    active: t.getAttribute('data-active') === 'true',
-    title: (t.innerText ?? '').slice(0, 60),
-  }));
-
-w.__clickTabByKey = (key: string) => {
-  const tab = listTopBarTabs().find((t) => t.getAttribute('data-contextmenu-trigger') === key);
-  if (!tab) return 'not found: ' + key;
-  if (tab.getAttribute('data-active') === 'true') return 'already active: ' + key;
-  tab.click();
-  return 'clicked key=' + key;
-};
-
-w.__PROBE_EVENT = (name: string) => {
-  calls.push({ t: now(), name: 'MARK:' + name });
-};
-
-// `run.ts` wraps the bundle in an IIFE and appends a `return <confirmation>`
-// after the bundle body — agent-browser then prints the confirmation back to
-// the operator. Nothing to do here at the end of the module body.
@@ -1,211 +0,0 @@
-// CLI for the agent-gateway probe.
-//
-// Bundles the TS probes with esbuild, pipes them into `agent-browser eval`,
-// and persists dumps under `.agent-gateway/` (gitignored) for later use as
-// streaming-replay test fixtures.
-//
-// Commands:
-//   bun run .agents/skills/agent-testing/scripts/agent-gateway/run.ts install
-//       Bundle probe-events.ts and inject into the CDP-attached browser.
-//       Re-installing clears all buffers and re-patches WebSocket / fetch.
-//
-//   bun run .agents/skills/agent-testing/scripts/agent-gateway/run.ts dump [name]
-//       Stop the timeline timer, fetch the capture as JSON, write it to
-//       `.agent-gateway/<name>-<YYYYMMDD-HHmmss>.json`. `name` defaults to
-//       `dump`. Prints the absolute path written.
-//
-//   bun run .agents/skills/agent-testing/scripts/agent-gateway/run.ts analyze [path]
-//       Run analyze-events.ts on the dump. `path` defaults to the most
-//       recently modified file in `.agent-gateway/`.
-//
-// Optional flags:
-//   --cdp <port>     CDP port (default 9222)
-//   --browser <bin>  agent-browser binary (default 'agent-browser')
-
-import { spawn } from 'node:child_process';
-import { mkdirSync, readdirSync, statSync, writeFileSync } from 'node:fs';
-import path from 'node:path';
-import { fileURLToPath } from 'node:url';
-
-const SCRIPT_DIR = path.dirname(fileURLToPath(import.meta.url));
-// .agents/skills/agent-testing/scripts/agent-gateway/ → 5 levels up
-const PROJECT_ROOT = path.resolve(SCRIPT_DIR, '../../../../..');
-const DUMP_DIR = path.join(PROJECT_ROOT, '.agent-gateway');
-
-interface Flags {
-  browser: string;
-  cdp: string;
-  positional: string[];
-}
-
-function parseFlags(argv: string[]): Flags {
-  const out: Flags = { cdp: '9222', browser: 'agent-browser', positional: [] };
-  for (let i = 0; i < argv.length; i++) {
-    const a = argv[i];
-    if (a === '--cdp') out.cdp = argv[++i] ?? out.cdp;
-    else if (a === '--browser') out.browser = argv[++i] ?? out.browser;
-    else out.positional.push(a);
-  }
-  return out;
-}
-
-async function bundle(entry: string): Promise<string> {
-  // Bun.build is built into the Bun runtime — no external dep needed.
-  const r = await Bun.build({
-    entrypoints: [path.join(SCRIPT_DIR, entry)],
-    target: 'browser',
-    format: 'esm',
-    minify: false,
-  });
-  if (!r.success) {
-    const msgs = r.logs.map((l) => `${l.level}: ${l.message}`).join('\n');
-    throw new Error(`bundle failed for ${entry}:\n${msgs}`);
-  }
-  return await r.outputs[0].text();
-}
-
-function wrapIife(body: string, returnExpr: string): string {
-  // Wrap as an IIFE that swallows the bundled top-level (top-level `const`
-  // declarations get scoped to the IIFE, so re-injection doesn't conflict)
-  // and returns the configured expression — which `agent-browser eval`
-  // captures and prints to stdout.
-  return `(() => {\n${body}\n;return ${returnExpr};\n})()`;
-}
-
-function runAgentBrowserEval(flags: Flags, script: string): Promise<string> {
-  return new Promise((resolveP, rejectP) => {
-    const child = spawn(flags.browser, ['--cdp', flags.cdp, 'eval', '--stdin'], {
-      stdio: ['pipe', 'pipe', 'inherit'],
-    });
-    let stdout = '';
-    child.stdout.on('data', (chunk: Buffer) => {
-      stdout += chunk.toString('utf8');
-    });
-    child.on('error', rejectP);
-    child.on('close', (code) => {
-      if (code === 0) resolveP(stdout);
-      else rejectP(new Error(`agent-browser exited ${code}`));
-    });
-    child.stdin.write(script);
-    child.stdin.end();
-  });
-}
-
-// agent-browser prints eval results as JSON (string values are quoted).
-function unquoteAgentBrowserResult(raw: string): string {
-  const trimmed = raw.trim();
-  if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
-    try {
-      return JSON.parse(trimmed) as string;
-    } catch {
-      /* fall through */
-    }
-  }
-  return trimmed;
-}
-
-function isoStamp(): string {
-  const d = new Date();
-  const yyyy = d.getFullYear();
-  const mm = String(d.getMonth() + 1).padStart(2, '0');
-  const dd = String(d.getDate()).padStart(2, '0');
-  const hh = String(d.getHours()).padStart(2, '0');
-  const mi = String(d.getMinutes()).padStart(2, '0');
-  const ss = String(d.getSeconds()).padStart(2, '0');
-  return `${yyyy}${mm}${dd}-${hh}${mi}${ss}`;
-}
-
-function ensureDumpDir(): void {
-  mkdirSync(DUMP_DIR, { recursive: true });
-}
-
-function latestDump(): string | null {
-  ensureDumpDir();
-  const entries = readdirSync(DUMP_DIR)
-    .filter((f) => f.endsWith('.json'))
-    .map((f) => ({ f, mtime: statSync(path.join(DUMP_DIR, f)).mtimeMs }))
-    .sort((a, b) => b.mtime - a.mtime);
-  return entries[0] ? path.join(DUMP_DIR, entries[0].f) : null;
-}
-
-// ── Commands ────────────────────────────────────────────────────────
-
-async function cmdInstall(flags: Flags): Promise<void> {
-  const body = await bundle('probe-events.ts');
-  const installMsg = JSON.stringify(
-    'events probe installed: WebSocket+fetch interception. ' +
-      'WS captures operationId= sockets (gateway), fetch captures /api/agent/stream (direct).',
-  );
-  const script = wrapIife(body, installMsg);
-  const out = await runAgentBrowserEval(flags, script);
-  console.log(unquoteAgentBrowserResult(out));
-}
-
-async function cmdDump(flags: Flags): Promise<void> {
-  const name = flags.positional[1] ?? 'dump';
-  const body = await bundle('probe-dump.ts');
-  const script = wrapIife(body, 'window.__PROBE_LAST_DUMP_JSON');
-  const raw = await runAgentBrowserEval(flags, script);
-  const json = unquoteAgentBrowserResult(raw);
-  ensureDumpDir();
-  const filename = `${name}-${isoStamp()}.json`;
-  const dumpPath = path.join(DUMP_DIR, filename);
-  writeFileSync(dumpPath, json, 'utf8');
-  // Validate by parsing the meta header so we error early on bad capture
-  try {
-    const parsed = JSON.parse(json) as {
-      meta?: { eventCount?: number; callCount?: number; sampleCount?: number };
-    };
-    const meta = parsed.meta ?? {};
-    console.log(
-      `wrote ${dumpPath}  (${json.length} bytes  events=${meta.eventCount ?? '?'}  ` +
-        `calls=${meta.callCount ?? '?'}  samples=${meta.sampleCount ?? '?'})`,
-    );
-  } catch {
-    console.log(`wrote ${dumpPath}  (${json.length} bytes — JSON.parse failed; see file)`);
-  }
-}
-
-async function cmdAnalyze(flags: Flags): Promise<void> {
-  const target = flags.positional[1] ?? latestDump();
-  if (!target) {
-    console.error('no dump file found. run `dump` first or pass a path.');
-    process.exit(1);
-  }
-  const child = spawn('bun', ['run', path.join(SCRIPT_DIR, 'analyze-events.ts'), target], {
-    stdio: 'inherit',
-  });
-  await new Promise<void>((resolveP, rejectP) => {
-    child.on('error', rejectP);
-    child.on('close', (code) => (code === 0 ? resolveP() : rejectP(new Error(`exit ${code}`))));
-  });
-}
-
-// ── Entry point ─────────────────────────────────────────────────────
-
-const flags = parseFlags(process.argv.slice(2));
-const cmd = flags.positional[0];
-
-const usage = `usage:
-  bun run run.ts install [--cdp 9222]
-  bun run run.ts dump [name] [--cdp 9222]
-  bun run run.ts analyze [path]
-`;
-
-if (!cmd) {
-  console.error(usage);
-  process.exit(1);
-}
-
-try {
-  if (cmd === 'install') await cmdInstall(flags);
-  else if (cmd === 'dump') await cmdDump(flags);
-  else if (cmd === 'analyze') await cmdAnalyze(flags);
-  else {
-    console.error(`unknown command: ${cmd}\n\n${usage}`);
-    process.exit(1);
-  }
-} catch (e: any) {
-  console.error(e?.stack ?? e);
-  process.exit(1);
-}
@@ -1,113 +0,0 @@
-// Shared types between the in-browser probe and the Node-side analyzer.
-// Kept tiny on purpose — anything the analyzer can re-derive is left off.
-
-export interface ProbeStreamEvent {
-  /** Summarized payload — long strings truncated, arrays printed as Array(N) */
-  data?: Record<string, unknown>;
-  /** Keys present on the event's `data` payload — useful at a glance */
-  dataKeys?: string[];
-  /** ServerMessage.id — gateway WS frames carry an event-id we may resume from */
-  eventId?: string | null;
-  message?: string;
-  /** Last 10 chars of the operationId (full id is excessively long) */
-  opIdTail: string;
-  raw?: string;
-  /** Raw frame byte length, when applicable */
-  rawLen?: number;
-  /** For non-agent_event server frames (auth_success, heartbeat_ack, …) */
-  serverType?: string;
-  sseEvent?: string;
-  status?: number;
-  stepIndex?: number;
-  /** Milliseconds since the probe's t0 (install time). */
-  t: number;
-  /** 'ws' for gateway WebSocket frames, 'sse' for direct /api/agent/stream */
-  transport: 'ws' | 'sse';
-  /** Either the AgentStreamEvent.type, or a probe sentinel like `_WS_OPEN_` */
-  type: string;
-  url?: string;
-}
-
-export interface ProbeActionCall {
-  args?: {
-    count?: number;
-    context?: unknown;
-    params?: unknown;
-  };
-  error?: string;
-  /** `replaceMessages` / `refreshMessages` / `MARK:<label>` / `_WRAP_ERROR_` */
-  name: string;
-  stack?: string;
-  t: number;
-}
-
-export interface ProbeMessageSummary {
-  /** children.length */
-  chN: number;
-  /** content.length */
-  cLen: number;
-  /** Last 8 chars of the message id */
-  id: string;
-  /** reasoning.content.length */
-  rLen: number;
-  role: string;
-  /** tools.length */
-  tools: number;
-}
-
-export interface ProbeTimelineSample {
-  /** Last 10 chars of activeTopicId, or null */
-  activeTopic: string | null;
-  /** Per-key breakdown: display count, db count, message summaries */
-  byKey: Record<
-    string,
-    {
-      n: number;
-      dbN: number;
-      msgs: ProbeMessageSummary[];
-    }
-  >;
-  err?: string;
-  /** All messagesMap keys that have content at this moment */
-  keys: string[];
-  /** Number of operations in 'running' status */
-  runOps: number;
-  t: number;
-}
-
-export interface ProbeDumpMeta {
-  callCount: number;
-  /** Date.now() at dump call */
-  collectedAt: number;
-  eventCount: number;
-  sampleCount: number;
-  /** Date.now() at probe install */
-  t0: number;
-}
-
-export interface ProbeDump {
-  actionCalls: ProbeActionCall[];
-  meta: ProbeDumpMeta;
-  streamEvents: ProbeStreamEvent[];
-  timeline: ProbeTimelineSample[];
-}
-
-/**
- * Globals the probe attaches to `window`. Keeps `as any` casts at the boundary
- * instead of sprinkling them through the probe body.
- */
-declare global {
-  interface Window {
-    __clickTabByKey?: (key: string) => string;
-    __listTabs?: () => Array<{ i: number; key: string | null; active: boolean; title: string }>;
-    __LOBE_STORES?: Record<string, () => any>;
-    __PROBE_ACTION_CALLS?: ProbeActionCall[];
-    __PROBE_EVENT?: (label: string) => void;
-    __PROBE_MSG_TIMELINE?: ProbeTimelineSample[];
-    __PROBE_ORIG_FETCH?: typeof fetch;
-    __PROBE_ORIG_WEBSOCKET?: typeof WebSocket;
-    __PROBE_STREAM_EVENTS?: ProbeStreamEvent[];
-    __PROBE_T0?: number;
-    __PROBE_TIMELINE_TIMER?: ReturnType<typeof setInterval> | null;
-  }
-}
@@ -1,95 +0,0 @@
-#!/usr/bin/env bash
-# app-probe.sh — standardized probes for a running LobeHub app (Electron via
-# CDP, or a web agent-browser session). Use these instead of hand-rolling
-# `window.__LOBE_STORES` eval snippets — especially the auth check.
-#
-# Usage:
-#   app-probe.sh auth              # { isSignedIn, userId } from the user store
-#   app-probe.sh route             # current SPA route
-#   app-probe.sh ops               # running chat operations (type / status / startTime)
-#   app-probe.sh goto <path>       # navigate the SPA to a route (full reload), e.g. goto /agent/agt_xxx
-#   app-probe.sh errors-install    # install a console.error interceptor
-#   app-probe.sh errors            # dump errors captured since errors-install
-#
-# Target selection (default: Electron over CDP 9222):
-#   AB_TARGET="--cdp 9222"             # Electron (default; CDP_PORT also honored)
-#   AB_TARGET="--session lobehub-dev"  # web agent-browser session
-#
-# Common routes (desktop SPA): /  /agent/<agentId>  /agent/<agentId>/<topicId>
-#   /task  /task/<taskId>  /page  /settings  /community
-
-set -euo pipefail
-
-AB_TARGET="${AB_TARGET:---cdp ${CDP_PORT:-9222}}"
-
-run_eval() {
-  # shellcheck disable=SC2086
-  agent-browser $AB_TARGET eval --stdin
-}
-
-case "${1:-}" in
-  auth)
-    run_eval << 'EVALEOF'
-(function () {
-  var stores = window.__LOBE_STORES;
-  if (!stores || !stores.user) return JSON.stringify({ ok: false, reason: 'no user store — app not loaded yet?' });
-  var u = stores.user();
-  return JSON.stringify({ ok: !!u.isSignedIn, isSignedIn: !!u.isSignedIn, userId: (u.user && u.user.id) || null });
-})()
-EVALEOF
-    ;;
-  route)
-    run_eval << 'EVALEOF'
-location.pathname + location.search + location.hash
-EVALEOF
-    ;;
-  ops)
-    run_eval << 'EVALEOF'
-(function () {
-  var stores = window.__LOBE_STORES;
-  if (!stores || !stores.chat) return JSON.stringify({ ok: false, reason: 'no chat store — open a conversation first' });
-  var ops = Object.values(stores.chat().operations || {});
-  var running = ops.filter(function (o) { return o.status === 'running'; });
-  return JSON.stringify({
-    ok: true,
-    running: running.map(function (o) { return { startTime: o.metadata && o.metadata.startTime, type: o.type }; }),
-    runningCount: running.length,
-    total: ops.length,
-  });
-})()
-EVALEOF
-    ;;
-  goto)
-    TARGET_PATH="${2:?Usage: app-probe.sh goto <path>}"
-    # shellcheck disable=SC2086
-    agent-browser $AB_TARGET eval "location.href = '$TARGET_PATH'" > /dev/null
-    sleep 2
-    bash "${BASH_SOURCE[0]}" route
-    ;;
-  errors-install)
-    run_eval << 'EVALEOF'
-(function () {
-  window.__CAPTURED_ERRORS = [];
-  var orig = console.error;
-  console.error = function () {
-    var msg = Array.from(arguments).map(function (a) {
-      if (a instanceof Error) return a.message;
-      return typeof a === 'object' ? JSON.stringify(a) : String(a);
-    }).join(' ');
-    window.__CAPTURED_ERRORS.push(msg);
-    orig.apply(console, arguments);
-  };
-  return 'installed';
-})()
-EVALEOF
-    ;;
-  errors)
-    run_eval << 'EVALEOF'
-JSON.stringify(window.__CAPTURED_ERRORS || 'interceptor not installed — run errors-install first')
-EVALEOF
-    ;;
-  *)
-    echo "Usage: $0 {auth|route|ops|goto <path>|errors-install|errors}" >&2
-    exit 2
-    ;;
-esac
@@ -1,61 +0,0 @@
-#!/usr/bin/env bash
-# record-gif.sh — capture a frame sequence via agent-browser (CDP) and
-# synthesize a GIF for embedding in a test report.
-#
-# Use this whenever the asserted behavior is about CHANGE OVER TIME —
-# streaming output, a ticking timer, loading states, animations. A static
-# screenshot cannot prove those; a GIF can. Cloud-portable: frames come from
-# CDP rendering, no OS-level screen capture.
-#
-# Usage:
-#   record-gif.sh <output.gif> <duration_seconds> [fps]
-#
-#   AB_TARGET="--cdp 9222"             # Electron (default; CDP_PORT honored)
-#   AB_TARGET="--session lobehub-dev"  # web agent-browser session
-#   GIF_WIDTH=960                      # output width (px), default 960
-#
-# Requires ffmpeg (`brew install ffmpeg`). Effective fps is capped by
-# screenshot latency (~0.3-0.5s per frame); 1-2 fps is the realistic range.
-#
-# Example — record a 12s run and embed it in the report:
-#   ./record-gif.sh "$DIR/assets/case2-tray-running.gif" 12 2 &
-#   GIF_PID=$!
-#   # ... trigger the streaming behavior ...
-#   wait $GIF_PID
-
-set -euo pipefail
-
-OUT="${1:?Usage: record-gif.sh <output.gif> <duration_seconds> [fps]}"
-DUR="${2:?Usage: record-gif.sh <output.gif> <duration_seconds> [fps]}"
-FPS="${3:-2}"
-AB_TARGET="${AB_TARGET:---cdp ${CDP_PORT:-9222}}"
-GIF_WIDTH="${GIF_WIDTH:-960}"
-
-command -v ffmpeg > /dev/null || {
-  echo "ffmpeg not found — install with: brew install ffmpeg" >&2
-  exit 1
-}
-
-TMP=$(mktemp -d)
-trap 'rm -rf "$TMP"' EXIT
-
-FRAMES=$((DUR * FPS))
-INTERVAL=$(python3 -c "print(1 / $FPS)")
-
-for i in $(seq -f '%04g' 1 "$FRAMES"); do
-  # shellcheck disable=SC2086
-  agent-browser $AB_TARGET screenshot "$TMP/frame-$i.png" > /dev/null 2>&1 || true
-  sleep "$INTERVAL"
-done
-
-CAPTURED=$(find "$TMP" -name 'frame-*.png' | wc -l | tr -d ' ')
-[ "$CAPTURED" -gt 0 ] || {
-  echo "no frames captured — is the app reachable via $AB_TARGET?" >&2
-  exit 1
-}
-
-ffmpeg -y -loglevel error -framerate "$FPS" -pattern_type glob -i "$TMP/frame-*.png" \
-  -vf "fps=$FPS,scale=$GIF_WIDTH:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse" \
-  "$OUT"
-
-echo "$OUT ($CAPTURED frames @ ${FPS}fps)"
@@ -1,74 +0,0 @@
-#!/usr/bin/env bash
-# report-init.sh — scaffold a structured test report under .records/reports/.
-#
-# Format spec and evidence rules: ../references/report.md
-#
-# Usage:
-#   report-init.sh <slug> [title]
-#
-# Prints the report directory path (capture it: DIR=$(report-init.sh my-test)).
-
-set -euo pipefail
-
-SLUG="${1:?Usage: report-init.sh <slug> [title]}"
-TITLE="${2:-$SLUG}"
-
-REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)"
-TS="$(date +%Y%m%d-%H%M%S)"
-DIR="$REPO_ROOT/.records/reports/$TS-$SLUG"
-mkdir -p "$DIR/assets"
-
-BRANCH=$(git -C "$REPO_ROOT" branch --show-current 2> /dev/null || echo "unknown")
-COMMIT=$(git -C "$REPO_ROOT" rev-parse --short HEAD 2> /dev/null || echo "unknown")
-DATE_HUMAN=$(date '+%Y-%m-%d %H:%M')
-DATE_ISO=$(date '+%Y-%m-%dT%H:%M:%S%z')
-
-cat > "$DIR/report.md" << EOF
-# Test Report: $TITLE
-
-## Scope
-
-<!-- What changed / what is being verified -->
-
- Branch: \`$BRANCH\`
- Commit: \`$COMMIT\`
- Date: $DATE_HUMAN
-
-## Environment
-
- Server: <!-- e.g. http://localhost:3010 -->
- Surfaces: <!-- cli / electron / web / bot:<platform> -->
-
-## Cases
-
-| # | Case | Surface | Steps | Expected | Actual | Status | Evidence |
-| - | ---- | ------- | ----- | -------- | ------ | ------ | -------- |
-| 1 |      |         |       |          |        |        |          |
-
-## Evidence
-
-<!-- Embed screenshots: ![case 1](assets/case1.png) -->
-<!-- CLI transcripts in fenced blocks, with the exact command -->
-
-## Verdict
-
- Passed: 0 / 0
- Failed: 0
- Blocked: 0
- Score (optional): —
- Open issues / follow-ups:
-EOF
-
-cat > "$DIR/result.json" << EOF
-{
-  "title": "$TITLE",
-  "createdAt": "$DATE_ISO",
-  "branch": "$BRANCH",
-  "commit": "$COMMIT",
-  "surfaces": [],
-  "cases": [],
-  "summary": { "total": 0, "passed": 0, "failed": 0, "blocked": 0, "verdict": "pending" }
-}
-EOF
-
-echo "$DIR"
@@ -1,174 +0,0 @@
-#!/usr/bin/env bash
-# setup-auth.sh — one-stop auth setup & check for local agent testing.
-#
-# Auth is the gate for all automated testing: prepare it BEFORE writing any
-# test step. Background and failure modes: ../references/auth.md
-#
-# Usage:
-#   setup-auth.sh status        # check server + CLI + web auth readiness
-#   setup-auth.sh cli           # interactive CLI device-code login (run by a human)
-#   setup-auth.sh web           # stdin = Cookie header -> inject into agent-browser session
-#   setup-auth.sh web-verify    # live-check the agent-browser session is authenticated
-#
-# Env:
-#   SERVER_URL  (default http://localhost:3010)   dev server under test
-#   SESSION     (default lobehub-dev)             agent-browser session name
-#   AUTH_DIR    (default ~/.lobehub-agent-testing) where web state is persisted
-
-set -euo pipefail
-
-SERVER_URL="${SERVER_URL:-http://localhost:3010}"
-SESSION="${SESSION:-lobehub-dev}"
-AUTH_DIR="${AUTH_DIR:-$HOME/.lobehub-agent-testing}"
-STATE_FILE="$AUTH_DIR/web-state.json"
-REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)"
-CLI_HOME="$REPO_ROOT/apps/cli/.lobehub-dev"
-
-ok()   { printf '  \033[32m✔\033[0m %s\n' "$1"; }
-bad()  { printf '  \033[31m✘\033[0m %s\n' "$1"; }
-note() { printf '      %s\n' "$1"; }
-
-check_server() {
-  local code
-  code=$(curl -s -o /dev/null -w '%{http_code}' "$SERVER_URL/" 2> /dev/null || true)
-  if [[ "$code" =~ ^[23] ]]; then
-    ok "dev server reachable at $SERVER_URL"
-  else
-    bad "dev server NOT reachable at $SERVER_URL (http_code='$code')"
-    note "start it: pnpm run dev:next  (see references/dev-server.md)"
-    return 1
-  fi
-}
-
-check_cli() {
-  if [[ -f "$CLI_HOME/settings.json" ]] && grep -q "$SERVER_URL" "$CLI_HOME/settings.json"; then
-    ok "CLI logged in to $SERVER_URL (creds: apps/cli/.lobehub-dev)"
-  else
-    bad "CLI not logged in to $SERVER_URL"
-    note "ask the user to run:"
-    note "cd apps/cli && LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts login --server $SERVER_URL"
-    return 1
-  fi
-}
-
-check_web() {
-  if [[ -f "$STATE_FILE" ]]; then
-    ok "web auth state saved ($STATE_FILE)"
-    note "live-verify: $0 web-verify"
-  else
-    bad "no web auth state for agent-browser"
-    note "copy the Cookie header from Chrome DevTools (Network tab), then:"
-    note "pbpaste | $0 web   (see references/auth.md)"
-    return 1
-  fi
-}
-
-check_electron() {
-  local cdp_port="${CDP_PORT:-9222}"
-  if ! curl -s -o /dev/null --max-time 2 "http://localhost:$cdp_port/json/version" 2> /dev/null; then
-    note "electron: not running (CDP $cdp_port unreachable) — start with electron-dev.sh; check skipped"
-    return 0
-  fi
-  local probe result
-  probe="$(dirname "${BASH_SOURCE[0]}")/app-probe.sh"
-  result=$(bash "$probe" auth 2> /dev/null || true)
-  # agent-browser eval returns the JSON string with escaped quotes — normalize.
-  result="${result//\\/}"
-  if [[ "$result" == *'"isSignedIn":true'* ]]; then
-    ok "electron app signed in ($result)"
-  else
-    bad "electron app NOT signed in ($result)"
-    note "log in once manually inside the app (state persists across restarts)"
-    return 1
-  fi
-}
-
-cmd_status() {
-  echo "agent-testing auth status (SERVER_URL=$SERVER_URL):"
-  local rc=0
-  check_server || rc=1
-  check_cli || rc=1
-  check_web || rc=1
-  check_electron || rc=1
-  if [[ $rc -eq 0 ]]; then
-    echo "all green — safe to start automated testing."
-  else
-    echo "auth NOT ready — fix the ✘ items before writing any test step."
-  fi
-  return $rc
-}
-
-cmd_cli() {
-  echo "Starting CLI device-code login against $SERVER_URL ..."
-  echo "(opens a browser authorization — must be run by a human in a terminal)"
-  cd "$REPO_ROOT/apps/cli"
-  LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts login --server "$SERVER_URL"
-}
-
-# Build a Playwright storageState file from a raw Cookie header on stdin,
-# keeping only the better-auth cookies. See references/auth.md for why the
-# header must come from a Network request (HttpOnly) and why httpOnly=false.
-cmd_web() {
-  mkdir -p "$AUTH_DIR"
-  python3 - "$STATE_FILE" << 'PY'
-import json, sys, time
-
-raw = sys.stdin.read().strip()
-if raw.lower().startswith("cookie:"):
-    raw = raw.split(":", 1)[1].strip()
-
-WANTED = {"better-auth.session_token", "better-auth.state"}
-exp = int(time.time()) + 30 * 24 * 3600  # 30 days
-
-cookies = []
-for pair in raw.split("; "):
-    if "=" not in pair:
-        continue
-    name, _, value = pair.partition("=")
-    if name not in WANTED:
-        continue
-    cookies.append({
-        "name": name,
-        "value": value,
-        "domain": "localhost",
-        "path": "/",
-        "expires": exp,
-        "httpOnly": False,
-        "secure": False,
-        "sameSite": "Lax",
-    })
-
-if not cookies:
-    sys.stderr.write("no better-auth cookies found in input — paste the raw Cookie header from a Network request\n")
-    sys.exit(1)
-
-with open(sys.argv[1], "w") as f:
-    json.dump({"cookies": cookies, "origins": []}, f, indent=2)
-print(f"wrote {len(cookies)} cookie(s) to {sys.argv[1]}")
-PY
-  agent-browser --session "$SESSION" state load "$STATE_FILE"
-  cmd_web_verify
-}
-
-cmd_web_verify() {
-  agent-browser --session "$SESSION" open "$SERVER_URL/" > /dev/null
-  local url
-  url=$(agent-browser --session "$SESSION" get url)
-  if [[ "$url" == *"/signin"* || "$url" == *"/login"* ]]; then
-    bad "agent-browser session '$SESSION' NOT authenticated (landed on $url)"
-    note "re-copy the Cookie header and re-run: pbpaste | $0 web"
-    return 1
-  fi
-  ok "agent-browser session '$SESSION' authenticated (at $url)"
-}
-
-case "${1:-status}" in
-  status) cmd_status ;;
-  cli) cmd_cli ;;
-  web) cmd_web ;;
-  web-verify) cmd_web_verify ;;
-  *)
-    echo "Usage: $0 {status|cli|web|web-verify}" >&2
-    exit 2
-    ;;
-esac
@@ -1,154 +0,0 @@
-# Electron (LobeHub Desktop) UI Testing
-
-Default surface for verifying **pure frontend changes** (components, store logic, styles, interactions) in the primary product shape. Drives the Electron renderer over CDP with `agent-browser` — see [../references/agent-browser.md](../references/agent-browser.md) for the full command reference.
-
-**Auth**: the Electron app keeps its own persistent login state — log in once manually in the app; sessions survive restarts. Run `../scripts/setup-auth.sh status` before testing (see [../references/auth.md](../references/auth.md)).
-
-**Linux / headless (cloud)**: Electron itself runs on Linux, but it has no true headless mode — it needs a display server. In a headless environment wrap the launch with `xvfb-run` (virtual framebuffer). Everything CDP-based keeps working under Xvfb: the `agent-browser --cdp 9222` connection, snapshots, eval, and `agent-browser screenshot` (captured from the renderer via CDP, not the OS screen). What does NOT work on Linux: `capture-app-window.sh` (macOS `screencapture`), osascript, and the ffmpeg recording scripts in their current form.
-
-### Setup / Teardown
-
-Use the `electron-dev.sh` script to manage the Electron dev environment. It handles process lifecycle, waits for SPA readiness, and reliably kills all child processes (main + helpers + vite).
-
-```bash
-SCRIPT=".agents/skills/agent-testing/scripts/electron-dev.sh"
-
-# Start Electron dev with CDP (idempotent — skips if already running)
-$SCRIPT start
-
-# Check if Electron is running and CDP is reachable
-$SCRIPT status
-
-# Kill all Electron-related processes (main + helper + vite)
-$SCRIPT stop
-
-# Force fresh restart
-$SCRIPT restart
-```
-
-After `start` succeeds, connect with: `agent-browser --cdp 9222 snapshot -i`
-
-**Always run `$SCRIPT stop` when done testing** — `pkill -f "Electron"` alone won't catch all helper processes.
-
-#### Environment Variables
-
-| Variable          | Default                 | Description                              |
-| ----------------- | ----------------------- | ---------------------------------------- |
-| `CDP_PORT`        | `9222`                  | Chrome DevTools Protocol port            |
-| `ELECTRON_LOG`    | `/tmp/electron-dev.log` | Electron process log                     |
-| `ELECTRON_WAIT_S` | `60`                    | Max seconds to wait for Electron process |
-| `RENDERER_WAIT_S` | `60`                    | Max seconds to wait for SPA to load      |
-
-### LobeHub Probes & Quick Navigation
-
-`scripts/app-probe.sh` is the standard fast path into app state — **use it
-instead of hand-rolling `__LOBE_STORES` eval snippets** for these common needs:
-
-```bash
-PROBE=".agents/skills/agent-testing/scripts/app-probe.sh"
-
-$PROBE auth              # login check (Step 0.3) → { isSignedIn, userId }
-$PROBE route             # current SPA route
-$PROBE ops               # running chat operations (type / startTime)
-$PROBE goto /settings    # jump the SPA straight to a route (full reload)
-$PROBE errors-install    # install console.error interceptor
-$PROBE errors            # dump captured errors
-```
-
-`goto` lets a test enter the state under test directly instead of clicking
-through the UI. Common desktop routes:
-
-| Route                         | Where it lands                       |
-| ----------------------------- | ------------------------------------ |
-| `/`                           | Home (has a chat input)              |
-| `/agent/<agentId>`            | Agent conversation (latest topic)    |
-| `/agent/<agentId>/<topicId>`  | Specific topic in a conversation     |
-| `/task` · `/task/<taskId>`    | Task list / task detail              |
-| `/page`                       | Documents (文稿)                     |
-| `/settings`                   | Settings                             |
-| `/community`                  | Discover / community                 |
-
-Targets default to Electron (`--cdp 9222`); set `AB_TARGET="--session <name>"`
-for web sessions. For deeper or one-off state inspection, fall back to raw
-eval below.
-
-### LobeHub-Specific Patterns
-
-#### Access Zustand Store State
-
-```bash
-agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
-(function() {
-  var chat = window.__LOBE_STORES.chat();
-  var ops = Object.values(chat.operations);
-  return JSON.stringify({
-    ops: ops.map(function(o) { return { type: o.type, status: o.status }; }),
-    activeAgent: chat.activeAgentId,
-    activeTopic: chat.activeTopicId,
-  });
-})()
-EVALEOF
-```
-
-#### Find and Use the Chat Input
-
-```bash
-# The chat input is contenteditable — must use -C flag
-agent-browser --cdp 9222 snapshot -i -C 2>&1 | grep "editable"
-
-agent-browser --cdp 9222 click @e48
-agent-browser --cdp 9222 type @e48 "Hello world"
-agent-browser --cdp 9222 press Enter
-```
-
-#### Wait for Agent to Complete
-
-```bash
-agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
-(function() {
-  var chat = window.__LOBE_STORES.chat();
-  var ops = Object.values(chat.operations);
-  var running = ops.filter(function(o) { return o.status === 'running'; });
-  return running.length === 0 ? 'done' : 'running: ' + running.length;
-})()
-EVALEOF
-```
-
-#### Install Error Interceptor
-
-```bash
-agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
-(function() {
-  window.__CAPTURED_ERRORS = [];
-  var orig = console.error;
-  console.error = function() {
-    var msg = Array.from(arguments).map(function(a) {
-      if (a instanceof Error) return a.message;
-      return typeof a === 'object' ? JSON.stringify(a) : String(a);
-    }).join(' ');
-    window.__CAPTURED_ERRORS.push(msg);
-    orig.apply(console, arguments);
-  };
-  return 'installed';
-})()
-EVALEOF
-
-# Later, check captured errors:
-agent-browser --cdp 9222 eval "JSON.stringify(window.__CAPTURED_ERRORS)"
-```
-
-## Electron Gotchas
-
- **Always use `electron-dev.sh stop` to clean up** — `pkill -f "Electron"` only kills the main process; helper processes (GPU, renderer, network) survive. The script finds and kills all of them via PID matching against the project's electron binary path.
- **`npx electron-vite dev` must run from `apps/desktop/`** — running from project root fails silently. The `electron-dev.sh` script handles this automatically.
- **Dev build auto-opens DevTools, which hijacks the CDP target** — `agent-browser --cdp 9222` may attach to the DevTools page (`devtools://…`) instead of the app (`app://renderer/`). Symptom: `get url` returns a `devtools://` URL. Fix: close the DevTools target and reconnect:
-
-  ```bash
-  DT_ID=$(curl -s http://localhost:9222/json/list | python3 -c "import json,sys; ts=json.load(sys.stdin); print(next(t['id'] for t in ts if t['type']=='page' and t['url'].startswith('devtools://')))")
-  curl -s "http://localhost:9222/json/close/$DT_ID" > /dev/null
-  agent-browser close --all && agent-browser --cdp 9222 get url   # expect app://renderer/
-  ```
-
- **Don't resize the Electron window after load** — resizing triggers full SPA reload
- **Store is at `window.__LOBE_STORES`** not `window.__ZUSTAND_STORES__`
- **Streaming / ticking UI needs GIF evidence** — see `scripts/record-gif.sh`; a static screenshot cannot prove time-based behavior.
@@ -1,69 +0,0 @@
-# Web (Full-Stack) Testing
-
-Default surface for **full-stack changes** — a new/changed API plus the UI that
-consumes it. The browser is the one surface where network requests and UI state
-are observable together, so you can assert both sides of the contract in a
-single run.
-
-For pure-frontend changes prefer [electron.md](./electron.md); for
-backend-only changes prefer [../cli/index.md](../cli/index.md).
-
-## Prerequisites
-
- Local dev server running — [../references/dev-server.md](../references/dev-server.md)
- Web auth injected into agent-browser — [../references/auth.md](../references/auth.md):
-
-```bash
-pbpaste | ./.agents/skills/agent-testing/scripts/setup-auth.sh web # after copying the Cookie header
-```
-
-## Option A — agent-browser with injected auth (recommended)
-
-```bash
-SESSION=lobehub-dev
-
-agent-browser --session $SESSION open "http://localhost:3010/"
-agent-browser --session $SESSION snapshot -i
-# interact via refs — full command reference: ../references/agent-browser.md
-```
-
-### Watch the API while driving the UI
-
-```bash
-# After triggering the UI action under test:
-agent-browser --session $SESSION network requests --type xhr,fetch
-agent-browser --session $SESSION network requests --method POST
-
-# Record a full HAR for the report
-agent-browser --session $SESSION network har start
-# ... drive the scenario ...
-agent-browser --session $SESSION network har stop ./capture.har
-```
-
-Assert both layers: the request/response shape (network) and the rendered
-result (snapshot/screenshot). Both belong in the report as evidence.
-
-## Option B — real Chrome with remote debugging
-
-For flows that need a real, visible browser (e.g. exercising the login UI
-itself):
-
-```bash
-/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \
-  --remote-debugging-port=9222 \
-  --user-data-dir=/tmp/chrome-test-profile \
-  "<URL>" &
-sleep 5
-agent-browser --cdp 9222 snapshot -i
-
-# Or auto-discover running Chrome with remote debugging
-agent-browser --auto-connect snapshot -i
-```
-
-## Option C — Debug Proxy (local frontend, production backend)
-
-`bun run dev:spa` prints a **Debug Proxy** URL
-(`https://app.lobehub.com/_dangerous_local_dev_proxy?debug-host=…`) that loads
-your local Vite SPA inside the online environment — HMR against real server
-config. Useful for verifying frontend behavior against production data, **not**
-for testing backend changes (the backend is production, not your branch).
@@ -1,6 +1,6 @@
 ---
 name: agent-tracing
-description: 'Agent tracing CLI for execution snapshots. Use for agent-tracing, traces, snapshots, LLM call inspection, context engine data, agent step analysis, or execution debugging.'
+description: "Agent tracing CLI for inspecting agent execution snapshots. Use when user mentions 'agent-tracing', 'trace', 'snapshot', wants to debug agent execution, inspect LLM calls, view context engine data, or analyze agent steps. Triggers on agent debugging, trace inspection, or execution analysis tasks."
 user-invocable: false
 ---

@@ -216,6 +216,6 @@ When using `--messages`, the output shows three sections (if context engine data

 ## Integration Points

- **Recording**: `apps/server/src/services/agentRuntime/AgentRuntimeService.ts` — in the `executeStep()` method, after building `stepPresentationData`, writes partial snapshot in dev mode
- **Context engine capture**: `apps/server/src/modules/AgentRuntime/RuntimeExecutors.ts` — in `call_llm` executor, after `serverMessagesEngine()` returns, calls `ctx.tracingContextEngine(input, output)`. `AgentRuntimeService.executeStep` buffers it per step and passes it to `traceRecorder.appendStep` as the typed `contextEngine` field (kept off the `events` array to stay out of Redis state).
+- **Recording**: `src/server/services/agentRuntime/AgentRuntimeService.ts` — in the `executeStep()` method, after building `stepPresentationData`, writes partial snapshot in dev mode
+- **Context engine capture**: `src/server/modules/AgentRuntime/RuntimeExecutors.ts` — in `call_llm` executor, after `serverMessagesEngine()` returns, calls `ctx.tracingContextEngine(input, output)`. `AgentRuntimeService.executeStep` buffers it per step and passes it to `traceRecorder.appendStep` as the typed `contextEngine` field (kept off the `events` array to stay out of Redis state).
 - **Store**: `FileSnapshotStore` reads/writes to `.agent-tracing/` relative to `process.cwd()`
@@ -1,6 +1,6 @@
 ---
 name: builtin-tool
-description: 'Build LobeHub builtin tool packages. Use when adding agent-callable tools, manifests, executors, runtimes, inspectors, renders, placeholders, streaming, interventions, portals, or tool registries.'
+description: Build a new builtin tool package under `packages/builtin-tool-<name>/`. Use when adding a new agent-callable toolset, designing its API surface (manifest / ApiName / Params / State), implementing the Executor + ExecutionRuntime, building the Inspector / Render / Placeholder / Streaming / Intervention / Portal UI, or wiring a tool into the central registries (`packages/builtin-tools/src/{index,identifiers,inspectors,renders,placeholders,streamings,interventions,portals}.ts` and `src/store/tool/slices/builtin/executors/index.ts`). Triggers on "new builtin tool", "add a tool", "tool inspector", "tool render", "tool placeholder", "tool streaming", "tool intervention", "BuiltinToolManifest", "BaseExecutor", "ExecutionRuntime".
 ---

 # Builtin Tool Authoring Guide
@@ -23,7 +23,7 @@ A builtin tool is a package the agent runtime can call. It ships **five faces**:
 | ------------------------------------------------------------------------------------ | --------------------------------------------- |
 | Where do files live? What does each face do? Wiring?                                 | [architecture.md](references/architecture.md) |
 | How do I name the tool, design APIs, write the manifest, executor, ExecutionRuntime? | [tool-design.md](references/tool-design.md)   |
-| How do I build Inspector / Render / Placeholder / Streaming / Intervention / Portal? | [ui/](references/ui/README.md)                |
+| How do I build Inspector / Render / Placeholder / Streaming / Intervention / Portal? | [ui.md](references/ui.md)                     |

 ---

@@ -271,7 +271,7 @@ Lists in the same file you may need to touch:

 - `defaultToolIds` — added to the agent's tool list by default
 - `alwaysOnToolIds` — forced on regardless of user selection (use sparingly)
- `runtimeManagedToolIds` — enable state controlled by runtime, not user UI; **must mirror the rules map** in `apps/server/src/modules/Mecha/AgentToolsEngine/index.ts` and `src/helpers/toolEngineering/index.ts`
+- `runtimeManagedToolIds` — enable state controlled by runtime, not user UI; **must mirror the rules map** in `src/server/modules/Mecha/AgentToolsEngine/index.ts` and `src/helpers/toolEngineering/index.ts`

 ---

@@ -2,7 +2,7 @@

 This doc covers everything that **isn't UI**: the tool's identifier, API surface, manifest, types, system prompt, ExecutionRuntime, and the executor that wires it into the frontend.

-For UI surfaces (Inspector / Render / Placeholder / Streaming / Intervention / Portal), see [ui/](ui/README.md).
+For UI surfaces (Inspector / Render / Placeholder / Streaming / Intervention / Portal), see [ui.md](ui.md).
 For where files live and how registries work, see [architecture.md](architecture.md).

 ---
@@ -156,7 +156,7 @@ export const TaskManifest: BuiltinToolManifest = {
  executors: ['client', 'server'],

  /* Default human intervention policy for all APIs that don't specify one.
-     Pair with an Intervention component (see ui/intervention.md). */
+     Pair with an Intervention component (see ui.md). */
  humanIntervention: 'never' | 'always' | { /* extended config */ },
 }
 ```
@@ -0,0 +1,744 @@
+# Tool UI Surfaces
+
+A builtin tool can ship up to **six client-side surfaces**, each with a different role in the chat UI. Only `Inspector` is required; the other five are added on demand and registered in their own central files.
+
+| Surface      | Required? | When the chat shows it                                                | Registered in                                 |
+| ------------ | --------- | --------------------------------------------------------------------- | --------------------------------------------- |
+| Inspector    | ✅ Always | Header strip of every tool call (one-line chip)                       | `packages/builtin-tools/src/inspectors.ts`    |
+| Render       | Optional  | Rich result card below the header, after the call returns             | `packages/builtin-tools/src/renders.ts`       |
+| Placeholder  | Optional  | Skeleton between "args streaming complete" and "result arrives"       | `packages/builtin-tools/src/placeholders.ts`  |
+| Streaming    | Optional  | Live output during execution (e.g. command stdout)                    | `packages/builtin-tools/src/streamings.ts`    |
+| Intervention | Optional  | Approval / edit-before-run dialog (when `humanIntervention` triggers) | `packages/builtin-tools/src/interventions.ts` |
+| Portal       | Optional  | Full-screen detail view (right-side or modal)                         | `packages/builtin-tools/src/portals.ts`       |
+
+The two reference tools to read end-to-end:
+
+- **`builtin-tool-web-browsing/src/client/`** — Inspector + Render + Placeholder + Portal (no Intervention/Streaming).
+- **`builtin-tool-local-system/src/client/`** — all six surfaces, including `components/` for shared building blocks.
+
+---
+
+## Tool Render 设计原则（中文草案）
+
+这些原则用于判断一个 builtin tool 的 Inspector / Render / Placeholder / Streaming / Intervention / Portal 应该做什么，以及做到什么程度。
+
+1. **先保证折叠态可读。** 每个 API 都必须有 Inspector；用户不展开也应该能看懂 “正在做什么 / 对什么做 / 当前结果是什么”。Inspector 不应该只展示函数名和原始参数。
+2. **Inspector 是一句话，不是详情页。** 优先表达动作、关键对象、数量、状态，例如 “分析图片 3 张”“搜索 12 个结果”“读取 config.json”。长文本、列表和结构化结果放到 Render 或 Portal。
+3. **Inspector 要覆盖执行生命周期。** `args` 还在 streaming、工具执行中、执行完成、执行失败时都应该有稳定展示；必要时同时读取 `args`、`partialArgs` 和 `pluginState`，避免出现空白、跳变或只显示半截参数。
+4. **文案要随状态切换时态。** 同一个动作在 loading 与 completed 两个阶段必须用不同的措辞：执行中用现在进行时（“正在创建任务 / Creating task / 正在搜索”），执行完成后切到完成态（“已创建任务 / Task created / 已找到 N 条”）。Inspector chip 会一直留在聊天记录里 —— 如果一直挂着 “正在 xxx”，几小时后回看历史时会读起来像还在跑。约定的 i18n 形式是 `<api>.loading` / `<api>.completed` 一对键（见 `lobe-agent.apiName.callSubAgent.{loading,completed}` 与 `lobe-claude-code.task.{create,list,update,get}.{loading,completed}`），渲染时按 `isArgumentsStreaming || isLoading` 决定取哪一个。只读 / 查询类（“查看任务” 这种本来就是名词性的）可以共用一个键。
+5. **只有结构化结果才需要 Render。** 如果工具结果只是自然语言总结，通常不需要 Render；如果结果包含列表、媒体、文件、表格、代码、diff、地图、时间线、权限请求等结构，就应该提供 Render。
+6. **Render 要帮助用户检查结果，而不是复述参数。** Render 的主体应该围绕工具产物组织：可预览、可比较、可筛选、可定位。参数只作为上下文辅助出现，不要把 Render 做成一块更大的 args dump。
+7. **参数和结果要一起参与渲染。** 好的 Tool UI 通常同时用 `args` 解释意图，用 `pluginState` 展示真实执行结果；但 `pluginState` 只放结果域数据，不要反向塞入可以从 `args` 推导出的内容。
+8. **慢操作要有 Placeholder。** 如果工具通常需要等待网络、文件系统、模型或外部进程，Placeholder 应该先占住最终 Render 的版式，让用户知道即将看到什么，而不是只显示一个泛化 loading。
+9. **Streaming 只用于连续产物。** 搜索列表、日志、长文本、文件分析、分阶段计划适合 Streaming；一次性小结果不需要强行做 Streaming。Streaming UI 要能渐进追加，并且完成后自然过渡到最终 Render。
+10. **有风险的动作必须 Intervention。** 写文件、删除、发送、安装、执行命令、外部可见操作、权限敏感操作，都应该在执行前给出可理解的确认界面；确认文案要说明影响范围，而不是只问 “是否继续”。
+11. **错误、空态和截断都是正式状态。** Render 不能在失败、无结果、超长结果时退化成空白。错误要说明发生在哪一步；空态要告诉用户没有产物；超长内容要明确 “展示前 N 项 / 还有 N 项”。
+12. **信息密度要克制。** 默认展示最有判断价值的部分：标题、来源、状态、摘要、少量关键字段。大对象、长列表、原文、调试数据放进可展开区域或 Portal，避免把聊天流撑成后台管理页。
+13. **视觉上融入聊天流。** Tool UI 应该使用 `@lobehub/ui` / base-ui、`Flexbox`、`createStaticStyles` 和 `cssVar.*`，遵循现有间距、圆角、颜色、字号；不要为单个工具发明一套独立视觉语言。
+14. **Devtools fixture 是验收入口。** 新增或修改 Tool UI 时，应在 `/devtools` 里准备覆盖典型态、loading/streaming、空态、错误态、长内容态的 fixture；一个 API 如果在真实聊天里会出现，就不应该在 devtools 中缺席。
+15. **先做用户会看的 UI，再做调试 UI。** Raw JSON、trace、schema、内部 id 可以存在，但应默认收起或放到调试区；主界面先回答用户最关心的问题：工具做了什么，结果值不值得信任，下一步能做什么。
+
+---
+
+## 0. Shared Style Rules
+
+These apply across every surface.
+
+### 0.1 Use `'use client'` at the top of every component file
+
+Tool surfaces are leaves in the chat tree and must not block server rendering.
+
+### 0.2 Prefer `createStaticStyles + cssVar.*`
+
+Zero-runtime CSS-in-JS — the styles compile once and read CSS variables at runtime.
+
+```tsx
+import { createStaticStyles, cssVar } from 'antd-style';
+
+const styles = createStaticStyles(({ css, cssVar }) => ({
+  chip: css`
+    padding-block: 2px;
+    padding-inline: 8px;
+    border-radius: 999px;
+    color: ${cssVar.colorText};
+    background: ${cssVar.colorFillTertiary};
+  `,
+}));
+```
+
+Fall back to `createStyles + token` only when you need runtime token computation (rare). Inline `style={{ color: cssVar.colorTextSecondary }}` is fine for one-off dynamic values.
+
+### 0.3 Use `@lobehub/ui`, not raw `antd`
+
+`Block`, `Text`, `Flexbox`, `Highlighter`, `Alert`, `Tooltip`, `Skeleton` all come from `@lobehub/ui`. Modals come from `@lobehub/ui/base-ui` (`createModal`, `useModalContext`, `confirmModal`) — see the **modal** skill.
+
+Memory note: `@lobehub/ui`'s `<Text type='secondary'>` is a lighter shade than `colorTextSecondary`. If you need that exact token color, write `<Text style={{ color: cssVar.colorTextSecondary }}>`.
+
+### 0.4 Always `memo` and set `displayName`
+
+```tsx
+export const SearchInspector = memo<BuiltinInspectorProps<SearchQuery, UniformSearchResponse>>(
+  ({ args /* … */ }) => {
+    /* … */
+  },
+);
+SearchInspector.displayName = 'SearchInspector';
+export default SearchInspector;
+```
+
+### 0.5 Always type with `BuiltinXProps<Args, State>` generics
+
+Don't widen to `any`. The Args generic is the JSON Schema params, the State generic is the executor's `state` field. The two should match `<Name>Params` and `<Name>State` from `types.ts`.
+
+### 0.6 Pull strings from `t('plugin')`
+
+```tsx
+const { t } = useTranslation('plugin');
+t('builtins.<identifier>.apiName.<api>');
+```
+
+Every Inspector should default to `t('builtins.<identifier>.apiName.<api>')` so it shows something while args stream in.
+
+### 0.7 Read store state from `@/store/chat`, not props
+
+Tool surfaces sometimes need cross-cutting state (loading, streaming buffer). Read it inside the component via Zustand selectors, not from props — props only carry args/state/messageId.
+
+---
+
+## 1. Inspector — Header Chip (required)
+
+**Lifecycle:** Inspector renders for **every phase** of a tool call: while args are streaming in, while the executor is running, and after results come back. It's the only surface that's always visible.
+
+**Goal:** keep it to a single line. Show what's happening with as much context as is currently available.
+
+### Props (`BuiltinInspectorProps<Args, State>`)
+
+```ts
+interface BuiltinInspectorProps<Arguments = any, State = any> {
+  apiName: string;
+  args: Arguments; // final args (only after the assistant stops streaming)
+  identifier: string;
+  isArgumentsStreaming?: boolean; // args still arriving
+  isLoading?: boolean; // args complete, executor running
+  partialArgs?: Arguments; // partial JSON during streaming
+  pluginState?: State; // executor's `state` after success
+  result?: { content: string | null; error?: any };
+}
+```
+
+### State machine
+
+| Phase                               | What's available                                           | What to show                                               |
+| ----------------------------------- | ---------------------------------------------------------- | ---------------------------------------------------------- |
+| Args streaming, no useful field yet | `isArgumentsStreaming === true`, `partialArgs.X` undefined | Just the API title with `shinyTextStyles.shinyText`        |
+| Args streaming, key field arrived   | `partialArgs.X` populated                                  | Title + key field chip, still pulse-animated               |
+| Args complete, executor running     | `args` populated, `isLoading === true`                     | Same as above, still pulse-animated                        |
+| Result arrived                      | `pluginState` populated, `isLoading === false`             | Title + chips + result summary (count, identifier, status) |
+
+### Canonical example — Search
+
+`packages/builtin-tool-web-browsing/src/client/Inspector/Search/index.tsx`:
+
+```tsx
+'use client';
+
+import type { BuiltinInspectorProps, SearchQuery, UniformSearchResponse } from '@lobechat/types';
+import { Text } from '@lobehub/ui';
+import { cssVar, cx } from 'antd-style';
+import { memo } from 'react';
+import { useTranslation } from 'react-i18next';
+
+import { highlightTextStyles, inspectorTextStyles, shinyTextStyles } from '@/styles';
+
+export const SearchInspector = memo<BuiltinInspectorProps<SearchQuery, UniformSearchResponse>>(
+  ({ args, partialArgs, isArgumentsStreaming, isLoading, pluginState }) => {
+    const { t } = useTranslation('plugin');
+
+    const query = args?.query || partialArgs?.query || '';
+    const resultCount = pluginState?.results?.length ?? 0;
+    const hasResults = resultCount > 0;
+
+    if (isArgumentsStreaming && !query) {
+      return (
+        <div className={cx(inspectorTextStyles.root, shinyTextStyles.shinyText)}>
+          <span>{t('builtins.lobe-web-browsing.apiName.search')}</span>
+        </div>
+      );
+    }
+
+    return (
+      <div
+        className={cx(
+          inspectorTextStyles.root,
+          (isArgumentsStreaming || isLoading) && shinyTextStyles.shinyText,
+        )}
+      >
+        <span>{t('builtins.lobe-web-browsing.apiName.search')}:&nbsp;</span>
+        {query && <span className={highlightTextStyles.primary}>{query}</span>}
+        {!isLoading &&
+          !isArgumentsStreaming &&
+          pluginState?.results &&
+          (hasResults ? (
+            <span style={{ marginInlineStart: 4 }}>({resultCount})</span>
+          ) : (
+            <Text as="span" color={cssVar.colorTextDescription} fontSize={12}>
+              ({t('builtins.lobe-web-browsing.inspector.noResults')})
+            </Text>
+          ))}
+      </div>
+    );
+  },
+);
+SearchInspector.displayName = 'SearchInspector';
+export default SearchInspector;
+```
+
+### Inspector rules
+
+- Wrap the whole row with `inspectorTextStyles.root` (provides correct flex / line-height baseline).
+- Pulse with `shinyTextStyles.shinyText` whenever `isArgumentsStreaming || isLoading`.
+- Show the i18n title first so the row is non-empty during the earliest streaming phase.
+- Read both `args?.X` and `partialArgs?.X` together — `args` is final, `partialArgs` is in-stream.
+- Use chips/tags for distinct facets (identifier, name, parent, status, count). Each chip should clip with `text-overflow: ellipsis` and have a `max-width` so long values don't blow out the chat bubble.
+- Append `pluginState`-derived suffixes only **after** loading finishes — count or "(no results)" should not appear while still searching.
+- **Switch copy by phase.** If the verb implies an ongoing action ("Creating", "Searching", "Listing"), define `<api>.loading` and `<api>.completed` keys and select via `isArgumentsStreaming || isLoading ? loadingKey : completedKey`. Inspector chips persist in chat history — leaving "Creating task" frozen on a finished call reads as if the tool is still running. Read-only labels that are already noun-form ("View task") can keep a single key. See `CallSubAgentInspector` for the canonical two-key pattern.
+
+### Inspector registry — `client/Inspector/index.ts`
+
+```ts
+import type { BuiltinInspector } from '@lobechat/types';
+
+import { TaskApiName } from '../../types';
+import { CreateTaskInspector } from './CreateTask';
+import { ListTasksInspector } from './ListTasks';
+/* … */
+
+export const TaskInspectors: Record<string, BuiltinInspector> = {
+  [TaskApiName.createTask]: CreateTaskInspector as BuiltinInspector,
+  [TaskApiName.listTasks]: ListTasksInspector as BuiltinInspector,
+  /* one entry per ApiName */
+};
+
+export { CreateTaskInspector } from './CreateTask';
+export { ListTasksInspector } from './ListTasks';
+/* re-export each */
+```
+
+---
+
+## 2. Render — Rich Result Card (optional)
+
+**Lifecycle:** rendered **once the result arrives** (after Placeholder/Streaming hand off). Sits below the Inspector header.
+
+**Skip if** the API is read-only or the result is just text — the framework already shows the executor's `content` string. Add a Render only when there's a structured artifact worth seeing: a card, a chart, a diff, a list of files.
+
+### Props (`BuiltinRenderProps<Args, State, Content>`)
+
+```ts
+interface BuiltinRenderProps<Arguments = any, State = any, Content = any> {
+  apiName?: string;
+  args: Arguments; // final params from the LLM
+  content: Content; // executor's content string (or parsed)
+  identifier?: string;
+  messageId: string; // for store lookups
+  pluginError?: any; // from BuiltinToolResult.error
+  pluginState?: State; // executor's state
+  toolCallId?: string;
+}
+```
+
+### Two patterns
+
+**Pattern A — Single-file Render** (web-browsing CrawlSinglePage):
+
+```tsx
+// client/Render/CrawlSinglePage.tsx
+import type { BuiltinRenderProps, CrawlPluginState, CrawlSinglePageQuery } from '@lobechat/types';
+import { memo } from 'react';
+
+import PageContent from './PageContent';
+
+const CrawlSinglePage = memo<BuiltinRenderProps<CrawlSinglePageQuery, CrawlPluginState>>(
+  ({ messageId, pluginState, args }) => (
+    <PageContent messageId={messageId} results={pluginState?.results} urls={[args?.url]} />
+  ),
+);
+export default CrawlSinglePage;
+```
+
+**Pattern B — Folder with subcomponents** (web-browsing Search):
+
+```
+client/Render/Search/
+├── index.tsx           # composes the subcomponents, handles error states
+├── ConfigForm.tsx      # appears when pluginError.type === 'PluginSettingsInvalid'
+├── SearchQuery.tsx     # editable query header
+└── SearchResult.tsx    # result list
+```
+
+Use Pattern B when the Render has internal state (editing mode, expanded items), error variants, or is large enough to benefit from splitting.
+
+### Error handling in Render
+
+Renders are the canonical place to surface `pluginError` because the chat doesn't auto-render typed errors:
+
+```tsx
+if (pluginError) {
+  if (pluginError?.type === 'PluginSettingsInvalid') {
+    return <ConfigForm id={messageId} provider={pluginError.body?.provider} />;
+  }
+  return (
+    <Alert
+      title={pluginError?.message}
+      type="error"
+      extra={<Highlighter language="json">{JSON.stringify(pluginError.body, null, 2)}</Highlighter>}
+    />
+  );
+}
+```
+
+### Render rules
+
+- **Return `null`** if there's nothing useful to draw yet (avoids empty cards during stream).
+- Use `pluginState` for server-truth (ids, counts, server-assigned status) and `args` for what the LLM asked. **Combine — neither alone is enough.**
+- For lists, summarize with a header line and show top N items with a "+N more" tail rather than rendering everything.
+- For modals from a Render, use `@lobehub/ui/base-ui` (`createModal`, `useModalContext`, `confirmModal`) — see the **modal** skill.
+
+### Render registry — `client/Render/index.ts`
+
+```ts
+import type { BuiltinRender } from '@lobechat/types';
+
+import { TaskApiName } from '../../types';
+import CreateTaskRender from './CreateTask';
+import RunTasksRender from './RunTasks';
+
+export const TaskRenders: Record<string, BuiltinRender> = {
+  [TaskApiName.createTask]: CreateTaskRender as BuiltinRender,
+  [TaskApiName.runTasks]: RunTasksRender as BuiltinRender,
+  /* only the APIs with rich result UI — others fall back to text content */
+};
+
+export { default as CreateTaskRender } from './CreateTask';
+export { default as RunTasksRender } from './RunTasks';
+```
+
+### Render display control (rare)
+
+If the Render should hide for certain results (e.g. ClaudeCode's TodoWrite hides when the agent is mid-stream), add a `RenderDisplayControl` to `packages/builtin-tools/src/displayControls.ts`. See `ClaudeCodeRenderDisplayControls` for the pattern.
+
+---
+
+## 3. Placeholder — Skeleton Between Args and Result (optional)
+
+**Lifecycle:** rendered when the args have finished streaming but the executor hasn't returned yet. Disappears when `pluginState` arrives. Bridges the moment of perceived lag.
+
+**Add for** APIs with noticeable execution time: web search, network crawl, file list, large grep. **Skip for** instant ops (status flips, calculator).
+
+### Props (`BuiltinPlaceholderProps<Args>`)
+
+```ts
+interface BuiltinPlaceholderProps<T extends Record<string, any> = any> {
+  apiName: string;
+  args?: T;
+  identifier: string;
+}
+```
+
+No `pluginState` — Placeholder lives entirely in the "executing" gap.
+
+### Canonical example — Search Placeholder
+
+`packages/builtin-tool-web-browsing/src/client/Placeholder/Search.tsx`:
+
+```tsx
+import type { BuiltinPlaceholderProps, SearchQuery } from '@lobechat/types';
+import { Flexbox, Icon, Skeleton } from '@lobehub/ui';
+import { createStaticStyles, cx } from 'antd-style';
+import { SearchIcon } from 'lucide-react';
+import { memo } from 'react';
+
+import { useIsMobile } from '@/hooks/useIsMobile';
+import { shinyTextStyles } from '@/styles';
+
+const styles = createStaticStyles(({ css, cssVar }) => ({
+  query: cx(
+    css`
+      padding: 4px 8px;
+      border-radius: 8px;
+      font-size: 12px;
+      color: ${cssVar.colorTextSecondary};
+      &:hover {
+        background: ${cssVar.colorFillTertiary};
+      }
+    `,
+    shinyTextStyles.shinyText,
+  ),
+}));
+
+export const Search = memo<BuiltinPlaceholderProps<SearchQuery>>(({ args }) => {
+  const { query } = args || {};
+  const isMobile = useIsMobile();
+
+  return (
+    <Flexbox gap={8}>
+      <Flexbox horizontal={!isMobile} gap={isMobile ? 8 : 40}>
+        <Flexbox horizontal align="center" className={styles.query} gap={8}>
+          <Icon icon={SearchIcon} />
+          {query ? query : <Skeleton.Block active style={{ height: 20, width: 40 }} />}
+        </Flexbox>
+        <Skeleton.Block active style={{ height: 20, width: 40 }} />
+      </Flexbox>
+      <Flexbox horizontal gap={12}>
+        {[1, 2, 3, 4, 5].map((id) => (
+          <Skeleton.Button active key={id} style={{ borderRadius: 8, height: 80, width: 160 }} />
+        ))}
+      </Flexbox>
+    </Flexbox>
+  );
+});
+```
+
+### Placeholder rules
+
+- **Mirror the eventual Render's layout.** When the result arrives the Placeholder unmounts and the Render mounts; if they share dimensions, the chat doesn't jump.
+- Use `Skeleton.Block` / `Skeleton.Button` from `@lobehub/ui` for placeholder shapes.
+- Embed any args you have (e.g. the query text) — context helps the user know what's loading.
+- Pulse with `shinyTextStyles.shinyText` if the Placeholder includes literal text.
+
+### Placeholder registry — `client/Placeholder/index.ts`
+
+```ts
+import { WebBrowsingApiName } from '../../types';
+import CrawlMultiPages from './CrawlMultiPages';
+import CrawlSinglePage from './CrawlSinglePage';
+import { Search } from './Search';
+
+export const WebBrowsingPlaceholders = {
+  [WebBrowsingApiName.crawlMultiPages]: CrawlMultiPages,
+  [WebBrowsingApiName.crawlSinglePage]: CrawlSinglePage,
+  [WebBrowsingApiName.search]: Search,
+};
+
+export { CrawlMultiPages, CrawlSinglePage, Search };
+```
+
+---
+
+## 4. Streaming — Live Output During Execution (optional)
+
+**Lifecycle:** rendered **while the executor is still running** for APIs that emit incremental output. The component is responsible for fetching the in-flight stream from the chat store and rendering it.
+
+**Add for** long-running ops with continuous output: shell command execution (stdout/stderr), file write progress, code interpreter cells.
+
+### Props (`BuiltinStreamingProps<Args>`)
+
+```ts
+interface BuiltinStreamingProps<Arguments = any> {
+  apiName: string;
+  args: Arguments;
+  identifier: string;
+  messageId: string; // use to fetch the streaming buffer from store
+  toolCallId: string;
+}
+```
+
+Note there's **no `state` or `result` prop** — the Streaming component is for the in-flight phase. It pulls the live buffer from the store itself (typically via `chatToolSelectors.streamingContent(messageId)` or similar).
+
+### Canonical example — RunCommandStreaming
+
+`packages/builtin-tool-local-system/src/client/Streaming/RunCommand/index.tsx`:
+
+```tsx
+'use client';
+
+import type { BuiltinStreamingProps } from '@lobechat/types';
+import { Highlighter } from '@lobehub/ui';
+import { memo } from 'react';
+
+interface RunCommandParams {
+  command?: string;
+  description?: string;
+  timeout?: number;
+}
+
+export const RunCommandStreaming = memo<BuiltinStreamingProps<RunCommandParams>>(({ args }) => {
+  const { command } = args || {};
+  if (!command) return null;
+
+  return (
+    <Highlighter
+      animated
+      wrap
+      language="sh"
+      showLanguage={false}
+      style={{ padding: '4px 8px' }}
+      variant="outlined"
+    >
+      {command}
+    </Highlighter>
+  );
+});
+RunCommandStreaming.displayName = 'RunCommandStreaming';
+```
+
+For real-time output beyond just the command (stderr/stdout streaming), pull from the chat store:
+
+```tsx
+const buffer = useChatStore((state) =>
+  chatToolSelectors.streamingBuffer(messageId, toolCallId)(state),
+);
+```
+
+### Streaming rules
+
+- Render `null` until you have something to display (avoids flash).
+- For terminal-style output, use `Highlighter` with `animated` to show typing-like effect.
+- The Streaming component must **unmount cleanly** when execution ends — typically the framework swaps it out for the Render automatically.
+
+### Streaming registry — `client/Streaming/index.ts`
+
+```ts
+import { LocalSystemApiName } from '../..';
+import { RunCommandStreaming } from './RunCommand';
+import { WriteFileStreaming } from './WriteFile';
+
+export const LocalSystemStreamings = {
+  [LocalSystemApiName.runCommand]: RunCommandStreaming,
+  [LocalSystemApiName.writeLocalFile]: WriteFileStreaming,
+};
+```
+
+---
+
+## 5. Intervention — Approval / Edit-Before-Run (optional)
+
+**Lifecycle:** rendered **before the executor runs** for APIs whose manifest sets `humanIntervention`. The user sees a preview of the args, can edit them, then approves or skips/cancels.
+
+**Add for** destructive or sensitive ops: shell commands, file writes, file moves, payments, message broadcasts.
+
+### Props (`BuiltinInterventionProps<Args>`)
+
+```ts
+interface BuiltinInterventionProps<Arguments = any> {
+  apiName?: string;
+  args: Arguments;
+  identifier?: string;
+  interactionMode?: 'approval' | 'custom';
+  messageId: string;
+
+  /** Called when the user edits the args; the approve action awaits this. */
+  onArgsChange?: (args: Arguments) => void | Promise<void>;
+
+  /** Called on approve / skip / cancel. */
+  onInteractionAction?: (
+    action:
+      | { type: 'submit'; payload: Record<string, unknown> }
+      | { type: 'skip'; payload?: Record<string, unknown>; reason?: string }
+      | { type: 'cancel'; payload?: Record<string, unknown> },
+  ) => Promise<void>;
+
+  /** Register a callback to flush pending saves before approval. Returns cleanup. */
+  registerBeforeApprove?: (id: string, callback: () => void | Promise<void>) => () => void;
+}
+```
+
+### Canonical example — RunCommand Intervention
+
+`packages/builtin-tool-local-system/src/client/Intervention/RunCommand/index.tsx`:
+
+```tsx
+import type { RunCommandParams } from '@lobechat/electron-client-ipc';
+import type { BuiltinInterventionProps } from '@lobechat/types';
+import { Flexbox, Highlighter, Text } from '@lobehub/ui';
+import { memo } from 'react';
+
+const RunCommand = memo<BuiltinInterventionProps<RunCommandParams>>(({ args }) => {
+  const { description, command, timeout } = args;
+  return (
+    <Flexbox gap={8}>
+      <Flexbox horizontal justify="space-between">
+        {description && <Text>{description}</Text>}
+        {timeout && (
+          <Text style={{ fontSize: 12 }} type="secondary">
+            timeout: {formatTimeout(timeout)}
+          </Text>
+        )}
+      </Flexbox>
+      {command && (
+        <Highlighter wrap language="sh" showLanguage={false} variant="outlined">
+          {command}
+        </Highlighter>
+      )}
+    </Flexbox>
+  );
+});
+export default RunCommand;
+```
+
+### Intervention rules
+
+- **Show a preview, not a form by default.** Editing UI is opt-in via `onArgsChange` and is usually inline (click to edit a code block, etc.).
+- For args with debounced edit state (text fields), use `registerBeforeApprove(id, flushFn)` so the approve action waits for the debounce to flush. Always return the cleanup function.
+- Call `onInteractionAction({ type: 'submit', payload })` when the user approves; `'skip'` if they skip with a reason; `'cancel'` if they cancel the whole turn.
+- Add a corresponding `interventionAudit.ts` in the package root if the tool needs scope/path validation before approval (see `local-system/src/interventionAudit.ts`).
+
+### Intervention registry — `client/Intervention/index.ts`
+
+```ts
+import { LocalSystemApiName } from '../..';
+import EditLocalFile from './EditLocalFile';
+import RunCommand from './RunCommand';
+import WriteFile from './WriteFile';
+/* … */
+
+export const LocalSystemInterventions = {
+  [LocalSystemApiName.editLocalFile]: EditLocalFile,
+  [LocalSystemApiName.runCommand]: RunCommand,
+  [LocalSystemApiName.writeLocalFile]: WriteFile,
+  /* one entry per API that needs approval */
+};
+```
+
+---
+
+## 6. Portal — Full-Screen Detail View (optional)
+
+**Lifecycle:** rendered when the user opens the tool message in a side panel or full-screen modal. One Portal per **tool**, not per API — the Portal switches on `apiName` internally.
+
+**Add for** tools whose results deserve a deep-dive view: search results with editable filters, page content with reader mode, code interpreter sessions.
+
+### Props (`BuiltinPortalProps<Args, State>`)
+
+```ts
+interface BuiltinPortalProps<Arguments = Record<string, any>, State = any> {
+  apiName?: string;
+  arguments: Arguments;
+  identifier: string;
+  messageId: string;
+  state: State;
+}
+```
+
+### Canonical example — Web-Browsing Portal
+
+`packages/builtin-tool-web-browsing/src/client/Portal/index.tsx`:
+
+```tsx
+import type { BuiltinPortalProps, CrawlPluginState, SearchQuery } from '@lobechat/types';
+import { memo } from 'react';
+
+import { WebBrowsingApiName } from '../../types';
+import PageContent from './PageContent';
+import PageContents from './PageContents';
+import Search from './Search';
+
+const Portal = memo<BuiltinPortalProps>(({ arguments: args, messageId, state, apiName }) => {
+  switch (apiName) {
+    case WebBrowsingApiName.search:
+      return <Search messageId={messageId} query={args as SearchQuery} response={state} />;
+
+    case WebBrowsingApiName.crawlSinglePage: {
+      const result = (state as CrawlPluginState).results.find((r) => r.originalUrl === args.url);
+      return <PageContent messageId={messageId} result={result} />;
+    }
+
+    case WebBrowsingApiName.crawlMultiPages:
+      return (
+        <PageContents
+          messageId={messageId}
+          results={(state as CrawlPluginState).results}
+          urls={args.urls}
+        />
+      );
+  }
+  return null;
+});
+export default Portal;
+```
+
+### Portal rules
+
+- One Portal per tool — the file is the routing layer, subcomponents implement each API's view.
+- Portals can read the chat store directly to detect "still streaming" and render a Skeleton internally (see `Search/index.tsx:20-46`).
+- Layout assumes more space than the Render — use `Flexbox` with `height={'100%'}` and structure for a side panel viewport.
+
+### Portal registry — `packages/builtin-tools/src/portals.ts`
+
+```ts
+import { WebBrowsingManifest, WebBrowsingPortal } from '@lobechat/builtin-tool-web-browsing/client';
+import { type BuiltinPortal } from '@lobechat/types';
+
+export const BuiltinToolsPortals: Record<string, BuiltinPortal> = {
+  [WebBrowsingManifest.identifier]: WebBrowsingPortal as BuiltinPortal,
+};
+```
+
+---
+
+## 7. `client/components/` — Shared Subcomponents
+
+Cross-cutting building blocks used by multiple surfaces live here, not duplicated in each surface folder.
+
+Examples from `web-browsing/src/client/components/`:
+
+- `CategoryAvatar.tsx` — search category icon
+- `EngineAvatar.tsx` — search engine logo (used in Inspector chip + Render list + Portal header)
+- `SearchBar.tsx` — editable query bar (used in Render and Portal)
+
+Examples from `local-system/src/client/components/`:
+
+- `FileItem.tsx` — single file row (used in ListFiles Render, SearchFiles Render, MoveLocalFiles Render)
+- `FilePathDisplay.tsx` — path with truncation (used everywhere)
+
+### Rules
+
+- Live under `client/components/`, exported via `client/components/index.ts`.
+- Re-export from `client/index.ts` only if other packages need them; otherwise keep internal.
+- Keep them dumb — props in, JSX out, no store reads. The store reads belong in the surface that composes them.
+
+---
+
+## 8. `client/index.ts` — Package Public API
+
+Re-exports everything the registries need plus useful types/manifest:
+
+```ts
+// Inspector — required
+export { TaskInspectors } from './Inspector';
+
+// Render — only if any API has one
+export { TaskRenders, CreateTaskRender, RunTasksRender } from './Render';
+
+// Placeholder / Streaming / Intervention — only if used
+export { LocalSystemListFilesPlaceholder, LocalSystemSearchFilesPlaceholder } from './Placeholder';
+export { LocalSystemStreamings } from './Streaming';
+export { LocalSystemInterventions } from './Intervention';
+
+// Portal — single export per tool
+export { default as WebBrowsingPortal } from './Portal';
+
+// Reusable components if other packages need them
+export { CategoryAvatar, EngineAvatar, SearchBar } from './components';
+
+// Re-export manifest, identifier, types for convenience
+export { TaskManifest, TaskIdentifier } from '../manifest';
+export * from '../types';
+```
+
+---
+
+## 9. Diagnostic Quick-Lookup
+
+| Symptom                                         | Surface to check                                                                                                  |     |                           |
+| ----------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | --- | ------------------------- |
+| No header at all on the tool call               | Inspector missing from `client/Inspector/index.ts` registry                                                       |     |                           |
+| Header shows the API name but no chips          | Inspector missing \`args?.X                                                                                       |     | partialArgs?.X\` fallback |
+| Header doesn't pulse during loading             | Missing `shinyTextStyles.shinyText` on `isArgumentsStreaming \|\| isLoading`                                      |     |                           |
+| Empty result card under header                  | Render returned `<div />` instead of `null` when no data                                                          |     |                           |
+| Layout jump when result arrives                 | Placeholder dimensions don't match Render dimensions                                                              |     |                           |
+| Approval dialog never appears                   | Manifest missing `humanIntervention`, or Intervention not in registry                                             |     |                           |
+| Approval click doesn't wait for inline edit     | Missing `registerBeforeApprove(id, flushFn)`                                                                      |     |                           |
+| Portal opens but blank                          | Switch in `Portal/index.tsx` doesn't cover the apiName                                                            |     |                           |
+| Strings show as `builtins.lobe-foo.apiName.bar` | Missing i18n key in `src/locales/default/plugin.ts` (or not seeded in dev locale files)                           |     |                           |
+| Wrong color shade on `<Text type="secondary">`  | `type='secondary'` is lighter than `colorTextSecondary` — pass via `style={{ color: cssVar.colorTextSecondary }}` |     |                           |
@@ -1,36 +0,0 @@
-# Tool UI Surfaces
-
-A builtin tool can ship up to **six client-side surfaces**, each with a different role in the chat UI. Only `Inspector` is required; the other five are added on demand and registered in their own central files.
-
-| Surface      | Required? | When the chat shows it                                                | Registered in                                 |
-| ------------ | --------- | --------------------------------------------------------------------- | --------------------------------------------- |
-| Inspector    | ✅ Always | Header strip of every tool call (one-line chip)                       | `packages/builtin-tools/src/inspectors.ts`    |
-| Render       | Optional  | Rich result card below the header, after the call returns             | `packages/builtin-tools/src/renders.ts`       |
-| Placeholder  | Optional  | Skeleton between "args streaming complete" and "result arrives"       | `packages/builtin-tools/src/placeholders.ts`  |
-| Streaming    | Optional  | Live output during execution (e.g. command stdout)                    | `packages/builtin-tools/src/streamings.ts`    |
-| Intervention | Optional  | Approval / edit-before-run dialog (when `humanIntervention` triggers) | `packages/builtin-tools/src/interventions.ts` |
-| Portal       | Optional  | Full-screen detail view (right-side or modal)                         | `packages/builtin-tools/src/portals.ts`       |
-
-The two reference tools to read end-to-end:
-
- **`builtin-tool-web-browsing/src/client/`** — Inspector + Render + Placeholder + Portal (no Intervention/Streaming).
- **`builtin-tool-local-system/src/client/`** — all six surfaces, including `components/` for shared building blocks.
-
---
-
-## Files in this folder
-
-Read **principles** and **shared-rules** first — they apply to every surface. Then jump to the surface you're building.
-
-| File                               | What it covers                                                          |
-| ---------------------------------- | ----------------------------------------------------------------------- |
-| [principles.md](principles.md)     | Design principles — when each surface exists and how far to take it     |
-| [shared-rules.md](shared-rules.md) | Cross-surface rules: component skeleton, styling, single-layer surfaces |
-| [inspector.md](inspector.md)       | Inspector — header chip (required)                                      |
-| [render.md](render.md)             | Render — rich result card                                               |
-| [placeholder.md](placeholder.md)   | Placeholder — skeleton between args and result                          |
-| [streaming.md](streaming.md)       | Streaming — live output during execution                                |
-| [intervention.md](intervention.md) | Intervention — approval / edit-before-run                               |
-| [portal.md](portal.md)             | Portal — full-screen detail view                                        |
-| [composition.md](composition.md)   | Shared subcomponents (`client/components/`) + package public API        |
-| [diagnostics.md](diagnostics.md)   | Symptom → surface quick-lookup                                          |
@@ -1,51 +0,0 @@
-# Composition — Shared Components & Package API
-
-## `client/components/` — Shared Subcomponents
-
-Cross-cutting building blocks used by multiple surfaces live here, not duplicated in each surface folder.
-
-Examples from `web-browsing/src/client/components/`:
-
- `CategoryAvatar.tsx` — search category icon
- `EngineAvatar.tsx` — search engine logo (used in Inspector chip + Render list + Portal header)
- `SearchBar.tsx` — editable query bar (used in Render and Portal)
-
-Examples from `local-system/src/client/components/`:
-
- `FileItem.tsx` — single file row (used in ListFiles Render, SearchFiles Render, MoveLocalFiles Render)
- `FilePathDisplay.tsx` — path with truncation (used everywhere)
-
-### Rules
-
- Live under `client/components/`, exported via `client/components/index.ts`.
- Re-export from `client/index.ts` only if other packages need them; otherwise keep internal.
- Keep them dumb — props in, JSX out, no store reads. The store reads belong in the surface that composes them.
-
---
-
-## `client/index.ts` — Package Public API
-
-Re-exports everything the registries need plus useful types/manifest:
-
-```ts
-// Inspector — required
-export { TaskInspectors } from './Inspector';
-
-// Render — only if any API has one
-export { TaskRenders, CreateTaskRender, RunTasksRender } from './Render';
-
-// Placeholder / Streaming / Intervention — only if used
-export { LocalSystemListFilesPlaceholder, LocalSystemSearchFilesPlaceholder } from './Placeholder';
-export { LocalSystemStreamings } from './Streaming';
-export { LocalSystemInterventions } from './Intervention';
-
-// Portal — single export per tool
-export { default as WebBrowsingPortal } from './Portal';
-
-// Reusable components if other packages need them
-export { CategoryAvatar, EngineAvatar, SearchBar } from './components';
-
-// Re-export manifest, identifier, types for convenience
-export { TaskManifest, TaskIdentifier } from '../manifest';
-export * from '../types';
-```
@@ -1,15 +0,0 @@
-# Diagnostic Quick-Lookup
-
-| Symptom                                         | Surface to check                                                                                                                      |
-| ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
-| No header at all on the tool call               | Inspector missing from `client/Inspector/index.ts` registry                                                                           |
-| Header shows the API name but no chips          | Inspector missing `args?.X \|\| partialArgs?.X` fallback                                                                              |
-| Header doesn't pulse during loading             | Missing `shinyTextStyles.shinyText` on `isArgumentsStreaming \|\| isLoading`                                                          |
-| Empty result card under header                  | Render returned `<div />` instead of `null` when no data                                                                              |
-| Render looks "complex" / card-in-card           | Filled container (`colorFillQuaternary`) wrapping more filled boxes — flatten to single-layer, see [shared-rules.md](shared-rules.md) |
-| Layout jump when result arrives                 | Placeholder dimensions don't match Render dimensions                                                                                  |
-| Approval dialog never appears                   | Manifest missing `humanIntervention`, or Intervention not in registry                                                                 |
-| Approval click doesn't wait for inline edit     | Missing `registerBeforeApprove(id, flushFn)`                                                                                          |
-| Portal opens but blank                          | Switch in `Portal/index.tsx` doesn't cover the apiName                                                                                |
-| Strings show as `builtins.lobe-foo.apiName.bar` | Missing i18n key in `src/locales/default/plugin.ts` (or not seeded in dev locale files)                                               |
-| Wrong color shade on `<Text type="secondary">`  | `type='secondary'` is lighter than `colorTextSecondary` — pass via `style={{ color: cssVar.colorTextSecondary }}`                     |
@@ -1,118 +0,0 @@
-# Inspector — Header Chip (required)
-
-**Lifecycle:** Inspector renders for **every phase** of a tool call: while args are streaming in, while the executor is running, and after results come back. It's the only surface that's always visible.
-
-**Goal:** keep it to a single line. Show what's happening with as much context as is currently available.
-
-## Props (`BuiltinInspectorProps<Args, State>`)
-
-```ts
-interface BuiltinInspectorProps<Arguments = any, State = any> {
-  apiName: string;
-  args: Arguments; // final args (only after the assistant stops streaming)
-  identifier: string;
-  isArgumentsStreaming?: boolean; // args still arriving
-  isLoading?: boolean; // args complete, executor running
-  partialArgs?: Arguments; // partial JSON during streaming
-  pluginState?: State; // executor's `state` after success
-  result?: { content: string | null; error?: any };
-}
-```
-
-## State machine
-
-| Phase                               | What's available                                           | What to show                                               |
-| ----------------------------------- | ---------------------------------------------------------- | ---------------------------------------------------------- |
-| Args streaming, no useful field yet | `isArgumentsStreaming === true`, `partialArgs.X` undefined | Just the API title with `shinyTextStyles.shinyText`        |
-| Args streaming, key field arrived   | `partialArgs.X` populated                                  | Title + key field chip, still pulse-animated               |
-| Args complete, executor running     | `args` populated, `isLoading === true`                     | Same as above, still pulse-animated                        |
-| Result arrived                      | `pluginState` populated, `isLoading === false`             | Title + chips + result summary (count, identifier, status) |
-
-## Canonical example — Search
-
-`packages/builtin-tool-web-browsing/src/client/Inspector/Search/index.tsx`:
-
-```tsx
-'use client';
-
-import type { BuiltinInspectorProps, SearchQuery, UniformSearchResponse } from '@lobechat/types';
-import { Text } from '@lobehub/ui';
-import { cssVar, cx } from 'antd-style';
-import { memo } from 'react';
-import { useTranslation } from 'react-i18next';
-
-import { highlightTextStyles, inspectorTextStyles, shinyTextStyles } from '@/styles';
-
-export const SearchInspector = memo<BuiltinInspectorProps<SearchQuery, UniformSearchResponse>>(
-  ({ args, partialArgs, isArgumentsStreaming, isLoading, pluginState }) => {
-    const { t } = useTranslation('plugin');
-
-    const query = args?.query || partialArgs?.query || '';
-    const resultCount = pluginState?.results?.length ?? 0;
-    const hasResults = resultCount > 0;
-
-    if (isArgumentsStreaming && !query) {
-      return (
-        <div className={cx(inspectorTextStyles.root, shinyTextStyles.shinyText)}>
-          <span>{t('builtins.lobe-web-browsing.apiName.search')}</span>
-        </div>
-      );
-    }
-
-    return (
-      <div
-        className={cx(
-          inspectorTextStyles.root,
-          (isArgumentsStreaming || isLoading) && shinyTextStyles.shinyText,
-        )}
-      >
-        <span>{t('builtins.lobe-web-browsing.apiName.search')}:&nbsp;</span>
-        {query && <span className={highlightTextStyles.primary}>{query}</span>}
-        {!isLoading &&
-          !isArgumentsStreaming &&
-          pluginState?.results &&
-          (hasResults ? (
-            <span style={{ marginInlineStart: 4 }}>({resultCount})</span>
-          ) : (
-            <Text as="span" color={cssVar.colorTextDescription} fontSize={12}>
-              ({t('builtins.lobe-web-browsing.inspector.noResults')})
-            </Text>
-          ))}
-      </div>
-    );
-  },
-);
-SearchInspector.displayName = 'SearchInspector';
-export default SearchInspector;
-```
-
-## Inspector rules
-
- Wrap the whole row with `inspectorTextStyles.root` (provides correct flex / line-height baseline).
- Pulse with `shinyTextStyles.shinyText` whenever `isArgumentsStreaming || isLoading`.
- Show the i18n title first so the row is non-empty during the earliest streaming phase.
- Read both `args?.X` and `partialArgs?.X` together — `args` is final, `partialArgs` is in-stream.
- Use chips/tags for distinct facets (identifier, name, parent, status, count). Each chip should clip with `text-overflow: ellipsis` and have a `max-width` so long values don't blow out the chat bubble.
- Append `pluginState`-derived suffixes only **after** loading finishes — count or "(no results)" should not appear while still searching.
- **Switch copy by phase.** If the verb implies an ongoing action ("Creating", "Searching", "Listing"), define `<api>.loading` and `<api>.completed` keys and select via `isArgumentsStreaming || isLoading ? loadingKey : completedKey`. Inspector chips persist in chat history — leaving "Creating task" frozen on a finished call reads as if the tool is still running. Read-only labels that are already noun-form ("View task") can keep a single key. See `CallSubAgentInspector` for the canonical two-key pattern.
-
-## Inspector registry — `client/Inspector/index.ts`
-
-```ts
-import type { BuiltinInspector } from '@lobechat/types';
-
-import { TaskApiName } from '../../types';
-import { CreateTaskInspector } from './CreateTask';
-import { ListTasksInspector } from './ListTasks';
-/* … */
-
-export const TaskInspectors: Record<string, BuiltinInspector> = {
-  [TaskApiName.createTask]: CreateTaskInspector as BuiltinInspector,
-  [TaskApiName.listTasks]: ListTasksInspector as BuiltinInspector,
-  /* one entry per ApiName */
-};
-
-export { CreateTaskInspector } from './CreateTask';
-export { ListTasksInspector } from './ListTasks';
-/* re-export each */
-```
@@ -1,88 +0,0 @@
-# Intervention — Approval / Edit-Before-Run (optional)
-
-**Lifecycle:** rendered **before the executor runs** for APIs whose manifest sets `humanIntervention`. The user sees a preview of the args, can edit them, then approves or skips/cancels.
-
-**Add for** destructive or sensitive ops: shell commands, file writes, file moves, payments, message broadcasts.
-
-## Props (`BuiltinInterventionProps<Args>`)
-
-```ts
-interface BuiltinInterventionProps<Arguments = any> {
-  apiName?: string;
-  args: Arguments;
-  identifier?: string;
-  interactionMode?: 'approval' | 'custom';
-  messageId: string;
-
-  /** Called when the user edits the args; the approve action awaits this. */
-  onArgsChange?: (args: Arguments) => void | Promise<void>;
-
-  /** Called on approve / skip / cancel. */
-  onInteractionAction?: (
-    action:
-      | { type: 'submit'; payload: Record<string, unknown> }
-      | { type: 'skip'; payload?: Record<string, unknown>; reason?: string }
-      | { type: 'cancel'; payload?: Record<string, unknown> },
-  ) => Promise<void>;
-
-  /** Register a callback to flush pending saves before approval. Returns cleanup. */
-  registerBeforeApprove?: (id: string, callback: () => void | Promise<void>) => () => void;
-}
-```
-
-## Canonical example — RunCommand Intervention
-
-`packages/builtin-tool-local-system/src/client/Intervention/RunCommand/index.tsx`:
-
-```tsx
-import type { RunCommandParams } from '@lobechat/electron-client-ipc';
-import type { BuiltinInterventionProps } from '@lobechat/types';
-import { Flexbox, Highlighter, Text } from '@lobehub/ui';
-import { memo } from 'react';
-
-const RunCommand = memo<BuiltinInterventionProps<RunCommandParams>>(({ args }) => {
-  const { description, command, timeout } = args;
-  return (
-    <Flexbox gap={8}>
-      <Flexbox horizontal justify="space-between">
-        {description && <Text>{description}</Text>}
-        {timeout && (
-          <Text style={{ fontSize: 12 }} type="secondary">
-            timeout: {formatTimeout(timeout)}
-          </Text>
-        )}
-      </Flexbox>
-      {command && (
-        <Highlighter wrap language="sh" showLanguage={false} variant="outlined">
-          {command}
-        </Highlighter>
-      )}
-    </Flexbox>
-  );
-});
-export default RunCommand;
-```
-
-## Intervention rules
-
- **Show a preview, not a form by default.** Editing UI is opt-in via `onArgsChange` and is usually inline (click to edit a code block, etc.).
- For args with debounced edit state (text fields), use `registerBeforeApprove(id, flushFn)` so the approve action waits for the debounce to flush. Always return the cleanup function.
- Call `onInteractionAction({ type: 'submit', payload })` when the user approves; `'skip'` if they skip with a reason; `'cancel'` if they cancel the whole turn.
- Add a corresponding `interventionAudit.ts` in the package root if the tool needs scope/path validation before approval (see `local-system/src/interventionAudit.ts`).
-
-## Intervention registry — `client/Intervention/index.ts`
-
-```ts
-import { LocalSystemApiName } from '../..';
-import EditLocalFile from './EditLocalFile';
-import RunCommand from './RunCommand';
-import WriteFile from './WriteFile';
-/* … */
-
-export const LocalSystemInterventions = {
-  [LocalSystemApiName.editLocalFile]: EditLocalFile,
-  [LocalSystemApiName.runCommand]: RunCommand,
-  [LocalSystemApiName.writeLocalFile]: WriteFile,
-  /* one entry per API that needs approval */
-};
-```
@@ -1,93 +0,0 @@
-# Placeholder — Skeleton Between Args and Result (optional)
-
-**Lifecycle:** rendered when the args have finished streaming but the executor hasn't returned yet. Disappears when `pluginState` arrives. Bridges the moment of perceived lag.
-
-**Add for** APIs with noticeable execution time: web search, network crawl, file list, large grep. **Skip for** instant ops (status flips, calculator).
-
-## Props (`BuiltinPlaceholderProps<Args>`)
-
-```ts
-interface BuiltinPlaceholderProps<T extends Record<string, any> = any> {
-  apiName: string;
-  args?: T;
-  identifier: string;
-}
-```
-
-No `pluginState` — Placeholder lives entirely in the "executing" gap.
-
-## Canonical example — Search Placeholder
-
-`packages/builtin-tool-web-browsing/src/client/Placeholder/Search.tsx`:
-
-```tsx
-import type { BuiltinPlaceholderProps, SearchQuery } from '@lobechat/types';
-import { Flexbox, Icon, Skeleton } from '@lobehub/ui';
-import { createStaticStyles, cx } from 'antd-style';
-import { SearchIcon } from 'lucide-react';
-import { memo } from 'react';
-
-import { useIsMobile } from '@/hooks/useIsMobile';
-import { shinyTextStyles } from '@/styles';
-
-const styles = createStaticStyles(({ css, cssVar }) => ({
-  query: cx(
-    css`
-      padding: 4px 8px;
-      border-radius: 8px;
-      font-size: 12px;
-      color: ${cssVar.colorTextSecondary};
-      &:hover {
-        background: ${cssVar.colorFillTertiary};
-      }
-    `,
-    shinyTextStyles.shinyText,
-  ),
-}));
-
-export const Search = memo<BuiltinPlaceholderProps<SearchQuery>>(({ args }) => {
-  const { query } = args || {};
-  const isMobile = useIsMobile();
-
-  return (
-    <Flexbox gap={8}>
-      <Flexbox horizontal={!isMobile} gap={isMobile ? 8 : 40}>
-        <Flexbox horizontal align="center" className={styles.query} gap={8}>
-          <Icon icon={SearchIcon} />
-          {query ? query : <Skeleton.Block active style={{ height: 20, width: 40 }} />}
-        </Flexbox>
-        <Skeleton.Block active style={{ height: 20, width: 40 }} />
-      </Flexbox>
-      <Flexbox horizontal gap={12}>
-        {[1, 2, 3, 4, 5].map((id) => (
-          <Skeleton.Button active key={id} style={{ borderRadius: 8, height: 80, width: 160 }} />
-        ))}
-      </Flexbox>
-    </Flexbox>
-  );
-});
-```
-
-## Placeholder rules
-
- **Mirror the eventual Render's layout.** When the result arrives the Placeholder unmounts and the Render mounts; if they share dimensions, the chat doesn't jump.
- Use `Skeleton.Block` / `Skeleton.Button` from `@lobehub/ui` for placeholder shapes.
- Embed any args you have (e.g. the query text) — context helps the user know what's loading.
- Pulse with `shinyTextStyles.shinyText` if the Placeholder includes literal text.
-
-## Placeholder registry — `client/Placeholder/index.ts`
-
-```ts
-import { WebBrowsingApiName } from '../../types';
-import CrawlMultiPages from './CrawlMultiPages';
-import CrawlSinglePage from './CrawlSinglePage';
-import { Search } from './Search';
-
-export const WebBrowsingPlaceholders = {
-  [WebBrowsingApiName.crawlMultiPages]: CrawlMultiPages,
-  [WebBrowsingApiName.crawlSinglePage]: CrawlSinglePage,
-  [WebBrowsingApiName.search]: Search,
-};
-
-export { CrawlMultiPages, CrawlSinglePage, Search };
-```
@@ -1,71 +0,0 @@
-# Portal — Full-Screen Detail View (optional)
-
-**Lifecycle:** rendered when the user opens the tool message in a side panel or full-screen modal. One Portal per **tool**, not per API — the Portal switches on `apiName` internally.
-
-**Add for** tools whose results deserve a deep-dive view: search results with editable filters, page content with reader mode, code interpreter sessions.
-
-## Props (`BuiltinPortalProps<Args, State>`)
-
-```ts
-interface BuiltinPortalProps<Arguments = Record<string, any>, State = any> {
-  apiName?: string;
-  arguments: Arguments;
-  identifier: string;
-  messageId: string;
-  state: State;
-}
-```
-
-## Canonical example — Web-Browsing Portal
-
-`packages/builtin-tool-web-browsing/src/client/Portal/index.tsx`:
-
-```tsx
-import type { BuiltinPortalProps, CrawlPluginState, SearchQuery } from '@lobechat/types';
-import { memo } from 'react';
-
-import { WebBrowsingApiName } from '../../types';
-import PageContent from './PageContent';
-import PageContents from './PageContents';
-import Search from './Search';
-
-const Portal = memo<BuiltinPortalProps>(({ arguments: args, messageId, state, apiName }) => {
-  switch (apiName) {
-    case WebBrowsingApiName.search:
-      return <Search messageId={messageId} query={args as SearchQuery} response={state} />;
-
-    case WebBrowsingApiName.crawlSinglePage: {
-      const result = (state as CrawlPluginState).results.find((r) => r.originalUrl === args.url);
-      return <PageContent messageId={messageId} result={result} />;
-    }
-
-    case WebBrowsingApiName.crawlMultiPages:
-      return (
-        <PageContents
-          messageId={messageId}
-          results={(state as CrawlPluginState).results}
-          urls={args.urls}
-        />
-      );
-  }
-  return null;
-});
-export default Portal;
-```
-
-## Portal rules
-
- One Portal per tool — the file is the routing layer, subcomponents implement each API's view.
- Portals can read the chat store directly to detect "still streaming" and render a Skeleton internally (see `Search/index.tsx:20-46`).
- Layout assumes more space than the Render — use `Flexbox` with `height={'100%'}` and structure for a side panel viewport.
-
-## Portal registry — `packages/builtin-tools/src/portals.ts`
-
-```ts
-import { WebBrowsingManifest, WebBrowsingPortal } from '@lobechat/builtin-tool-web-browsing/client';
-import { type BuiltinPortal } from '@lobechat/types';
-
-export const BuiltinToolsPortals: Record<string, BuiltinPortal> = {
-  [WebBrowsingManifest.identifier]: WebBrowsingPortal as BuiltinPortal,
-};
-```
@@ -1,19 +0,0 @@
-# Tool Render 设计原则（中文草案）
-
-这些原则用于判断一个 builtin tool 的 Inspector / Render / Placeholder / Streaming / Intervention / Portal 应该做什么，以及做到什么程度。
-
-1. **先保证折叠态可读。** 每个 API 都必须有 Inspector；用户不展开也应该能看懂 “正在做什么 / 对什么做 / 当前结果是什么”。Inspector 不应该只展示函数名和原始参数。
-2. **Inspector 是一句话，不是详情页。** 优先表达动作、关键对象、数量、状态，例如 “分析图片 3 张”“搜索 12 个结果”“读取 config.json”。长文本、列表和结构化结果放到 Render 或 Portal。
-3. **Inspector 要覆盖执行生命周期。** `args` 还在 streaming、工具执行中、执行完成、执行失败时都应该有稳定展示；必要时同时读取 `args`、`partialArgs` 和 `pluginState`，避免出现空白、跳变或只显示半截参数。
-4. **文案要随状态切换时态。** 同一个动作在 loading 与 completed 两个阶段必须用不同的措辞：执行中用现在进行时（“正在创建任务 / Creating task / 正在搜索”），执行完成后切到完成态（“已创建任务 / Task created / 已找到 N 条”）。Inspector chip 会一直留在聊天记录里 —— 如果一直挂着 “正在 xxx”，几小时后回看历史时会读起来像还在跑。约定的 i18n 形式是 `<api>.loading` / `<api>.completed` 一对键（见 `lobe-agent.apiName.callSubAgent.{loading,completed}` 与 `lobe-claude-code.task.{create,list,update,get}.{loading,completed}`），渲染时按 `isArgumentsStreaming || isLoading` 决定取哪一个。只读 / 查询类（“查看任务” 这种本来就是名词性的）可以共用一个键。
-5. **只有结构化结果才需要 Render。** 如果工具结果只是自然语言总结，通常不需要 Render；如果结果包含列表、媒体、文件、表格、代码、diff、地图、时间线、权限请求等结构，就应该提供 Render。
-6. **Render 要帮助用户检查结果，而不是复述参数。** Render 的主体应该围绕工具产物组织：可预览、可比较、可筛选、可定位。参数只作为上下文辅助出现，不要把 Render 做成一块更大的 args dump。
-7. **参数和结果要一起参与渲染。** 好的 Tool UI 通常同时用 `args` 解释意图，用 `pluginState` 展示真实执行结果；但 `pluginState` 只放结果域数据，不要反向塞入可以从 `args` 推导出的内容。
-8. **慢操作要有 Placeholder。** 如果工具通常需要等待网络、文件系统、模型或外部进程，Placeholder 应该先占住最终 Render 的版式，让用户知道即将看到什么，而不是只显示一个泛化 loading。
-9. **Streaming 只用于连续产物。** 搜索列表、日志、长文本、文件分析、分阶段计划适合 Streaming；一次性小结果不需要强行做 Streaming。Streaming UI 要能渐进追加，并且完成后自然过渡到最终 Render。
-10. **有风险的动作必须 Intervention。** 写文件、删除、发送、安装、执行命令、外部可见操作、权限敏感操作，都应该在执行前给出可理解的确认界面；确认文案要说明影响范围，而不是只问 “是否继续”。
-11. **错误、空态和截断都是正式状态。** Render 不能在失败、无结果、超长结果时退化成空白。错误要说明发生在哪一步；空态要告诉用户没有产物；超长内容要明确 “展示前 N 项 / 还有 N 项”。
-12. **信息密度要克制。** 默认展示最有判断价值的部分：标题、来源、状态、摘要、少量关键字段。大对象、长列表、原文、调试数据放进可展开区域或 Portal，避免把聊天流撑成后台管理页。
-13. **视觉上融入聊天流。** Tool UI 应该使用 `@lobehub/ui` / base-ui、`Flexbox`、`createStaticStyles` 和 `cssVar.*`，遵循现有间距、圆角、颜色、字号；不要为单个工具发明一套独立视觉语言。具体的样式约定见 [shared-rules.md](shared-rules.md)。
-14. **Devtools fixture 是验收入口。** 新增或修改 Tool UI 时，应在 `/devtools` 里准备覆盖典型态、loading/streaming、空态、错误态、长内容态的 fixture；一个 API 如果在真实聊天里会出现，就不应该在 devtools 中缺席。
-15. **先做用户会看的 UI，再做调试 UI。** Raw JSON、trace、schema、内部 id 可以存在，但应默认收起或放到调试区；主界面先回答用户最关心的问题：工具做了什么，结果值不值得信任，下一步能做什么。
@@ -1,101 +0,0 @@
-# Render — Rich Result Card (optional)
-
-**Lifecycle:** rendered **once the result arrives** (after Placeholder/Streaming hand off). Sits below the Inspector header.
-
-**Skip if** the API is read-only or the result is just text — the framework already shows the executor's `content` string. Add a Render only when there's a structured artifact worth seeing: a card, a chart, a diff, a list of files.
-
-## Props (`BuiltinRenderProps<Args, State, Content>`)
-
-```ts
-interface BuiltinRenderProps<Arguments = any, State = any, Content = any> {
-  apiName?: string;
-  args: Arguments; // final params from the LLM
-  content: Content; // executor's content string (or parsed)
-  identifier?: string;
-  messageId: string; // for store lookups
-  pluginError?: any; // from BuiltinToolResult.error
-  pluginState?: State; // executor's state
-  toolCallId?: string;
-}
-```
-
-## Two patterns
-
-**Pattern A — Single-file Render** (web-browsing CrawlSinglePage):
-
-```tsx
-// client/Render/CrawlSinglePage.tsx
-import type { BuiltinRenderProps, CrawlPluginState, CrawlSinglePageQuery } from '@lobechat/types';
-import { memo } from 'react';
-
-import PageContent from './PageContent';
-
-const CrawlSinglePage = memo<BuiltinRenderProps<CrawlSinglePageQuery, CrawlPluginState>>(
-  ({ messageId, pluginState, args }) => (
-    <PageContent messageId={messageId} results={pluginState?.results} urls={[args?.url]} />
-  ),
-);
-export default CrawlSinglePage;
-```
-
-**Pattern B — Folder with subcomponents** (web-browsing Search):
-
-```
-client/Render/Search/
-├── index.tsx           # composes the subcomponents, handles error states
-├── ConfigForm.tsx      # appears when pluginError.type === 'PluginSettingsInvalid'
-├── SearchQuery.tsx     # editable query header
-└── SearchResult.tsx    # result list
-```
-
-Use Pattern B when the Render has internal state (editing mode, expanded items), error variants, or is large enough to benefit from splitting.
-
-## Error handling in Render
-
-Renders are the canonical place to surface `pluginError` because the chat doesn't auto-render typed errors:
-
-```tsx
-if (pluginError) {
-  if (pluginError?.type === 'PluginSettingsInvalid') {
-    return <ConfigForm id={messageId} provider={pluginError.body?.provider} />;
-  }
-  return (
-    <Alert
-      title={pluginError?.message}
-      type="error"
-      extra={<Highlighter language="json">{JSON.stringify(pluginError.body, null, 2)}</Highlighter>}
-    />
-  );
-}
-```
-
-## Render rules
-
- **Return `null`** if there's nothing useful to draw yet (avoids empty cards during stream).
- Use `pluginState` for server-truth (ids, counts, server-assigned status) and `args` for what the LLM asked. **Combine — neither alone is enough.**
- For lists, summarize with a header line and show top N items with a "+N more" tail rather than rendering everything.
- **Keep the Render single-layer** — the tool card is already your surface, so don't open with your own filled container and then nest more filled boxes inside it. See [shared-rules.md](shared-rules.md) → "Stay single-layer".
- For modals from a Render, use `@lobehub/ui/base-ui` (`createModal`, `useModalContext`, `confirmModal`) — see the **modal** skill.
-
-## Render registry — `client/Render/index.ts`
-
-```ts
-import type { BuiltinRender } from '@lobechat/types';
-
-import { TaskApiName } from '../../types';
-import CreateTaskRender from './CreateTask';
-import RunTasksRender from './RunTasks';
-
-export const TaskRenders: Record<string, BuiltinRender> = {
-  [TaskApiName.createTask]: CreateTaskRender as BuiltinRender,
-  [TaskApiName.runTasks]: RunTasksRender as BuiltinRender,
-  /* only the APIs with rich result UI — others fall back to text content */
-};
-
-export { default as CreateTaskRender } from './CreateTask';
-export { default as RunTasksRender } from './RunTasks';
-```
-
-## Render display control (rare)
-
-If the Render should hide for certain results (e.g. ClaudeCode's TodoWrite hides when the agent is mid-stream), add a `RenderDisplayControl` to `packages/builtin-tools/src/displayControls.ts`. See `ClaudeCodeRenderDisplayControls` for the pattern.
@@ -1,89 +0,0 @@
-# Shared Style Rules
-
-These apply across every surface.
-
-## The component skeleton
-
-Every surface file is the same shape, so internalize it once instead of re-deriving it per rule. The skeleton below bakes in five mechanical conventions — copy it and fill the body:
-
-```tsx
-'use client'; // (a) leaves of the chat tree must not block server rendering
-
-import type { BuiltinInspectorProps, SearchQuery, UniformSearchResponse } from '@lobechat/types';
-import { memo } from 'react';
-import { useTranslation } from 'react-i18next';
-
-// (b) type with BuiltinXProps<Args, State> — never widen to `any`.
-//     Args = the JSON Schema params, State = the executor's `state` field;
-//     they should match <Name>Params / <Name>State from types.ts.
-export const SearchInspector = memo<BuiltinInspectorProps<SearchQuery, UniformSearchResponse>>(
-  ({ args, pluginState }) => {
-    const { t } = useTranslation('plugin'); // (c) all strings from the `plugin` namespace
-
-    // (d) cross-cutting state (loading, streaming buffer) comes from the store,
-    //     not props — props only carry args/state/messageId.
-    // const buffer = useChatStore((s) => chatToolSelectors.streamingBuffer(messageId)(s));
-
-    return <span>{t('builtins.<identifier>.apiName.search')}</span>;
-  },
-);
-SearchInspector.displayName = 'SearchInspector'; // (e) always memo + displayName
-export default SearchInspector;
-```
-
- **(c)** Default an Inspector to `t('builtins.<identifier>.apiName.<api>')` so the row is non-empty while args stream in.
- **(d)** Read the store via Zustand selectors inside the component; see [streaming.md](streaming.md) for the buffer selector.
-
-## Styling: `createStaticStyles + cssVar.*`, `@lobehub/ui` over `antd`
-
-Zero-runtime CSS-in-JS — styles compile once and read CSS variables at runtime:
-
-```tsx
-import { createStaticStyles, cssVar } from 'antd-style';
-
-const styles = createStaticStyles(({ css, cssVar }) => ({
-  chip: css`
-    padding-block: 2px;
-    padding-inline: 8px;
-    border-radius: 999px;
-    color: ${cssVar.colorText};
-    background: ${cssVar.colorFillTertiary};
-  `,
-}));
-```
-
- Fall back to `createStyles + token` only when you need runtime token computation (rare). Inline `style={{ color: cssVar.colorTextSecondary }}` is fine for one-off dynamic values.
- Components come from `@lobehub/ui` (`Block`, `Text`, `Flexbox`, `Highlighter`, `Alert`, `Tooltip`, `Skeleton`), not raw `antd`. Modals come from `@lobehub/ui/base-ui` (`createModal`, `useModalContext`, `confirmModal`) — see the **modal** skill.
- Note: `<Text type='secondary'>` is a lighter shade than `colorTextSecondary`. For that exact token color, write `<Text style={{ color: cssVar.colorTextSecondary }}>`.
-
-## Stay single-layer — don't nest filled cards
-
-The framework already wraps every Render / Intervention in a tool card, so that card **is** your surface. A Render that opens with its own `background: ${cssVar.colorFillQuaternary}` container is already one card deep; put another filled box inside it (`colorBgContainer` / `colorFillTertiary`) and you get the card-in-card look that reads as "complex" — two or three stacked fills for what is really a flat list of fields.
-
- **The outermost wrapper carries no fill.** Use a flat container with only `padding-block: 4px` for breathing room; let the tool card provide the card. (See `Agent/index.tsx`'s `container`.)
- **At most one filled box, and only to delineate real content** — a Markdown preview, a diff, a code/result block. Labels, key–value fields, question/answer text, chips: render flat on the surface, separated by spacing or a hairline divider (`height: 1px; background: ${cssVar.colorFillSecondary}`), not by wrapping each in its own box.
- **A box on a flat surface needs a visible fill.** Once the outer fill is gone, an inner `colorBgContainer` box can vanish against the tool card (same color). Use `colorFillTertiary` for the one content box so it still reads as delineated.
- Don't wrap a single value in a box just to give it padding — that's the redundant-nesting smell (a `detailCard` around a `value` box around one string).
-
-```tsx
-// ❌ card-in-card: filled container wrapping a filled preview box
-container: css`
-  padding: 12px;
-  background: ${cssVar.colorFillQuaternary};
-`,
-previewBox: css`
-  background: ${cssVar.colorBgContainer};
-`,
-
-// ✅ single-layer: flat container, one visible content box
-container: css`
-  padding-block: 4px;
-`,
-previewBox: css`
-  background: ${cssVar.colorFillTertiary};
-`,
-```
-
-For the common "icon + file/title header, then one content box" shape, reuse `ToolResultCard` from `@lobechat/shared-tool-ui/components` instead of rebuilding it — it's already single-layer (flat wrapper, one `colorFillTertiary` content box) and is what CC `Read` / `Grep` / `Glob` / `Write` / `WebSearch` / `WebFetch` render through.
-
-The exception is a deliberate **panel** pattern — an `<Block variant="outlined">` with a header bar + list rows (CC `TodoWrite` / `Task`). There the single outlined block is the panel and the header fill is a header bar, not a nested card. One structured panel is fine; stacked decorative fills are not.
@@ -1,83 +0,0 @@
-# Streaming — Live Output During Execution (optional)
-
-**Lifecycle:** rendered **while the executor is still running** for APIs that emit incremental output. The component is responsible for fetching the in-flight stream from the chat store and rendering it.
-
-**Add for** long-running ops with continuous output: shell command execution (stdout/stderr), file write progress, code interpreter cells.
-
-## Props (`BuiltinStreamingProps<Args>`)
-
-```ts
-interface BuiltinStreamingProps<Arguments = any> {
-  apiName: string;
-  args: Arguments;
-  identifier: string;
-  messageId: string; // use to fetch the streaming buffer from store
-  toolCallId: string;
-}
-```
-
-Note there's **no `state` or `result` prop** — the Streaming component is for the in-flight phase. It pulls the live buffer from the store itself (typically via `chatToolSelectors.streamingContent(messageId)` or similar).
-
-## Canonical example — RunCommandStreaming
-
-`packages/builtin-tool-local-system/src/client/Streaming/RunCommand/index.tsx`:
-
-```tsx
-'use client';
-
-import type { BuiltinStreamingProps } from '@lobechat/types';
-import { Highlighter } from '@lobehub/ui';
-import { memo } from 'react';
-
-interface RunCommandParams {
-  command?: string;
-  description?: string;
-  timeout?: number;
-}
-
-export const RunCommandStreaming = memo<BuiltinStreamingProps<RunCommandParams>>(({ args }) => {
-  const { command } = args || {};
-  if (!command) return null;
-
-  return (
-    <Highlighter
-      animated
-      wrap
-      language="sh"
-      showLanguage={false}
-      style={{ padding: '4px 8px' }}
-      variant="outlined"
-    >
-      {command}
-    </Highlighter>
-  );
-});
-RunCommandStreaming.displayName = 'RunCommandStreaming';
-```
-
-For real-time output beyond just the command (stderr/stdout streaming), pull from the chat store:
-
-```tsx
-const buffer = useChatStore((state) =>
-  chatToolSelectors.streamingBuffer(messageId, toolCallId)(state),
-);
-```
-
-## Streaming rules
-
- Render `null` until you have something to display (avoids flash).
- For terminal-style output, use `Highlighter` with `animated` to show typing-like effect.
- The Streaming component must **unmount cleanly** when execution ends — typically the framework swaps it out for the Render automatically.
-
-## Streaming registry — `client/Streaming/index.ts`
-
-```ts
-import { LocalSystemApiName } from '../..';
-import { RunCommandStreaming } from './RunCommand';
-import { WriteFileStreaming } from './WriteFile';
-
-export const LocalSystemStreamings = {
-  [LocalSystemApiName.runCommand]: RunCommandStreaming,
-  [LocalSystemApiName.writeLocalFile]: WriteFileStreaming,
-};
-```
@@ -1,6 +1,13 @@
 ---
 name: chat-sdk
-description: 'Build multi-platform chat bots with the chat SDK. Use for Slack, Teams, Google Chat, Discord, GitHub, Linear bots, webhooks, mentions, slash commands, cards, modals, or streaming responses.'
+description: >
+  Build multi-platform chat bots with Chat SDK (`chat` npm package). Use when developers want to
+  (1) Build a Slack, Teams, Google Chat, Discord, GitHub, or Linear bot,
+  (2) Use the Chat SDK to handle mentions, messages, reactions, slash commands, cards, modals, or streaming,
+  (3) Set up webhook handlers for chat platforms,
+  (4) Send interactive cards or stream AI responses to chat platforms.
+  Triggers on "chat sdk", "chat bot", "slack bot", "teams bot", "discord bot", "@chat-adapter",
+  building bots that work across multiple chat platforms.
 user-invocable: false
 ---

@@ -0,0 +1,218 @@
+---
+name: cli-backend-testing
+description: >
+  CLI + Backend integration testing workflow. Use when verifying backend API changes
+  (TRPC routers, services, models) via the LobeHub CLI against a local dev server.
+  Triggers on 'cli test', 'test with cli', 'verify with cli', 'local cli test',
+  'backend test with cli', or when needing to validate server-side changes end-to-end.
+---
+
+# CLI + Backend Integration Testing
+
+Standard workflow for verifying backend changes using the LobeHub CLI (`lh`) against a local dev server.
+
+## When to Use
+
+- Verifying TRPC router / service / model changes end-to-end
+- Testing new API fields or response structure changes
+- Validating CLI command output after backend modifications
+- Debugging data flow issues between server and CLI
+
+## Prerequisites
+
+| Requirement  | Details                                                       |
+| ------------ | ------------------------------------------------------------- |
+| Dev server   | `localhost:3011` (Next.js)                                    |
+| CLI source   | `lobehub/apps/cli/`                                           |
+| CLI dev mode | Uses `LOBEHUB_CLI_HOME=.lobehub-dev` for isolated credentials |
+| Auth         | Device Code Flow login to local server                        |
+
+## Quick Reference
+
+All CLI dev commands run from `lobehub/apps/cli/`:
+
+```bash
+# Shorthand for all commands below
+CLI="LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts"
+```
+
+## Workflow
+
+### Step 1: Ensure Dev Server is Running
+
+Check if the dev server is already running:
+
+```bash
+curl -s -o /dev/null -w '%{http_code}' http://localhost:3011/ 2> /dev/null
+```
+
+- **If reachable** (returns any HTTP status): server is running. Skip to Step 2.
+- **If unreachable**: start the server:
+
+```bash
+# From cloud repo root
+pnpm run dev:next
+```
+
+To **restart** (pick up server-side code changes):
+
+```bash
+lsof -ti:3011 | xargs kill
+pnpm run dev:next
+```
+
+**Important:** Server-side code changes in the submodule (`lobehub/src/server/`, `lobehub/packages/`) require a server restart. Next.js hot-reload may not pick up changes in submodule packages.
+
+### Step 2: Check CLI Authentication
+
+Check if dev credentials already exist:
+
+```bash
+cat lobehub/apps/cli/.lobehub-dev/settings.json 2> /dev/null
+```
+
+- **If file exists and contains `"serverUrl": "http://localhost:3011"`**: already authenticated. Skip to Step 3.
+- **If file missing or points to wrong server**: login is needed. Ask the user to run:
+
+```bash
+! cd lobehub/apps/cli && LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts login --server http://localhost:3011
+```
+
+> Login requires interactive browser authorization (OIDC Device Code Flow), so the user must run it themselves via `!` prefix. After login, credentials are saved to `lobehub/apps/cli/.lobehub-dev/` and persist across sessions.
+
+### Step 3: Test with CLI Commands
+
+CLI runs from source (`bun src/index.ts`), so CLI-side code changes take effect immediately without rebuilding.
+
+```bash
+cd lobehub/apps/cli
+LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts <command>
+```
+
+### Step 4: Clean Up Test Data
+
+Delete any test data created during verification:
+
+```bash
+LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts task delete < id > -y
+LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts agent delete < id > -y
+```
+
+## Common Testing Patterns
+
+### Task System
+
+```bash
+# List tasks
+$CLI task list
+
+# Create test data with nesting
+$CLI task create -n "Root Task" -i "Test instruction"
+$CLI task create -n "Child Task" -i "Sub instruction" --parent T-1
+
+# View task detail (tests getTaskDetail service)
+$CLI task view T-1
+
+# View task tree
+$CLI task tree T-1
+
+# Test lifecycle
+$CLI task edit T-1 --status running
+$CLI task comment T-1 -m "Test comment"
+
+# Clean up
+$CLI task delete T-1 -y
+```
+
+### Agent System
+
+```bash
+# List agents
+$CLI agent list
+
+# View agent detail
+$CLI agent view <agent-id>
+
+# Run agent (tests agent execution pipeline)
+$CLI agent run <agent-id> -m "Test prompt"
+```
+
+### Document & Knowledge Base
+
+```bash
+# List documents
+$CLI doc list
+
+# Create and view
+$CLI doc create -t "Test Doc" -c "Content here"
+$CLI doc view <doc-id>
+
+# Knowledge base
+$CLI kb list
+$CLI kb tree <kb-id>
+```
+
+### Model & Provider
+
+```bash
+# List models and providers
+$CLI model list
+$CLI provider list
+
+# Test provider connectivity
+$CLI provider test <provider-id>
+```
+
+## Dev-Test Cycle
+
+The standard cycle for backend development:
+
+```
+1. Make code changes (service/model/router/type)
+         |
+2. Run unit tests (fast feedback)
+   bunx vitest run --silent='passed-only' '<test-file>'
+         |
+3. Restart dev server (if server-side changes)
+   lsof -ti:3011 | xargs kill && pnpm run dev:next
+         |
+4. CLI verification (end-to-end)
+   LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts <command>
+         |
+5. Clean up test data
+```
+
+### When Server Restart is Needed
+
+| Change Location                           | Restart? |
+| ----------------------------------------- | -------- |
+| `lobehub/src/server/` (routers, services) | Yes      |
+| `lobehub/packages/database/` (models)     | Yes      |
+| `lobehub/packages/types/`                 | Yes      |
+| `lobehub/packages/prompts/`               | Yes      |
+| `lobehub/apps/cli/` (CLI code)            | No       |
+| `src/` (cloud overrides)                  | Yes      |
+
+### When Server Restart is NOT Needed
+
+CLI runs from source via `bun src/index.ts`, so any changes to `lobehub/apps/cli/src/` take effect immediately on next command invocation.
+
+## Troubleshooting
+
+| Issue                       | Solution                                                              |
+| --------------------------- | --------------------------------------------------------------------- |
+| `No authentication found`   | Run `login --server http://localhost:3011`                            |
+| `UNAUTHORIZED` on API calls | Token expired; re-run login                                           |
+| `ECONNREFUSED`              | Dev server not running; start with `pnpm run dev:next`                |
+| CLI shows old data/behavior | Server needs restart to pick up code changes                          |
+| `EADDRINUSE` on port 3011   | Server already running; kill with `lsof -ti:3011 \| xargs kill`       |
+| Login opens wrong server    | Must use `--server http://localhost:3011` flag (env var doesn't work) |
+
+## Credential Isolation
+
+| Mode       | Credential Dir                   | Server            |
+| ---------- | -------------------------------- | ----------------- |
+| Dev        | `lobehub/apps/cli/.lobehub-dev/` | `localhost:3011`  |
+| Production | `~/.lobehub/`                    | `app.lobehub.com` |
+
+The two environments are completely isolated. Dev mode credentials are gitignored.
@@ -111,7 +111,7 @@ Generate video from text prompt. This is an async operation.
 **Source**: `apps/cli/src/commands/generate/video.ts`

 ```bash
-lh gen video "A cat playing piano" -m < model > -p < provider > [options]
+lh gen video "A cat playing piano" -m <model> -p <provider> [options]
 ```

 | Option                      | Description              | Required |
@@ -259,13 +259,13 @@ Image and video generation use an async task pattern:
     UUID from the `async_tasks` table, not `gen_xxx`
   - Returns `{ status, error, generation }` (generation includes asset URLs on success)
   - Before querying, calls `checkTimeoutTasks` which marks tasks as `error` if they have been
-     `pending` or `processing` for more than \~5 minutes (`ASYNC_TASK_TIMEOUT = 298s`)
+     `pending` or `processing` for more than ~5 minutes (`ASYNC_TASK_TIMEOUT = 298s`)

 **Server routes**:

- `apps/server/src/routers/lambda/image/index.ts` — image creation (uses `authedProcedure` + `serverDatabase`)
- `apps/server/src/routers/lambda/video/index.ts` — video creation (uses `authedProcedure` + `serverDatabase`)
- `apps/server/src/routers/lambda/generation.ts` — status checking
+- `src/server/routers/lambda/image/index.ts` — image creation (uses `authedProcedure` + `serverDatabase`)
+- `src/server/routers/lambda/video/index.ts` — video creation (uses `authedProcedure` + `serverDatabase`)
+- `src/server/routers/lambda/generation.ts` — status checking
 - `packages/database/src/models/asyncTask.ts` — `AsyncTaskModel` including `checkTimeoutTasks`

 **Note**: Image/video routes do NOT use the `keyVaults` middleware — they read API keys from the database via `initModelRuntimeFromDB` or `createAsyncCaller`.
@@ -1,6 +1,6 @@
 ---
-name: data-fetching-architecture
-description: 'LobeHub data-fetching pipeline guide. Use for service layer, Zustand store, SWR, lambdaClient, useClientDataSWR, useFetchXxx hooks, or migrating useEffect fetches.'
+name: data-fetching
+description: Data fetching architecture guide using Service layer + Zustand Store + SWR. Use when implementing data fetching, creating services, working with store hooks, or migrating from useEffect. Triggers on data loading, API calls, service creation, or store data fetching tasks.
 user-invocable: false
 ---

@@ -1,71 +1,11 @@
 ---
 name: db-migrations
-description: 'Use for Drizzle migrations: schema/table/column changes, migration generation or regeneration, sequence conflicts after rebase, idempotent SQL review, or migration renames.'
+description: 'Use when generating or regenerating Drizzle migration files, changing database schema tables or columns, resolving migration sequence conflicts after rebase, reviewing migration SQL for idempotent patterns, or renaming migration files.'
 user-invocable: false
 ---

 # Database Migrations Guide

-## Development-stage schema changes
-
-Schema changes churn during feature development. When the schema changes before the migration has shipped, do not hand-edit the existing migration SQL to chase the new schema shape. Delete the draft migration artifacts added by this branch (SQL file, matching snapshot, and matching journal entry), then run the generator again and re-apply the normal migration review steps below.
-
-For example, if this branch's draft migration is `0110_add_verify_tables_and_ai_infra_id`:
-
-```bash
-# 1. Delete the draft SQL and its snapshot
-rm packages/database/migrations/0110_add_verify_tables_and_ai_infra_id.sql
-rm packages/database/migrations/meta/0110_snapshot.json
-
-# 2. Remove the matching 0110 entry from the journal's "entries" array
-#    packages/database/migrations/meta/_journal.json
-
-# 3. Regenerate from the current schema
-bun run db:generate
-```
-
-This keeps the generated SQL, snapshot, and journal aligned with the actual schema. Manual SQL edits are reserved for review-time hardening such as idempotent clauses, custom extension SQL, and meaningful filename/tag updates.
-
-Before release, if a feature branch accumulated multiple development-only migrations, consolidate them into one migration when possible. Production does not need to replay every intermediate draft shape, and fewer migrations reduce deploy-time risk.
-
-For example, if this branch added `0110`, `0111`, and `0112`, delete all three drafts and regenerate a single migration:
-
-```bash
-# 1. Delete every draft SQL and snapshot this branch added
-rm packages/database/migrations/011{0,1,2}_*.sql
-rm packages/database/migrations/meta/011{0,1,2}_snapshot.json
-
-# 2. Remove the 0110/0111/0112 entries from the journal's "entries" array
-#    packages/database/migrations/meta/_journal.json
-
-# 3. Regenerate one migration covering the full schema delta
-bun run db:generate
-```
-
-Do not make a migration compatible with earlier development-only versions of the same branch. While the migration has not shipped, there is no production history to preserve. Fix local/dev databases directly with whatever SQL is simplest (drop the draft table, rename a column, delete draft rows), then regenerate the branch migration from the current schema.
-
-For example, if an earlier draft on this branch created `signup_attempt_id` and you have since renamed it to `user_signup_log_id`, do not add a compatibility `ALTER ... RENAME` to the migration. Just fix the dev DB directly (see the `access-pg` skill for the `bun -e` + `pg` pattern), then regenerate:
-
-```bash
-# Fix the dev DB to match the new schema (simplest SQL wins)
-set -a && source .env && set +a && bun -e '
-import pg from "pg";
-const client = new pg.Client({ connectionString: process.env.DATABASE_URL });
-await client.connect();
-await client.query("ALTER TABLE user_signup_logs DROP COLUMN signup_attempt_id");
-await client.end();
-'
-
-# Regenerate so the migration reflects only the final shape
-bun run db:generate
-```
-
-After a migration has reached production or the target default branch, treat it as immutable: add a follow-up migration instead of rewriting it.
-
-## Rebase conflicts
-
-When a rebase conflicts in migration files, keep the upstream/default-branch migrations and remove all migrations introduced by the current feature branch. Complete the rebase, then regenerate this branch's migration from the rebased schema. This avoids merging two independent snapshots or hand-splicing journal entries.
-
 ## Step 1: Generate Migrations

 ```bash
@@ -1,6 +1,6 @@
 ---
 name: debug-package
-description: 'LobeHub debug package and log namespace guide. Use when adding debug() logging, choosing lobe-* namespaces, troubleshooting DEBUG output, localStorage.debug, or log format specifiers.'
+description: "Guide for the `debug` npm package and LobeHub log namespaces (lobe-server:*, lobe-desktop:*, lobe-client:*, lobe-*-router:*). Use whenever adding a `debug(...)` logger, picking a namespace for new server/desktop/client/router code, troubleshooting why DEBUG=lobe-* logs don't show up, or when the user asks to 'add logging', 'add a logger', 'instrument this', 'trace this call', 'why isn't my log printing', or mentions `debug(`, `DEBUG=`, `localStorage.debug`, or log format specifiers like %O / %o / %s / %d in a LobeHub codebase."
 user-invocable: false
 ---

@@ -57,7 +57,7 @@ process.env.DEBUG = 'lobe-*';
 ## Example

 ```typescript
-// apps/server/src/routers/edge/market/index.ts
+// src/server/routers/edge/market/index.ts
 import debug from 'debug';

 const log = debug('lobe-edge-router:market');
@@ -1,6 +1,6 @@
 ---
 name: docs-changelog
-description: 'Write website changelog pages under docs/changelog/*.mdx. Use for EN/ZH product update posts, changelog posts, update-log copy, or docs changelog edits; not GitHub Release notes.'
+description: 'Writing guide for website changelog pages under docs/changelog/*.mdx. Use when creating or editing product update posts in EN/ZH. Not for GitHub Release notes.'
 ---

 # Docs Changelog Writing Guide
@@ -1,29 +1,21 @@
 ---
 name: drizzle
-description: 'LobeHub Drizzle ORM schema and query style. Use for pgTable schemas, indexes, joins, inferred types, db.select/db.query, schema fields, foreign keys, junction tables, or postgres query patterns.'
+description: "Drizzle ORM schema authoring and query style for LobeHub (postgres, strict mode). Use when editing anything under `src/database/schemas/`, defining `pgTable` columns/indexes/junction tables, spreading `...timestamps`, generating `createInsertSchema`/`$inferSelect`/`$inferInsert` types, writing `db.select().from(...).leftJoin(...)` queries, or deciding when to split a relational `with:` into two queries. Triggers on `pgTable`, `db.select`, `db.query`, `eq()`/`and()`/`inArray()`, `uniqueIndex`, `primaryKey`, `references({ onDelete })`, 'add a column', 'new table', 'foreign key', 'junction table', 'schema field'. For migration files specifically, see the `db-migrations` skill."
 user-invocable: false
 ---

 # Drizzle ORM Schema Style Guide

-> **Adding a Model or Repository?** Ship a sibling test in the same PR — every new
-> file under `packages/database/src/models/**` or `src/repositories/**` needs a
-> matching `__tests__/<name>.test.ts`. See the **testing** skill
-> (`.agents/skills/testing/references/db-model-test.md`) for the `getTestDB()`
-> integration pattern, user-isolation tests, the BM25 `describe.skipIf(!isServerDB)`
-> guard, and schema gotchas. CI's coverage patch gate won't reliably catch a brand-new
-> untested file, so this is on you.
-
 ## Configuration

 - Config: `drizzle.config.ts`
- Schemas: `packages/database/src/schemas/`
- Migrations: `packages/database/migrations/`
+- Schemas: `src/database/schemas/`
+- Migrations: `src/database/migrations/`
 - Dialect: `postgresql` with `strict: true`

 ## Helper Functions

-Location: `packages/database/src/schemas/_helpers.ts`
+Location: `src/database/schemas/_helpers.ts`

 - `timestamptz(name)`: Timestamp with timezone
 - `createdAt()`, `updatedAt()`, `accessedAt()`: Standard timestamp columns
@@ -33,42 +25,16 @@ Location: `packages/database/src/schemas/_helpers.ts`

 - **Tables**: Plural snake_case (`users`, `session_groups`)
 - **Columns**: snake_case (`user_id`, `created_at`)
- **New tables**: Check nearby existing tables before naming a new one. Preserve
-  the established noun family and suffix. For example, if the user-scoped table
-  is `user_xxx_logs`, the workspace-scoped counterpart should be
-  `workspace_xxx_logs`, not `workspace_xxx_records` or another new synonym.
-
-```typescript
-// ✅ Good: follows the existing user/workspace table family.
-export const userSignupLogs = pgTable('user_signup_logs', { ... });
-export const workspaceSignupLogs = pgTable('workspace_signup_logs', { ... });
-
-// ❌ Bad: introduces a new suffix for the same concept.
-export const workspaceSignupRecords = pgTable('workspace_signup_records', { ... });
-```

 ## Column Definitions

 ### Primary Keys

-Do not use auto-incrementing primary keys (`serial`, `bigserial`, generated
-identity columns). They create sequence-state problems during cross-database
-migrations, restores, and data copy jobs. Prefer text IDs from application
-generators (`idGenerator`, `createNanoId`) or `uuid` for internal tables.
-
-Keep `$defaultFn(...)` when a table normally owns ID generation. Callers can
-still pass an explicit `id`; the default only runs when the insert omits it. Do
-not remove the default just because one flow needs to supply a request-scoped ID.
-
 ```typescript
-// ✅ Good: app-generated text ID; explicit inserts can still override it.
 id: text('id')
  .primaryKey()
  .$defaultFn(() => idGenerator('agents'))
  .notNull(),
-
-// ❌ Bad: sequence state is fragile across DB migrations and restores.
-id: serial('id').primaryKey(),
 ```

 ID prefixes make entity types distinguishable. For internal tables, use `uuid`.
@@ -87,80 +53,6 @@ userId: text('user_id')
 ...timestamps,  // Spread from _helpers.ts
 ```

-### Optional and Undefined Values
-
-Do not introduce artificial sentinel strings for missing values, such as
-`unknown`, unless the domain already has that explicit state and existing code
-uses it consistently. Prefer nullable columns, optional TypeScript fields, or a
-separate concrete status enum when the value is genuinely absent.
-
-```typescript
-// ✅ Good: absent until the final stage writes a real decision.
-export type UserSignupLogFinalDecision = 'allow' | 'block' | 'error';
-
-finalDecision: varchar('final_decision', { length: 32 }).$type<UserSignupLogFinalDecision>(),
-
-// ❌ Bad: invents a new state that callers now need to handle everywhere.
-export type UserSignupLogFinalDecision = 'allow' | 'block' | 'error' | 'unknown';
-
-finalDecision: varchar('final_decision', { length: 32 })
-  .$type<UserSignupLogFinalDecision>()
-  .notNull()
-  .default('unknown');
-```
-
-### Field Descriptions
-
-For columns whose meaning is not obvious from the name alone, add JSDoc on the
-schema field. Include a concrete example when it clarifies the stored value or
-the lifecycle moment that writes it. This is especially important for external
-IDs, lifecycle statuses, denormalized snapshots, JSONB signals, and fields whose
-name could mean either a request ID or a persisted row ID.
-
-```typescript
-// ✅ Good: explain the table's business object first, then only document
-// non-obvious lifecycle or risk-control fields.
-/**
- * User signup logs - one row per signup flow, collecting stage-level
- * risk-control decisions before and after the auth provider creates a user.
- */
-export const userSignupLogs = pgTable('user_signup_logs', {
-  /** Final signup outcome reason, for example user_created, llm_block, or guard_error */
-  finalReason: text('final_reason'),
-
-  /** Aggregated risk level derived from stage decisions, for example block -> high */
-  riskLevel: varchar('risk_level', { length: 16 }).$type<UserSignupLogRiskLevel>(),
-
-  /** Ordered stage-level decisions and metadata grouped by signup review stage */
-  stageResults: jsonb('stage_results').$type<UserSignupLogStageResults>(),
-});
-
-// ❌ Bad: comments restate obvious column names without adding domain meaning.
-/** User email */
-email: text('email'),
-```
-
-### JSONB Types
-
-Avoid `Record<string, unknown>` or similarly loose JSONB types for schema
-columns. Define a concrete interface that describes the expected JSON shape, even
-when most properties are optional. This keeps callers, migrations, and review
-queries aligned on the same data contract.
-
-```typescript
-interface UserSignupLogMetadata {
-  payloadPath?: string;
-  requestPath?: string;
-}
-
-metadata: jsonb('metadata').$type<UserSignupLogMetadata>(),
-```
-
-```typescript
-// ❌ Bad: hides the contract and makes downstream access untyped.
-metadata: jsonb('metadata').$type<Record<string, unknown>>(),
-```
-
 ### Indexes

 ```typescript
@@ -282,78 +174,6 @@ const rows = await this.db
  .groupBy(agentEvalDatasets.id);
 ```

-### Raw SQL and Advanced Queries
-
-Prefer Drizzle builders whenever the query reads clearly with `select`,
-`insert().select()`, `update().from()`, joins, CTEs, and `groupBy` — this keeps
-table/column references tied to schema, so changes surface as TypeScript errors.
-Within a builder, expression-level `sql<T>` is fine for features lacking a helper
-(JSON path, casts, aggregates, `CASE`, `NOW()`). Row locks are clauses, not
-expressions — use `.for('update')`, never raw `FOR UPDATE`.
-
-Use `COALESCE` only when null-handling is part of required DB semantics (nullable
-JSONB append/merge, "keep first non-null"). Don't scatter
-`COALESCE(excluded.col, current.col)` across ordinary upsert scalars just to avoid
-an update object — build `set` from defined values only, and hide any remaining
-SQL behind named helpers (`appendJsonbArray`, `mergeJsonbObject`, `keepFirstValue`)
-so the method reads as business intent, not SQL plumbing.
-
-```typescript
-// ✅ Scalars included only when present; SQL hidden behind a named helper.
-const updateValues = compactUndefined({
-  email: record.email ?? undefined,
-  ip: record.ip ?? undefined,
-});
-await db.insert(userSignupLogs).values(values).onConflictDoUpdate({
-  set: { ...updateValues, stageResults: appendStageResult(stage, result), updatedAt: now },
-  target: userSignupLogs.id,
-});
-
-// ❌ Every scalar becomes SQL plumbing.
-set: {
-  email: sql`COALESCE(excluded.email, ${userSignupLogs.email})`,
-  ip: sql`COALESCE(excluded.ip, ${userSignupLogs.ip})`,
-}
-```
-
-When refactoring raw SQL:
-
- Preserve query shape on latency-sensitive paths. If raw SQL is one roundtrip,
-  don't split it into multiple depth-based queries just to drop `execute`.
- Use `$with(...)` + `insert().select()` / `update().from()` for multi-step
-  single-roundtrip writes Drizzle can express.
- Don't rely on `execute<MyRow>(sql...)` for safety — it types rows but doesn't keep
-  selected columns in sync with schema changes.
- If only a PostgreSQL feature Drizzle can't express works, keep the raw SQL and
-  tighten it: schema refs in interpolations, explicit user scope, a narrow row
-  interface, and regression tests.
-
-Recursive CTEs are the canonical "keep raw" case — there's no clean `WITH RECURSIVE`
-builder, and a rewrite would add depth-based roundtrips:
-
-```typescript
-interface TaskTreeRow {
-  id: string;
-  parent_task_id: string | null;
-}
-
-// execute<T> acceptable: no clean WITH RECURSIVE builder. Keep schema refs in the
-// interpolations and scope every leg to the user.
-const { rows } = await db.execute<TaskTreeRow>(sql`
-  WITH RECURSIVE task_tree AS (
-    SELECT ${tasks.id}, ${tasks.parentTaskId}
-    FROM ${tasks}
-    WHERE ${tasks.id} = ${rootTaskId} AND ${tasks.createdByUserId} = ${userId}
-    UNION ALL
-    SELECT ${tasks.id}, ${tasks.parentTaskId}
-    FROM ${tasks}
-    JOIN task_tree ON ${tasks.parentTaskId} = task_tree.id
-    WHERE ${tasks.createdByUserId} = ${userId}
-  )
-  SELECT * FROM task_tree
-`);
-```
-
 ### One-to-Many (Separate Queries)

 When you need a parent record with its children, use two queries instead of relational `with:`:
@@ -1,6 +1,6 @@
 ---
 name: heterogeneous-agent
-description: 'Implement or debug LobeHub heterogeneous agents. Use for Claude Code/Codex adapters, external CLI agents, event mapping, IPC, persistence, tool-call chains, sessions, traces, or adapter bugs.'
+description: Guide for implementing and debugging LobeHub heterogeneous agent integrations such as Claude Code, Codex, and future external CLI agents. Use when working on adapter event mapping, Electron IPC transport, renderer persistence, tool-call chaining, subagent threads, resume/session handling, or regressions like mixed multi-tool messages, broken step boundaries, stuck tool loading, and orphan tool messages. Triggers on 'heterogeneous agent', 'hetero agent', '异构 agent', 'claude code adapter', 'codex adapter', 'external agent CLI', '孤立 tool 消息', 'raw Codex trace', or adapter/executor bugs.
 ---

 # Heterogeneous Agent Development
@@ -241,6 +241,6 @@ When the bug comes from a real trace, distill it into the closest existing test
 3. Add or update the narrowest failing test near the broken layer.
 4. Fix the smallest layer that can explain the symptom.
 5. Re-run focused tests.
-6. Only then do an Electron smoke test with the `agent-testing` skill if UI confirmation is still needed.
+6. Only then do an Electron smoke test with the `local-testing` skill if UI confirmation is still needed.

 Do not start with a broad Electron repro if a raw trace or adapter test can prove the fault zone faster.
@@ -1,6 +1,6 @@
 ---
 name: hotkey
-description: 'Add or edit LobeHub keyboard shortcuts. Use for HotkeyEnum, HOTKEYS_REGISTRATION, combineKeys, useHotkeyById, tooltip hotkeys, shortcut scope, conflicts, or Cmd/Ctrl key combos.'
+description: "Adding or editing keyboard shortcuts in LobeHub. Use when registering a new hotkey, changing a key combo, scoping a shortcut to chat vs global, or wiring a hotkey hook + tooltip. Covers the 5-step flow: add to `HotkeyEnum` in `src/types/hotkey.ts`, register in `HOTKEYS_REGISTRATION` (`src/const/hotkeys.ts`) with `combineKeys([Key.Mod, …])`, add i18n in `src/locales/default/hotkey.ts`, expose via `useHotkeyById` in `src/hooks/useHotkeys/`, and render `<Tooltip hotkey={…}>`. Triggers on `HotkeyEnum`, `HOTKEYS_REGISTRATION`, `useHotkeyById`, `combineKeys`, `Key.Mod`/`Key.Shift`, 'add a hotkey', 'add a shortcut', '加快捷键', '快捷键', 'Cmd+K', 'keyboard shortcut', 'hotkey scope', 'hotkey conflict'."
 user-invocable: false
 ---

@@ -1,6 +1,6 @@
 ---
 name: i18n
-description: 'LobeHub i18n with react-i18next. Use for user-facing strings, locale keys, namespaces, useTranslation, t(), interpolation, zh-CN/en-US previews, hardcoded UI copy, or pnpm i18n.'
+description: "LobeHub internationalization with react-i18next. Use when adding any user-facing string in `.tsx`/`.ts` files, creating or renaming a key under `src/locales/default/{namespace}.ts`, deciding the `{feature}.{context}.{action}` flat-key pattern, wiring a new namespace into `src/locales/default/index.ts`, or translating zh-CN/en-US JSON for dev preview. Triggers on `useTranslation`, `t('foo.bar')`, `i18next.t`, `{{variable}}` interpolation, hardcoded UI strings (zh or en) that should be extracted, 'add i18n', '加 i18n key', '翻译', 'locale key', 'namespace', 'pnpm i18n'."
 user-invocable: false
 ---

@@ -1,6 +1,6 @@
 ---
 name: linear
-description: 'Linear issue management. Use for LOBE-xxx issues, Linear links, PRs referencing Linear, retrieving issues, updating status, completion comments, or sub-issue trees.'
+description: "Linear issue management. Use when the user mentions LOBE-xxx issue IDs (e.g. LOBE-4540), says 'linear' / 'linear issue' / 'link linear', or when creating PRs that reference Linear issues. Covers retrieving issues, updating status, adding completion comments, and creating sub-issue trees."
 user-invocable: false
 ---

@@ -0,0 +1,520 @@
+---
+name: local-testing
+description: >
+  Local app and bot testing. Uses agent-browser CLI for Electron/web app UI testing,
+  and osascript (AppleScript) for controlling native macOS apps (WeChat, Discord, Telegram, Slack, Lark/飞书, QQ)
+  to test bots. Triggers on 'local test', 'test in electron', 'test desktop', 'test bot',
+  'bot test', 'test in discord', 'test in telegram', 'test in slack', 'test in weixin',
+  'test in wechat', 'test in lark', 'test in feishu', 'test in qq',
+  'manual test', 'osascript', or UI/bot verification tasks.
+---
+
+# Local App & Bot Testing
+
+Two approaches for local testing on macOS:
+
+| Approach                    | Tool                | Best For                                             |
+| --------------------------- | ------------------- | ---------------------------------------------------- |
+| **agent-browser + CDP**     | `agent-browser` CLI | Electron apps, web apps (DOM access, JS eval)        |
+| **osascript (AppleScript)** | `osascript -e`      | Native macOS apps (WeChat, Discord, Telegram, Slack) |
+
+---
+
+# Part 1: agent-browser (Electron / Web Apps)
+
+Use `agent-browser` to automate Chromium-based apps via Chrome DevTools Protocol.
+
+Install via `npm i -g agent-browser`, `brew install agent-browser`, or `cargo install agent-browser`. Run `agent-browser install` to download Chrome. Run `agent-browser upgrade` to update.
+
+## Core Workflow
+
+Every browser automation follows this pattern:
+
+1. **Navigate**: `agent-browser open <url>`
+2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
+3. **Interact**: Use refs to click, fill, select
+4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
+
+```bash
+agent-browser open https://example.com/form
+agent-browser snapshot -i
+# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
+
+agent-browser fill @e1 "user@example.com"
+agent-browser fill @e2 "password123"
+agent-browser click @e3
+agent-browser wait --load networkidle
+agent-browser snapshot -i # Check result
+```
+
+## Command Chaining
+
+```bash
+# Chain open + wait + snapshot in one call
+agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser snapshot -i
+```
+
+Use `&&` when you don't need to read intermediate output. Run commands separately when you need to parse output first (e.g., snapshot to discover refs, then interact).
+
+## Essential Commands
+
+```bash
+# Navigation
+agent-browser open <url>              # Navigate (aliases: goto, navigate)
+agent-browser close                   # Close browser
+agent-browser close --all             # Close all active sessions
+
+# Snapshot
+agent-browser snapshot -i             # Interactive elements with refs (recommended)
+agent-browser snapshot -s "#selector" # Scope to CSS selector
+
+# Interaction (use @refs from snapshot)
+agent-browser click @e1               # Click element
+agent-browser click @e1 --new-tab     # Click and open in new tab
+agent-browser fill @e2 "text"         # Clear and type text
+agent-browser type @e2 "text"         # Type without clearing
+agent-browser select @e1 "option"     # Select dropdown option
+agent-browser check @e1               # Check checkbox
+agent-browser press Enter             # Press key
+agent-browser keyboard type "text"    # Type at current focus (no selector)
+agent-browser keyboard inserttext "text"  # Insert without key events
+agent-browser scroll down 500         # Scroll page
+agent-browser scroll down 500 --selector "div.content"  # Scroll within container
+
+# Get information
+agent-browser get text @e1            # Get element text
+agent-browser get url                 # Get current URL
+agent-browser get title               # Get page title
+agent-browser get cdp-url             # Get CDP WebSocket URL
+
+# Wait
+agent-browser wait @e1                # Wait for element
+agent-browser wait --load networkidle # Wait for network idle
+agent-browser wait --url "**/page"    # Wait for URL pattern
+agent-browser wait 2000               # Wait milliseconds
+agent-browser wait --text "Welcome"   # Wait for text to appear
+agent-browser wait --fn "!document.body.innerText.includes('Loading...')"  # Wait for text to disappear
+agent-browser wait "#spinner" --state hidden  # Wait for element to disappear
+
+# Downloads
+agent-browser download @e1 ./file.pdf          # Click element to trigger download
+agent-browser wait --download ./output.zip     # Wait for any download to complete
+
+# Network
+agent-browser network requests                 # Inspect tracked requests
+agent-browser network requests --type xhr,fetch  # Filter by resource type
+agent-browser network requests --method POST   # Filter by HTTP method
+agent-browser network route "**/api/*" --abort # Block matching requests
+agent-browser network har start                # Start HAR recording
+agent-browser network har stop ./capture.har   # Stop and save HAR file
+
+# Viewport & Device Emulation
+agent-browser set viewport 1920 1080          # Set viewport size (default: 1280x720)
+agent-browser set viewport 1920 1080 2        # 2x retina
+agent-browser set device "iPhone 14"          # Emulate device (viewport + user agent)
+
+# Capture
+agent-browser screenshot              # Screenshot to temp dir
+agent-browser screenshot --full       # Full page screenshot
+agent-browser screenshot --annotate   # Annotated screenshot with numbered element labels
+agent-browser pdf output.pdf          # Save as PDF
+
+# Clipboard
+agent-browser clipboard read          # Read text from clipboard
+agent-browser clipboard write "text"  # Write text to clipboard
+agent-browser clipboard copy          # Copy current selection
+agent-browser clipboard paste         # Paste from clipboard
+
+# Dialogs (alert, confirm, prompt, beforeunload)
+agent-browser dialog accept           # Accept dialog
+agent-browser dialog accept "input"   # Accept prompt dialog with text
+agent-browser dialog dismiss          # Dismiss/cancel dialog
+agent-browser dialog status           # Check if dialog is open
+
+# Diff (compare page states)
+agent-browser diff snapshot                        # Compare current vs last snapshot
+agent-browser diff screenshot --baseline before.png  # Visual pixel diff
+agent-browser diff url <url1> <url2>               # Compare two pages
+
+# Streaming
+agent-browser stream enable           # Start WebSocket streaming
+agent-browser stream status           # Inspect streaming state
+agent-browser stream disable          # Stop streaming
+```
+
+## Batch Execution
+
+```bash
+echo '[
+  ["open", "https://example.com"],
+  ["snapshot", "-i"],
+  ["click", "@e1"],
+  ["screenshot", "result.png"]
+]' | agent-browser batch --json
+```
+
+## Authentication
+
+```bash
+# Option 1: Auth vault (credentials stored encrypted)
+echo "$PASSWORD" | agent-browser auth save myapp --url https://app.example.com/login --username user --password-stdin
+agent-browser auth login myapp
+
+# Option 2: Session name (auto-save/restore cookies + localStorage)
+agent-browser --session-name myapp open https://app.example.com/login
+agent-browser close                                                       # State auto-saved
+agent-browser --session-name myapp open https://app.example.com/dashboard # Auto-restored
+
+# Option 3: Persistent profile
+agent-browser --profile ~/.myapp open https://app.example.com/login
+
+# Option 4: State file
+agent-browser state save auth.json
+agent-browser state load auth.json
+```
+
+### LobeHub dev server — inject better-auth cookie
+
+`agent-browser --headed` on macOS can create an off-screen Chromium window, blocking manual login. For a local LobeHub dev server (e.g. `localhost:3011`), copy the `better-auth.session_token` cookie out of a **Network request** in the user's own Chrome DevTools and load it via `state load`. See [references/agent-browser-login.md](./references/agent-browser-login.md) for the full recipe.
+
+## Semantic Locators (Alternative to Refs)
+
+```bash
+agent-browser find text "Sign In" click
+agent-browser find label "Email" fill "user@test.com"
+agent-browser find role button click --name "Submit"
+agent-browser find placeholder "Search" type "query"
+agent-browser find testid "submit-btn" click
+```
+
+## JavaScript Evaluation (eval)
+
+```bash
+# Simple expressions
+agent-browser eval 'document.title'
+
+# Complex JS: use --stdin with heredoc (RECOMMENDED)
+agent-browser eval --stdin << 'EVALEOF'
+JSON.stringify(
+  Array.from(document.querySelectorAll("img"))
+    .filter(i => !i.alt)
+    .map(i => ({ src: i.src.split("/").pop(), width: i.width }))
+)
+EVALEOF
+
+# Base64 encoding (avoids all shell escaping issues)
+agent-browser eval -b "$(echo -n 'document.title' | base64)"
+```
+
+## Ref Lifecycle
+
+Refs (`@e1`, `@e2`, etc.) are invalidated when the page changes. Always re-snapshot after clicking links/buttons that navigate, form submissions, or dynamic content loading.
+
+## Annotated Screenshots (Vision Mode)
+
+```bash
+agent-browser screenshot --annotate
+# Output includes the image path and a legend:
+#   [1] @e1 button "Submit"
+#   [2] @e2 link "Home"
+agent-browser click @e2 # Click using ref from annotated screenshot
+```
+
+## Parallel Sessions
+
+```bash
+agent-browser --session site1 open https://site-a.com
+agent-browser --session site2 open https://site-b.com
+agent-browser session list
+```
+
+## Connect to Existing Chrome
+
+```bash
+agent-browser --auto-connect snapshot # Auto-discover running Chrome
+agent-browser --cdp 9222 snapshot     # Explicit CDP port
+```
+
+## iOS Simulator (Mobile Safari)
+
+```bash
+agent-browser device list
+agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
+agent-browser -p ios snapshot -i
+agent-browser -p ios tap @e1
+agent-browser -p ios swipe up
+agent-browser -p ios screenshot mobile.png
+agent-browser -p ios close
+```
+
+## Observability Dashboard
+
+```bash
+agent-browser dashboard install
+agent-browser dashboard start # Background server on port 4848
+agent-browser dashboard stop
+```
+
+## Cloud Providers
+
+Use `-p <provider>` to run against cloud browsers: `agentcore`, `browserbase`, `browserless`, `browseruse`, `kernel`.
+
+## Browser Engine Selection
+
+```bash
+agent-browser --engine lightpanda open example.com # 10x faster, 10x less memory
+```
+
+## Electron (LobeHub Desktop)
+
+### Setup / Teardown
+
+Use the `electron-dev.sh` script to manage the Electron dev environment. It handles process lifecycle, waits for SPA readiness, and reliably kills all child processes (main + helpers + vite).
+
+```bash
+SCRIPT=".agents/skills/local-testing/scripts/electron-dev.sh"
+
+# Start Electron dev with CDP (idempotent — skips if already running)
+$SCRIPT start
+
+# Check if Electron is running and CDP is reachable
+$SCRIPT status
+
+# Kill all Electron-related processes (main + helper + vite)
+$SCRIPT stop
+
+# Force fresh restart
+$SCRIPT restart
+```
+
+After `start` succeeds, connect with: `agent-browser --cdp 9222 snapshot -i`
+
+**Always run `$SCRIPT stop` when done testing** — `pkill -f "Electron"` alone won't catch all helper processes.
+
+#### Environment Variables
+
+| Variable          | Default                 | Description                              |
+| ----------------- | ----------------------- | ---------------------------------------- |
+| `CDP_PORT`        | `9222`                  | Chrome DevTools Protocol port            |
+| `ELECTRON_LOG`    | `/tmp/electron-dev.log` | Electron process log                     |
+| `ELECTRON_WAIT_S` | `60`                    | Max seconds to wait for Electron process |
+| `RENDERER_WAIT_S` | `60`                    | Max seconds to wait for SPA to load      |
+
+### LobeHub-Specific Patterns
+
+#### Access Zustand Store State
+
+```bash
+agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
+(function() {
+  var chat = window.__LOBE_STORES.chat();
+  var ops = Object.values(chat.operations);
+  return JSON.stringify({
+    ops: ops.map(function(o) { return { type: o.type, status: o.status }; }),
+    activeAgent: chat.activeAgentId,
+    activeTopic: chat.activeTopicId,
+  });
+})()
+EVALEOF
+```
+
+#### Find and Use the Chat Input
+
+```bash
+# The chat input is contenteditable — must use -C flag
+agent-browser --cdp 9222 snapshot -i -C 2>&1 | grep "editable"
+
+agent-browser --cdp 9222 click @e48
+agent-browser --cdp 9222 type @e48 "Hello world"
+agent-browser --cdp 9222 press Enter
+```
+
+#### Wait for Agent to Complete
+
+```bash
+agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
+(function() {
+  var chat = window.__LOBE_STORES.chat();
+  var ops = Object.values(chat.operations);
+  var running = ops.filter(function(o) { return o.status === 'running'; });
+  return running.length === 0 ? 'done' : 'running: ' + running.length;
+})()
+EVALEOF
+```
+
+#### Install Error Interceptor
+
+```bash
+agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
+(function() {
+  window.__CAPTURED_ERRORS = [];
+  var orig = console.error;
+  console.error = function() {
+    var msg = Array.from(arguments).map(function(a) {
+      if (a instanceof Error) return a.message;
+      return typeof a === 'object' ? JSON.stringify(a) : String(a);
+    }).join(' ');
+    window.__CAPTURED_ERRORS.push(msg);
+    orig.apply(console, arguments);
+  };
+  return 'installed';
+})()
+EVALEOF
+
+# Later, check captured errors:
+agent-browser --cdp 9222 eval "JSON.stringify(window.__CAPTURED_ERRORS)"
+```
+
+## Chrome / Web Apps
+
+```bash
+/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \
+  --remote-debugging-port=9222 \
+  --user-data-dir=/tmp/chrome-test-profile \
+  "<URL>" &
+sleep 5
+agent-browser --cdp 9222 snapshot -i
+
+# Or auto-discover running Chrome with remote debugging
+agent-browser --auto-connect snapshot -i
+```
+
+---
+
+# Part 2: osascript (Native macOS App Bot Testing)
+
+Use AppleScript via `osascript` to control native macOS desktop apps for bot testing. Works with any app that supports macOS Accessibility, no CDP or Chromium needed.
+
+The pattern is the same for every platform:
+
+1. **Activate** the app (`tell application "X" to activate`)
+2. **Navigate** to a channel/chat (Quick Switcher `Cmd+K` or Search `Cmd+F`)
+3. **Send** a message (clipboard paste `Cmd+V` + Enter)
+4. **Wait** for the bot response
+5. **Screenshot** for verification (`screencapture` + `Read` tool)
+
+## Per-Platform References
+
+Pick the file for your target platform — each contains activation, navigation, send-message, and verification snippets specific to that app:
+
+| Platform      | Reference                                          | Quick switcher |
+| ------------- | -------------------------------------------------- | -------------- |
+| Discord       | [references/discord.md](./references/discord.md)   | `Cmd+K`        |
+| Slack         | [references/slack.md](./references/slack.md)       | `Cmd+K`        |
+| Telegram      | [references/telegram.md](./references/telegram.md) | `Cmd+F`        |
+| WeChat / 微信 | [references/wechat.md](./references/wechat.md)     | `Cmd+F`        |
+| Lark / 飞书   | [references/lark.md](./references/lark.md)         | `Cmd+K`        |
+| QQ            | [references/qq.md](./references/qq.md)             | `Cmd+F`        |
+
+For **shared osascript patterns** (activate, type, paste, screenshot, read accessibility, common workflow template, gotchas), see [references/osascript-common.md](./references/osascript-common.md). Read this first if you're new to osascript automation.
+
+---
+
+# Scripts
+
+Ready-to-use scripts in `.agents/skills/local-testing/scripts/`:
+
+| Script                    | Usage                                               |
+| ------------------------- | --------------------------------------------------- |
+| `electron-dev.sh`         | Manage Electron dev env (start/stop/status/restart) |
+| `capture-app-window.sh`   | Capture screenshot of a specific app window         |
+| `record-electron-demo.sh` | Record Electron app demo with ffmpeg                |
+| `record-app-screen.sh`    | Record app screen (video + screenshots, start/stop) |
+| `test-discord-bot.sh`     | Send message to Discord bot via osascript           |
+| `test-slack-bot.sh`       | Send message to Slack bot via osascript             |
+| `test-telegram-bot.sh`    | Send message to Telegram bot via osascript          |
+| `test-wechat-bot.sh`      | Send message to WeChat bot via osascript            |
+| `test-lark-bot.sh`        | Send message to Lark / 飞书 bot via osascript       |
+| `test-qq-bot.sh`          | Send message to QQ bot via osascript                |
+
+### Window Screenshot Utility
+
+`capture-app-window.sh` captures a screenshot of a specific app window using `screencapture -l <windowID>`. It uses Swift + CGWindowList to find the window by process name, so screenshots work correctly even when the window is on an external monitor or behind other windows.
+
+```bash
+# Standalone usage
+./.agents/skills/local-testing/scripts/capture-app-window.sh "Discord" /tmp/discord.png
+./.agents/skills/local-testing/scripts/capture-app-window.sh "Slack" /tmp/slack.png
+./.agents/skills/local-testing/scripts/capture-app-window.sh "WeChat" /tmp/wechat.png
+```
+
+All bot test scripts use this utility automatically for their screenshots.
+
+### Bot Test Scripts
+
+All bot test scripts share the same interface:
+
+```bash
+./scripts/test-<platform>-bot.sh <channel_or_contact> <message> [wait_seconds] [screenshot_path]
+```
+
+Examples:
+
+```bash
+# Discord — test a bot in #bot-testing channel
+./.agents/skills/local-testing/scripts/test-discord-bot.sh "bot-testing" "!ping"
+./.agents/skills/local-testing/scripts/test-discord-bot.sh "bot-testing" "/ask Tell me a joke" 30
+
+# Slack — test a bot in #bot-testing channel
+./.agents/skills/local-testing/scripts/test-slack-bot.sh "bot-testing" "@mybot hello"
+./.agents/skills/local-testing/scripts/test-slack-bot.sh "bot-testing" "/ask What is 2+2?" 20
+
+# Telegram — test a bot by username
+./.agents/skills/local-testing/scripts/test-telegram-bot.sh "MyTestBot" "/start"
+./.agents/skills/local-testing/scripts/test-telegram-bot.sh "GPTBot" "Hello" 60
+
+# WeChat — test a bot or send to a contact
+./.agents/skills/local-testing/scripts/test-wechat-bot.sh "文件传输助手" "test message" 5
+./.agents/skills/local-testing/scripts/test-wechat-bot.sh "MyBot" "Tell me a joke" 30
+
+# Lark/飞书 — test a bot in a group chat
+./.agents/skills/local-testing/scripts/test-lark-bot.sh "bot-testing" "@MyBot hello"
+./.agents/skills/local-testing/scripts/test-lark-bot.sh "bot-testing" "Help me with this" 30
+
+# QQ — test a bot in a group or direct chat
+./.agents/skills/local-testing/scripts/test-qq-bot.sh "bot-testing" "Hello bot" 15
+./.agents/skills/local-testing/scripts/test-qq-bot.sh "MyBot" "/help" 10
+```
+
+Each script: activates the app, navigates to the channel/contact, pastes the message via clipboard, sends, waits, and takes a screenshot. Use the `Read` tool on the screenshot for visual verification.
+
+---
+
+# Screen Recording
+
+Record automated demos using `record-app-screen.sh` (start/stop lifecycle, CDP screenshots + ffmpeg assembly). See [references/record-app-screen.md](references/record-app-screen.md) for full documentation.
+
+```bash
+./.agents/skills/local-testing/scripts/electron-dev.sh start
+./.agents/skills/local-testing/scripts/record-app-screen.sh start my-demo
+# ... run automation ...
+./.agents/skills/local-testing/scripts/record-app-screen.sh stop
+```
+
+Outputs to `.records/` directory (gitignored): `<name>.mp4` (video) + `<name>/` (screenshots every 3s).
+
+---
+
+# Gotchas
+
+### agent-browser
+
+- **Daemon can get stuck** — if commands hang, `agent-browser close --all` or `pkill -f agent-browser` to reset
+- **HMR invalidates everything** — after code changes, refs break. Re-snapshot or restart
+- **`snapshot -i` doesn't find contenteditable** — use `snapshot -i -C` for rich text editors
+- **`fill` doesn't work on contenteditable** — use `type` for chat inputs
+- **Screenshots go to `~/.agent-browser/tmp/screenshots/`** — read them with the `Read` tool
+- **Dialogs block all commands** — if commands time out, check `agent-browser dialog status`
+- **Default timeout is 25s** — override with `AGENT_BROWSER_DEFAULT_TIMEOUT` (ms) or use explicit waits
+- **Shell quoting corrupts eval** — use `eval --stdin <<'EVALEOF'` for complex JS
+
+### Electron-specific
+
+- **Always use `electron-dev.sh stop` to clean up** — `pkill -f "Electron"` only kills the main process; helper processes (GPU, renderer, network) survive. The script finds and kills all of them via PID matching against the project's electron binary path.
+- **`npx electron-vite dev` must run from `apps/desktop/`** — running from project root fails silently. The `electron-dev.sh` script handles this automatically.
+- **Don't resize the Electron window after load** — resizing triggers full SPA reload
+- **Store is at `window.__LOBE_STORES`** not `window.__ZUSTAND_STORES__`
+
+### osascript
+
+See [references/osascript-common.md](./references/osascript-common.md#gotchas) for the full osascript gotchas list (accessibility permissions, `keystroke` non-ASCII issues, locale-specific app names, rate limiting, etc.).
@@ -0,0 +1,110 @@
+# Log `agent-browser` into a local LobeHub dev server
+
+`agent-browser --headed` on macOS often creates the Chromium window off-screen — the user can't see or interact with it, so manual login inside the agent-browser session fails. Instead of sharing the user's real Chrome profile, copy the **better-auth session cookie** out of a request in DevTools and inject it into the agent-browser session as a Playwright-style state file.
+
+## When to use
+
+- You need `agent-browser` to reach an authenticated page on `http://localhost:<port>` (e.g. `localhost:3011`).
+- The user already has a logged-in tab of the same dev server in their own Chrome.
+- Spawning a headed Chromium to let the user log in manually is unreliable (window off-screen, no interaction).
+
+Do **not** use this on production URLs — only local dev. Treat the cookie as a secret: don't paste it into shared logs, PRs, or commit it anywhere.
+
+## Step 1 — Ask the user to copy the cookie from a Network request, NOT `document.cookie`
+
+`document.cookie` will not return HttpOnly cookies, which is exactly where better-auth puts its session. Instruct the user:
+
+1. Open the logged-in tab (`http://localhost:<port>/…`) in their own Chrome.
+2. `Cmd+Option+I` → **Network** tab.
+3. Refresh, click any same-origin request (e.g. the top-level document request).
+4. In the right pane under **Request Headers**, right-click the `Cookie:` line → **Copy value** (or copy the entire header).
+5. Paste the string into chat.
+
+You only need the better-auth pieces. Everything else (Clerk, `LOBE_LOCALE`, HMR hash, theme vars) is noise and can stay. The minimum viable set is:
+
+```
+better-auth.session_token=<value>; better-auth.state=<value>
+```
+
+## Step 2 — Build a Playwright-style state file
+
+`agent-browser state load` expects Playwright's `storageState` format: a JSON with a `cookies` array and an `origins` array.
+
+```bash
+cat > /tmp/mkstate.py << 'PY'
+import json, sys, time
+
+# Read the Cookie header from stdin (allows optional "Cookie: " prefix).
+raw = sys.stdin.read().strip()
+if raw.lower().startswith("cookie:"):
+    raw = raw.split(":", 1)[1].strip()
+
+# Keep only better-auth cookies. Extend this set if the app genuinely needs more.
+WANTED = {"better-auth.session_token", "better-auth.state"}
+
+cookies = []
+exp = int(time.time()) + 30 * 24 * 3600  # 30 days
+for pair in raw.split("; "):
+    if "=" not in pair:
+        continue
+    name, _, value = pair.partition("=")
+    if name not in WANTED:
+        continue
+    cookies.append({
+        "name": name,
+        "value": value,
+        "domain": "localhost",
+        "path": "/",
+        "expires": exp,
+        "httpOnly": False,
+        "secure": False,
+        "sameSite": "Lax",
+    })
+
+if not cookies:
+    sys.stderr.write("no better-auth cookies found in input\n")
+    sys.exit(1)
+
+print(json.dumps({"cookies": cookies, "origins": []}, indent=2))
+PY
+
+# Feed the copied Cookie header in via env var or heredoc.
+printf '%s' "$COOKIE_HEADER" | python3 /tmp/mkstate.py > /tmp/state.json
+```
+
+**Note on `httpOnly`**: the real cookie in the user's browser is HttpOnly, but `storageState` doesn't enforce the flag on load — it just attaches the value. Storing with `httpOnly: false` is fine for local dev and sidesteps a CDP-context quirk where HttpOnly cookies sometimes fail to attach.
+
+## Step 3 — Load state and navigate
+
+```bash
+SESSION="my-test" # any stable session name
+
+agent-browser --session "$SESSION" state load /tmp/state.json
+agent-browser --session "$SESSION" open "http://localhost:3011/"
+agent-browser --session "$SESSION" get url
+# Expect NOT /signin?callbackUrl=… — if you still see signin, cookie didn't apply.
+```
+
+## Step 4 — Verify
+
+```bash
+agent-browser --session "$SESSION" snapshot -i | head -20
+# Look for the user's avatar/name in the sidebar, or absence of the signin form.
+```
+
+## Common failure modes
+
+| Symptom                                         | Cause                                                                   | Fix                                                  |
+| ----------------------------------------------- | ----------------------------------------------------------------------- | ---------------------------------------------------- |
+| Still redirects to `/signin` after `state load` | User pasted from `document.cookie` → missed HttpOnly session            | Re-pull from Network request Headers, not console    |
+| `state load` reports 0 cookies                  | Separator wrong, or user pasted URL-decoded value                       | Keep the raw `Cookie:` header as-is; split on `"; "` |
+| Login works briefly then expires                | `better-auth.session_token` rotated (user logged out / signed in again) | Re-copy and re-load                                  |
+| Domain mismatch                                 | Use `domain: "localhost"` literally, no leading dot for local dev       | —                                                    |
+
+## Scope
+
+Only covers authenticating an **agent-browser** session into a **local** LobeHub dev server. It does not:
+
+- Work for production — production cookies are `Secure; HttpOnly; Domain=.lobehub.com` and must be delivered over HTTPS.
+- Replace real OAuth flows — tests that must exercise the login UI need a real Chromium with `--remote-debugging-port` or a bot account.
+- Flow cookies back to the user's Chrome — injection is one-way (into agent-browser only).
@@ -19,13 +19,13 @@ works for any LobeHub streaming session.

 ```bash
 # 1. Start Electron with CDP
-./.agents/skills/agent-testing/scripts/electron-dev.sh start
+./.agents/skills/local-testing/scripts/electron-dev.sh start

 # 2. Navigate to a chat, switch runtime to Cloud Sandbox (gateway mode)

 # 3. Install the probe + helpers
 agent-browser --cdp 9222 eval --stdin \
-  < .agents/skills/agent-testing/scripts/agent-gateway/probe.js
+  < .agents/skills/local-testing/scripts/agent-gateway/probe.js

 # 4. Send a tool-call message — manually or via type+press
 agent-browser --cdp 9222 eval "window.__PROBE_EVENT('SENT')"
@@ -34,15 +34,15 @@ agent-browser --cdp 9222 eval "window.__PROBE_EVENT('SENT')"
 #    rightmost inactive tab as AWAY — edit ROUND_TRIPS / DWELL_MS in the
 #    file if you want different timing)
 agent-browser --cdp 9222 eval --stdin \
-  < .agents/skills/agent-testing/scripts/agent-gateway/tab-switch.js
+  < .agents/skills/local-testing/scripts/agent-gateway/tab-switch.js

 # 6. Wait for streaming to finish, then dump
 agent-browser --cdp 9222 eval --stdin \
-  < .agents/skills/agent-testing/scripts/agent-gateway/probe-dump.js \
+  < .agents/skills/local-testing/scripts/agent-gateway/probe-dump.js \
  > /tmp/probe.json

 # 7. Analyze
-node .agents/skills/agent-testing/scripts/agent-gateway/analyze.mjs /tmp/probe.json
+node .agents/skills/local-testing/scripts/agent-gateway/analyze.mjs /tmp/probe.json
 ```

 The analyzer prints three sections: EVENTS, TIMELINE, REGRESSIONS. If
@@ -2,7 +2,7 @@

 **App name:** `Discord` | **Process name:** `Discord`

-See [references/osascript.md](../../references/osascript.md) for shared patterns.
+See [osascript-common.md](./osascript-common.md) for shared patterns.

 ## Activate & Navigate

@@ -92,6 +92,6 @@ echo "Screenshot saved to /tmp/discord-test-result.png"
 ## Script

 ```bash
-./.agents/skills/agent-testing/bot/discord/test-discord-bot.sh "bot-testing" "!ping"
-./.agents/skills/agent-testing/bot/discord/test-discord-bot.sh "bot-testing" "/ask Tell me a joke" 30
+./.agents/skills/local-testing/scripts/test-discord-bot.sh "bot-testing" "!ping"
+./.agents/skills/local-testing/scripts/test-discord-bot.sh "bot-testing" "/ask Tell me a joke" 30
 ```
@@ -2,7 +2,7 @@

 **App name:** `Lark` or `飞书` | **Process name:** `Lark` or `飞书`

-See [references/osascript.md](../../references/osascript.md) for shared patterns.
+See [osascript-common.md](./osascript-common.md) for shared patterns.

 ## Activate & Navigate

@@ -56,6 +56,6 @@ screencapture /tmp/lark-bot-response.png
 ## Script

 ```bash
-./.agents/skills/agent-testing/bot/lark/test-lark-bot.sh "bot-testing" "@MyBot hello"
-./.agents/skills/agent-testing/bot/lark/test-lark-bot.sh "bot-testing" "Help me with this" 30
+./.agents/skills/local-testing/scripts/test-lark-bot.sh "bot-testing" "@MyBot hello"
+./.agents/skills/local-testing/scripts/test-lark-bot.sh "bot-testing" "Help me with this" 30
 ```
@@ -2,7 +2,7 @@

 **App name:** `QQ` | **Process name:** `QQ`

-See [references/osascript.md](../../references/osascript.md) for shared patterns.
+See [osascript-common.md](./osascript-common.md) for shared patterns.

 ## Activate & Navigate

@@ -57,6 +57,6 @@ screencapture /tmp/qq-bot-response.png
 ## Script

 ```bash
-./.agents/skills/agent-testing/bot/qq/test-qq-bot.sh "bot-testing" "Hello bot" 15
-./.agents/skills/agent-testing/bot/qq/test-qq-bot.sh "MyBot" "/help" 10
+./.agents/skills/local-testing/scripts/test-qq-bot.sh "bot-testing" "Hello bot" 15
+./.agents/skills/local-testing/scripts/test-qq-bot.sh "MyBot" "/help" 10
 ```
@@ -12,13 +12,13 @@ General-purpose screen recording tool for the Electron app. Captures CDP screens

 ```bash
 # Start recording (Electron must be running with CDP)
-.agents/skills/agent-testing/scripts/record-app-screen.sh start [output_name]
+.agents/skills/local-testing/scripts/record-app-screen.sh start [output_name]

 # Stop recording and assemble video
-.agents/skills/agent-testing/scripts/record-app-screen.sh stop
+.agents/skills/local-testing/scripts/record-app-screen.sh stop

 # Check if recording is active
-.agents/skills/agent-testing/scripts/record-app-screen.sh status
+.agents/skills/local-testing/scripts/record-app-screen.sh status
 ```

 ### Arguments
@@ -74,10 +74,10 @@ The `.records/` directory is at the project root and is gitignored.

 ```bash
 # Start Electron
-.agents/skills/agent-testing/scripts/electron-dev.sh start
+.agents/skills/local-testing/scripts/electron-dev.sh start

 # Start recording
-.agents/skills/agent-testing/scripts/record-app-screen.sh start my-test
+.agents/skills/local-testing/scripts/record-app-screen.sh start my-test

 # Run automation
 agent-browser --cdp 9222 click @e61
@@ -86,14 +86,14 @@ agent-browser --cdp 9222 press Enter
 sleep 10

 # Stop and get results
-.agents/skills/agent-testing/scripts/record-app-screen.sh stop
+.agents/skills/local-testing/scripts/record-app-screen.sh stop
 # → .records/my-test.mp4 + .records/my-test/*.png
 ```

 ### Gateway Streaming Demo

 ```bash
-.agents/skills/agent-testing/scripts/electron-dev.sh start
+.agents/skills/local-testing/scripts/electron-dev.sh start

 # Inject gateway URL
 agent-browser --cdp 9222 eval --stdin << 'EOF'
@@ -106,19 +106,19 @@ agent-browser --cdp 9222 eval --stdin << 'EOF'
 EOF

 # Record
-.agents/skills/agent-testing/scripts/record-app-screen.sh start gateway-demo
+.agents/skills/local-testing/scripts/record-app-screen.sh start gateway-demo

 # Navigate to agent, send message, wait for completion...
 # (automation commands here)

-.agents/skills/agent-testing/scripts/record-app-screen.sh stop
+.agents/skills/local-testing/scripts/record-app-screen.sh stop
 open .records/gateway-demo.mp4
 ```

 ### Check Active Recording

 ```bash
-.agents/skills/agent-testing/scripts/record-app-screen.sh status
+.agents/skills/local-testing/scripts/record-app-screen.sh status
 # [record] Active recording
 #   Frames:      42 captured (running: yes)
 #   Screenshots: 14 captured (running: yes)
@@ -2,7 +2,7 @@

 **App name:** `Slack` | **Process name:** `Slack`

-See [references/osascript.md](../../references/osascript.md) for shared patterns.
+See [osascript-common.md](./osascript-common.md) for shared patterns.

 ## Activate & Navigate

@@ -68,6 +68,6 @@ screencapture /tmp/slack-bot-response.png
 ## Script

 ```bash
-./.agents/skills/agent-testing/bot/slack/test-slack-bot.sh "bot-testing" "@mybot hello"
-./.agents/skills/agent-testing/bot/slack/test-slack-bot.sh "bot-testing" "/ask What is 2+2?" 20
+./.agents/skills/local-testing/scripts/test-slack-bot.sh "bot-testing" "@mybot hello"
+./.agents/skills/local-testing/scripts/test-slack-bot.sh "bot-testing" "/ask What is 2+2?" 20
 ```
@@ -2,7 +2,7 @@

 **App name:** `Telegram` | **Process name:** `Telegram`

-See [references/osascript.md](../../references/osascript.md) for shared patterns.
+See [osascript-common.md](./osascript-common.md) for shared patterns.

 ## Activate & Navigate

@@ -75,6 +75,6 @@ curl -s "https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/getUpdates?limit=5" | j
 ## Script

 ```bash
-./.agents/skills/agent-testing/bot/telegram/test-telegram-bot.sh "MyTestBot" "/start"
-./.agents/skills/agent-testing/bot/telegram/test-telegram-bot.sh "GPTBot" "Hello" 60
+./.agents/skills/local-testing/scripts/test-telegram-bot.sh "MyTestBot" "/start"
+./.agents/skills/local-testing/scripts/test-telegram-bot.sh "GPTBot" "Hello" 60
 ```
@@ -2,7 +2,7 @@

 **App name:** `微信` or `WeChat` | **Process name:** `WeChat`

-See [references/osascript.md](../../references/osascript.md) for shared patterns.
+See [osascript-common.md](./osascript-common.md) for shared patterns.

 ## Activate & Navigate

@@ -76,6 +76,6 @@ screencapture /tmp/wechat-bot-response.png
 ## Script

 ```bash
-./.agents/skills/agent-testing/bot/wechat/test-wechat-bot.sh "文件传输助手" "test message" 5
-./.agents/skills/agent-testing/bot/wechat/test-wechat-bot.sh "MyBot" "Tell me a joke" 30
+./.agents/skills/local-testing/scripts/test-wechat-bot.sh "文件传输助手" "test message" 5
+./.agents/skills/local-testing/scripts/test-wechat-bot.sh "MyBot" "Tell me a joke" 30
 ```
@@ -60,5 +60,5 @@ echo "[$APP] Waiting ${WAIT}s for bot response..."
 sleep "$WAIT"

 echo "[$APP] Capturing screenshot..."
-"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
+"$SCRIPT_DIR/capture-app-window.sh" "$APP" "$SCREENSHOT"
 echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
@@ -80,5 +80,5 @@ echo "[$APP] Waiting ${WAIT}s for bot response..."
 sleep "$WAIT"

 echo "[$APP] Capturing screenshot..."
-"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
+"$SCRIPT_DIR/capture-app-window.sh" "$APP" "$SCREENSHOT"
 echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
@@ -72,5 +72,5 @@ echo "[$APP] Waiting ${WAIT}s for bot response..."
 sleep "$WAIT"

 echo "[$APP] Capturing screenshot..."
-"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
+"$SCRIPT_DIR/capture-app-window.sh" "$APP" "$SCREENSHOT"
 echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
@@ -60,5 +60,5 @@ echo "[$APP] Waiting ${WAIT}s for bot response..."
 sleep "$WAIT"

 echo "[$APP] Capturing screenshot..."
-"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
+"$SCRIPT_DIR/capture-app-window.sh" "$APP" "$SCREENSHOT"
 echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
@@ -75,5 +75,5 @@ echo "[$APP] Waiting ${WAIT}s for bot response..."
 sleep "$WAIT"

 echo "[$APP] Capturing screenshot..."
-"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
+"$SCRIPT_DIR/capture-app-window.sh" "$APP" "$SCREENSHOT"
 echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
@@ -81,5 +81,5 @@ echo "[$APP] Waiting ${WAIT}s for bot response..."
 sleep "$WAIT"

 echo "[$APP] Capturing screenshot..."
-"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
+"$SCRIPT_DIR/capture-app-window.sh" "$APP" "$SCREENSHOT"
 echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
@@ -1,6 +1,6 @@
 ---
 name: microcopy
-description: 'UI copy and microcopy guidelines. Use for user-facing copy, buttons, errors, empty states, onboarding, i18n wording, translation, or copy improvements in Chinese or English.'
+description: UI copy and microcopy guidelines. Use when writing UI text, buttons, error messages, empty states, onboarding, or any user-facing copy. Triggers on i18n translation, UI text writing, or copy improvement tasks. Supports both Chinese and English.
 user-invocable: false
 ---

@@ -1,6 +1,6 @@
 ---
 name: modal
-description: 'LobeHub imperative modal conventions. Use when creating or migrating modals, dialogs, popups, confirm flows, ModalHost wiring, createModal, confirmModal, useModalContext, or base-ui modal APIs.'
+description: "LobeHub imperative-modal conventions. Use whenever creating, editing, opening, or migrating a modal/dialog/popup — prefer `createModal` / `confirmModal` / `useModalContext` from `@lobehub/ui/base-ui` (headless) over the legacy root `@lobehub/ui` `createModal` (antd Modal props) and over any declarative `open` state + `<Modal />` pattern. Covers required `ModalHost` mounting, the `Content` + `index.tsx` file layout, `content` vs `children` slot, i18n inside `createModal()` (`import { t } from 'i18next'`), and migration notes. Triggers on `createModal`, `confirmModal`, `useModalContext`, `ModalHost`, `antd Modal`, `<Modal open>`, 'open a modal', 'popup', 'dialog', 'confirm dialog', '弹框', '弹窗', '确认框', 'migrate to base-ui'."
 user-invocable: false
 ---

@@ -1,69 +0,0 @@
---
-name: model-bank-metadata
-description: 'Backfill and maintain model-bank metadata (knowledgeCutoff, family, generation). Use when adding models, fixing cutoff/family data, running a metadata sweep across aiModels providers, or researching official knowledge cutoffs.'
-user-invocable: false
---
-
-# Model-Bank Metadata (knowledgeCutoff / family / generation)
-
-How to populate and maintain the three structured metadata fields on `packages/model-bank/src/aiModels/*.ts` model cards, at single-model scale (new model PR) or repo-wide scale (sweep across \~80 provider files / \~1900 entries).
-
-## Field semantics
-
-| Field             | Format                                                                              | Meaning                                                                                                                                                                                 |
-| ----------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `knowledgeCutoff` | `'YYYY-MM'` (or `'YYYY'` if only the year is published)                             | World-knowledge cutoff. When a vendor distinguishes a **"reliable knowledge cutoff"** from the broader training-data cutoff (Anthropic does), always use the **reliable** one.          |
-| `family`          | lowercase slug (`claude`, `gpt`, `o-series`, `qwen`, `deepseek`, `llama`, `glm`, …) | Model lineage, finer than `organization`. Lets the UI group models and match the same model across aggregator providers.                                                                |
-| `generation`      | family slug + version (`claude-4.6`, `gpt-5.2`, `qwen3.5`, `llama-3.1`)             | Generation within the family. Only set when confidently derivable from the model line's naming. Rolling aliases (`qwen-max`, `deepseek-chat`, `gemini-flash-latest`) get `family` only. |
-
-All three are optional. **The cardinal rule: only fill what an authoritative source states or naming rules derive — never guess.** An empty field is correct for vendors that publish nothing.
-
-No DB migration is ever needed for these: builtin models are merged from model-bank at read time (`repositories/aiInfra/index.ts` spreads the whole card), so new card fields flow to the client automatically.
-
-## Sourcing rules for knowledgeCutoff
-
-Accept only:
-
- Vendor official docs (platform.openai.com / developers.openai.com, docs.x.ai, ai.google.dev, docs.anthropic.com / platform.claude.com)
- Official Hugging Face org model cards (huggingface.co/meta-llama/..., etc.)
- Official tech reports / system cards / launch blog posts
-
-Reject:
-
- **Third-party aggregator sites** (aiknowledgecutoff.com and similar) — proven to copy one model's value across a whole family. A Cohere sweep once claimed `2024-06` for four distinct base models; none of the cited Cohere pages said that, and the only cutoff Cohere actually publishes is Feb 2023 for the 08-2024 Command R/R+ refresh.
- **AWS Bedrock model cards as sole source** — proven to conflate launch date with knowledge cutoff (DeepSeek R1's card lists both as "Jan 2025"). If Bedrock is the only place a value appears, leave the field empty.
- Inference from `releasedAt` — a release date is not a cutoff.
-
-Variant inheritance: dated snapshots (`-2024-08-06`), speed/price tiers of the same checkpoint, quantizations (`-fp8`, `-awq`), context-length variants (`-32k`), ollama `:NNb` tags, and cloud-prefixed ids (`anthropic.`/`us.`/`global.` Bedrock ids) share their base model's cutoff. **Distills do not inherit** from teacher or base — use the distill's own published value or leave empty. **Sizes within one generation can genuinely differ**: Llama 3 8B is Mar 2023 while 70B is Dec 2023 (per Meta's own card) — don't "fix" that to one family-wide value.
-
-Vendors that publish no cutoffs (leave empty, don't chase): Qwen, DeepSeek, GLM/Zhipu, ERNIE, Doubao, Hunyuan, SenseNova, Spark, MiniMax, StepFun, Yi (mostly), Moonshot.
-
-Known per-vendor footguns:
-
- **Anthropic**: Opus 4.6 reliable cutoff is `2025-05`, Sonnet 4.6 is `2025-08` — easy to swap. Claude 3.7 is `2024-10` (system card: trained through Nov 2024, knowledge cutoff end of Oct 2024). Cite system cards / the models overview, not the Help Center article (a living page that drops retired models — citation rot).
- **xAI**: docs.x.ai has one blanket sentence covering grok-3/grok-4; mini variants are not named there. Grok 4.20/4.3 have no official cutoff anywhere.
- **OpenAI**: per-model docs pages (developers.openai.com/api/docs/models/<id>) state cutoffs explicitly, including snapshot differences (gpt-4-1106-preview `2023-04` vs gpt-4-0125-preview `2023-12`).
-
-## family/generation derivation
-
-Rule-based, no research needed: `scripts/derive-family.ts` holds the per-family regex rules. Traps already encoded there — keep them when extending:
-
- Date suffixes are not versions: `claude-sonnet-4-20250514` is generation `claude-4`, not `claude-4.2`.
- Size suffixes are not versions: `llama-3-8b` → `llama-3` (not `llama-3.8`); `gemma-7b-it` is **gemma-1** (not gemma-7).
- Vendor spelling variants: `qwen2p5` = qwen2.5, `llama-v3p1` = llama-3.1, ollama `:NNb` tags, Bedrock `us.`/`global.`/`anthropic.` prefixes.
- `claude-X.0` normalizes to `claude-X`.
- Fable/Mythos-class ids (`claude-fable-5`) don't match the opus/sonnet/haiku regex — they are the Mythos class — `family: 'claude-mythos'`, `generation: 'mythos-5'` (set manually; the launch page calls Fable 5 "the generally available Mythos-class model").
-
-## Repo-wide sweep workflow
-
-1. **Extract ids**: `bun .agents/skills/model-bank-metadata/scripts/extract-model-ids.ts` → unique normalized chat-model ids (normalization = last path segment, lowercased). Non-chat types (image/video/embedding/tts) have no knowledge cutoff — skip them.
-2. **Research (multi-agent)**: chunk ids by family (≤50 per chunk) and fan out one research agent per chunk (Workflow tool), each returning `{id, cutoff, source}` with the sourcing rules above baked into the prompt, **plus** one adversarial verify agent per chunk that re-fetches cited sources and refutes unsupported claims. The verify pass is load-bearing: it caught the Cohere aggregator copy-paste and the AWS launch-date conflation.
-3. **Policy filter**: before applying, drop entries whose only source is a rejected category (check the returned `sources` map — e.g. drop everything sourced to aws.amazon.com).
-4. **Apply**: `bun scripts/apply-cutoffs.ts <map.json>` and `bun scripts/apply-family.ts <map.json>` (run from repo root). Both are idempotent codemods keyed on normalized id — aggregator providers get the same values automatically; entries that already have the field are skipped. They rely on the uniform prettier formatting of the data files (entries start `  {` / end `  },`, fields at 4-space indent).
-5. **Verify**: `cd packages/model-bank && bunx vitest run src/aiModels/__tests__/index.test.ts && bunx tsc --noEmit`.
-
-## Maintenance rules
-
- **New model PRs** should fill all three fields inline, citing the official source in the PR body (see the Anthropic entries in `anthropic.ts` for reference values).
- **After resolving merge conflicts** in model-bank data files, sanity-check that metadata didn't vanish: `git grep -c knowledgeCutoff -- 'packages/model-bank/src/aiModels/*.ts'` before vs after. A three-way stack of model PRs once silently dropped all 10 Anthropic cutoffs during conflict resolution.
- Dirty ids exist in aggregator data (a sambanova id once carried a trailing tab). The codemods match ids verbatim — if a map key won't apply, check for invisible characters before assuming the model is missing.
@@ -1,73 +0,0 @@
-/**
- * One-off codemod: apply a canonical { normalizedModelId: 'YYYY-MM' } map onto
- * packages/model-bank/src/aiModels/*.ts, inserting `knowledgeCutoff` after the
- * `id:` line of every chat-model entry that matches and doesn't already have one.
- *
- * Relies on the uniform prettier formatting of these files:
- *   - each model entry starts with `  {` and ends with `  },` (2-space indent)
- *   - fields are at 4-space indent: `    id: '...'`, `    type: 'chat'`
- *
- * Usage: bun /tmp/apply-cutoffs.ts /tmp/cutoff-map.json
- */
-import { readdirSync, readFileSync, writeFileSync } from 'node:fs';
-import { join } from 'node:path';
-
-const mapPath = process.argv[2];
-if (!mapPath) throw new Error('usage: bun apply-cutoffs.ts <map.json>');
-const map: Record<string, string> = JSON.parse(readFileSync(mapPath, 'utf8'));
-
-const dir = 'packages/model-bank/src/aiModels';
-const normalize = (id: string) => id.split('/').pop()!.toLowerCase();
-
-let touchedFiles = 0;
-let inserted = 0;
-const matchedIds = new Set<string>();
-
-for (const file of readdirSync(dir).filter((f) => f.endsWith('.ts'))) {
-  const path = join(dir, file);
-  const lines = readFileSync(path, 'utf8').split('\n');
-  const out: string[] = [];
-  let changed = false;
-
-  let i = 0;
-  while (i < lines.length) {
-    if (lines[i] !== '  {') {
-      out.push(lines[i]);
-      i++;
-      continue;
-    }
-    // collect one model entry block
-    const start = i;
-    let end = i;
-    while (end < lines.length && lines[end] !== '  },') end++;
-    const block = lines.slice(start, end + 1);
-
-    const idLineIdx = block.findIndex((l) => /^ {4}id: '/.test(l));
-    const isChat = block.some((l) => /^ {4}type: 'chat',?$/.test(l));
-    const hasCutoff = block.some((l) => /^ {4}knowledgeCutoff:/.test(l));
-
-    if (idLineIdx >= 0 && isChat && !hasCutoff) {
-      const rawId = block[idLineIdx].match(/^ {4}id: '(.+)',$/)?.[1];
-      const norm = rawId ? normalize(rawId) : undefined;
-      const cutoff = norm ? map[norm] : undefined;
-      if (cutoff && /^\d{4}(?:-\d{2})?$/.test(cutoff)) {
-        block.splice(idLineIdx + 1, 0, `    knowledgeCutoff: '${cutoff}',`);
-        inserted++;
-        changed = true;
-        matchedIds.add(norm!);
-      }
-    }
-    out.push(...block);
-    i = end + 1;
-  }
-
-  if (changed) {
-    writeFileSync(path, out.join('\n'));
-    touchedFiles++;
-  }
-}
-
-console.log(`inserted ${inserted} knowledgeCutoff fields across ${touchedFiles} files`);
-console.log(`map ids used: ${matchedIds.size}/${Object.keys(map).length}`);
-const unused = Object.keys(map).filter((k) => !matchedIds.has(k));
-if (unused.length) console.log('unused map keys (first 20):', unused.slice(0, 20));
@@ -1,49 +0,0 @@
-import { readdirSync, readFileSync, writeFileSync } from 'node:fs';
-import { join } from 'node:path';
-
-const map: Record<string, { family: string; generation?: string }> = JSON.parse(
-  readFileSync('/tmp/family-map.json', 'utf8'),
-);
-const dir = 'packages/model-bank/src/aiModels';
-const normalize = (id: string) => id.split('/').pop()!.toLowerCase();
-
-let inserted = 0;
-let touchedFiles = 0;
-for (const file of readdirSync(dir).filter((f) => f.endsWith('.ts'))) {
-  const path = join(dir, file);
-  const lines = readFileSync(path, 'utf8').split('\n');
-  const out: string[] = [];
-  let changed = false;
-  let i = 0;
-  while (i < lines.length) {
-    if (lines[i] !== '  {') {
-      out.push(lines[i]);
-      i++;
-      continue;
-    }
-    let end = i;
-    while (end < lines.length && lines[end] !== '  },') end++;
-    const block = lines.slice(i, end + 1);
-    const idLineIdx = block.findIndex((l) => /^ {4}id: '/.test(l));
-    const isChat = block.some((l) => /^ {4}type: 'chat',?$/.test(l));
-    const hasFamily = block.some((l) => /^ {4}family:/.test(l));
-    if (idLineIdx >= 0 && isChat && !hasFamily) {
-      const rawId = block[idLineIdx].match(/^ {4}id: '(.+)',$/)?.[1];
-      const r = rawId ? map[normalize(rawId)] : undefined;
-      if (r) {
-        const add = [`    family: '${r.family}',`];
-        if (r.generation) add.push(`    generation: '${r.generation}',`);
-        block.splice(idLineIdx, 0, ...add);
-        inserted++;
-        changed = true;
-      }
-    }
-    out.push(...block);
-    i = end + 1;
-  }
-  if (changed) {
-    writeFileSync(path, out.join('\n'));
-    touchedFiles++;
-  }
-}
-console.log(`annotated ${inserted} model entries across ${touchedFiles} files`);
@@ -1,237 +0,0 @@
-/* eslint-disable regexp/no-unused-capturing-group */
-/**
- * Rule-based derivation of { family, generation } from normalized model ids.
- * Principle: only fill what is confidently derivable; otherwise omit.
- *
- * Usage: bun /tmp/derive-family.ts            # print distinct pairs for review
- *        bun /tmp/derive-family.ts --emit     # write /tmp/family-map.json
- */
-import { readFileSync, writeFileSync } from 'node:fs';
-
-const ids: string[] = JSON.parse(readFileSync('/tmp/model-ids.json', 'utf8'));
-
-type R = { family: string; generation?: string };
-
-const derive = (id: string): R | undefined => {
-  // strip cloud/bedrock prefixes for matching
-  const m = id.replace(/^(us\.|global\.|eu\.|apac\.)?(anthropic\.|meta\.|cohere\.|azure-)/, '');
-
-  // ---- anthropic ----
-  if (m.startsWith('claude')) {
-    // family = product-line tier (claude-opus/sonnet/haiku/instant); bare claude-2.x has no tier
-    const tier = m.match(/(opus|sonnet|haiku|instant)/)?.[1];
-    const family = tier ? `claude-${tier}` : 'claude';
-    let g = m.match(/^claude-(?:opus|sonnet|haiku)-(\d)[.-](\d)(?!\d)/); // claude-opus-4-8 / claude-haiku-4.5
-    if (g) return { family, generation: `claude-${g[1]}.${g[2]}` };
-    g = m.match(/^claude-(?:opus|sonnet|haiku)-(\d)(?!\d)/); // claude-opus-4
-    if (g) return { family, generation: `claude-${g[1]}` };
-    g = m.match(/^claude-(\d)[.-](\d)(?!\d)/); // claude-3-5-haiku / claude-3.7-sonnet / claude-2.1
-    if (g) return { family, generation: g[2] === '0' ? `claude-${g[1]}` : `claude-${g[1]}.${g[2]}` };
-    g = m.match(/^claude-(\d)(?!\d)/); // claude-3-haiku
-    if (g) return { family, generation: `claude-${g[1]}` };
-    if (m.startsWith('claude-instant')) return { family: 'claude-instant' };
-    if (/^claude-v?2/.test(m)) return { family: 'claude', generation: 'claude-2' };
-    return { family };
-  }
-
-  // ---- openai ----
-  if (/^(gpt-oss|gpt_oss)/.test(m) || m.startsWith('gpt-oss:'))
-    return { family: 'gpt-oss', generation: 'gpt-oss' };
-  if (/^(chatgpt-4o|gpt-4o)/.test(m)) return { family: 'gpt', generation: 'gpt-4o' };
-  if (/^gpt-(3\.5|35)/.test(m)) return { family: 'gpt', generation: 'gpt-3.5' };
-  if (m.startsWith('gpt-audio')) return { family: 'gpt', generation: 'gpt-audio' };
-  {
-    const g = m.match(/^gpt-(\d)\.(\d)/); // gpt-4.1 / gpt-5.2
-    if (g) return { family: 'gpt', generation: `gpt-${g[1]}.${g[2]}` };
-    const g2 = m.match(/^gpt-(\d)(?!\d)/); // gpt-4 / gpt-5
-    if (g2) return { family: 'gpt', generation: `gpt-${g2[1]}` };
-  }
-  {
-    const g = m.match(/^o([134])(-|$)/); // o1 / o3 / o4
-    if (g) return { family: 'o-series', generation: `o${g[1]}` };
-  }
-  if (/^(codex|computer-use-preview)/.test(m)) return { family: 'gpt' };
-
-  // ---- google ----
-  {
-    const g = m.match(/^gemini-(\d+(?:\.\d+)?)/);
-    if (g) return { family: 'gemini', generation: `gemini-${g[1]}` };
-    if (/^gemini-(pro|flash)/.test(m)) return { family: 'gemini' }; // rolling aliases
-    if (m.startsWith('gemma')) {
-      if (/^gemma-?\db/.test(m)) return { family: 'gemma', generation: 'gemma-1' };
-      const v = m.match(/^gemma-?(\d)(?!b)/);
-      return { family: 'gemma', generation: v ? `gemma-${v[1]}` : undefined };
-    }
-    if (/^(codegemma|learnlm|palm)/.test(m)) return { family: m.match(/^[a-z]+/)![0] };
-  }
-
-  // ---- qwen ----
-  if (m.startsWith('qwq')) return { family: 'qwen', generation: 'qwq' };
-  if (m.startsWith('qvq')) return { family: 'qwen', generation: 'qvq' };
-  if (m.startsWith('codeqwen')) return { family: 'qwen' };
-  if (m.startsWith('qwen')) {
-    const g =
-      m.match(/^qwen-?([123](?:\.\d+)?)(?![0-9b])/) || // qwen3.5-plus / qwen-3-14b / qwen2-7b / qwen1.5
-      m.match(/^qwen([23](?:\.\d+)?):/) || // qwen2.5:72b
-      m.match(/^qwen([23])p(\d)/); // qwen2p5 -> handled below
-    if (/^qwen(\d)p(\d)/.test(m)) {
-      const p = m.match(/^qwen(\d)p(\d)/)!;
-      return { family: 'qwen', generation: `qwen${p[1]}.${p[2]}` };
-    }
-    if (g) return { family: 'qwen', generation: `qwen${g[1]}` };
-    return { family: 'qwen' }; // qwen-max/plus/turbo/vl rolling aliases
-  }
-
-  // ---- deepseek ----
-  if (/^(deepseek|azure-deepseek|pro-deepseek)/.test(m) || m.startsWith('deepseek_')) {
-    const s = m.replace(/^pro-/, '').replaceAll('_', '-');
-    if (s.startsWith('deepseek-r1-distill'))
-      return { family: 'deepseek', generation: 'deepseek-r1-distill' };
-    if (s.startsWith('deepseek-r1')) return { family: 'deepseek', generation: 'deepseek-r1' };
-    const g = s.match(/^deepseek-(?:chat-)?v(\d(?:\.\d)?)/);
-    if (g) return { family: 'deepseek', generation: `deepseek-v${g[1]}` };
-    if (/^deepseek-(coder-v2|coder)/.test(s))
-      return { family: 'deepseek', generation: 'deepseek-coder' };
-    return { family: 'deepseek' }; // deepseek-chat / reasoner rolling aliases
-  }
-
-  // ---- meta llama ----
-  if (m.startsWith('codellama')) return { family: 'llama', generation: 'codellama' };
-  if (/^(meta-)?llama|^l3(\d)?-|^llava/.test(m)) {
-    if (m.startsWith('llava')) return { family: 'llava' };
-    const s = m.replace(/^meta-/, '');
-    const g =
-      s.match(/^llama-?([234])(?:[.-](\d))?(?![0-9b])/) || // llama-3.1 / llama3.3 / llama-4
-      s.match(/^llama-?v([234])p?(\d)?/) || // llama-v3p1
-      s.match(/^llama([234])[.:-](\d)?/);
-    if (g) {
-      const gen = g[2] ? `llama-${g[1]}.${g[2]}` : `llama-${g[1]}`;
-      return { family: 'llama', generation: gen };
-    }
-    if (m.startsWith('l3-')) return { family: 'llama', generation: 'llama-3' };
-    if (m.startsWith('l31-')) return { family: 'llama', generation: 'llama-3.1' };
-    return { family: 'llama' };
-  }
-
-  // ---- zhipu ----
-  if (/^(zai-)?glm/.test(m)) {
-    const s = m.replace(/^zai-/, '');
-    if (s.startsWith('glm-z1')) return { family: 'glm', generation: 'glm-z1' };
-    if (s.startsWith('glm-zero')) return { family: 'glm', generation: 'glm-zero' };
-    const g = s.match(/^glm-(\d(?:\.\d)?)/);
-    if (g) return { family: 'glm', generation: `glm-${g[1]}` };
-    return { family: 'glm' };
-  }
-  if (/^(charglm|codegeex|emohaa)/.test(m)) return { family: m.match(/^[a-z]+/)![0] };
-
-  // ---- mistral ----
-  if (
-    /^(open-)?(mistral|mixtral|ministral|codestral|devstral|magistral|pixtral|mathstral|labs-devstral|labs-leanstral|open-codestral)/.test(
-      m,
-    )
-  ) {
-    const fam = m.replace(/^(open-|labs-)/, '').match(/^[a-z]+/)![0];
-    return { family: fam };
-  }
-
-  // ---- xai ----
-  if (m.startsWith('grok')) {
-    const g = m.match(/^grok-(\d(?:\.\d+)?)/);
-    return { family: 'grok', generation: g ? `grok-${g[1]}` : undefined };
-  }
-
-  // ---- moonshot ----
-  if (m.startsWith('kimi')) {
-    const g = m.match(/^kimi-k(\d(?:\.\d)?)/);
-    return { family: 'kimi', generation: g ? `kimi-k${g[1]}` : undefined };
-  }
-  if (m.startsWith('moonshot-kimi-k2')) return { family: 'kimi', generation: 'kimi-k2' };
-  if (m.startsWith('moonshot-v1')) return { family: 'kimi', generation: 'moonshot-v1' };
-
-  // ---- minimax ----
-  if (m.startsWith('minimax')) {
-    if (m.startsWith('minimax-text')) return { family: 'minimax', generation: 'minimax-text-01' };
-    const g = m.match(/^minimax-m(\d(?:\.\d)?)/);
-    return { family: 'minimax', generation: g ? `minimax-m${g[1]}` : undefined };
-  }
-  if (m.startsWith('abab')) return { family: 'minimax', generation: 'abab' };
-
-  // ---- baidu ----
-  if (m.startsWith('ernie')) {
-    if (m.startsWith('ernie-x1')) return { family: 'ernie', generation: 'ernie-x1' };
-    const g = m.match(/^ernie-(\d\.\d)/);
-    return { family: 'ernie', generation: g ? `ernie-${g[1]}` : undefined };
-  }
-  if (m.startsWith('qianfan')) return { family: 'qianfan' };
-
-  // ---- bytedance ----
-  if (m.startsWith('doubao')) {
-    const g = m.match(/^doubao-seed-(\d[.-]\d|\d)/) || m.match(/^doubao-(\d\.\d)/);
-    return { family: 'doubao', generation: g ? `doubao-${g[1].replace('-', '.')}` : undefined };
-  }
-  if (/^(seed-oss|skylark)/.test(m)) return { family: m.startsWith('seed') ? 'doubao' : 'skylark' };
-
-  // ---- tencent ----
-  if (m.startsWith('hunyuan')) {
-    const g = m.match(/^hunyuan-(\d\.\d)/);
-    return { family: 'hunyuan', generation: g ? `hunyuan-${g[1]}` : undefined };
-  }
-  if (m.startsWith('hy3')) return { family: 'hunyuan', generation: 'hunyuan-3' };
-
-  // ---- others (family only / simple version) ----
-  if (m.startsWith('yi-')) return { family: 'yi' };
-  if (/^(command|c4ai-command)/.test(m)) return { family: 'command' };
-  if (/^(aya|c4ai-aya)/.test(m)) return { family: 'aya' };
-  if (/^phi-?(\d)?/.test(m) && m.startsWith('phi')) {
-    const g = m.match(/^phi-?(\d(?:\.\d)?)/);
-    return { family: 'phi', generation: g ? `phi-${g[1]}` : undefined };
-  }
-  if (m.startsWith('wizardlm')) return { family: 'wizardlm' };
-  if (m.startsWith('step-')) {
-    const g = m.match(/^step-(?:r1|(\d(?:\.\d)?))/);
-    return { family: 'step', generation: g?.[1] ? `step-${g[1]}` : undefined };
-  }
-  if (/^(internlm|intern-)/.test(m)) return { family: 'intern' };
-  if (m.startsWith('internvl')) return { family: 'internvl' };
-  if (m.startsWith('baichuan')) {
-    const g = m.match(/^baichuan-?(m?\d)/);
-    return { family: 'baichuan', generation: g ? `baichuan-${g[1]}` : undefined };
-  }
-  if (/^(sensechat|sensenova)/.test(m)) return { family: 'sensenova' };
-  if (/^(spark|generalv|4\.0ultra)/.test(m)) return { family: 'spark' };
-  if (/^(360gpt|360zhinao)/.test(m)) return { family: '360zhinao' };
-  if (/^(jamba|ai21-jamba)/.test(m)) return { family: 'jamba' };
-  if (m.startsWith('sonar')) return { family: 'sonar' };
-  if (/^(nova-lite|nova-micro|nova-pro)/.test(m)) return { family: 'nova' };
-  if (/^(ling|ring)-/.test(m)) return { family: m.match(/^[a-z]+/)![0] };
-  if (m.startsWith('longcat')) return { family: 'longcat' };
-  if (m.startsWith('mimo')) return { family: 'mimo' };
-  if (m.startsWith('taichu')) return { family: 'taichu' };
-  if (/^(hermes|nous-hermes)/.test(m)) return { family: 'hermes' };
-  if (m.startsWith('solar')) return { family: 'solar' };
-  if (m.startsWith('kat-coder')) return { family: 'kat-coder' };
-  if (m.startsWith('dbrx')) return { family: 'dbrx' };
-  if (m.startsWith('morph')) return { family: 'morph' };
-
-  return undefined;
-};
-
-const map: Record<string, R> = {};
-const pairs = new Map<string, number>();
-let derived = 0;
-for (const id of ids) {
-  const r = derive(id);
-  if (!r) continue;
-  derived++;
-  map[id] = r;
-  const key = `${r.family} :: ${r.generation ?? '—'}`;
-  pairs.set(key, (pairs.get(key) || 0) + 1);
-}
-
-console.log(`derived ${derived}/${ids.length}`);
-for (const [k, n] of [...pairs.entries()].sort()) console.log(String(n).padStart(4), k);
-
-if (process.argv.includes('--emit')) {
-  writeFileSync('/tmp/family-map.json', JSON.stringify(map, null, 1));
-  console.log('\nwritten /tmp/family-map.json');
-}
@@ -1,23 +0,0 @@
-/**
- * Extract unique normalized chat-model ids from packages/model-bank/src/aiModels/*.ts.
- * Normalization: last path segment, lowercased (matches the apply codemods).
- *
- * Usage (repo root): bun .agents/skills/model-bank-metadata/scripts/extract-model-ids.ts [out.json]
- * Default output: /tmp/model-ids.json
- */
-import { readdirSync, writeFileSync } from 'node:fs';
-import { join, resolve } from 'node:path';
-
-const dir = resolve('packages/model-bank/src/aiModels');
-const out = process.argv[2] || '/tmp/model-ids.json';
-
-const ids = new Set<string>();
-for (const f of readdirSync(dir).filter((f) => f.endsWith('.ts'))) {
-  const mod = await import(join(dir, f));
-  for (const m of mod.default || []) {
-    if (!m?.id || m.type !== 'chat') continue;
-    ids.add(m.id.split('/').pop()!.toLowerCase());
-  }
-}
-writeFileSync(out, JSON.stringify([...ids].sort(), null, 1));
-console.log(`${ids.size} unique normalized chat ids -> ${out}`);
@@ -1,6 +1,6 @@
 ---
 name: pr
-description: "Create a PR for the current branch (targets `canary` by default), including splitting one cross-layer branch into ordered stacked PRs so a lower layer (db / shared package / server TRPC) merges before its callers (desktop / CLI / UI). Use when the user asks to create / submit a PR, or to split a branch because clients call a server contract that isn't on the trunk yet. Triggers on 'pr', 'create pr', 'submit pr', 'open a PR', 'pull request', 'split this PR', 'stacked PR', 'backend should merge first', '提 PR', '提个 PR', '新建 PR', '拆 PR', '后端先合', '分层合并'."
+description: "Create a PR for the current branch. Use when the user asks to create a pull request, submit PR, or says 'pr'."
 user-invocable: true
 ---

@@ -71,82 +71,3 @@ Use `.github/PULL_REQUEST_TEMPLATE.md` as the body structure. Key sections:

 - **Language**: All PR content must be in English
 - If a PR already exists for the branch, inform the user instead of creating a duplicate
-
---
-
-# Stacked PRs (cross-layer feature)
-
-The steps above create **one** PR for the current branch. When a single branch lands across layers — `packages/database` schema/model → a shared `packages/*` lib → `src/server` TRPC → `apps/desktop` + `apps/cli` callers → `src/features` UI — shipping it as one PR can't merge safely: the clients call an endpoint that doesn't exist on the trunk until the same PR merges, so any partial/rollback or independent review breaks. Split it into **ordered PRs**, lower layer first.
-
-## The ordering rule
-
-A PR may only merge **after** every layer it calls is already on the trunk.
-
- The **server contract** (new TRPC procedure, changed return shape, new table/model) merges first.
- The **callers** (desktop, CLI, UI) merge after — they invoke that contract.
- Tie-break with one question: _"if this merged alone to `canary` right now, would it build and behave?"_ If no, it belongs in a later PR.
-
-## Which file goes in which PR
-
-The non-obvious calls:
-
- **Frontend that adapts to a contract change goes WITH the server PR.** If you widen a TRPC return shape (e.g. `listDevices` now returns `platform: string | null`), the component consuming it must change in the _same_ PR — otherwise the server PR breaks the build on its own. Contract + its in-repo consumers ship together.
- **A new shared package goes with its consumer**, not the server, unless the server imports it too. A `@lobechat/*` package imported only by desktop/CLI ships in the client PR. Don't carry an unused package in the lower PR.
- **Workspace dep declarations** (`package.json` `workspace:*`, `pnpm-workspace.yaml`) travel with the code that imports the package.
-
-## The git recipe — split an existing full branch
-
-Starting point: one branch (`feat/x`) with a single commit `<FULL>` containing everything, already pushed (so it's also safe on the remote).
-
-```bash
-# 1. Safety nets — make the full work unloseable before rewriting anything
-git branch backup/x-full <FULL>          # local ref to the full commit
-git branch feat/x-clients <FULL>         # the higher-layer branch starts here
-
-# 2. Rewrite the lower-layer branch to lower-layer files only
-git checkout feat/x                      # this becomes the SERVER PR
-git reset --hard origin/canary
-git checkout <FULL> -- <server/db files…>   # stages just those paths
-git commit -m "✨ feat(...): <server half>"
-git push --force-with-lease origin feat/x   # never --force; never push to canary
-
-# 3. Build the higher-layer branch STACKED on the lower branch
-git checkout feat/x-clients
-git reset --hard feat/x                  # base = the just-rewritten server HEAD
-git checkout backup/x-full -- <client/ui files…>   # only the remaining paths
-git commit -m "✨ feat(...): <client half>"
-git push -u origin feat/x-clients
-```
-
-Then open the higher PR **based on the lower branch**, not the trunk:
-
-```bash
-gh pr create --base feat/x --head feat/x-clients --title "…" --body "…"
-```
-
-`--base feat/x` keeps the diff client-only (no server files leak in) and makes it physically impossible to merge the clients before the server. **After the server PR merges to `canary`, retarget the client PR's base to `canary`** (GitHub usually auto-retargets when the base branch merges; note it in the PR body so a human confirms).
-
-## Verify the dependency actually holds
-
-The whole point is the higher layer needs the lower one. Prove it: on the stacked higher branch, type-check the caller and confirm the symbol the lower layer introduced resolves.
-
-```bash
-cd apps/cli && bun run type-check 2>&1 | grep -iE "connect\.ts|device\.register"
-# empty (re: your change) = the stacked base supplies device.register ✓
-```
-
-Filter to your touched files — this repo's standalone type-check emits pre-existing env noise (`__ELECTRON__`, `@/types/llm`, unbuilt `@lobechat/types`) that isn't yours.
-
-## PR + Linear bookkeeping
-
- **Each PR closes only its own layer's issues.** Server PR: `Closes LOBE-<server>`. Client PR: `Closes LOBE-<pkg> / <desktop> / <cli>`. Don't let one PR's body claim another layer's issue.
- Both PRs are `Part of LOBE-<parent>`.
- On PR creation, move each closed sub-issue to **In Review** (not Done) and add a completion comment — see the `linear` skill.
-
-## Gotchas
-
- **Never push to `canary`.** A split branch cut with `git checkout -b feat/x origin/canary` _tracks_ `origin/canary`, so a bare `git push` targets canary. Always `git push origin feat/x` with the explicit branch name.
- **`--force-with-lease`, not `--force`** when rewriting the lower branch — it aborts if the remote moved under you.
- **Back up before `reset --hard`.** Step 1's `backup/x-full` + the pushed remote branch mean the full commit is referenced by ≥3 refs before you rewrite anything. Verify with `git branch --contains <FULL>`.
- **Lockfiles:** this monorepo commits no root `pnpm-lock.yaml`, so a new `workspace:*` dep needs no lockfile churn. In a repo that _does_ commit one, regenerate it on each branch after the split.
- **Don't over-split.** Two PRs (contract / callers) is usually enough. A UI page that only reads an existing endpoint can be its own later PR, but don't fragment a single layer across PRs for its own sake.
@@ -1,6 +1,6 @@
 ---
 name: project-overview
-description: 'LobeHub open-source monorepo architecture map. Use when locating code layers, understanding apps/packages/src layout, business stubs, project structure, or onboarding to the repository.'
+description: Complete project architecture and structure guide. Use when exploring the codebase, understanding project organization, finding files, or needing comprehensive architectural context. Triggers on architecture questions, directory navigation, or project overview needs.
 user-invocable: false
 ---

@@ -13,12 +13,11 @@ user-invocable: false
 ## Project Description

 Open-source, modern-design AI Agent Workspace: **LobeHub** (previously LobeChat).
-This repo is the **open-source root** (`github.com/lobehub/lobehub`, package `@lobehub/lobehub`).

 **Supported platforms:**

 - Web desktop/mobile
- Desktop (Electron) — `apps/desktop`
+- Desktop (Electron)
 - Mobile app (React Native) — **separate repo, already launched** (not in this monorepo)

 **Logo emoji:** 🤯
@@ -48,29 +47,30 @@ This repo is the **open-source root** (`github.com/lobehub/lobehub`, package `@l

 ## Monorepo Layout

-Flat layout — `apps/`, `packages/`, and `src/` all sit at the repo root. No
-git submodules.
+This is a monorepo extending the open-source `lobehub` submodule. Two repos:
+
+- **cloud repo root** — `src/` and `packages/business/` (`config`, `const`, `model-runtime`) hold cloud-only SaaS code that overrides/extends the submodule. See `AGENTS.md` for the override mechanism.
+- **`lobehub/` submodule** — the open-source product core.
+
+### `lobehub/` submodule — key directories

 ```
-(repo root)
+lobehub/
 ├── apps/
-│   ├── cli/                  # LobeHub CLI
-│   ├── desktop/              # Electron desktop app
-│   ├── device-gateway/       # Device gateway service
-│   └── server/               # Next.js-backed server: featureFlags, globalConfig, modules, routers, services, utils, workflows (`@/server/*` alias)
-├── docs/                     # changelog, development, self-hosting, usage
-├── locales/                  # en-US, zh-CN, ...
-├── packages/                 # ~80 @lobechat/* workspace packages — `ls` for the full set. Key ones:
-│   ├── agent-runtime/        # Agent runtime core
+│   ├── cli/                 # LobeHub CLI
+│   ├── desktop/             # Electron desktop app
+│   └── device-gateway/      # Device gateway service
+├── docs/                    # changelog, development, self-hosting, usage
+├── locales/                 # en-US, zh-CN, ...
+├── packages/                # ~80 @lobechat/* workspace packages — `ls` for the full set. Key ones:
+│   ├── agent-runtime/        # Agent runtime
 │   ├── agent-signal/         # Agent Signal pipeline
-│   ├── agent-tracing/        # Tracing / snapshots
-│   ├── builtin-tool-*/       # Per-tool packages (calculator, web-browsing, claude-code, ...)
-│   ├── builtin-tools/        # Central registries that compose builtin-tool-*
+│   ├── builtin-tool-*/       # Builtin tool packages
+│   ├── builtin-tools/        # Builtin tool registries
 │   ├── context-engine/
 │   ├── database/             # src/{models,schemas,repositories}
 │   ├── model-bank/           # Model definitions & provider cards
 │   ├── model-runtime/        # src/{core,providers}
-│   ├── business/             # Open-source stubs (config, const, model-bank, model-runtime) — overridden by cloud
 │   ├── types/
 │   └── utils/
 └── src/
@@ -83,54 +83,55 @@ git submodules.
    ├── spa/                  # SPA entries + router config
    │   ├── entry.{web,mobile,desktop,popup}.tsx
    │   └── router/
-    ├── business/             # Open-source stubs (client/server) — cloud repo provides real impls
+    ├── business/             # Open-source stubs (~50) overridden by cloud src/business/
    ├── features/             # Domain business components
-    ├── store/                # ~30 zustand stores — `ls` for the full set
-    ├── server/               # standalone-Hono server pieces only: agent-hono, workflows-hono (main backend lives in `apps/server`)
+    ├── store/                # ~28 zustand stores — `ls` for the full set
+    ├── server/               # featureFlags, globalConfig, modules, routers, services
    └── ...                   # components, hooks, layout, libs, locales, services, types, utils
 ```

+### cloud repo — key directories
+
+```
+(cloud root)
+├── packages/business/        # Cloud overrides: config, const, model-runtime
+├── src/
+│   ├── business/             # Cloud impls of submodule stubs (client/server/locales)
+│   ├── routes/               # Cloud-only route groups: (cloud)/, embed/
+│   ├── store/                # Cloud-only stores (e.g. subscription/)
+│   ├── server/               # Cloud routers & services (billing, budget, risk control...)
+│   └── app/(backend)/cron/   # Vercel cron routes (schedules declared in root vercel.ts)
+└── vercel.ts                 # Cron schedule declarations
+```
+
+> File search rule: a path like `@/store/x` resolves cloud `src/store/x` first, then
+> `lobehub/packages/store/src/x`, then `lobehub/src/store/x`. Cloud override wins.
+
 ## Architecture Map

-| Layer            | Location                                                 |
-| ---------------- | -------------------------------------------------------- |
-| UI Components    | `src/components`, `src/features`                         |
-| SPA Pages        | `src/routes/`                                            |
-| React Router     | `src/spa/router/`                                        |
-| Global Providers | `src/layout`                                             |
-| Zustand Stores   | `src/store`                                              |
-| Client Services  | `src/services/`                                          |
-| REST API         | `src/app/(backend)/webapi`                               |
-| tRPC Routers     | `apps/server/src/routers/{async\|lambda\|mobile\|tools}` |
-| Server Services  | `apps/server/src/services` (can access DB)               |
-| Server Modules   | `apps/server/src/modules` (no DB access)                 |
-| Feature Flags    | `apps/server/src/featureFlags`                           |
-| Global Config    | `apps/server/src/globalConfig`                           |
-| DB Schema        | `packages/database/src/schemas`                          |
-| DB Model         | `packages/database/src/models`                           |
-| DB Repository    | `packages/database/src/repositories`                     |
-| Third-party      | `src/libs` (analytics, oidc, etc.)                       |
-| Builtin Tools    | `packages/builtin-tool-*`, `packages/builtin-tools`      |
-| Open-source stub | `src/business/*`, `packages/business/*` (this repo)      |
+| Layer            | Location                                             |
+| ---------------- | ---------------------------------------------------- |
+| UI Components    | `src/components`, `src/features`                     |
+| SPA Pages        | `src/routes/`                                        |
+| React Router     | `src/spa/router/`                                    |
+| Global Providers | `src/layout`                                         |
+| Zustand Stores   | `src/store`                                          |
+| Client Services  | `src/services/`                                      |
+| REST API         | `src/app/(backend)/webapi`                           |
+| tRPC Routers     | `src/server/routers/{async\|lambda\|mobile\|tools}`  |
+| Server Services  | `src/server/services` (can access DB)                |
+| Server Modules   | `src/server/modules` (no DB access)                  |
+| Feature Flags    | `src/server/featureFlags`                            |
+| Global Config    | `src/server/globalConfig`                            |
+| DB Schema        | `packages/database/src/schemas`                      |
+| DB Model         | `packages/database/src/models`                       |
+| DB Repository    | `packages/database/src/repositories`                 |
+| Third-party      | `src/libs` (analytics, oidc, etc.)                   |
+| Builtin Tools    | `src/tools`, `packages/builtin-tool-*`               |
+| Cloud-only       | `src/business/*`, `packages/business/*` (cloud repo) |

 ## Data Flow

 ```
 React UI → Store Actions → Client Service → TRPC Lambda → Server Services → DB Model → PostgreSQL
 ```
-
-## Note: Relationship to the Cloud Repo
-
-This open-source repo is consumed by a **separate, private cloud (SaaS) repo**
-as a git submodule mounted at `lobehub/`. The cloud repo provides:
-
- **`src/business/{client,server}`** and **`packages/business/*`** implementations
-  that override the stubs shipped here.
- Cloud-only routes (e.g. `(cloud)/`, `embed/`), cloud-only stores (e.g.
-  `subscription/`), cloud-only TRPC routers (billing, budget, risk control, …),
-  and Vercel cron routes under `src/app/(backend)/cron/`.
- File-resolution order in cloud: `@/store/x` → cloud `src/store/x` first, then
-  `lobehub/packages/store/src/x`, then `lobehub/src/store/x`. **Cloud override wins.**
-
-When working in this repo alone, ignore the cloud layer — the stubs in
-`src/business/` and `packages/business/` are the source of truth here.
@@ -1,6 +1,6 @@
 ---
 name: react
-description: 'LobeHub React component conventions. Use when editing TSX UI, choosing base-ui vs @lobehub/ui vs antd, styling with antd-style, routing, desktop variants, layouts, or component state.'
+description: 'Use when writing or editing any `.tsx` under `src/**`. Triggers: createStaticStyles, createStyles, cssVar, antd-style, Flexbox, Center, Select, Modal, Drawer, Button, Tooltip, DropdownMenu, Popover, Switch, ScrollArea, Link, useNavigate, react-router-dom, next/link, desktopRouter, componentMap.desktop, .desktop.tsx, new component, new page, edit layout, add styles, zustand selector, @lobehub/ui, antd import.'
 user-invocable: false
 ---

@@ -17,45 +17,22 @@ user-invocable: false
 ## Component Priority

 1. **`src/components`** — project-specific reusable components
-2. **`@lobehub/ui/base-ui`** — headless primitives. **If the component lives here, use it. Do NOT import the same-named root export.**
-3. **`@lobehub/ui`** — higher-level / antd-wrapping components (only when no base-ui equivalent)
-4. **antd** — only when neither base-ui nor `@lobehub/ui` root provides it
-5. **Custom implementation** — true last resort
+2. **`@lobehub/ui/base-ui`** — headless primitives (Select, Modal, DropdownMenu, Popover, Switch, ScrollArea…)
+3. **`@lobehub/ui`** — higher-level components (ActionIcon, Markdown, DragPage…)
+4. **Custom implementation** — last resort; never reach for antd directly

-If unsure about available components, search existing code or check `node_modules/@lobehub/ui/es/index.mjs` and `node_modules/@lobehub/ui/es/base-ui/`.
+If unsure about available components, search existing code or check `node_modules/@lobehub/ui/es/index.mjs`.

-### `@lobehub/ui/base-ui` — always prefer for these
+### Common @lobehub/ui Components

-| Component                                  | Import                                                                                                  |
-| ------------------------------------------ | ------------------------------------------------------------------------------------------------------- |
-| `Select` (+ `SelectProps`, `SelectOption`) | `import { Select } from '@lobehub/ui/base-ui';`                                                         |
-| `Modal` (imperative API)                   | `import { createModal, confirmModal, useModalContext, type ModalInstance } from '@lobehub/ui/base-ui';` |
-| `DropdownMenu`                             | `import { DropdownMenu } from '@lobehub/ui/base-ui';`                                                   |
-| `ContextMenu`                              | `import { ContextMenu } from '@lobehub/ui/base-ui';`                                                    |
-| `Popover`                                  | `import { Popover } from '@lobehub/ui/base-ui';`                                                        |
-| `ScrollArea`                               | `import { ScrollArea } from '@lobehub/ui/base-ui';`                                                     |
-| `Switch`                                   | `import { Switch } from '@lobehub/ui/base-ui';`                                                         |
-| `Toast`                                    | `import { Toast } from '@lobehub/ui/base-ui';`                                                          |
-| `FloatingSheet`                            | `import { FloatingSheet } from '@lobehub/ui/base-ui';`                                                  |
-
-For Modal specifically, see the dedicated **modal** skill — use the imperative `createModal({ content: … })` pattern over the legacy `<Modal open … />` declarative pattern. base-ui has its own `ModalHost` already mounted in `SPAGlobalProvider`.
-
-> Common slip: `import { Select } from '@lobehub/ui'` looks fine but it's the antd-backed Select. Use base-ui Select. Same for `Modal`, `DropdownMenu`, etc.
-
-### `@lobehub/ui` root — use when base-ui has no equivalent
-
-| Category     | Components                                                                            |
-| ------------ | ------------------------------------------------------------------------------------- |
-| General      | ActionIcon, ActionIconGroup, Block, Button, Icon                                      |
-| Data Display | Avatar, Collapse, Empty, Highlighter, Markdown, Tag, Tooltip                          |
-| Data Entry   | CodeEditor, CopyButton, EditableText, Form, Input, InputPassword, SearchBar, TextArea |
-| Feedback     | Alert, Drawer                                                                         |
-| Layout       | Center, DraggablePanel, Flexbox, Grid, Header, MaskShadow                             |
-| Navigation   | Burger, Menu, SideNav, Tabs                                                           |
-
-## State
-
-When a feature component manages more than 3 pieces of state (`useState`/`useReducer`/derived state), extract the logic into a custom hook (e.g. `useXxx`). Keep the component focused on rendering — the hook holds state and handlers, so logic can be unit-tested without rendering the component.
+| Category     | Components                                                                      |
+| ------------ | ------------------------------------------------------------------------------- |
+| General      | ActionIcon, ActionIconGroup, Block, Button, Icon                                |
+| Data Display | Avatar, Collapse, Empty, Highlighter, Markdown, Tag, Tooltip                    |
+| Data Entry   | CodeEditor, CopyButton, EditableText, Form, FormModal, Input, SearchBar, Select |
+| Feedback     | Alert, Drawer, Modal                                                            |
+| Layout       | Center, DraggablePanel, Flexbox, Grid, Header, MaskShadow                       |
+| Navigation   | Burger, Dropdown, Menu, SideNav, Tabs                                           |

 ## Layout

@@ -108,15 +85,12 @@ errorElement: <ErrorBoundary />;

 ## Common Mistakes

-| Mistake                                                            | Fix                                                                         |
-| ------------------------------------------------------------------ | --------------------------------------------------------------------------- |
-| Using `next/link` in SPA                                           | Use `react-router-dom` `Link`                                               |
-| Using antd directly                                                | Use `@lobehub/ui/base-ui` first, then `@lobehub/ui`                         |
-| `import { Select } from '@lobehub/ui'`                             | `import { Select } from '@lobehub/ui/base-ui'`                              |
-| `import { Modal } from '@lobehub/ui'` + `<Modal open>` declarative | `createModal` / `confirmModal` from `@lobehub/ui/base-ui` (see modal skill) |
-| `import { DropdownMenu/Popover/Switch } from '@lobehub/ui'`        | Import same name from `@lobehub/ui/base-ui` instead                         |
-| `createStyles` for static styles                                   | Use `createStaticStyles` + `cssVar`                                         |
-| Editing only `desktopRouter.config.tsx`                            | Must edit both `.tsx` and `.desktop.tsx`                                    |
-| Using `margin` for flex spacing                                    | Use `gap` prop on Flexbox                                                   |
-| Accessing zustand store without selector                           | Use selectors to access store data (see zustand skill)                      |
-| Text or icon-text actions built with `Flexbox`/`Text` + `onClick`  | Use `Button type={'text'} size={'small'}` with `icon` when needed           |
+| Mistake                                                           | Fix                                                               |
+| ----------------------------------------------------------------- | ----------------------------------------------------------------- |
+| Using `next/link` in SPA                                          | Use `react-router-dom` `Link`                                     |
+| Using antd directly                                               | Use `@lobehub/ui/base-ui` first, then `@lobehub/ui`               |
+| `createStyles` for static styles                                  | Use `createStaticStyles` + `cssVar`                               |
+| Editing only `desktopRouter.config.tsx`                           | Must edit both `.tsx` and `.desktop.tsx`                          |
+| Using `margin` for flex spacing                                   | Use `gap` prop on Flexbox                                         |
+| Accessing zustand store without selector                          | Use selectors to access store data (see zustand skill)            |
+| Text or icon-text actions built with `Flexbox`/`Text` + `onClick` | Use `Button type={'text'} size={'small'}` with `icon` when needed |
@@ -1,6 +1,6 @@
 ---
 name: response-compliance
-description: 'OpenResponses API compliance testing. Use for Response API endpoint tests, compliance runs, schema debugging, response api test, or openresponses test tasks.'
+description: OpenResponses API compliance testing. Use when testing the Response API endpoint, running compliance tests, or debugging Response API schema issues. Triggers on 'compliance', 'response api test', 'openresponses test'.
 ---

 # OpenResponses Compliance Test
@@ -1,6 +1,6 @@
 ---
 name: review-checklist
-description: 'LobeHub code review checklist. Use when reviewing a PR, diff, or branch for console leftovers, return await, secrets, i18n, desktop router drift, UI imports, migrations, or cloud impact.'
+description: 'Common recurring mistakes in LobeHub code review — console leftovers, missing return await, hardcoded secrets, hardcoded i18n strings, desktop router pair drift, antd vs @lobehub/ui, non-idempotent migrations, cloud impact red flags. Use as a quick checklist when reviewing PRs, diffs, or branch changes.'
 user-invocable: false
 ---

@@ -22,7 +22,6 @@ user-invocable: false

 - Bug fixes must include tests covering the fixed scenario
 - New logic (services, store actions, utilities) should have test coverage
- **New database Model/Repository** (`packages/database/src/models/**`, `src/repositories/**`) must ship a sibling `__tests__/<name>.test.ts` — incl. user-isolation tests; BM25 search guarded by `describe.skipIf(!isServerDB)` (see `/testing` → `db-model-test.md`)
 - Existing tests still cover the changed behavior?
 - Prefer `vi.spyOn` over `vi.mock` (see `/testing` skill)

@@ -1,142 +0,0 @@
---
-name: skills-audit
-description: 'Audit .agents/skills SKILL.md files. Use for recurring checks of duplicate, overlapping, stale, inconsistent, or broken skills and merge/delete candidates.'
-disable-model-invocation: true
-argument-hint: '[--verbose | --apply]'
---
-
-# Skills Audit
-
-Periodic review of the project-local skill set under `.agents/skills/`. The goal is to catch drift before the catalog becomes confusing — too many skills, overlapping triggers, descriptions that no longer match the body, references to skills that were renamed/deleted.
-
-**Recommended cadence:** weekly, or after any week where >1 skill was added/renamed.
-
-## Procedure
-
-### 1 — Inventory
-
-Build a fresh census of all SKILL.md files. Do NOT trust any prior cached list.
-
-```bash
-find .agents/skills -name SKILL.md | wc -l                      # total count
-find .agents/skills -name SKILL.md -exec wc -l {} \; | sort -rn # by body length
-```
-
-Group by domain in a mental table (DB / state / UI / agent / testing / workflow / docs / etc.). Note new arrivals since last audit (`git log --since="1 week ago" -- .agents/skills/`).
-
-### 2 — Pull frontmatter for all skills
-
-```bash
-# Extract name + description for each SKILL.md
-for f in .agents/skills/*/SKILL.md; do
-  echo "=== $(basename $(dirname $f)) ==="
-  awk '/^---$/{c++; next} c==1' "$f" | head -20
-done
-```
-
-Read the description block of every skill. The body can stay unread unless step 4 flags it.
-
-### 3 — Detect overlap / redundancy
-
-For each pair within the same domain, ask:
-
- **Same description**? → likely duplicate (one is probably a stale rename leftover, or a global-vs-local collision).
- **Trigger keywords substantially overlap**? → either merge, OR tighten one description so the model can choose unambiguously.
- **One skill's body says "see also: foo"**? → confirm `foo` still exists, AND confirm the cross-reference is still meaningful (the referenced skill may have absorbed the referrer's concerns).
- **Skill duplicates content from `AGENTS.md`**? → fold into AGENTS.md or slim the skill to just the delta.
-
-Common false positives (do NOT merge):
-
- `db-migrations` vs `drizzle` — distinct workflows (migration files vs schema authoring).
- `microcopy` vs `i18n` — content vs mechanics.
- `agent-runtime-hooks` vs `agent-tracing` vs `agent-signal` — different surfaces of the agent system.
- `testing` vs `agent-testing` — different test types.
-
-### 4 — Description format consistency
-
-Apply the **standard template**:
-
-```
-{Topic + key conventions or scope}. Use when {scenarios — verbs + nouns}. Triggers on {`code-symbols`, 'natural phrases', '中文'}.
-```
-
-Skills with `disable-model-invocation: true` (user-invoked only, slash commands) don't need `Triggers on` — they're never auto-routed.
-
-Flag descriptions that:
-
- ❌ Have NO `Use when` clause (model can't decide when to load it).
- ❌ Have NO `Triggers on` clause (and aren't `disable-model-invocation`).
- ❌ Use weird formats (numbered lists `(1)(2)(3)`, `Triggers:` colon instead of `Triggers on`, `MUST use when ...` as opening word).
- ❌ Are dramatically terse for a 200+ line body, or dramatically verbose for a 60-line body.
- ❌ Reference deleted/renamed skills.
-
-### 5 — Stale-skill check
-
-For narrow domain skills (e.g. `response-compliance`, one-off CLI workflows):
-
-```bash
-# Confirm the referenced code surface still exists
-rg -l "response-compliance|openresponses" packages/ src/              # adjust per skill
-git log --since="3 months ago" -- .agents/skills/ < skill > /SKILL.md # is it being maintained?
-```
-
-If the underlying surface is gone and the skill hasn't been edited in 3+ months → flag for archival.
-
-### 6 — Cross-reference integrity
-
-Any skill body mentioning another skill by name:
-
-```bash
-# Scan all skill bodies for skill-name references
-rg -o '`[a-z][a-z0-9-]+`' .agents/skills/*/SKILL.md | grep -v ':\s*$' | sort -u
-```
-
-For each name extracted, confirm `.agents/skills/<name>/SKILL.md` exists. Broken references happen after renames — fix them in the same audit pass.
-
-### 7 — Output report
-
-Produce a markdown summary back to the user with the same structure as the original audit (this skill was created during one):
-
-```markdown
-## 📊 Inventory
-
-{count, domain breakdown}
-
-## 🎯 Recommendations
-
-### 🔴 High confidence
-
- {action} — {reason}
-
-### 🟡 Medium confidence
-
- {action} — {reason needs verification}
-
-### 🟢 Low confidence / no-op
-
- {item considered but skipping because ...}
-
-## 📋 Suggested order
-
-{table of actions with risk + LOC estimate}
-```
-
-End by asking the user which actions to apply — do NOT auto-apply unless the user passed `--apply` and even then confirm destructive deletes individually.
-
-## Output rules
-
- Be specific. "Skill X overlaps with Y" is useless without naming the overlapping triggers.
- Cite line numbers when flagging description / body issues.
- Don't recommend merges unless the call sites would actually load the merged skill in the same context.
- Don't recommend deletes for skills that haven't been touched recently — "unused" can mean "stable", not "dead".
-
-## What NOT to do
-
- ❌ Don't rename skill directories without checking for cross-references AND user memory entries that name the old slug.
- ❌ Don't normalize a description by removing trigger keywords just to fit the template — the keywords are the routing signal.
- ❌ Don't fold a heavy 200+ line skill into another just because they share a domain — large skills get loaded selectively and merging makes everything load.
- ❌ Don't propose `.agents/skills/INDEX.md` or `<domain>-<skill>` prefix renames unless the user explicitly asks — costs > benefits for cosmetic reorgs.
-
-## Related history
-
- First audit: `chore/skills-audit` branch (2026-05-25) — deleted `source-command-dedupe`, renamed `data-fetching` → `data-fetching-architecture`, normalized 9 descriptions, created this skill.
@@ -0,0 +1,44 @@
+---
+name: 'source-command-dedupe'
+description: 'Find duplicate GitHub issues'
+---
+
+# source-command-dedupe
+
+Use this skill when the user asks to run the migrated source command `dedupe`.
+
+## Command Template
+
+Find up to 3 likely duplicate issues for a given GitHub issue.
+
+To do this, follow these steps precisely:
+
+1. Use an agent to check if the Github issue (a) is closed, (b) does not need to be deduped (eg. because it is broad product feedback without a specific solution, or positive feedback), or (c) already has a duplicates comment that you made earlier. If so, do not proceed.
+2. Use an agent to view a Github issue, and ask the agent to return a summary of the issue
+3. Then, launch 5 parallel agents to search Github for duplicates of this issue, using diverse keywords and search approaches, using the summary from #1
+4. Next, feed the results from #1 and #2 into another agent, so that it can filter out false positives, that are likely not actually duplicates of the original issue. If there are no duplicates remaining, do not proceed.
+5. Finally, comment back on the issue with a list of up to three duplicate issues (or zero, if there are no likely duplicates)
+
+Notes (be sure to tell this to your agents, too):
+
+- Use `gh` to interact with Github, rather than web fetch
+- Do not use other tools, beyond `gh` (eg. don't use other MCP servers, file edit, etc.)
+- Make a todo list first
+- For your comment, follow the following format precisely (assuming for this example that you found 3 suspected duplicates):
+
+---
+
+Found 3 possible duplicate issues:
+
+1. <link to issue>
+2. <link to issue>
+3. <link to issue>
+
+This issue will be automatically closed as a duplicate in 3 days.
+
+- If your issue is a duplicate, please close it and 👍 the existing issue instead
+- To prevent auto-closure, add a comment or 👎 this comment
+
+> 🤖 Generated with Codex
+
+---
@@ -1,6 +1,6 @@
 ---
 name: spa-routes
-description: 'LobeHub SPA route architecture. Use when editing src/routes, src/features delegation, desktop/mobile/popup router configs, .desktop variants, route segments, redirects, or new pages.'
+description: MUST use when editing src/routes/ segments, src/spa/router/desktopRouter.config.tsx or desktopRouter.config.desktop.tsx (always change both together), mobileRouter.config.tsx, or when moving UI/logic between routes and src/features/.
 user-invocable: false
 ---

@@ -94,27 +94,6 @@ Anything that changes the tree (new segment, renamed `path`, moved layout, new c

 ---

-## 3b. Other `.desktop.{ts,tsx}` variants inside `src/routes/`
-
-The router pair is **not** the only `.desktop` variant pattern in this repo. Some route trees colocate a `<name>.desktop.{ts,tsx}` next to its base `<name>.{ts,tsx}` — Vite's resolver swaps in the `.desktop` file for Electron builds. Same drift risk as the router pair: editing only one side can break Electron silently.
-
-Known variants today:
-
-| Base file (web)                                       | Desktop file (Electron)                                       | Purpose                                                                                                                                    |
-| ----------------------------------------------------- | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
-| `src/routes/(main)/settings/features/componentMap.ts` | `src/routes/(main)/settings/features/componentMap.desktop.ts` | Settings tab → component map. Web uses dynamic `import()`; desktop uses sync imports. `componentMap.sync.test.ts` enforces identical keys. |
-| `src/routes/(main)/agent/index.tsx`                   | `src/routes/(main)/agent/index.desktop.tsx`                   | Page entry. Desktop variant overrides the web page wholesale (e.g. extra popup guards).                                                    |
-| `src/routes/(main)/group/index.tsx`                   | `src/routes/(main)/group/index.desktop.tsx`                   | Same pattern as agent.                                                                                                                     |
-
-**Rules:**
-
-1. After editing **any** `.ts`/`.tsx` under `src/routes/`, glob the same directory for a `<filename>.desktop.{ts,tsx}` sibling. If one exists, apply the equivalent change there in the same commit.
-2. When adding a new SettingsTab, register it in **both** `componentMap.ts` (with `dynamic(...)`) and `componentMap.desktop.ts` (with a sync `import`). `componentMap.sync.test.ts` will fail the build otherwise.
-3. When adding a new desktop-only page wholesale-override, prefer a single base file with platform-aware code over introducing a new `.desktop.tsx` variant — only add a new variant when the two trees genuinely diverge (different store wiring, different popup guards, etc.).
-4. When deleting, remove **both** files together.
-
---
-
 ## 4. How to Divide Files (route vs feature)

 | Question                                                 | Put in `src/routes/`                                     | Put in `src/features/`       |
@@ -1,6 +1,6 @@
 ---
 name: store-data-structures
-description: 'LobeHub Zustand store data-shape patterns. Use when designing store state, list/detail splits, normalized maps, reducers, messagesMap, topicsMap, or choosing shared type sources.'
+description: Zustand store data structure patterns for LobeHub. Covers List vs Detail data structures, Map + Reducer patterns, type definitions, and when to use each pattern. Use when designing store state, choosing data structures, or implementing list/detail pages.
 user-invocable: false
 ---

@@ -310,5 +310,5 @@ export interface BenchmarkListItem {

 ## Related Skills

- `data-fetching-architecture` — how to fetch and update this data
+- `data-fetching` — how to fetch and update this data
 - `zustand` — general Zustand patterns
@@ -1,6 +1,6 @@
 ---
 name: testing
-description: 'Vitest testing guide. Use when writing or updating tests, fixing failing tests, improving coverage, debugging test issues, or setting up mocks.'
+description: Testing guide using Vitest. Use when writing tests (.test.ts, .test.tsx), fixing failing tests, improving test coverage, or debugging test issues. Triggers on test creation, test debugging, mock setup, or test-related questions.
 user-invocable: false
 ---

@@ -14,21 +14,15 @@ user-invocable: false
 # Run specific test file
 bunx vitest run --silent='passed-only' '[file-path]'

-# Database package (client-db, PGlite — default, skips BM25/pg_search)
+# Database package (client)
 cd packages/database && bunx vitest run --silent='passed-only' '[file]'

-# Database package (server-db, Postgres — BM25/pgvector parity, what CI measures coverage in)
+# Database package (server)
 cd packages/database && TEST_SERVER_DB=1 bunx vitest run --silent='passed-only' '[file]'
 ```

 **Never run** `bun run test` - it runs all 3000+ tests (\~10 minutes).

-> **Database models/repositories:** every new file under `packages/database/src/models/**`
-> or `src/repositories/**` ships with a sibling `__tests__/<name>.test.ts` in the same PR.
-> Use the real DB via `getTestDB()` (integration style), guard BM25/full-text-search blocks
-> with `describe.skipIf(!isServerDB)`, and always test user-isolation. See
-> `references/db-model-test.md` for setup, schema gotchas, and the client-vs-server-db split.
-
 ## Test Categories

 | Category | Location                    | Config                          |
@@ -1,74 +1,95 @@
 # Database Model Testing Guide

-Test the `packages/database` Model and Repository layers.
+Test `packages/database` Model layer.

-> **Rule: every new Model or Repository ships with a sibling test in the same PR.**
-> A new file under `src/models/**` or `src/repositories/**` must have a matching
-> `__tests__/<name>.test.ts`. Coverage runs in server-db mode in CI and the patch
-> gate will not always catch a brand-new untested file (a small new file barely
-> moves the project total) — so this is a convention, not something CI guarantees.
-> Start from the template: `packages/database/src/models/__tests__/_test_template.ts`.
-
-## Two test environments: client-db vs server-db
-
-`getTestDB()` (`src/core/getTestDB.ts`) returns different engines based on the
-`TEST_SERVER_DB` env var:
-
-| Mode                    | Engine                              | When               | Notes                                                                                                                                                               |
-| ----------------------- | ----------------------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **client-db** (default) | PGlite (in-memory)                  | `bunx vitest run`  | Migration runner **skips any SQL containing `pg_search` / `bm25`** — the ParadeDB BM25 `@@@` operator does not exist here.                                          |
-| **server-db**           | node-postgres → `DATABASE_TEST_URL` | `TEST_SERVER_DB=1` | CI uses the `paradedb/paradedb` image (has `pg_search`). **Coverage is measured in this mode** (`test:coverage` → `vitest.config.server.mts`, uploaded to Codecov). |
+## Dual Environment Verification (Required)

 ```bash
-# 1. Client environment (fast, default — what most local runs use)
-cd packages/database && bunx vitest run --silent='passed-only' '[file]'
+# 1. Client environment (fast)
+cd packages/database && TEST_SERVER_DB=0 bunx vitest run --silent='passed-only' '[file]'

-# 2. Server environment (BM25 / pg_search / pgvector parity, needs DATABASE_TEST_URL)
+# 2. Server environment (compatibility)
 cd packages/database && TEST_SERVER_DB=1 bunx vitest run --silent='passed-only' '[file]'
 ```

-Implication: client-db coverage **under-counts** any code that needs BM25 (e.g.
-`repositories/search/index.ts` reads near-0% locally but is fully covered in CI).
-Don't chase those lines locally — confirm via CI/Codecov.
+## User Permission Check - Security First 🔒

-## BM25 / full-text search → `describe.skipIf(!isServerDB)`
-
-Any method using the BM25 `@@@` operator or `sanitizeBm25` (keyword search:
-`queryByKeyword`, `searchAgents`, userMemory lexical search, …) **throws under
-PGlite** (often swallowed by a `catch` that returns `[]`, so the test silently
-fails with empty results). Guard those blocks so they only run in server-db:
+**Critical security requirement**: All user data operations must include permission checks.

 ```typescript
-// BM25 search requires the pg_search extension (ParadeDB), not available in PGlite
-const isServerDB = process.env.TEST_SERVER_DB === '1';
-describe.skipIf(!isServerDB)('queryByKeyword', () => {
-  /* ... */
+// ❌ DANGEROUS: Missing permission check
+update = async (id: string, data: Partial<MyModel>) => {
+  return this.db
+    .update(myTable)
+    .set(data)
+    .where(eq(myTable.id, id)) // Only checks ID
+    .returning();
+};
+
+// ✅ SECURE: Permission check included
+update = async (id: string, data: Partial<MyModel>) => {
+  return this.db
+    .update(myTable)
+    .set(data)
+    .where(
+      and(
+        eq(myTable.id, id),
+        eq(myTable.userId, this.userId), // ✅ Permission check
+      ),
+    )
+    .returning();
+};
+```
+
+## Test File Structure
+
+```typescript
+// @vitest-environment node
+describe('MyModel', () => {
+  describe('create', () => {
+    /* ... */
+  });
+  describe('queryAll', () => {
+    /* ... */
+  });
+  describe('update', () => {
+    it('should update own records');
+    it('should NOT update other users records'); // 🔒 Security
+  });
+  describe('delete', () => {
+    it('should delete own records');
+    it('should NOT delete other users records'); // 🔒 Security
+  });
+  describe('user isolation', () => {
+    it('should enforce user data isolation'); // 🔒 Core security
+  });
 });
 ```

-Convention already used in `session.test.ts`, `topic.query.test.ts`,
-`message.query.test.ts`, `home/index.test.ts`, `repositories/search/index.test.ts`.
-
-## Setup boilerplate
-
-Top-of-file pattern (see `_test_template.ts` for the full version). Use real DB
-integration via `getTestDB()` — **not a mocked `vi.fn()` db**; the integration
-style exercises real SQL and gives far deeper coverage.
+## Security Test Example

 ```typescript
-import { eq } from 'drizzle-orm';
-import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+it('should not update records of other users', async () => {
+  const [otherUserRecord] = await serverDB
+    .insert(myTable)
+    .values({ userId: 'other-user', data: 'original' })
+    .returning();

-import { getTestDB } from '../../core/getTestDB';
-import { users } from '../../schemas';
-import type { LobeChatDatabase } from '../../type';
-import { MyModel } from '../myModel';
+  const result = await myModel.update(otherUserRecord.id, { data: 'hacked' });

-const serverDB: LobeChatDatabase = await getTestDB(); // top-level await is fine
+  expect(result).toBeUndefined();
+  const unchanged = await serverDB.query.myTable.findFirst({
+    where: eq(myTable.id, otherUserRecord.id),
+  });
+  expect(unchanged?.data).toBe('original');
+});
+```

-const userId = 'my-model-test-user';
+## Data Management
+
+```typescript
+const userId = 'test-user';
 const otherUserId = 'other-user';
-const myModel = new MyModel(serverDB, userId);

 beforeEach(async () => {
  await serverDB.delete(users);
@@ -76,99 +97,40 @@ beforeEach(async () => {
 });

 afterEach(async () => {
-  await serverDB.delete(users); // cascades to user-scoped rows
+  await serverDB.delete(users);
 });
 ```

-Some tests need the Node environment (pgvector, server-only deps) — add
-`// @vitest-environment node` as the first line when required.
-
-## User permission check — security first 🔒
-
-**Every user-data operation must be ownership-scoped.** Always add a test proving
-another user cannot read/update/delete the row.
+## Foreign Key Handling

 ```typescript
-// ✅ SECURE: ownership in the WHERE clause
-update = async (id: string, data: Partial<MyModel>) =>
-  this.db
-    .update(myTable)
-    .set(data)
-    .where(and(eq(myTable.id, id), eq(myTable.userId, this.userId)))
-    .returning();
-```
-
-```typescript
-it('should NOT update another user's record', async () => {
-  const otherModel = new MyModel(serverDB, otherUserId);
-  const [row] = await otherModel.create({ data: 'original' });
-
-  await myModel.update(row.id, { data: 'hacked' });
-
-  const unchanged = await serverDB.query.myTable.findFirst({
-    where: eq(myTable.id, row.id),
-  });
-  expect(unchanged?.data).toBe('original');
-});
-```
-
-## What to cover
-
-Aim each model/repository as close to 100% as practical (excluding BM25):
-
- Every public method
- Both branches of conditionals; empty-list / `if (!x) return []` early returns
- Error fallbacks (e.g. decrypt/JSON-parse failure → `null`)
- Filters, pagination, ordering branches
- Ownership / user isolation, and workspace scoping if the model takes a `workspaceId`
-
-## Schema gotchas (real traps that fail inserts or types)
-
- **`workspaces`** requires `{ id, name, slug, primaryOwnerId }` and has **no
-  `userId` column** — `insert(workspaces).values({ id, name, slug, primaryOwnerId })`.
- **uuid columns**: a "not found" test must pass a _valid_ UUID
-  (`'00000000-0000-0000-0000-000000000000'`); a random string raises a `22P02`
-  DB error instead of returning `undefined`/`null`.
- **Enum / `$type` columns** are type-checked: e.g. `files.source` is a
-  `FileSource` enum (`image_generation` | `page-editor` | `video_generation`),
-  not free text — passing `'upload'` is a type error.
- Read the table's schema in `src/schemas/` for `notNull` columns **without
-  defaults**; you must supply those on insert.
-
-## Foreign key handling
-
-```typescript
-// ❌ Wrong: invalid foreign key
+// ❌ Wrong: Invalid foreign key
 const testData = { asyncTaskId: 'invalid-uuid', fileId: 'non-existent' };

-// ✅ Use null …
+// ✅ Correct: Use null
 const testData = { asyncTaskId: null, fileId: null };

-// ✅ … or create the referenced row first
-const [asyncTask] = await serverDB.insert(asyncTasks).values({ status: 'pending' }).returning();
-testData.asyncTaskId = asyncTask.id;
+// ✅ Or: Create referenced record first
+beforeEach(async () => {
+  const [asyncTask] = await serverDB
+    .insert(asyncTasks)
+    .values({ id: 'valid-id', status: 'pending' })
+    .returning();
+  testData.asyncTaskId = asyncTask.id;
+});
 ```

-## Predictable sorting
+## Predictable Sorting

 ```typescript
-// ✅ Use explicit timestamps — never rely on insert order
+// ✅ Use explicit timestamps
+const oldDate = new Date('2024-01-01T10:00:00Z');
+const newDate = new Date('2024-01-02T10:00:00Z');
 await serverDB.insert(table).values([
-  { ...data1, createdAt: new Date('2024-01-01T10:00:00Z') },
-  { ...data2, createdAt: new Date('2024-01-02T10:00:00Z') },
+  { ...data1, createdAt: oldDate },
+  { ...data2, createdAt: newDate },
 ]);
+
+// ❌ Don't rely on insert order
+await serverDB.insert(table).values([data1, data2]); // Unpredictable
 ```
-
-## Checking coverage of one file
-
-```bash
-# Per-file coverage; read the "Uncovered Line #s" column to find gaps
-cd packages/database
-bunx vitest run --coverage --silent='passed-only' '[test-file]' 2>&1 | grep '[sourceFile].ts'
-```
-
-## Before finishing
-
-1. Tests pass: `bunx vitest run --silent='passed-only' '[file]'`
-2. Types pass: `bun run type-check` (vitest uses esbuild and does **not**
-   type-check — a green test run can still have type errors).
@@ -1,6 +1,6 @@
 ---
 name: trpc-router
-description: 'TRPC router development guide. Use when creating or modifying apps/server/src/routers, adding procedures, or implementing server-side API endpoints.'
+description: TRPC router development guide. Use when creating or modifying TRPC routers (src/server/routers/**), adding procedures, or working with server-side API endpoints. Triggers on TRPC router creation, procedure implementation, or API endpoint tasks.
 user-invocable: false
 ---

@@ -8,9 +8,9 @@ user-invocable: false

 ## File Location

- Routers: `apps/server/src/routers/lambda/<domain>.ts`
- Helpers: `apps/server/src/routers/lambda/_helpers/`
- Schemas: `apps/server/src/routers/lambda/_schema/`
+- Routers: `src/server/routers/lambda/<domain>.ts`
+- Helpers: `src/server/routers/lambda/_helpers/`
+- Schemas: `src/server/routers/lambda/_schema/`

 ## Router Structure

--- a/Show More
+++ b/Show More