mirror of
https://github.com/lobehub/lobe-chat.git
synced 2026-06-14 11:40:07 +00:00
Compare commits
67 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b8b1ab6616 | |||
| 3891015a3d | |||
| 5babb7d826 | |||
| a7504b696a | |||
| 9dc4308942 | |||
| 082117998d | |||
| 9a74d6c045 | |||
| b1a4f24dc9 | |||
| c47551775b | |||
| 2d83300795 | |||
| 0915538da8 | |||
| 53fc0642e0 | |||
| a8c725abd5 | |||
| b8a7f6e9eb | |||
| bb594f87e2 | |||
| b0ee9b434e | |||
| cf2c5a1d37 | |||
| 0511e43a48 | |||
| 1f128f407f | |||
| f258a2e042 | |||
| 7996e1c431 | |||
| 93dddfc2e5 | |||
| 5e4186559b | |||
| 9bfd9bb4a5 | |||
| 9ca54135b5 | |||
| f162556607 | |||
| 3292ed83f9 | |||
| 561a38f788 | |||
| 71aaf0fac5 | |||
| 287601f8ec | |||
| b36f8781e6 | |||
| 705450a571 | |||
| 5272c7373f | |||
| fb24b6f1b7 | |||
| 2fd65fe8a3 | |||
| 35d5a2c937 | |||
| 42f40d2717 | |||
| ef8a644d8c | |||
| 81c84348bc | |||
| 8d7a0467db | |||
| e9522729c5 | |||
| cf01894077 | |||
| b5d945b1fd | |||
| cbee964582 | |||
| 87a38ad0c4 | |||
| f2d4745ad3 | |||
| 0167ac8e28 | |||
| b480227fd0 | |||
| 97ff98cada | |||
| 845d3ef58a | |||
| 906917362f | |||
| c69049d6da | |||
| 4f7356ffab | |||
| d20c82c115 | |||
| d617a6cd97 | |||
| 408391eeb6 | |||
| 4a2e671f55 | |||
| 695a261df1 | |||
| 39b723eff4 | |||
| 68937d842c | |||
| b66bc66260 | |||
| 4d06279abd | |||
| 1a8d33fbf4 | |||
| 2c086373cc | |||
| c7d49258f8 | |||
| 2280fd6ff9 | |||
| 8eb901c401 |
@@ -1,94 +0,0 @@
|
||||
---
|
||||
name: add-provider-doc
|
||||
description: Add documentation for a new AI provider — usage docs, env vars, Docker config, image resources.
|
||||
disable-model-invocation: true
|
||||
argument-hint: '[provider-name]'
|
||||
---
|
||||
|
||||
# Adding New AI Provider Documentation
|
||||
|
||||
Complete workflow for adding documentation for a new AI provider.
|
||||
|
||||
## Overview
|
||||
|
||||
1. Create usage documentation (EN + CN)
|
||||
2. Add environment variable documentation (EN + CN)
|
||||
3. Update Docker configuration files
|
||||
4. Update .env.example
|
||||
5. Prepare image resources
|
||||
|
||||
## Step 1: Create Provider Usage Documentation
|
||||
|
||||
### Required Files
|
||||
|
||||
- `docs/usage/providers/{provider-name}.mdx` (English)
|
||||
- `docs/usage/providers/{provider-name}.zh-CN.mdx` (Chinese)
|
||||
|
||||
### Key Requirements
|
||||
|
||||
- 5-6 screenshots showing the process
|
||||
- Cover image for the provider
|
||||
- Real registration and dashboard URLs
|
||||
- Pricing information callout
|
||||
- **Never include real API keys** - use placeholders
|
||||
|
||||
Reference: `docs/usage/providers/fal.mdx`
|
||||
|
||||
## Step 2: Update Environment Variables Documentation
|
||||
|
||||
### Files to Update
|
||||
|
||||
- `docs/self-hosting/environment-variables/model-provider.mdx` (EN)
|
||||
- `docs/self-hosting/environment-variables/model-provider.zh-CN.mdx` (CN)
|
||||
|
||||
### Content Format
|
||||
|
||||
```markdown
|
||||
### `{PROVIDER}_API_KEY`
|
||||
|
||||
- Type: Required
|
||||
- Description: API key from {Provider Name}
|
||||
- Example: `{api-key-format}`
|
||||
|
||||
### `{PROVIDER}_MODEL_LIST`
|
||||
|
||||
- Type: Optional
|
||||
- Description: Control model list. Use `+` to add, `-` to hide
|
||||
- Example: `-all,+model-1,+model-2=Display Name`
|
||||
```
|
||||
|
||||
## Step 3: Update Docker Files
|
||||
|
||||
Update all Dockerfiles at the **end** of ENV section:
|
||||
|
||||
- `Dockerfile`
|
||||
- `Dockerfile.database`
|
||||
- `Dockerfile.pglite`
|
||||
|
||||
```dockerfile
|
||||
# {New Provider}
|
||||
{PROVIDER}_API_KEY="" {PROVIDER}_MODEL_LIST=""
|
||||
```
|
||||
|
||||
## Step 4: Update .env.example
|
||||
|
||||
```bash
|
||||
### {Provider Name} ###
|
||||
# {PROVIDER}_API_KEY={prefix}-xxxxxxxx
|
||||
```
|
||||
|
||||
## Step 5: Image Resources
|
||||
|
||||
- Cover image
|
||||
- 3-4 API dashboard screenshots
|
||||
- 2-3 LobeHub configuration screenshots
|
||||
- Host on LobeHub CDN: `hub-apac-1.lobeobjects.space`
|
||||
|
||||
## Checklist
|
||||
|
||||
- [ ] EN + CN usage docs
|
||||
- [ ] EN + CN env var docs
|
||||
- [ ] All 3 Dockerfiles updated
|
||||
- [ ] .env.example updated
|
||||
- [ ] All images prepared
|
||||
- [ ] No real API keys in docs
|
||||
@@ -1,108 +0,0 @@
|
||||
---
|
||||
name: add-setting-env
|
||||
description: Add server-side environment variables that control default values for user settings.
|
||||
disable-model-invocation: true
|
||||
argument-hint: '[setting-name]'
|
||||
---
|
||||
|
||||
# Adding Environment Variable for User Settings
|
||||
|
||||
Add server-side environment variables to configure default values for user settings.
|
||||
|
||||
**Priority**: User Custom > Server Env Var > Hardcoded Default
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Define Environment Variable
|
||||
|
||||
Create `src/envs/<domain>.ts`:
|
||||
|
||||
```typescript
|
||||
import { createEnv } from '@t3-oss/env-nextjs';
|
||||
import { z } from 'zod';
|
||||
|
||||
export const get<Domain>Config = () => {
|
||||
return createEnv({
|
||||
server: {
|
||||
YOUR_ENV_VAR: z.coerce.number().min(MIN).max(MAX).optional(),
|
||||
},
|
||||
runtimeEnv: {
|
||||
YOUR_ENV_VAR: process.env.YOUR_ENV_VAR,
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
export const <domain>Env = get<Domain>Config();
|
||||
```
|
||||
|
||||
### 2. Update Type (if new domain)
|
||||
|
||||
Add to `packages/types/src/serverConfig.ts`:
|
||||
|
||||
```typescript
|
||||
import { User<Domain>Config } from './user/settings';
|
||||
|
||||
export interface GlobalServerConfig {
|
||||
<domain>?: PartialDeep<User<Domain>Config>;
|
||||
}
|
||||
```
|
||||
|
||||
**Prefer reusing existing types** from `packages/types/src/user/settings`.
|
||||
|
||||
### 3. Assemble Server Config (if new domain)
|
||||
|
||||
In `apps/server/src/globalConfig/index.ts`:
|
||||
|
||||
```typescript
|
||||
import { <domain>Env } from '@/envs/<domain>';
|
||||
|
||||
export const getServerGlobalConfig = async () => {
|
||||
const config: GlobalServerConfig = {
|
||||
<domain>: cleanObject({
|
||||
<settingName>: <domain>Env.YOUR_ENV_VAR,
|
||||
}),
|
||||
};
|
||||
return config;
|
||||
};
|
||||
```
|
||||
|
||||
### 4. Merge to User Store (if new domain)
|
||||
|
||||
In `src/store/user/slices/common/action.ts`:
|
||||
|
||||
```typescript
|
||||
const serverSettings: PartialDeep<UserSettings> = {
|
||||
<domain>: serverConfig.<domain>,
|
||||
};
|
||||
```
|
||||
|
||||
### 5. Update .env.example
|
||||
|
||||
```bash
|
||||
# <Description> (range/options, default: X)
|
||||
# YOUR_ENV_VAR=<example>
|
||||
```
|
||||
|
||||
### 6. Update Documentation
|
||||
|
||||
- `docs/self-hosting/environment-variables/basic.mdx` (EN)
|
||||
- `docs/self-hosting/environment-variables/basic.zh-CN.mdx` (CN)
|
||||
|
||||
## Example: AI_IMAGE_DEFAULT_IMAGE_NUM
|
||||
|
||||
```typescript
|
||||
// src/envs/image.ts
|
||||
AI_IMAGE_DEFAULT_IMAGE_NUM: z.coerce.number().min(1).max(20).optional(),
|
||||
|
||||
// packages/types/src/serverConfig.ts
|
||||
image?: PartialDeep<UserImageConfig>;
|
||||
|
||||
// apps/server/src/globalConfig/index.ts
|
||||
image: cleanObject({ defaultImageNum: imageEnv.AI_IMAGE_DEFAULT_IMAGE_NUM }),
|
||||
|
||||
// src/store/user/slices/common/action.ts
|
||||
image: serverConfig.image,
|
||||
|
||||
// .env.example
|
||||
# AI_IMAGE_DEFAULT_IMAGE_NUM=4
|
||||
```
|
||||
@@ -1,209 +0,0 @@
|
||||
---
|
||||
name: agent-runtime-hooks
|
||||
description: 'Agent runtime lifecycle hooks. Use for before/after tool or step hooks, tool mocks, human intervention, sub-agent calls, context compression, evals, tracing, callAgent, or lifecycle events.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# Agent Runtime Hooks
|
||||
|
||||
Lifecycle hooks for observing and intercepting agent execution. Hooks are registered per-operation via `execAgent({ hooks })` and dispatched by `HookDispatcher`.
|
||||
|
||||
## Hook Types
|
||||
|
||||
16 hook types across 5 categories:
|
||||
|
||||
```
|
||||
execAgent({ hooks })
|
||||
│
|
||||
├─ beforeStep ──────────── Before each step executes
|
||||
│ │
|
||||
│ ├─ [call_llm] LLM inference
|
||||
│ │
|
||||
│ ├─ [call_tool]
|
||||
│ │ ├─ beforeToolCall ── Before tool executes (supports mocking)
|
||||
│ │ ├─ (tool execution)
|
||||
│ │ ├─ afterToolCall ─── After tool completes (observation only)
|
||||
│ │ └─ onToolCallError ─ Tool threw an exception
|
||||
│ │
|
||||
│ ├─ [request_human_approve]
|
||||
│ │ ├─ beforeHumanIntervention ── Before agent pauses
|
||||
│ │ ├─ afterHumanIntervention ─── After approve/reject + resume
|
||||
│ │ └─ onStopByHumanIntervention ── User rejected, agent halted
|
||||
│ │
|
||||
│ ├─ [compress_context]
|
||||
│ │ ├─ beforeCompact ──── Before compression starts
|
||||
│ │ ├─ afterCompact ───── After compression completes
|
||||
│ │ └─ onCompactError ─── Compression failed
|
||||
│ │
|
||||
│ ├─ [callAgent] (via execSubAgentTask)
|
||||
│ │ ├─ beforeCallAgent ── Before sub-agent starts
|
||||
│ │ ├─ afterCallAgent ─── After sub-agent completes
|
||||
│ │ └─ onCallAgentError ── Sub-agent failed
|
||||
│ │
|
||||
│ └─ afterStep ──────────── After step completes
|
||||
│
|
||||
├─ (next step...)
|
||||
│
|
||||
├─ onComplete ───────────── Operation reaches terminal state
|
||||
└─ onError ──────────────── Error during execution
|
||||
```
|
||||
|
||||
## Key Files
|
||||
|
||||
| File | Role |
|
||||
| --------------------------------------------------------------- | ------------------------------------------------------ |
|
||||
| `packages/agent-runtime/src/types/hooks.ts` | Type definitions (AgentHookType, all event interfaces) |
|
||||
| `apps/server/src/services/agentRuntime/hooks/types.ts` | Server-side types (AgentHook, re-exports) |
|
||||
| `apps/server/src/services/agentRuntime/hooks/HookDispatcher.ts` | Registration, dispatch, dispatchBeforeToolCall |
|
||||
| `apps/server/src/modules/AgentRuntime/RuntimeExecutors.ts` | Tool/Compact/HumanIntervention hook dispatch |
|
||||
| `apps/server/src/services/agentRuntime/AgentRuntimeService.ts` | Step hooks + HumanIntervention resume/reject |
|
||||
| `apps/server/src/services/aiAgent/index.ts` | CallAgent hook dispatch |
|
||||
|
||||
## Registration Flow
|
||||
|
||||
```ts
|
||||
const hooks: AgentHook[] = [
|
||||
{ id: 'my-hook', type: 'afterStep', handler: async (event) => { ... } },
|
||||
];
|
||||
await aiAgentService.execAgent({ agentId, prompt, hooks });
|
||||
// Internally: hookDispatcher.register(operationId, hooks)
|
||||
// Cleanup: hookDispatcher.unregister(operationId)
|
||||
```
|
||||
|
||||
## Hook Reference
|
||||
|
||||
### Step Level
|
||||
|
||||
**`beforeStep`** — Before each step. `event: AgentHookEvent`
|
||||
**`afterStep`** — After each step. `event: AgentHookEvent` (content, toolsCalling, totalCost, etc.)
|
||||
**`onComplete`** — Terminal state. `event: AgentHookEvent` (reason: done/error/interrupted/max_steps/cost_limit)
|
||||
**`onError`** — Error occurred. `event: AgentHookEvent` (errorMessage, errorDetail)
|
||||
|
||||
### Tool Call Level
|
||||
|
||||
**`beforeToolCall`** — Before tool executes. **Supports mocking** via `event.mock()`.
|
||||
|
||||
```ts
|
||||
// event: ToolCallHookEvent
|
||||
{
|
||||
(identifier, apiName, args, callIndex, stepIndex, operationId, mock);
|
||||
}
|
||||
// Mock example:
|
||||
event.mock({ content: '{"error":"rate limited"}' });
|
||||
```
|
||||
|
||||
Dispatch method: `hookDispatcher.dispatchBeforeToolCall()` (returns mock result or null).
|
||||
|
||||
**`afterToolCall`** — After tool completes. Observation only.
|
||||
|
||||
```ts
|
||||
// event: AfterToolCallHookEvent
|
||||
{
|
||||
(identifier, apiName, args, callIndex, content, success, mocked, executionTimeMs, stepIndex);
|
||||
}
|
||||
```
|
||||
|
||||
**`onToolCallError`** — Tool threw an exception (catch block, not just `success=false`).
|
||||
|
||||
```ts
|
||||
// event: ToolCallErrorHookEvent
|
||||
{
|
||||
(identifier, apiName, args, callIndex, error, stepIndex);
|
||||
}
|
||||
```
|
||||
|
||||
### Human Intervention
|
||||
|
||||
**`beforeHumanIntervention`** — Before agent pauses for approval.
|
||||
|
||||
```ts
|
||||
// event: BeforeHumanInterventionHookEvent
|
||||
{ operationId, stepIndex, pendingTools: [{ identifier, apiName }] }
|
||||
```
|
||||
|
||||
**`afterHumanIntervention`** — After approve/reject, agent resumes.
|
||||
|
||||
```ts
|
||||
// event: AfterHumanInterventionHookEvent
|
||||
{ operationId, action: 'approve' | 'reject' | 'rejectAndContinue', toolCallId?, rejectionReason? }
|
||||
```
|
||||
|
||||
**`onStopByHumanIntervention`** — User rejected, agent halted.
|
||||
|
||||
```ts
|
||||
// event: StopByHumanInterventionHookEvent
|
||||
{ operationId, toolCallId?, rejectionReason? }
|
||||
```
|
||||
|
||||
### Context Compression
|
||||
|
||||
**`beforeCompact`** — Before compression starts.
|
||||
|
||||
```ts
|
||||
// event: BeforeCompactHookEvent
|
||||
{
|
||||
(operationId, stepIndex, messageCount, tokenCount);
|
||||
}
|
||||
```
|
||||
|
||||
**`afterCompact`** — After compression completes.
|
||||
|
||||
```ts
|
||||
// event: AfterCompactHookEvent
|
||||
{
|
||||
(operationId, stepIndex, groupId, messagesBefore, messagesAfter, summary);
|
||||
}
|
||||
```
|
||||
|
||||
**`onCompactError`** — Compression failed.
|
||||
|
||||
```ts
|
||||
// event: CompactErrorHookEvent
|
||||
{
|
||||
(operationId, stepIndex, tokenCount, error);
|
||||
}
|
||||
```
|
||||
|
||||
### Sub-Agent (CallAgent)
|
||||
|
||||
**`beforeCallAgent`** — Before calling sub-agent. Dispatched on **parent** operation.
|
||||
|
||||
```ts
|
||||
// event: BeforeCallAgentHookEvent
|
||||
{
|
||||
(operationId, agentId, instruction);
|
||||
}
|
||||
```
|
||||
|
||||
**`afterCallAgent`** — Sub-agent completed. Dispatched on **parent** operation.
|
||||
|
||||
```ts
|
||||
// event: AfterCallAgentHookEvent
|
||||
{
|
||||
(operationId, agentId, subOperationId, threadId, success);
|
||||
}
|
||||
```
|
||||
|
||||
**`onCallAgentError`** — Sub-agent failed. Dispatched on **parent** operation.
|
||||
|
||||
```ts
|
||||
// event: CallAgentErrorHookEvent
|
||||
{
|
||||
(operationId, agentId, error);
|
||||
}
|
||||
```
|
||||
|
||||
Note: CallAgent hooks require `parentOperationId` in `ExecSubAgentTaskParams`.
|
||||
|
||||
## Design Notes
|
||||
|
||||
- **Fire-and-forget**: All handlers return `Promise<void>`. Errors are non-fatal.
|
||||
- **Exception**: `beforeToolCall` supports mock via `event.mock()` — uses `dispatchBeforeToolCall()` which returns the mock result.
|
||||
- **Sequential**: Same-type hooks run in registration order.
|
||||
- **Local only**: `beforeToolCall` mock only works in local mode (in-memory hooks). Webhook mode does not support mocking.
|
||||
- **Scoped per operation**: Auto-cleaned via `hookDispatcher.unregister()` on completion.
|
||||
- **Sandbox/MCP**: No separate hooks — they go through `executeTool`, so `beforeToolCall`/`afterToolCall` cover them. Use `event.identifier` to filter.
|
||||
|
||||
## Real-World Example: agent-evals
|
||||
|
||||
See `devtools/agent-evals/helpers/runner.ts` — `createEvalHooks()` uses `afterStep`, `onComplete`, `afterToolCall`, and `beforeToolCall` (for mock).
|
||||
@@ -1,95 +0,0 @@
|
||||
---
|
||||
name: agent-signal
|
||||
description: 'Build or extend LobeHub Agent Signal pipelines. Use for signal sources, signal/action types, policies, middleware, workflow handoff, dedupe, scope behavior, or observability.'
|
||||
---
|
||||
|
||||
# Agent Signal
|
||||
|
||||
Use this skill to implement event-driven background work for agents without coupling the work to the foreground chat request.
|
||||
|
||||
Agent Signal has one consistent shape:
|
||||
|
||||
`source event` -> `signal interpretation` -> `action execution` -> built-in result signals
|
||||
|
||||
## Start Here
|
||||
|
||||
1. Read `references/architecture.md` to map the package boundary, runtime queue, scope model, and async workflow handoff.
|
||||
2. Read `references/handlers.md` before writing any new policy, source handler, signal handler, or action handler.
|
||||
3. Read `references/observability.md` when you need tracing, metrics, debugging, or workflow snapshot visibility.
|
||||
|
||||
## Use The Right Entry Point
|
||||
|
||||
- Use `emitAgentSignalSourceEvent(...)` when a server-owned producer should execute the pipeline immediately.
|
||||
- Use `executeAgentSignalSourceEvent(...)` when a worker or controlled backend path already owns execution timing and may inject a runtime guard backend.
|
||||
- Use `enqueueAgentSignalSourceEvent(...)` when the caller should return quickly and let Upstash Workflow process the event out-of-band.
|
||||
- Use `emitAgentSignalSourceEventWithStore(...)` for isolated tests or evals that should avoid ambient Redis state.
|
||||
|
||||
Read:
|
||||
|
||||
- `apps/server/src/services/agentSignal/index.ts`
|
||||
- `apps/server/src/workflows/agentSignal/index.ts`
|
||||
- `apps/server/src/workflows/agentSignal/run.ts`
|
||||
|
||||
## Core Model
|
||||
|
||||
- `source`: A normalized fact that happened. Sources come from producers such as runtime lifecycle events, user messages, or bot ingress.
|
||||
- `signal`: A semantic interpretation derived from one source or from another signal. Signals express meaning, routing, or policy state.
|
||||
- `action`: A concrete side effect planned from one signal. Actions do the work.
|
||||
- `policy`: An installable middleware bundle that registers source, signal, and action handlers.
|
||||
- `procedure`: Not a distinct runtime node. Treat "procedure" as the end-to-end flow for one use case: ingress source, matching handlers, planned actions, execution result, and observability.
|
||||
|
||||
Keep the boundaries strict:
|
||||
|
||||
- Add a new `source` when the outside world produced a new event.
|
||||
- Add a new `signal` when the system needs a reusable semantic interpretation.
|
||||
- Add a new `action` when the runtime needs a concrete side effect.
|
||||
- Add or update a `policy` when you are wiring those pieces together.
|
||||
|
||||
## Implementation Workflow
|
||||
|
||||
1. Decide whether the use case is synchronous or quiet background work.
|
||||
2. Define or reuse a source type in `apps/server/src/services/agentSignal/sourceTypes.ts`.
|
||||
3. Define or reuse signal and action types in `apps/server/src/services/agentSignal/policies/types.ts`.
|
||||
4. Implement handlers with `defineSourceHandler`, `defineSignalHandler`, or `defineActionHandler`.
|
||||
5. Bundle handlers with `defineAgentSignalHandlers(...)`.
|
||||
6. Register the policy in `apps/server/src/services/agentSignal/policies/index.ts` and pass it into the runtime factory if needed.
|
||||
7. Add or update ingress code that emits or enqueues the source event.
|
||||
8. Add observability and tests before considering the flow complete.
|
||||
|
||||
## Default Reading Set
|
||||
|
||||
- Shared semantic core:
|
||||
`packages/agent-signal/src/index.ts`
|
||||
`packages/agent-signal/src/base/builders.ts`
|
||||
`packages/agent-signal/src/base/types.ts`
|
||||
- Server-owned runtime and middleware:
|
||||
`apps/server/src/services/agentSignal/runtime/AgentSignalRuntime.ts`
|
||||
`apps/server/src/services/agentSignal/runtime/AgentSignalScheduler.ts`
|
||||
`apps/server/src/services/agentSignal/runtime/middleware.ts`
|
||||
`apps/server/src/services/agentSignal/runtime/context.ts`
|
||||
- Existing policy example:
|
||||
`apps/server/src/services/agentSignal/policies/analyzeIntent/index.ts`
|
||||
`apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackSatisfaction.ts`
|
||||
`apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackDomain.ts`
|
||||
`apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackAction.ts`
|
||||
`apps/server/src/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`
|
||||
- Observability:
|
||||
`apps/server/src/services/agentSignal/observability/projector.ts`
|
||||
`apps/server/src/services/agentSignal/observability/traceEvents.ts`
|
||||
`packages/observability-otel/src/modules/agent-signal/index.ts`
|
||||
|
||||
## Implementation Rules
|
||||
|
||||
- Reuse existing source, signal, and action types before adding new ones.
|
||||
- Keep source handlers focused on interpretation and fan-out, not heavy side effects.
|
||||
- Keep action handlers responsible for side effects, idempotency, and executor-style result reporting.
|
||||
- Use stable ids and idempotency keys when the same source can arrive more than once.
|
||||
- Preserve scope discipline. The runtime uses `scopeKey` to serialize related background work.
|
||||
- Prefer the dedicated shared package types and builders from `@lobechat/agent-signal` for normalized nodes and result contracts.
|
||||
- Add focused tests near the touched runtime, policy, or store module. Existing tests under `apps/server/src/services/agentSignal/**/__tests__` are the reference pattern.
|
||||
|
||||
## References
|
||||
|
||||
- Architecture and boundaries: `references/architecture.md`
|
||||
- Writing handlers and policies: `references/handlers.md`
|
||||
- Observability, metrics, and debugging: `references/observability.md`
|
||||
@@ -1,4 +0,0 @@
|
||||
interface:
|
||||
display_name: 'Agent Signal'
|
||||
short_description: 'Build AgentSignal sources, signals, actions, and policies.'
|
||||
default_prompt: 'Use $agent-signal to add a new Agent Signal source, policy, handler, or observability flow.'
|
||||
@@ -1,199 +0,0 @@
|
||||
# Agent Signal Architecture
|
||||
|
||||
## Pipeline
|
||||
|
||||
Use this mental model first:
|
||||
|
||||
```text
|
||||
producer
|
||||
-> emitAgentSignalSourceEvent(...) or enqueueAgentSignalSourceEvent(...)
|
||||
-> emitSourceEvent(...)
|
||||
-> dedupe + scope lock + source normalization
|
||||
-> runtime.emitNormalized(source)
|
||||
-> source handlers
|
||||
-> signal handlers
|
||||
-> action handlers
|
||||
-> built-in result signals
|
||||
-> observability projection + persistence
|
||||
```
|
||||
|
||||
The scheduler is queue-driven, not hard-coded for one policy:
|
||||
|
||||
```text
|
||||
source node
|
||||
-> matching source handlers
|
||||
-> dispatch signals/actions
|
||||
-> matching signal handlers
|
||||
-> dispatch more signals/actions
|
||||
-> matching action handlers
|
||||
-> ExecutorResult
|
||||
-> signal.action.applied | signal.action.skipped | signal.action.failed
|
||||
```
|
||||
|
||||
Read:
|
||||
|
||||
- `apps/server/src/services/agentSignal/index.ts`
|
||||
- `apps/server/src/services/agentSignal/sources/index.ts`
|
||||
- `apps/server/src/services/agentSignal/runtime/AgentSignalScheduler.ts`
|
||||
|
||||
## Package Boundaries
|
||||
|
||||
### `packages/agent-signal`
|
||||
|
||||
Treat this as the shared semantic core.
|
||||
|
||||
It provides:
|
||||
|
||||
- base node types: source, signal, action
|
||||
- builders: `createSource`, `createSignal`, `createAction`
|
||||
- built-in result signal types
|
||||
- runtime result contracts such as `RuntimeProcessorResult` and `ExecutorResult`
|
||||
|
||||
Read:
|
||||
|
||||
- `packages/agent-signal/src/base/types.ts`
|
||||
- `packages/agent-signal/src/base/builders.ts`
|
||||
- `packages/agent-signal/src/types/events.ts`
|
||||
- `packages/agent-signal/src/types/builtin.ts`
|
||||
|
||||
### `apps/server/src/services/agentSignal`
|
||||
|
||||
Treat this as the server-owned implementation layer.
|
||||
|
||||
It owns:
|
||||
|
||||
- source catalogs and payload maps
|
||||
- policy-specific signal and action catalogs
|
||||
- middleware registration
|
||||
- runtime scheduling and guard backends
|
||||
- Redis-backed dedupe, waypoint, and policy state
|
||||
- service entrypoints for synchronous and async execution
|
||||
|
||||
### `packages/observability-otel/src/modules/agent-signal`
|
||||
|
||||
Treat this as shared OTEL ownership for Agent Signal metrics and tracer instances.
|
||||
|
||||
## Core Vocabulary
|
||||
|
||||
### Source
|
||||
|
||||
A source is the normalized external fact that started the chain.
|
||||
|
||||
Examples:
|
||||
|
||||
- `agent.user.message`
|
||||
- `runtime.before_step`
|
||||
- `runtime.after_step`
|
||||
- `client.runtime.start`
|
||||
- `bot.message.merged`
|
||||
|
||||
Define source payloads in:
|
||||
|
||||
- `apps/server/src/services/agentSignal/sourceTypes.ts`
|
||||
|
||||
Build normalized sources in:
|
||||
|
||||
- `apps/server/src/services/agentSignal/sources/buildSource.ts`
|
||||
- `packages/agent-signal/src/base/builders.ts`
|
||||
|
||||
### Signal
|
||||
|
||||
A signal is a semantic interpretation. Signals should be reusable and meaning-oriented.
|
||||
|
||||
Examples from `analyzeIntent`:
|
||||
|
||||
- `signal.feedback.satisfaction`
|
||||
- `signal.feedback.domain.memory`
|
||||
- `signal.feedback.domain.prompt`
|
||||
- `signal.feedback.domain.skill`
|
||||
|
||||
Define server-owned signal types in:
|
||||
|
||||
- `apps/server/src/services/agentSignal/policies/types.ts`
|
||||
|
||||
### Action
|
||||
|
||||
An action is a concrete side effect the runtime should execute.
|
||||
|
||||
Example:
|
||||
|
||||
- `action.user-memory.handle`
|
||||
|
||||
Action handlers usually:
|
||||
|
||||
- check idempotency
|
||||
- call tools, models, or services
|
||||
- return `ExecutorResult`
|
||||
|
||||
### Policy
|
||||
|
||||
A policy is an installable bundle of handlers. It is the composition unit that turns the generic runtime into a feature.
|
||||
|
||||
Example:
|
||||
|
||||
- `createAnalyzeIntentPolicy(...)`
|
||||
|
||||
### Procedure
|
||||
|
||||
"Procedure" is not a first-class type in this runtime. Use the word to describe one end-to-end use case:
|
||||
|
||||
1. define ingress source
|
||||
2. emit or enqueue the source
|
||||
3. interpret source into signals
|
||||
4. plan actions from signals
|
||||
5. execute actions
|
||||
6. persist trace and metrics
|
||||
|
||||
When a user asks for "the procedure", document the flow above and point to the exact producer, handlers, and execution entrypoint.
|
||||
|
||||
## Scope, Deduping, And Quiet Background Work
|
||||
|
||||
`scopeKey` is the serialization boundary for related work. It is used for:
|
||||
|
||||
- source dedupe windows
|
||||
- scope locks during source generation
|
||||
- runtime guard state
|
||||
- waypoint persistence for queued processing
|
||||
|
||||
Read:
|
||||
|
||||
- `apps/server/src/services/agentSignal/sources/index.ts`
|
||||
- `apps/server/src/services/agentSignal/runtime/context.ts`
|
||||
- `apps/server/src/services/agentSignal/constants.ts`
|
||||
|
||||
Use `enqueueAgentSignalSourceEvent(...)` when the work should stay quiet and out-of-band. That path:
|
||||
|
||||
1. normalizes the source envelope
|
||||
2. derives or reuses `scopeKey`
|
||||
3. triggers `AgentSignalWorkflow`
|
||||
4. executes later in `runAgentSignalWorkflow`
|
||||
|
||||
This is the preferred path when the UI request should finish immediately and the policy can run in the background.
|
||||
|
||||
Read:
|
||||
|
||||
- `apps/server/src/workflows/agentSignal/index.ts`
|
||||
- `apps/server/src/workflows/agentSignal/run.ts`
|
||||
|
||||
## Existing Example: `analyzeIntent`
|
||||
|
||||
Use `analyzeIntent` as the reference chain:
|
||||
|
||||
```text
|
||||
agent.user.message
|
||||
-> feedback satisfaction source handler
|
||||
-> signal.feedback.satisfaction
|
||||
-> feedback domain signal handler
|
||||
-> signal.feedback.domain.*
|
||||
-> feedback action planner
|
||||
-> action.user-memory.handle
|
||||
-> signal.action.applied | skipped | failed
|
||||
```
|
||||
|
||||
Read:
|
||||
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/index.ts`
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackSatisfaction.ts`
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackDomain.ts`
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackAction.ts`
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`
|
||||
@@ -1,228 +0,0 @@
|
||||
# Writing Handlers And Policies
|
||||
|
||||
## Fluent Registration API
|
||||
|
||||
Use the middleware helpers in `apps/server/src/services/agentSignal/runtime/middleware.ts`.
|
||||
|
||||
They provide:
|
||||
|
||||
- `defineSourceHandler(...)`
|
||||
- `defineSignalHandler(...)`
|
||||
- `defineActionHandler(...)`
|
||||
- `defineAgentSignalHandlers(...)`
|
||||
|
||||
These helpers do two jobs:
|
||||
|
||||
1. keep handler registration terse
|
||||
2. preserve strong typing when `listen` points at concrete source, signal, or action types
|
||||
|
||||
## Handler Shape
|
||||
|
||||
Each handler receives:
|
||||
|
||||
- the current runtime node
|
||||
- `RuntimeProcessorContext`
|
||||
|
||||
The context gives you:
|
||||
|
||||
- `scopeKey`
|
||||
- `now()`
|
||||
- `runtimeState.getGuardState(lane)`
|
||||
- `runtimeState.touchGuardState(lane, now?)`
|
||||
|
||||
Read:
|
||||
|
||||
- `apps/server/src/services/agentSignal/runtime/context.ts`
|
||||
|
||||
## Return Contracts
|
||||
|
||||
Return one of these shapes:
|
||||
|
||||
- `void`: no fan-out, stop at this handler
|
||||
- `{ status: 'dispatch', signals?, actions? }`: continue the chain
|
||||
- `{ status: 'wait', pending? }`: pause for later host coordination
|
||||
- `{ status: 'schedule', nextHop }`: schedule another hop
|
||||
- `{ status: 'conclude', concluded? }`: stop with a terminal runtime result
|
||||
- `ExecutorResult`: only for action handlers that performed a concrete side effect
|
||||
|
||||
Read:
|
||||
|
||||
- `packages/agent-signal/src/base/types.ts`
|
||||
- `apps/server/src/services/agentSignal/runtime/AgentSignalScheduler.ts`
|
||||
|
||||
## Policy Composition Pattern
|
||||
|
||||
Use `defineAgentSignalHandlers([...])` to bundle related handlers into one policy.
|
||||
|
||||
Example from `analyzeIntent`:
|
||||
|
||||
```ts
|
||||
return defineAgentSignalHandlers([
|
||||
createFeedbackSatisfactionJudgeProcessor(...),
|
||||
createFeedbackDomainJudgeSignalHandler(...),
|
||||
createFeedbackActionPlannerSignalHandler(),
|
||||
defineUserMemoryActionHandler(...),
|
||||
]);
|
||||
```
|
||||
|
||||
That bundle is later passed into the runtime via:
|
||||
|
||||
- `createDefaultAgentSignalPolicies(...)`
|
||||
- `createAgentSignalRuntime({ policies })`
|
||||
|
||||
Read:
|
||||
|
||||
- `apps/server/src/services/agentSignal/policies/index.ts`
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/index.ts`
|
||||
|
||||
## Source Handler Pattern
|
||||
|
||||
Use a source handler when you are interpreting a producer event into semantic signals.
|
||||
|
||||
Reference:
|
||||
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackSatisfaction.ts`
|
||||
|
||||
Pattern:
|
||||
|
||||
```ts
|
||||
return defineSourceHandler(
|
||||
AGENT_SIGNAL_SOURCE_TYPES.agentUserMessage,
|
||||
'agent.user.message:my-handler',
|
||||
async (source, ctx): Promise<RuntimeProcessorResult | void> => {
|
||||
// interpret source payload
|
||||
// optionally use ctx.runtimeState
|
||||
|
||||
return {
|
||||
signals: [
|
||||
/* one or more semantic signals */
|
||||
],
|
||||
status: 'dispatch',
|
||||
};
|
||||
},
|
||||
);
|
||||
```
|
||||
|
||||
Write source handlers when:
|
||||
|
||||
- a raw message, lifecycle event, or bot ingress needs interpretation
|
||||
- the work is still semantic, not side-effectful
|
||||
|
||||
## Signal Handler Pattern
|
||||
|
||||
Use a signal handler when one semantic state should branch into more semantic states or planned actions.
|
||||
|
||||
References:
|
||||
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackDomain.ts`
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/feedbackAction.ts`
|
||||
|
||||
Pattern:
|
||||
|
||||
```ts
|
||||
return defineSignalHandler(
|
||||
MY_SIGNAL_TYPE,
|
||||
'signal.my-policy-router',
|
||||
async (signal): Promise<RuntimeProcessorResult | void> => {
|
||||
return {
|
||||
actions: [
|
||||
/* planned work */
|
||||
],
|
||||
status: 'dispatch',
|
||||
};
|
||||
},
|
||||
);
|
||||
```
|
||||
|
||||
Use signal handlers for:
|
||||
|
||||
- routing
|
||||
- fan-out
|
||||
- filtering
|
||||
- conflict resolution
|
||||
- converting interpretation into planned actions
|
||||
|
||||
## Action Handler Pattern
|
||||
|
||||
Use an action handler when the runtime should do actual work.
|
||||
|
||||
Reference:
|
||||
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`
|
||||
|
||||
Pattern:
|
||||
|
||||
```ts
|
||||
return defineActionHandler(
|
||||
MY_ACTION_TYPE,
|
||||
'action.my-policy-executor',
|
||||
async (action, ctx): Promise<ExecutorResult> => {
|
||||
// run service/tool/model side effect
|
||||
// check idempotency if needed
|
||||
|
||||
return {
|
||||
actionId: action.actionId,
|
||||
attempt: {
|
||||
completedAt: ctx.now(),
|
||||
current: 1,
|
||||
startedAt,
|
||||
status: 'succeeded',
|
||||
},
|
||||
status: 'applied',
|
||||
};
|
||||
},
|
||||
);
|
||||
```
|
||||
|
||||
Keep these rules:
|
||||
|
||||
- perform idempotency checks here or immediately before side effects
|
||||
- return stable `actionId`
|
||||
- include failure detail in `error`
|
||||
- let the scheduler turn the `ExecutorResult` into built-in result signals
|
||||
|
||||
## Source, Signal, And Action Type Placement
|
||||
|
||||
Use this split:
|
||||
|
||||
- external event payloads:
|
||||
`apps/server/src/services/agentSignal/sourceTypes.ts`
|
||||
- policy-owned signal and action payloads:
|
||||
`apps/server/src/services/agentSignal/policies/types.ts`
|
||||
- normalized shared node contracts:
|
||||
`packages/agent-signal/src/base/types.ts`
|
||||
|
||||
Do not put app-specific signal catalogs into `packages/agent-signal`. That package should stay generic and reusable.
|
||||
|
||||
## Choosing The Right Node
|
||||
|
||||
Choose `source` when:
|
||||
|
||||
- the outside world emitted a new fact
|
||||
|
||||
Choose `signal` when:
|
||||
|
||||
- the system needs semantic meaning that downstream handlers can reuse
|
||||
|
||||
Choose `action` when:
|
||||
|
||||
- the runtime is ready for a concrete side effect
|
||||
|
||||
If a handler both interprets meaning and performs side effects, split it. That keeps chains inspectable and testable.
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
Prefer focused tests near the touched code.
|
||||
|
||||
Useful references:
|
||||
|
||||
- `apps/server/src/services/agentSignal/runtime/__tests__/AgentSignalRuntime.test.ts`
|
||||
- `apps/server/src/services/agentSignal/__tests__/index.integration.test.ts`
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/__tests__/*`
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/actions/__tests__/*`
|
||||
|
||||
Test at the smallest level that proves the behavior:
|
||||
|
||||
- handler unit test for one routing rule
|
||||
- runtime test for queue fan-out
|
||||
- integration test for service ingress and observability persistence
|
||||
@@ -1,118 +0,0 @@
|
||||
# Observability And Debugging
|
||||
|
||||
## OTEL Ownership
|
||||
|
||||
Use `packages/observability-otel/src/modules/agent-signal/index.ts` for the shared tracer and metrics.
|
||||
|
||||
Available instruments:
|
||||
|
||||
- `tracer`
|
||||
- `sourceCounter`
|
||||
- `signalCounter`
|
||||
- `actionCounter`
|
||||
- `actionResultCounter`
|
||||
- `chainCounter`
|
||||
- `signalActionTransitionCounter`
|
||||
- `chainDurationHistogram`
|
||||
- `actionDurationHistogram`
|
||||
|
||||
Use this module when you need shared telemetry ownership instead of creating feature-local meters or tracers.
|
||||
|
||||
## Projection Pipeline
|
||||
|
||||
After runtime execution, the service projects one compact observability model from the full chain.
|
||||
|
||||
Read:
|
||||
|
||||
- `apps/server/src/services/agentSignal/observability/projector.ts`
|
||||
- `apps/server/src/services/agentSignal/observability/traceEvents.ts`
|
||||
- `apps/server/src/services/agentSignal/observability/store.ts`
|
||||
|
||||
Projection outputs:
|
||||
|
||||
- a trace envelope with source, signals, actions, results, edges, and handler runs
|
||||
- a compact telemetry record with dominant path, status breakdown, and chain metadata
|
||||
|
||||
This projection is built from:
|
||||
|
||||
- source node
|
||||
- emitted signals
|
||||
- planned actions
|
||||
- executor results
|
||||
|
||||
## How To Inspect A Chain
|
||||
|
||||
Use this order:
|
||||
|
||||
1. Inspect the source type and payload.
|
||||
2. Inspect emitted signals.
|
||||
3. Inspect planned actions.
|
||||
4. Inspect executor results.
|
||||
5. Inspect projected edges and dominant path.
|
||||
|
||||
The helper `toAgentSignalTraceEvents(...)` flattens a chain into compact event records suitable for tracing snapshots.
|
||||
|
||||
## Workflow Snapshot Bridge
|
||||
|
||||
Workflow-triggered runs do not naturally pass through the normal foreground runtime snapshot path, so `runAgentSignalWorkflow` adds a development-only bridge into `.agent-tracing/`.
|
||||
|
||||
Read:
|
||||
|
||||
- `apps/server/src/workflows/agentSignal/run.ts`
|
||||
|
||||
Use that path when:
|
||||
|
||||
- the source was enqueued with `enqueueAgentSignalSourceEvent(...)`
|
||||
- you need local trace visibility for quiet background work
|
||||
|
||||
## Common Debug Questions
|
||||
|
||||
### The source emits but nothing happens
|
||||
|
||||
Check:
|
||||
|
||||
- feature gate enabled for the user
|
||||
- source type matches a registered source handler
|
||||
- dedupe or scope lock did not short-circuit generation
|
||||
|
||||
Read:
|
||||
|
||||
- `apps/server/src/services/agentSignal/index.ts`
|
||||
- `apps/server/src/services/agentSignal/sources/index.ts`
|
||||
|
||||
### The signal exists but no action runs
|
||||
|
||||
Check:
|
||||
|
||||
- the signal type has a registered signal handler
|
||||
- the signal handler returns `status: 'dispatch'`
|
||||
- the handler actually returned actions
|
||||
|
||||
### The action runs twice
|
||||
|
||||
Check:
|
||||
|
||||
- source dedupe key stability
|
||||
- action idempotency strategy
|
||||
- scope key stability across retries and workflow handoff
|
||||
|
||||
Reference:
|
||||
|
||||
- `apps/server/src/services/agentSignal/policies/actionIdempotency.ts`
|
||||
- `apps/server/src/services/agentSignal/policies/analyzeIntent/actions/userMemory.ts`
|
||||
|
||||
### Background runs are hard to discover
|
||||
|
||||
Check:
|
||||
|
||||
- workflow snapshot bridge in development
|
||||
- projected telemetry record contents
|
||||
- OTEL counters and histograms in the shared module
|
||||
|
||||
## Minimal Completion Checklist
|
||||
|
||||
- source ingress is testable
|
||||
- handler registration is discoverable from the policy factory
|
||||
- action executor returns structured results
|
||||
- projection includes the new path cleanly
|
||||
- tests cover at least one happy path and one no-op or failure path
|
||||
@@ -1,217 +0,0 @@
|
||||
---
|
||||
name: agent-testing
|
||||
description: >
|
||||
Agentic end-to-end testing for LobeHub: backend verification via the CLI,
|
||||
frontend verification via agent-browser (Electron), full-stack verification in
|
||||
the browser, and bot-channel verification via osascript. Local-first today,
|
||||
designed to extend to cloud automation. Triggers on 'cli test', 'test with cli',
|
||||
'verify with cli', 'backend test with cli', 'local test', 'test in electron',
|
||||
'test desktop', 'test bot', 'bot test', 'test in discord', 'test in telegram',
|
||||
'test in slack', 'test in wechat', 'test in weixin', 'test in lark', 'test in feishu',
|
||||
'test in qq', 'manual test', 'osascript', 'test report', or any local
|
||||
end-to-end verification task.
|
||||
---
|
||||
|
||||
# Agent Testing (Agentic End-to-End Verification)
|
||||
|
||||
One skill for all agentic end-to-end testing — local-first today, designed to
|
||||
also run as full cloud automation. Every test session follows the same
|
||||
four-step contract:
|
||||
|
||||
```
|
||||
Step 0: Env + Auth → Step 1: Pick surface → Step 2: Run → Step 3: Structured report
|
||||
```
|
||||
|
||||
## Step 0 — Environment setup + auth check (mandatory)
|
||||
|
||||
Step 0 is about getting the environment ready: **dependencies are healthy**
|
||||
and **auth is green**. A test run that dies halfway on a missing dependency or
|
||||
a login wall wastes the whole session — clear both gates BEFORE writing a
|
||||
single test step.
|
||||
|
||||
### 0.1 Dependencies are installed — root AND standalone apps
|
||||
|
||||
The root pnpm workspace does **NOT** cover every app: `pnpm-workspace.yaml`
|
||||
lists `packages/**`, `e2e`, `apps/server`, and only `apps/desktop/src/main` —
|
||||
**`apps/desktop` and `apps/cli` are standalone**, each keeping its own
|
||||
`node_modules` with its own links into `packages/`. A root install does not
|
||||
refresh them, so install in every app the test will touch:
|
||||
|
||||
```bash
|
||||
pnpm install # root workspace
|
||||
cd apps/desktop && pnpm install # Electron surface
|
||||
cd apps/cli && pnpm install # CLI surface
|
||||
```
|
||||
|
||||
Symptom of a stale standalone install: the build/launch fails to resolve a
|
||||
recently added workspace package — `Rolldown failed to resolve import
|
||||
"@lobechat/<pkg>"` (Electron) or `Cannot find module '@lobechat/<pkg>'` (CLI).
|
||||
|
||||
### 0.2 Run scripts from the repo root
|
||||
|
||||
All paths in this skill (`./.agents/skills/agent-testing/...`) are
|
||||
repo-root-relative, and background commands inherit the current working
|
||||
directory — a script launched while `cwd` is `apps/desktop` fails with
|
||||
`No such file or directory`. Verify `pwd` is the repo root before launching
|
||||
long-running scripts.
|
||||
|
||||
### 0.3 Auth is green
|
||||
|
||||
**Auth is the gate for all automated testing.**
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/scripts/setup-auth.sh status
|
||||
```
|
||||
|
||||
| Surface | Mechanism | One-key path | Standard check |
|
||||
| -------- | ------------------------------------------------- | ------------------------------ | -------------------------- |
|
||||
| CLI | OIDC Device Code Flow (`apps/cli/.lobehub-dev`) | `setup-auth.sh cli` | `setup-auth.sh status` |
|
||||
| Web | better-auth cookie injection into `agent-browser` | `pbpaste \| setup-auth.sh web` | `setup-auth.sh web-verify` |
|
||||
| Electron | App's own persistent login state | Log in once in the app | `app-probe.sh auth` |
|
||||
| Bot | Native apps already logged in | — | per-platform screenshot |
|
||||
|
||||
Login-state checks are standardized — do NOT hand-roll `window.__LOBE_STORES`
|
||||
eval snippets; use `scripts/app-probe.sh auth` (returns `{ isSignedIn, userId }`,
|
||||
works for Electron CDP and web sessions via `AB_TARGET`).
|
||||
|
||||
If `status` is not all green, fix auth first (the steps that need a human must be
|
||||
requested from the user explicitly). Full background and failure modes:
|
||||
[references/auth.md](./references/auth.md).
|
||||
|
||||
## Step 1 — Pick the surface by change scope
|
||||
|
||||
| Change scope | Default surface | Why | Guide |
|
||||
| ------------------------------------------------------- | ------------------------------------ | ----------------------------------------------------------------- | ---------------------------------- |
|
||||
| **Backend** (TRPC router / service / model / migration) | **CLI** | Fastest loop, text-assertable output, zero UI flakiness | [cli/index.md](./cli/index.md) |
|
||||
| **Pure frontend** (components, store, styles, UX) | **Electron** (agent-browser + CDP) | Primary product shape; `__LOBE_STORES` state introspection | [ui/electron.md](./ui/electron.md) |
|
||||
| **Full-stack** (new API + UI consuming it) | **Web** (browser + local dev server) | One surface where network requests and UI are observable together | [ui/web.md](./ui/web.md) |
|
||||
| **Bot channels** (Discord / WeChat / Lark / …) | Native app via osascript / bridge | Only way to exercise the real channel end-to-end | `bot/<platform>/index.md` |
|
||||
|
||||
Escalate, don't duplicate: verify a backend change with the CLI first; only add
|
||||
a UI pass when the change actually affects the UI.
|
||||
|
||||
### Environment support (local macOS vs cloud Linux)
|
||||
|
||||
The decisive constraint per surface is **how evidence (screenshots) is
|
||||
captured**: CDP-based capture (`agent-browser screenshot`) renders from the
|
||||
browser engine and needs no real display; OS-level capture (`screencapture`,
|
||||
osascript) is macOS-only.
|
||||
|
||||
| Surface | macOS (local) | Linux / cloud (headless) | Screenshot mechanism |
|
||||
| -------- | ------------- | --------------------------------------------------------- | ------------------------------------------------------ |
|
||||
| CLI | ✅ | ✅ | n/a — text output |
|
||||
| Web | ✅ | ✅ headless Chromium works natively | CDP — no display needed |
|
||||
| Electron | ✅ | ⚠️ runs, but needs a display server: wrap with `xvfb-run` | CDP works under Xvfb; `capture-app-window.sh` does NOT |
|
||||
| Bot | ✅ | ❌ osascript + native apps are macOS-only | macOS `screencapture` only |
|
||||
|
||||
When a test must stay cloud-portable, prefer CDP-based evidence over
|
||||
OS-level capture wherever both exist.
|
||||
|
||||
### Bot platforms
|
||||
|
||||
| Platform | Guide | Quick switcher |
|
||||
| ------------- | ------------------------------------------------ | --------------------- |
|
||||
| Discord | [bot/discord/index.md](./bot/discord/index.md) | `Cmd+K` |
|
||||
| Slack | [bot/slack/index.md](./bot/slack/index.md) | `Cmd+K` |
|
||||
| Telegram | [bot/telegram/index.md](./bot/telegram/index.md) | `Cmd+F` |
|
||||
| WeChat / 微信 | [bot/wechat/index.md](./bot/wechat/index.md) | `Cmd+F` |
|
||||
| Lark / 飞书 | [bot/lark/index.md](./bot/lark/index.md) | `Cmd+K` |
|
||||
| QQ | [bot/qq/index.md](./bot/qq/index.md) | `Cmd+F` |
|
||||
| iMessage | [bot/imessage/index.md](./bot/imessage/index.md) | bridge (no osascript) |
|
||||
|
||||
Each platform folder contains an `index.md` (activation, navigation,
|
||||
send-message, verification snippets) and a `test-<platform>-bot.sh` script
|
||||
sharing the interface:
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/bot/<platform>/test-<platform>-bot.sh <channel_or_contact> <message> [wait_seconds] [screenshot_path]
|
||||
```
|
||||
|
||||
New to osascript automation? Read
|
||||
[references/osascript.md](./references/osascript.md) first — it is a general
|
||||
macOS-automation asset (activate, type, paste, screenshot, accessibility reads,
|
||||
gotchas), not bot-specific.
|
||||
|
||||
## Step 2 — Run
|
||||
|
||||
Surface guides above carry the detailed workflows. Shared infrastructure:
|
||||
|
||||
| Need | Where |
|
||||
| ------------------------------------ | -------------------------------------------------------------------- |
|
||||
| Start / restart the local dev server | [references/dev-server.md](./references/dev-server.md) |
|
||||
| `agent-browser` command reference | [references/agent-browser.md](./references/agent-browser.md) |
|
||||
| osascript patterns (general macOS) | [references/osascript.md](./references/osascript.md) |
|
||||
| Agent gateway probing | [references/agent-gateway.md](./references/agent-gateway.md) |
|
||||
| Screen recording | [references/record-app-screen.md](./references/record-app-screen.md) |
|
||||
|
||||
### Scripts
|
||||
|
||||
All under `.agents/skills/agent-testing/scripts/`:
|
||||
|
||||
| Script | Usage |
|
||||
| ------------------------- | ---------------------------------------------------------------------------- |
|
||||
| `setup-auth.sh` | One-stop auth setup & status check (`status` / `cli` / `web`) |
|
||||
| `app-probe.sh` | LobeHub app probes: `auth` / `route` / `ops` / `goto <path>` / `errors` |
|
||||
| `record-gif.sh` | Frame-sequence → GIF for time-based behavior (streaming, timers, animations) |
|
||||
| `report-init.sh` | Scaffold a structured test report (Step 3) |
|
||||
| `electron-dev.sh` | Manage Electron dev env (start/stop/status/restart, CDP 9222) |
|
||||
| `capture-app-window.sh` | Screenshot a specific app window (general; used by bot tests) |
|
||||
| `record-app-screen.sh` | Record app screen (video + periodic screenshots) |
|
||||
| `record-electron-demo.sh` | Record Electron app demo with ffmpeg |
|
||||
| `agent-gateway/` | Gateway probe / dump / analyze tools |
|
||||
|
||||
`app-probe.sh` is the LobeHub-specific fast path into app state — auth check,
|
||||
current route, running operations, and `goto <path>` quick navigation
|
||||
(`/agent/<agentId>/<topicId>`, `/task/<taskId>`, `/settings`, …) so a test can
|
||||
jump straight to the state under test instead of clicking through the UI. See
|
||||
[ui/electron.md](./ui/electron.md#lobehub-probes--quick-navigation) for usage.
|
||||
|
||||
## Step 3 — Structured report (mandatory deliverable)
|
||||
|
||||
Every automated test session ends with a structured, evidence-backed report —
|
||||
not a chat-only summary. Scaffold it up front and fill it as you test:
|
||||
|
||||
```bash
|
||||
DIR=$(./.agents/skills/agent-testing/scripts/report-init.sh my-feature "Verify my feature")
|
||||
# ... test, saving screenshots / CLI transcripts into $DIR/assets/ ...
|
||||
# fill $DIR/report.md (scope, case table with inline evidence, verdict, score) and $DIR/result.json
|
||||
```
|
||||
|
||||
Reports live in `.records/reports/<timestamp>-<slug>/` (gitignored): `report.md`
|
||||
(human-readable, with screenshots/GIFs embedded directly in the case table),
|
||||
`result.json` (machine-readable pass/fail + score), `assets/` (evidence).
|
||||
Format spec and evidence rules:
|
||||
[references/report.md](./references/report.md).
|
||||
|
||||
Two hard rules worth front-loading:
|
||||
|
||||
- **Report language = the user's conversation language.** Write the ENTIRE
|
||||
`report.md` (headings included) in the language the user is conversing in —
|
||||
no mixed English. `result.json` keys/status values stay English.
|
||||
- **The case table is the main reading surface.** Prefer the compact
|
||||
`# | case | result | key observation | evidence` shape and embed the
|
||||
screenshot/GIF in the evidence cell. Use separate evidence sections only for
|
||||
long CLI transcripts, HAR summaries, or supplemental detail.
|
||||
- **Time-based behavior needs a GIF, not a screenshot.** If a case asserts
|
||||
change over time (streaming output, a ticking timer, loading states,
|
||||
animations), record it with `scripts/record-gif.sh` and embed the GIF —
|
||||
a static screenshot cannot prove the behavior.
|
||||
|
||||
## Directory map
|
||||
|
||||
```
|
||||
agent-testing/
|
||||
├── SKILL.md # this router
|
||||
├── cli/index.md # backend verification via the LobeHub CLI
|
||||
├── ui/electron.md # pure-frontend verification in the desktop app
|
||||
├── ui/web.md # full-stack verification in the browser
|
||||
├── bot/<platform>/ # bot-channel verification (osascript / bridge)
|
||||
├── references/ # shared knowledge: auth, dev-server, agent-browser, osascript, report
|
||||
└── scripts/ # setup-auth, report-init, electron-dev, capture, recording, gateway
|
||||
```
|
||||
|
||||
## Gotchas
|
||||
|
||||
- agent-browser: see [references/agent-browser.md](./references/agent-browser.md#gotchas)
|
||||
- Electron: see [ui/electron.md](./ui/electron.md#electron-gotchas)
|
||||
- osascript: see [references/osascript.md](./references/osascript.md#gotchas)
|
||||
@@ -1,97 +0,0 @@
|
||||
# Discord Bot Testing
|
||||
|
||||
**App name:** `Discord` | **Process name:** `Discord`
|
||||
|
||||
See [references/osascript.md](../../references/osascript.md) for shared patterns.
|
||||
|
||||
## Activate & Navigate
|
||||
|
||||
```bash
|
||||
# Activate Discord
|
||||
osascript -e 'tell application "Discord" to activate'
|
||||
sleep 1
|
||||
|
||||
# Open Quick Switcher (Cmd+K) to navigate to a channel
|
||||
osascript -e 'tell application "System Events" to keystroke "k" using command down'
|
||||
sleep 0.5
|
||||
osascript -e 'tell application "System Events" to keystroke "bot-testing"'
|
||||
sleep 1
|
||||
osascript -e 'tell application "System Events" to key code 36' # Enter
|
||||
sleep 2
|
||||
```
|
||||
|
||||
## Send Message to Bot
|
||||
|
||||
```bash
|
||||
# The message input is focused after navigating to a channel
|
||||
# Type a message
|
||||
osascript -e 'tell application "System Events" to keystroke "/hello"'
|
||||
sleep 0.5
|
||||
osascript -e 'tell application "System Events" to key code 36' # Enter
|
||||
```
|
||||
|
||||
## Send Long Message (via clipboard)
|
||||
|
||||
```bash
|
||||
osascript -e '
|
||||
tell application "Discord" to activate
|
||||
delay 0.5
|
||||
set the clipboard to "Write a 3000 word essay about space exploration"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36 -- Enter
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
## Verify Bot Response
|
||||
|
||||
```bash
|
||||
# Wait for bot to respond, then screenshot
|
||||
sleep 10
|
||||
screencapture /tmp/discord-bot-response.png
|
||||
# Read with the Read tool for visual verification
|
||||
```
|
||||
|
||||
## Full Bot Test Example
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
# test-discord-bot.sh — Send message and verify bot response
|
||||
|
||||
# 1. Activate Discord and navigate to channel
|
||||
osascript -e '
|
||||
tell application "Discord" to activate
|
||||
delay 1
|
||||
-- Quick Switcher
|
||||
tell application "System Events" to keystroke "k" using command down
|
||||
delay 0.5
|
||||
tell application "System Events" to keystroke "bot-testing"
|
||||
delay 1
|
||||
tell application "System Events" to key code 36
|
||||
delay 2
|
||||
'
|
||||
|
||||
# 2. Send test message
|
||||
osascript -e '
|
||||
set the clipboard to "!ping"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36
|
||||
end tell
|
||||
'
|
||||
|
||||
# 3. Wait for response and capture
|
||||
sleep 5
|
||||
screencapture /tmp/discord-test-result.png
|
||||
echo "Screenshot saved to /tmp/discord-test-result.png"
|
||||
```
|
||||
|
||||
## Script
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/bot/discord/test-discord-bot.sh "bot-testing" "!ping"
|
||||
./.agents/skills/agent-testing/bot/discord/test-discord-bot.sh "bot-testing" "/ask Tell me a joke" 30
|
||||
```
|
||||
@@ -1,64 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# test-discord-bot.sh — Send a message to a Discord bot and capture the response
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/test-discord-bot.sh <channel> <message> [wait_seconds] [screenshot_path]
|
||||
#
|
||||
# channel — Channel name to navigate to via Quick Switcher (Cmd+K)
|
||||
# message — Message to send to the bot
|
||||
# wait_seconds — Seconds to wait for bot response (default: 10)
|
||||
# screenshot_path — Output screenshot path (default: /tmp/discord-bot-test.png)
|
||||
#
|
||||
# Prerequisites:
|
||||
# - Discord desktop app installed and logged in
|
||||
# - Accessibility permission granted (System Preferences > Privacy > Accessibility)
|
||||
#
|
||||
# Examples:
|
||||
# ./scripts/test-discord-bot.sh "bot-testing" "!ping"
|
||||
# ./scripts/test-discord-bot.sh "bot-testing" "/ask Tell me a joke" 30
|
||||
# ./scripts/test-discord-bot.sh "general" "Hello bot" 15 /tmp/my-test.png
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
CHANNEL="${1:?Usage: test-discord-bot.sh <channel> <message> [wait_seconds] [screenshot_path]}"
|
||||
MESSAGE="${2:?Usage: test-discord-bot.sh <channel> <message> [wait_seconds] [screenshot_path]}"
|
||||
WAIT="${3:-10}"
|
||||
SCREENSHOT="${4:-/tmp/discord-bot-test.png}"
|
||||
|
||||
APP="Discord"
|
||||
|
||||
echo "[$APP] Activating..."
|
||||
osascript -e "tell application \"$APP\" to activate"
|
||||
sleep 1
|
||||
|
||||
echo "[$APP] Navigating to channel: $CHANNEL"
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
-- Quick Switcher
|
||||
keystroke "k" using command down
|
||||
delay 0.8
|
||||
keystroke "'"$CHANNEL"'"
|
||||
delay 1.5
|
||||
key code 36 -- Enter
|
||||
end tell
|
||||
'
|
||||
sleep 2
|
||||
|
||||
echo "[$APP] Sending message: $MESSAGE"
|
||||
osascript -e '
|
||||
set the clipboard to "'"$MESSAGE"'"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36 -- Enter
|
||||
end tell
|
||||
'
|
||||
|
||||
echo "[$APP] Waiting ${WAIT}s for bot response..."
|
||||
sleep "$WAIT"
|
||||
|
||||
echo "[$APP] Capturing screenshot..."
|
||||
"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
|
||||
echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
|
||||
@@ -1,232 +0,0 @@
|
||||
# iMessage Desktop bridge regression test
|
||||
|
||||
The iMessage channel is different from the other bot platforms: there is **no
|
||||
native app to drive with osascript**. Instead the Desktop app runs a local
|
||||
**BlueBubbles bridge** — a small HTTP server in the Electron main process that
|
||||
registers a webhook on a local [BlueBubbles](https://bluebubbles.app/) server,
|
||||
receives iMessage events, and forwards them to LobeHub Cloud.
|
||||
|
||||
So the test surface is three layers:
|
||||
|
||||
1. **Electron main IPC** — `imessageBridge.*` handlers (`getStatus`,
|
||||
`testConfig`, `upsertConfig`, `removeConfig`, `start`, `stop`)
|
||||
2. **Local bridge HTTP server** — `http://127.0.0.1:<port>/webhooks/bluebubbles/<appId>?secret=<secret>`
|
||||
3. **BlueBubbles REST API** — `http://127.0.0.1:1234/api/v1/*` (webhook + server/info)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- A running **BlueBubbles server** (macOS, default `http://127.0.0.1:1234`) with
|
||||
a known password. Sanity check:
|
||||
```bash
|
||||
curl -sS -m4 -o /dev/null -w '%{http_code}\n' \
|
||||
"http://127.0.0.1:1234/api/v1/server/info?password=<PW>" # expect 200
|
||||
```
|
||||
- **Electron dev running with CDP**: `./.agents/skills/agent-testing/scripts/electron-dev.sh start`
|
||||
- The **iMessage Desktop branch** checked out (the `imessageBridge` IPC group
|
||||
and `@lobechat/chat-adapter-imessage` must be compiled into the main bundle).
|
||||
Run `pnpm install --ignore-scripts` at the repo root **and** in `apps/desktop/`
|
||||
after switching branches — the new workspace package must be linked or the
|
||||
main build fails to resolve `@lobechat/chat-adapter-imessage`.
|
||||
|
||||
## Fast path: automated script
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/bot/imessage/test-imessage-bridge.sh '<bluebubbles_password>' [bb_url] [cdp_port]
|
||||
```
|
||||
|
||||
Asserts the whole flow and self-cleans (unique `applicationId` per run, removes
|
||||
its bridge config + BlueBubbles webhook on exit). Exit 0 = all green. It covers:
|
||||
|
||||
- BlueBubbles reachable + password valid; Electron CDP reachable; IPC available
|
||||
- `testConfig` happy path → success
|
||||
- `testConfig` wrong password → rejected; unreachable URL → rejected
|
||||
- `upsertConfig` **first-time save → success** (Bug #1 regression guard, below)
|
||||
- `getStatus` → `running:true`, config persisted, password redacted (`blueBubblesPasswordSet`)
|
||||
- BlueBubbles webhook actually registered for the appId
|
||||
- Local bridge HTTP server: wrong secret → 401; valid secret → past auth
|
||||
|
||||
The password is passed as argv (visible in `ps`) — local dev only, don't use a
|
||||
real secret on a shared machine.
|
||||
|
||||
## Layer 1 — IPC probes (no UI)
|
||||
|
||||
The renderer exposes the main-process handlers via `window.electronAPI.invoke`.
|
||||
This is the quickest way to exercise the bridge without clicking:
|
||||
|
||||
```bash
|
||||
# baseline
|
||||
agent-browser --cdp 9222 eval \
|
||||
"(async()=>JSON.stringify(await window.electronAPI.invoke('imessageBridge.getStatus',{})))()"
|
||||
|
||||
# test a connection (note: password as a JS string)
|
||||
agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
|
||||
(async function () {
|
||||
try {
|
||||
var r = await window.electronAPI.invoke('imessageBridge.testConfig', {
|
||||
applicationId: 'probe',
|
||||
blueBubblesServerUrl: 'http://127.0.0.1:1234',
|
||||
blueBubblesPassword: 'PASTE_PW',
|
||||
enabled: true,
|
||||
webhookSecret: 'probe-secret',
|
||||
});
|
||||
return JSON.stringify(r); // { success: true }
|
||||
} catch (e) { return 'ERR: ' + (e.message || e); }
|
||||
})()
|
||||
EVALEOF
|
||||
```
|
||||
|
||||
`upsertConfig` persists to the Electron store, starts the local HTTP server, and
|
||||
registers the BlueBubbles webhook. `removeConfig` + `stop` reverse it.
|
||||
|
||||
## Layer 2 — full UI flow (agent-browser)
|
||||
|
||||
The bridge settings only render in Desktop (`isDesktop` guard) under the agent's
|
||||
**Channel → iMessage** screen. The platform tile only appears as a real (non
|
||||
"Coming Soon") entry once the server registers `imessage` **and** the frontend
|
||||
drops it from `COMING_SOON_PLATFORMS` (`src/routes/(main)/agent/channel/const.ts`).
|
||||
|
||||
```bash
|
||||
agent-browser --cdp 9222 open "http://localhost:5173/agent/<aid>/channel"
|
||||
agent-browser --cdp 9222 wait --load networkidle && agent-browser --cdp 9222 wait 1500
|
||||
|
||||
# confirm the remote backend lists imessage (it must be registered + deployed)
|
||||
agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
|
||||
(async function(){
|
||||
var url='lobe-backend://lobe/trpc/lambda/agentBotProvider.listPlatforms?input='+encodeURIComponent('{"json":null,"meta":{"values":["undefined"],"v":1}}');
|
||||
var d=await (await fetch(url,{credentials:'include'})).json();
|
||||
var p=d.result?.data?.json||d;
|
||||
return JSON.stringify(p.map(function(x){return x.id;}));
|
||||
})()
|
||||
EVALEOF
|
||||
|
||||
# click the iMessage tile, then fill the form by ref
|
||||
agent-browser --cdp 9222 eval "(()=>{var b=[...document.querySelectorAll('aside button')].find(x=>/imessage/i.test(x.textContent));b&&b.click();})()"
|
||||
agent-browser --cdp 9222 wait 1500
|
||||
agent-browser --cdp 9222 snapshot -i | grep -iE "127.0.0.1:1234|Application ID|Webhook Secret|Test BlueBubbles|Save Bridge"
|
||||
```
|
||||
|
||||
Field refs (from the snapshot): Application ID, Webhook Secret, BlueBubbles
|
||||
Server URL (`placeholder="http://127.0.0.1:1234"`), and a **nested** textbox right
|
||||
under the URL one is the BlueBubbles Password. Fill with `fill` (real input
|
||||
events — `eval`-setting React inputs won't fire onChange), click **Test
|
||||
BlueBubbles**, then **Save Bridge**. Read the antd toast immediately (it
|
||||
auto-dismisses):
|
||||
|
||||
```bash
|
||||
agent-browser --cdp 9222 eval \
|
||||
"JSON.stringify([...new Set([...document.querySelectorAll('.ant-message-custom-content')].map(n=>n.textContent.trim()))])"
|
||||
# Test → "BlueBubbles connection passed"
|
||||
# Save → "iMessage Desktop bridge saved"
|
||||
```
|
||||
|
||||
Verify the end state via BlueBubbles + IPC:
|
||||
|
||||
```bash
|
||||
curl -sS "http://127.0.0.1:1234/api/v1/webhook?password=<PW>" # webhook for the appId present
|
||||
agent-browser --cdp 9222 eval "(async()=>JSON.stringify(await window.electronAPI.invoke('imessageBridge.getStatus',{})))()"
|
||||
# running:true, serverUrl: http://127.0.0.1:33270, configs[].blueBubblesPasswordSet:true
|
||||
```
|
||||
|
||||
Cleanup: `removeConfig` + `stop` via IPC, then `DELETE /api/v1/webhook/<id>` on
|
||||
BlueBubbles.
|
||||
|
||||
## Outbound send test (desktop → BlueBubbles → iMessage)
|
||||
|
||||
Verifies the leg the bridge uses to _reply_: `BlueBubblesApiClient.sendText`
|
||||
→ `POST /api/v1/message/text`. Run the helper against your own number:
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/bot/imessage/send-imessage-test.sh '<bb_password>' '+<E164>' # e.g. +15551234567
|
||||
```
|
||||
|
||||
**Gotcha that bites everyone:** with `method=apple-script` and a _new_
|
||||
conversation, the HTTP POST often **times out** even though the message is
|
||||
sent. Never judge success by the HTTP response. Instead poll
|
||||
`POST /api/v1/message/query` and read the matching `isFromMe:true` row's
|
||||
`error` field:
|
||||
|
||||
- `error: 0` (or null) → sent OK
|
||||
- non-zero `error` → real send failure
|
||||
|
||||
The script does exactly this: fires the send, ignores the timeout, then matches
|
||||
its marker text in the message store and asserts `error == 0`.
|
||||
|
||||
Two more notes:
|
||||
|
||||
- Use a full E.164 handle (`iMessage;-;+<countrycode><number>`) or an Apple ID
|
||||
email. Looking the chat up by guid afterwards may 404 if BB filed the message
|
||||
under a differently-formatted guid — that's a lookup quirk, not a send failure.
|
||||
- Sending to _your own_ number round-trips: BB records both the outgoing
|
||||
(`fromMe:true`) and an incoming copy (`fromMe:false`).
|
||||
|
||||
## Inbound e2e test (iMessage → cloud agent → reply)
|
||||
|
||||
Full inbound chain: a message arrives → BlueBubbles fires its `new-message`
|
||||
webhook → local bridge (`:33270`) → `forwardWebhook` POSTs to
|
||||
`<remote>/api/agent/webhooks/imessage/<appId>?secret=…` → cloud agent → reply
|
||||
flows back via Device Gateway → BB `sendText`.
|
||||
|
||||
Prerequisites:
|
||||
|
||||
- A cloud bot provider for the same `applicationId` exists and is **connected**
|
||||
(Save Configuration + the device gateway connected — a _disconnected_ gateway
|
||||
yields `DEVICE_NOT_FOUND` on connect and blocks the reply leg).
|
||||
- The `imessage` Labs toggle is on (otherwise the channel is gated to "Coming
|
||||
Soon"), and `webhookSecret` matches on both ends (auto-generated on save).
|
||||
|
||||
Two ways to drive it:
|
||||
|
||||
1. **Second device / Apple ID (recommended).** Have _another_ Apple ID message
|
||||
the BB-hosted number (e.g. "please reply pong"). The bot replies; you see it
|
||||
on the other device. **No loop risk** — the reply goes to the other party,
|
||||
not back to itself.
|
||||
2. **Send to your own number (quick, loop-aware).** `sendText` to the hosted
|
||||
number; the loopback _incoming_ copy (`isFromMe:false`) triggers the bot.
|
||||
Watch the reply land in `message/query` as a `fromMe:true` row.
|
||||
|
||||
**Loop guard — why a self-send doesn't spin forever:** the Chat SDK adapter
|
||||
drops any `isFromMe` message before dispatch
|
||||
(`packages/chat-adapter-imessage/src/adapter.ts`: `if (message.isFromMe) return`).
|
||||
The bot's own reply (`isFromMe:true`) is never re-processed, so in the normal
|
||||
case (someone else → bot → reply to them) there is no loop. The self-send case
|
||||
is a **test-only edge**: the bot's reply also round-trips to your number, and
|
||||
only the adapter's `isFromMe` check stops a second pass. Keep the prompt
|
||||
conversational (so the bot doesn't keep finding something to answer), and
|
||||
**turn the `imessage` lab off / remove the config when done** — never leave a
|
||||
self-send bot running unattended.
|
||||
|
||||
Watch the chain live:
|
||||
|
||||
```bash
|
||||
tail -f /tmp/electron-dev.log | grep -iE "imessage|bridge|forward|Message API"
|
||||
# the agent reply shows up as a fromMe:true row with the bot's text:
|
||||
curl -sS -X POST "http://127.0.0.1:1234/api/v1/message/query?password=<PW>" \
|
||||
-H 'Content-Type: application/json' -d '{"limit":5,"sort":"DESC"}'
|
||||
```
|
||||
|
||||
`startTyping` will log a Private-API error unless BlueBubbles has the Private
|
||||
API helper set up (needs a jailbroken / SIP-disabled Mac) — it's logged and
|
||||
ignored; text replies still work.
|
||||
|
||||
## Known bugs / gotchas
|
||||
|
||||
- **Bug #1 — first-time save (fixed; guarded by the script).** BlueBubbles'
|
||||
`GET /api/v1/webhook?url=<unregistered>` returns **HTTP 500**
|
||||
(`Cannot read properties of null (reading 'events')`). The bridge must list
|
||||
**all** webhooks and match client-side, never pass the `?url=` filter. If you
|
||||
see `upsertConfig` fail with "An unhandled error has occurred!" originating in
|
||||
`listWebhooks`, this regressed.
|
||||
- **Save leaves a half-state on webhook failure.** `upsertConfig` writes the
|
||||
config + starts the HTTP server _before_ registering the webhook, so a webhook
|
||||
failure still reports `running:true` with the config persisted but no
|
||||
BlueBubbles webhook. Always assert the BlueBubbles webhook list, not just IPC
|
||||
status.
|
||||
- **Unknown appId / forward failure → 500.** Posting to the local bridge for an
|
||||
unknown appId, or when no cloud bot is bound, returns 500 (BlueBubbles retries
|
||||
on 5xx). Auth (wrong secret → 401) is enforced before that.
|
||||
- **Backend deploy lag.** Desktop dev proxies tRPC through `lobe-backend://` to
|
||||
the _remote_ server. iMessage only appears in `listPlatforms` once the server
|
||||
registration is deployed there, regardless of local branch.
|
||||
- **Restart to load main-process fixes.** Editing `imessageBridgeSrv.ts` /
|
||||
`@lobechat/chat-adapter-imessage` needs `electron-dev.sh restart` — main isn't
|
||||
hot-replaced. On restart, enabled configs auto-register their webhook again.
|
||||
@@ -1,81 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# send-imessage-test.sh — Verify the outbound leg: desktop → BlueBubbles → iMessage
|
||||
#
|
||||
# Sends one real iMessage via the same REST call the Desktop bridge uses
|
||||
# (`POST /api/v1/message/text`, which BlueBubblesApiClient.sendText wraps) and
|
||||
# confirms it actually went out.
|
||||
#
|
||||
# KEY GOTCHA: with method=apple-script and a NEW conversation, the HTTP request
|
||||
# often TIMES OUT even though the message is sent. Do NOT treat the timeout as a
|
||||
# failure — instead poll `POST /api/v1/message/query` and check the message's
|
||||
# `error` field (0 = sent OK). This script does that for you.
|
||||
#
|
||||
# This sends a REAL message, so it has side effects. Target your own number.
|
||||
#
|
||||
# Usage:
|
||||
# ./send-imessage-test.sh <bb_password> <target_e164> [message] [bb_url]
|
||||
#
|
||||
# Example (send to your own phone, E.164 with country code):
|
||||
# ./send-imessage-test.sh 'my-bb-pass' '+15551234567'
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
BB_PASS="${1:?Usage: $0 <bb_password> <target_e164(+countrycode)> [message] [bb_url]}"
|
||||
TARGET="${2:?Need a target handle in E.164, e.g. +15551234567 (or an Apple ID email)}"
|
||||
MARKER="lobe-imsg-test-$(date +%s)"
|
||||
MESSAGE="${3:-[${MARKER}] desktop bridge → BlueBubbles → iMessage outbound check}"
|
||||
BB_URL="${4:-http://127.0.0.1:1234}"
|
||||
|
||||
CHAT_GUID="iMessage;-;${TARGET}"
|
||||
|
||||
echo "[send-test] target=${TARGET} marker=${MARKER}"
|
||||
|
||||
# 1) Fire the send. apple-script on a new chat may hang the HTTP response, so we
|
||||
# cap it short and ignore a timeout — step 2 is the source of truth.
|
||||
python3 - "$BB_PASS" "$BB_URL" "$CHAT_GUID" "$MESSAGE" <<'PY' || true
|
||||
import json,sys,urllib.request,urllib.parse,uuid
|
||||
pw,base,guid,msg=sys.argv[1:5]
|
||||
url=base+"/api/v1/message/text?password="+urllib.parse.quote(pw)
|
||||
body={"chatGuid":guid,"message":msg,"method":"apple-script","tempGuid":str(uuid.uuid4())}
|
||||
req=urllib.request.Request(url,data=json.dumps(body).encode("utf-8"),
|
||||
headers={"Content-Type":"application/json"},method="POST")
|
||||
try:
|
||||
r=urllib.request.urlopen(req,timeout=8)
|
||||
print("[send-test] HTTP",r.status,"(immediate response)")
|
||||
except urllib.error.HTTPError as e:
|
||||
print("[send-test] HTTP",e.code,e.read().decode()[:200])
|
||||
except Exception as e:
|
||||
print("[send-test] HTTP request returned no body (likely apple-script delay):",type(e).__name__)
|
||||
PY
|
||||
|
||||
# 2) Source of truth: find our marker in the message store and read its error.
|
||||
echo "[send-test] verifying via message/query (the HTTP timeout above is expected)…"
|
||||
sleep 3
|
||||
python3 - "$BB_PASS" "$BB_URL" "$MARKER" <<'PY'
|
||||
import json,sys,time,urllib.request,urllib.parse
|
||||
pw,base,marker=sys.argv[1:4]
|
||||
url=base+"/api/v1/message/query?password="+urllib.parse.quote(pw)
|
||||
def query():
|
||||
body={"limit":15,"offset":0,"with":["chats"],"sort":"DESC"}
|
||||
req=urllib.request.Request(url,data=json.dumps(body).encode(),
|
||||
headers={"Content-Type":"application/json"},method="POST")
|
||||
return json.load(urllib.request.urlopen(req,timeout=12)).get("data") or []
|
||||
hit=None
|
||||
for _ in range(5):
|
||||
for m in query():
|
||||
if marker in (m.get("text") or "") and m.get("isFromMe"):
|
||||
hit=m; break
|
||||
if hit: break
|
||||
time.sleep(2)
|
||||
if not hit:
|
||||
print("[send-test] ✗ outbound message not found in BB store — send likely failed")
|
||||
sys.exit(1)
|
||||
err=hit.get("error")
|
||||
if err in (0,None):
|
||||
print("[send-test] ✓ outbound message sent (fromMe=True, error=%s)"%err)
|
||||
print("[send-test] → confirm it arrived in the Messages app on the target device")
|
||||
else:
|
||||
print("[send-test] ✗ BlueBubbles reported send error=%s"%err)
|
||||
sys.exit(1)
|
||||
PY
|
||||
@@ -1,187 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# test-imessage-bridge.sh — Regression test for the iMessage Desktop bridge
|
||||
#
|
||||
# Drives the Electron main-process `imessageBridge.*` IPC handlers plus the
|
||||
# local bridge HTTP server and the BlueBubbles server, asserting the full
|
||||
# connect/configure flow. Use it to regression-test PR work on the iMessage
|
||||
# channel (BlueBubbles bridge) without clicking through the UI every time.
|
||||
#
|
||||
# Prerequisites:
|
||||
# 1. BlueBubbles server running and reachable (default http://127.0.0.1:1234)
|
||||
# 2. Electron dev running with CDP — `electron-dev.sh start`
|
||||
# 3. `agent-browser` on PATH, connected to the same CDP port
|
||||
#
|
||||
# Usage:
|
||||
# ./test-imessage-bridge.sh <bluebubbles_password> [bb_url] [cdp_port]
|
||||
#
|
||||
# Example:
|
||||
# ./test-imessage-bridge.sh 'my-bb-password'
|
||||
# ./test-imessage-bridge.sh 'my-bb-password' http://127.0.0.1:1234 9222
|
||||
#
|
||||
# Notes:
|
||||
# - The password is passed as an argv, so it is visible in `ps`. This is a
|
||||
# local dev tool; do not run it on shared machines with a real secret.
|
||||
# - It uses a unique applicationId per run (imsg-regression-$$) and cleans up
|
||||
# its own bridge config + BlueBubbles webhook on exit, so it is safe to
|
||||
# re-run and does not disturb real configs.
|
||||
set -euo pipefail
|
||||
|
||||
BB_PASS="${1:?Usage: $0 <bluebubbles_password> [bb_url] [cdp_port]}"
|
||||
BB_URL="${2:-http://127.0.0.1:1234}"
|
||||
CDP_PORT="${3:-9222}"
|
||||
|
||||
APP_ID="imsg-regression-$$"
|
||||
SECRET="regression-secret-$$"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
# ── Output helpers ───────────────────────────────────────────────────
|
||||
ok() { echo " ✓ $1"; PASS=$((PASS + 1)); }
|
||||
bad() { echo " ✗ $1 — $2"; FAIL=$((FAIL + 1)); }
|
||||
note() { echo "[imsg-test] $1"; }
|
||||
|
||||
# ── BlueBubbles REST helpers ─────────────────────────────────────────
|
||||
bb_get_webhooks() {
|
||||
curl -sS -m 8 "${BB_URL}/api/v1/webhook?password=${BB_PASS}"
|
||||
}
|
||||
|
||||
# Delete every webhook whose URL mentions our APP_ID (cleanup is idempotent).
|
||||
bb_cleanup_webhooks() {
|
||||
local ids
|
||||
ids=$(bb_get_webhooks | python3 -c '
|
||||
import json,sys
|
||||
try: d=json.load(sys.stdin)
|
||||
except Exception: sys.exit(0)
|
||||
for w in (d.get("data") or []):
|
||||
if "'"$APP_ID"'" in (w.get("url") or ""): print(w["id"])
|
||||
' 2>/dev/null || true)
|
||||
for id in $ids; do
|
||||
curl -sS -m 8 -X DELETE "${BB_URL}/api/v1/webhook/${id}?password=${BB_PASS}" >/dev/null 2>&1 || true
|
||||
done
|
||||
}
|
||||
|
||||
# ── IPC helper (drives the Electron renderer's electronAPI bridge) ───
|
||||
# Runs a JS snippet that returns a string token; prints the raw token.
|
||||
# The BlueBubbles password is base64-injected (atob) so special chars in the
|
||||
# secret never need shell/JS quoting.
|
||||
ipc_eval() {
|
||||
local js="$1"
|
||||
agent-browser --cdp "$CDP_PORT" eval -b "$(printf '%s' "$js" | base64)" 2>/dev/null
|
||||
}
|
||||
|
||||
PASS_B64=$(printf '%s' "$BB_PASS" | base64)
|
||||
|
||||
# Emit an inline JS object literal for the bridge config. $1 overrides the
|
||||
# password expression (defaults to atob of the real password); pass a JS string
|
||||
# literal like "'wrong'" to test the rejection path.
|
||||
ipc_config_js() {
|
||||
local pwexpr="${1:-atob('${PASS_B64}')}"
|
||||
printf "{applicationId:'%s',blueBubblesServerUrl:'%s',blueBubblesPassword:%s,enabled:true,webhookSecret:'%s'}" \
|
||||
"$APP_ID" "$BB_URL" "$pwexpr" "$SECRET"
|
||||
}
|
||||
|
||||
# ── Preflight ────────────────────────────────────────────────────────
|
||||
note "BlueBubbles: ${BB_URL} CDP: ${CDP_PORT} appId: ${APP_ID}"
|
||||
|
||||
code=$(curl -sS -m 6 -o /dev/null -w '%{http_code}' \
|
||||
"${BB_URL}/api/v1/server/info?password=${BB_PASS}" || echo 000)
|
||||
if [ "$code" = "200" ]; then ok "BlueBubbles reachable + password valid"; else
|
||||
bad "BlueBubbles preflight" "HTTP $code (is BlueBubbles running on ${BB_URL}?)"
|
||||
echo "Aborting — fix BlueBubbles first."; exit 1
|
||||
fi
|
||||
|
||||
if ! curl -sf --max-time 3 "http://localhost:${CDP_PORT}/json/version" >/dev/null 2>&1; then
|
||||
bad "Electron CDP preflight" "CDP ${CDP_PORT} unreachable — run electron-dev.sh start"
|
||||
echo "Aborting."; exit 1
|
||||
fi
|
||||
ok "Electron CDP reachable"
|
||||
|
||||
# Bridge must expose the IPC group (built from this branch's code).
|
||||
probe=$(ipc_eval "(async()=>{try{var s=await window.electronAPI.invoke('imessageBridge.getStatus',{});return 'OK:'+JSON.stringify(s);}catch(e){return 'ERR:'+(e.message||e);}})()")
|
||||
case "$probe" in
|
||||
*OK:*) ok "imessageBridge IPC available" ;;
|
||||
*) bad "imessageBridge IPC" "got: $probe (is the iMessage Desktop branch checked out?)"; echo "Aborting."; exit 1 ;;
|
||||
esac
|
||||
|
||||
# Start clean: remove any leftover config for this appId + BB webhooks.
|
||||
ipc_eval "(async()=>{try{await window.electronAPI.invoke('imessageBridge.removeConfig',{applicationId:'${APP_ID}'});}catch(e){}return 'done';})()" >/dev/null
|
||||
bb_cleanup_webhooks
|
||||
|
||||
# ── testConfig: happy path ───────────────────────────────────────────
|
||||
r=$(ipc_eval "(async()=>{try{var c=$(ipc_config_js);var x=await window.electronAPI.invoke('imessageBridge.testConfig',c);return 'OK:'+JSON.stringify(x);}catch(e){return 'ERR:'+(e.message||e);}})()")
|
||||
case "$r" in
|
||||
*OK:*success*true*) ok "testConfig with valid password → success" ;;
|
||||
*) bad "testConfig (valid)" "got: $r" ;;
|
||||
esac
|
||||
|
||||
# ── testConfig: wrong password rejects ───────────────────────────────
|
||||
r=$(ipc_eval "(async()=>{try{var c=$(ipc_config_js "'definitely-wrong-password'");var x=await window.electronAPI.invoke('imessageBridge.testConfig',c);return 'OK:'+JSON.stringify(x);}catch(e){return 'ERR:'+(e.message||e);}})()")
|
||||
case "$r" in
|
||||
*ERR:*) ok "testConfig with wrong password → rejected" ;;
|
||||
*) bad "testConfig (wrong password)" "expected rejection, got: $r" ;;
|
||||
esac
|
||||
|
||||
# ── testConfig: unreachable URL rejects ──────────────────────────────
|
||||
r=$(ipc_eval "(async()=>{try{var x=await window.electronAPI.invoke('imessageBridge.testConfig',{applicationId:'${APP_ID}',blueBubblesServerUrl:'http://127.0.0.1:65530',blueBubblesPassword:atob('${PASS_B64}'),enabled:true,webhookSecret:'${SECRET}'});return 'OK:'+JSON.stringify(x);}catch(e){return 'ERR:'+(e.message||e);}})()")
|
||||
case "$r" in
|
||||
*ERR:*) ok "testConfig with unreachable URL → rejected" ;;
|
||||
*) bad "testConfig (unreachable)" "expected rejection, got: $r" ;;
|
||||
esac
|
||||
|
||||
# ── upsertConfig: FIRST-TIME registration (Bug #1 regression guard) ──
|
||||
# BlueBubbles' GET /webhook?url=<unregistered> returns HTTP 500. The bridge
|
||||
# must list ALL webhooks and match client-side, otherwise this first save
|
||||
# fails. This assertion guards that fix.
|
||||
r=$(ipc_eval "(async()=>{try{var c=$(ipc_config_js);var x=await window.electronAPI.invoke('imessageBridge.upsertConfig',c);return 'OK:'+JSON.stringify(x);}catch(e){return 'ERR:'+(e.message||e);}})()")
|
||||
case "$r" in
|
||||
*OK:*success*true*) ok "upsertConfig first-time save → success (Bug #1 guard)" ;;
|
||||
*) bad "upsertConfig (first-time)" "got: $r" ;;
|
||||
esac
|
||||
|
||||
# ── getStatus: bridge running + config persisted ─────────────────────
|
||||
# Return a quote-free token so grep isn't tripped up by agent-browser's
|
||||
# JSON-string escaping of the eval result.
|
||||
r=$(ipc_eval "(async()=>{var s=await window.electronAPI.invoke('imessageBridge.getStatus',{});var c=(s.configs||[]).find(function(x){return x.applicationId==='${APP_ID}';});return 'RUN='+(s.running?'Y':'N')+' CFG='+(c?'Y':'N')+' PW='+((c&&c.blueBubblesPasswordSet)?'Y':'N');})()")
|
||||
echo "$r" | grep -q 'RUN=Y' && ok "bridge running" || bad "bridge running" "got: $r"
|
||||
echo "$r" | grep -q 'CFG=Y' && ok "config persisted" || bad "config persisted" "got: $r"
|
||||
echo "$r" | grep -q 'PW=Y' && ok "password stored (redacted in status)" || bad "password stored" "got: $r"
|
||||
|
||||
# ── BlueBubbles webhook actually registered ──────────────────────────
|
||||
if bb_get_webhooks | grep -q "${APP_ID}"; then
|
||||
ok "BlueBubbles webhook registered for appId"
|
||||
else
|
||||
bad "BlueBubbles webhook" "no webhook URL containing ${APP_ID}"
|
||||
fi
|
||||
|
||||
# ── Local bridge HTTP server: secret enforcement ─────────────────────
|
||||
BRIDGE_URL=$(ipc_eval "(async()=>{var s=await window.electronAPI.invoke('imessageBridge.getStatus',{});return s.serverUrl||'';})()" | tr -d '"')
|
||||
if [ -n "$BRIDGE_URL" ]; then
|
||||
# wrong secret → 401
|
||||
code=$(curl -sS -m 6 -o /dev/null -w '%{http_code}' -X POST \
|
||||
-H 'Content-Type: application/json' \
|
||||
"${BRIDGE_URL}/webhooks/bluebubbles/${APP_ID}?secret=WRONG" \
|
||||
-d '{"type":"new-message","data":{"guid":"x"}}' || echo 000)
|
||||
[ "$code" = "401" ] && ok "local bridge rejects wrong secret (401)" || bad "local bridge wrong secret" "expected 401, got $code"
|
||||
|
||||
# right secret → passes auth (reaches forward; without a bound cloud bot it
|
||||
# returns 5xx — that's fine, we're only asserting auth + routing here)
|
||||
code=$(curl -sS -m 6 -o /dev/null -w '%{http_code}' -X POST \
|
||||
-H 'Content-Type: application/json' \
|
||||
"${BRIDGE_URL}/webhooks/bluebubbles/${APP_ID}?secret=${SECRET}" \
|
||||
-d '{"type":"new-message","data":{"guid":"x","text":"hi"}}' || echo 000)
|
||||
[ "$code" != "401" ] && ok "local bridge accepts valid secret (HTTP $code, past auth)" || bad "local bridge valid secret" "got 401 with correct secret"
|
||||
else
|
||||
bad "local bridge URL" "getStatus returned no serverUrl"
|
||||
fi
|
||||
|
||||
# ── Cleanup ──────────────────────────────────────────────────────────
|
||||
ipc_eval "(async()=>{try{await window.electronAPI.invoke('imessageBridge.removeConfig',{applicationId:'${APP_ID}'});await window.electronAPI.invoke('imessageBridge.stop',{});}catch(e){}return 'cleaned';})()" >/dev/null
|
||||
bb_cleanup_webhooks
|
||||
note "cleaned up config + BlueBubbles webhook for ${APP_ID}"
|
||||
|
||||
# ── Summary ──────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[imsg-test] PASS=${PASS} FAIL=${FAIL}"
|
||||
[ "$FAIL" -eq 0 ] || exit 1
|
||||
@@ -1,61 +0,0 @@
|
||||
# Lark / 飞书 Bot Testing
|
||||
|
||||
**App name:** `Lark` or `飞书` | **Process name:** `Lark` or `飞书`
|
||||
|
||||
See [references/osascript.md](../../references/osascript.md) for shared patterns.
|
||||
|
||||
## Activate & Navigate
|
||||
|
||||
```bash
|
||||
# Activate Lark (auto-detects Lark or 飞书)
|
||||
osascript -e 'tell application "Lark" to activate' 2> /dev/null \
|
||||
|| osascript -e 'tell application "飞书" to activate'
|
||||
sleep 1
|
||||
|
||||
# Quick Switcher / Search (Cmd+K)
|
||||
osascript -e 'tell application "System Events" to keystroke "k" using command down'
|
||||
sleep 0.5
|
||||
osascript -e '
|
||||
set the clipboard to "bot-testing"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 1.5
|
||||
key code 36 -- Enter
|
||||
end tell
|
||||
'
|
||||
sleep 2
|
||||
```
|
||||
|
||||
## Send Message to Bot
|
||||
|
||||
```bash
|
||||
osascript -e '
|
||||
set the clipboard to "@MyBot help me with this task"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36 -- Enter
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
## Verify Response
|
||||
|
||||
```bash
|
||||
sleep 10
|
||||
screencapture /tmp/lark-bot-response.png
|
||||
```
|
||||
|
||||
## Lark-Specific Notes
|
||||
|
||||
- App name varies: `Lark` (international) vs `飞书` (China mainland) — the script auto-detects
|
||||
- Uses `Cmd+K` for quick search (same as Discord/Slack)
|
||||
- Enter sends message by default
|
||||
- Always use clipboard paste for CJK characters
|
||||
|
||||
## Script
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/bot/lark/test-lark-bot.sh "bot-testing" "@MyBot hello"
|
||||
./.agents/skills/agent-testing/bot/lark/test-lark-bot.sh "bot-testing" "Help me with this" 30
|
||||
```
|
||||
@@ -1,84 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# test-lark-bot.sh — Send a message to a Lark/Feishu bot and capture the response
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/test-lark-bot.sh <chat> <message> [wait_seconds] [screenshot_path]
|
||||
#
|
||||
# chat — Chat or contact name to search for
|
||||
# message — Message to send to the bot
|
||||
# wait_seconds — Seconds to wait for bot response (default: 10)
|
||||
# screenshot_path — Output screenshot path (default: /tmp/lark-bot-test.png)
|
||||
#
|
||||
# Prerequisites:
|
||||
# - Lark (飞书) desktop app installed and logged in
|
||||
# - Accessibility permission granted (System Preferences > Privacy > Accessibility)
|
||||
#
|
||||
# Notes:
|
||||
# - The app name may be "Lark" or "飞书" depending on version/locale
|
||||
# - Uses Cmd+K to open search/quick switcher
|
||||
# - Enter sends message by default
|
||||
#
|
||||
# Examples:
|
||||
# ./scripts/test-lark-bot.sh "TestBot" "Hello"
|
||||
# ./scripts/test-lark-bot.sh "bot-testing" "/ask Tell me a joke" 30
|
||||
# ./scripts/test-lark-bot.sh "MyBot" "Help me summarize this" 60 /tmp/my-test.png
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
CHAT="${1:?Usage: test-lark-bot.sh <chat> <message> [wait_seconds] [screenshot_path]}"
|
||||
MESSAGE="${2:?Usage: test-lark-bot.sh <chat> <message> [wait_seconds] [screenshot_path]}"
|
||||
WAIT="${3:-10}"
|
||||
SCREENSHOT="${4:-/tmp/lark-bot-test.png}"
|
||||
|
||||
# Detect app name — "Lark" or "飞书"
|
||||
APP=""
|
||||
if osascript -e 'tell application "Lark" to name' &>/dev/null; then
|
||||
APP="Lark"
|
||||
elif osascript -e 'tell application "飞书" to name' &>/dev/null; then
|
||||
APP="飞书"
|
||||
else
|
||||
echo "[error] Lark/飞书 app not found. Install Lark or 飞书."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[$APP] Activating..."
|
||||
osascript -e "tell application \"$APP\" to activate"
|
||||
sleep 1
|
||||
|
||||
echo "[$APP] Searching for chat: $CHAT"
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
-- Quick Switcher / Search (Cmd+K)
|
||||
keystroke "k" using command down
|
||||
delay 0.8
|
||||
end tell
|
||||
'
|
||||
# Use clipboard for chat name (supports CJK characters)
|
||||
osascript -e '
|
||||
set the clipboard to "'"$CHAT"'"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 1.5
|
||||
key code 36 -- Enter to select first result
|
||||
end tell
|
||||
'
|
||||
sleep 2
|
||||
|
||||
echo "[$APP] Sending message: $MESSAGE"
|
||||
osascript -e '
|
||||
set the clipboard to "'"$MESSAGE"'"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36 -- Enter to send
|
||||
end tell
|
||||
'
|
||||
|
||||
echo "[$APP] Waiting ${WAIT}s for bot response..."
|
||||
sleep "$WAIT"
|
||||
|
||||
echo "[$APP] Capturing screenshot..."
|
||||
"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
|
||||
echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
|
||||
@@ -1,62 +0,0 @@
|
||||
# QQ Bot Testing
|
||||
|
||||
**App name:** `QQ` | **Process name:** `QQ`
|
||||
|
||||
See [references/osascript.md](../../references/osascript.md) for shared patterns.
|
||||
|
||||
## Activate & Navigate
|
||||
|
||||
```bash
|
||||
osascript -e 'tell application "QQ" to activate'
|
||||
sleep 1
|
||||
|
||||
# Search for contact/group (Cmd+F)
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
keystroke "f" using command down
|
||||
delay 0.8
|
||||
end tell
|
||||
'
|
||||
osascript -e '
|
||||
set the clipboard to "bot-testing"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 1.5
|
||||
key code 36 -- Enter
|
||||
end tell
|
||||
'
|
||||
sleep 2
|
||||
```
|
||||
|
||||
## Send Message to Bot
|
||||
|
||||
```bash
|
||||
osascript -e '
|
||||
set the clipboard to "Hello bot!"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36 -- Enter
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
## Verify Response
|
||||
|
||||
```bash
|
||||
sleep 10
|
||||
screencapture /tmp/qq-bot-response.png
|
||||
```
|
||||
|
||||
## QQ-Specific Notes
|
||||
|
||||
- Enter sends message by default; Shift+Enter for newlines
|
||||
- Uses `Cmd+F` for search (not `Cmd+K` like Discord/Slack/Lark)
|
||||
- Always use clipboard paste for CJK characters
|
||||
|
||||
## Script
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/bot/qq/test-qq-bot.sh "bot-testing" "Hello bot" 15
|
||||
./.agents/skills/agent-testing/bot/qq/test-qq-bot.sh "MyBot" "/help" 10
|
||||
```
|
||||
@@ -1,76 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# test-qq-bot.sh — Send a message to a QQ bot and capture the response
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/test-qq-bot.sh <contact> <message> [wait_seconds] [screenshot_path]
|
||||
#
|
||||
# contact — Contact, group, or bot name to search for
|
||||
# message — Message to send
|
||||
# wait_seconds — Seconds to wait for bot response (default: 10)
|
||||
# screenshot_path — Output screenshot path (default: /tmp/qq-bot-test.png)
|
||||
#
|
||||
# Prerequisites:
|
||||
# - QQ desktop app installed and logged in
|
||||
# - Accessibility permission granted (System Preferences > Privacy > Accessibility)
|
||||
#
|
||||
# Notes:
|
||||
# - The app name is "QQ"
|
||||
# - Uses Cmd+F to open search
|
||||
# - Enter sends message by default; Shift+Enter for newlines
|
||||
# - Uses clipboard paste for CJK character support
|
||||
#
|
||||
# Examples:
|
||||
# ./scripts/test-qq-bot.sh "TestBot" "Hello"
|
||||
# ./scripts/test-qq-bot.sh "bot-testing" "Hello bot" 30
|
||||
# ./scripts/test-qq-bot.sh "MyBot" "/help" 15 /tmp/my-test.png
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
CONTACT="${1:?Usage: test-qq-bot.sh <contact> <message> [wait_seconds] [screenshot_path]}"
|
||||
MESSAGE="${2:?Usage: test-qq-bot.sh <contact> <message> [wait_seconds] [screenshot_path]}"
|
||||
WAIT="${3:-10}"
|
||||
SCREENSHOT="${4:-/tmp/qq-bot-test.png}"
|
||||
|
||||
APP="QQ"
|
||||
|
||||
echo "[$APP] Activating..."
|
||||
osascript -e "tell application \"$APP\" to activate"
|
||||
sleep 1
|
||||
|
||||
echo "[$APP] Searching for contact: $CONTACT"
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
-- Search (Cmd+F)
|
||||
keystroke "f" using command down
|
||||
delay 0.8
|
||||
end tell
|
||||
'
|
||||
# Use clipboard for contact name (supports CJK characters)
|
||||
osascript -e '
|
||||
set the clipboard to "'"$CONTACT"'"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 1.5
|
||||
key code 36 -- Enter to select first result
|
||||
end tell
|
||||
'
|
||||
sleep 2
|
||||
|
||||
echo "[$APP] Sending message: $MESSAGE"
|
||||
osascript -e '
|
||||
set the clipboard to "'"$MESSAGE"'"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36 -- Enter to send
|
||||
end tell
|
||||
'
|
||||
|
||||
echo "[$APP] Waiting ${WAIT}s for bot response..."
|
||||
sleep "$WAIT"
|
||||
|
||||
echo "[$APP] Capturing screenshot..."
|
||||
"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
|
||||
echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
|
||||
@@ -1,73 +0,0 @@
|
||||
# Slack Bot Testing
|
||||
|
||||
**App name:** `Slack` | **Process name:** `Slack`
|
||||
|
||||
See [references/osascript.md](../../references/osascript.md) for shared patterns.
|
||||
|
||||
## Activate & Navigate
|
||||
|
||||
```bash
|
||||
# Activate Slack
|
||||
osascript -e 'tell application "Slack" to activate'
|
||||
sleep 1
|
||||
|
||||
# Quick Switcher (Cmd+K)
|
||||
osascript -e 'tell application "System Events" to keystroke "k" using command down'
|
||||
sleep 0.5
|
||||
osascript -e 'tell application "System Events" to keystroke "bot-testing"'
|
||||
sleep 1
|
||||
osascript -e 'tell application "System Events" to key code 36' # Enter
|
||||
sleep 2
|
||||
```
|
||||
|
||||
## Send Message to Bot
|
||||
|
||||
```bash
|
||||
# Direct message input (focused after channel nav)
|
||||
osascript -e 'tell application "System Events" to keystroke "@mybot hello"'
|
||||
sleep 0.3
|
||||
osascript -e 'tell application "System Events" to key code 36'
|
||||
```
|
||||
|
||||
## Send Long Message
|
||||
|
||||
```bash
|
||||
osascript -e '
|
||||
tell application "Slack" to activate
|
||||
delay 0.5
|
||||
set the clipboard to "A long test message for the bot..."
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
## Slash Command Test
|
||||
|
||||
```bash
|
||||
osascript -e '
|
||||
tell application "Slack" to activate
|
||||
delay 0.5
|
||||
tell application "System Events"
|
||||
keystroke "/ask What is the meaning of life?"
|
||||
delay 0.5
|
||||
key code 36
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
## Verify Response
|
||||
|
||||
```bash
|
||||
sleep 10
|
||||
screencapture /tmp/slack-bot-response.png
|
||||
```
|
||||
|
||||
## Script
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/bot/slack/test-slack-bot.sh "bot-testing" "@mybot hello"
|
||||
./.agents/skills/agent-testing/bot/slack/test-slack-bot.sh "bot-testing" "/ask What is 2+2?" 20
|
||||
```
|
||||
@@ -1,64 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# test-slack-bot.sh — Send a message to a Slack bot and capture the response
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/test-slack-bot.sh <channel> <message> [wait_seconds] [screenshot_path]
|
||||
#
|
||||
# channel — Channel name to navigate to via Quick Switcher (Cmd+K)
|
||||
# message — Message to send (e.g., "@mybot hello" or "/ask question")
|
||||
# wait_seconds — Seconds to wait for bot response (default: 10)
|
||||
# screenshot_path — Output screenshot path (default: /tmp/slack-bot-test.png)
|
||||
#
|
||||
# Prerequisites:
|
||||
# - Slack desktop app installed and logged in
|
||||
# - Accessibility permission granted (System Preferences > Privacy > Accessibility)
|
||||
#
|
||||
# Examples:
|
||||
# ./scripts/test-slack-bot.sh "bot-testing" "@mybot hello"
|
||||
# ./scripts/test-slack-bot.sh "bot-testing" "/ask What is 2+2?" 20
|
||||
# ./scripts/test-slack-bot.sh "general" "Hey bot" 15 /tmp/my-test.png
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
CHANNEL="${1:?Usage: test-slack-bot.sh <channel> <message> [wait_seconds] [screenshot_path]}"
|
||||
MESSAGE="${2:?Usage: test-slack-bot.sh <channel> <message> [wait_seconds] [screenshot_path]}"
|
||||
WAIT="${3:-10}"
|
||||
SCREENSHOT="${4:-/tmp/slack-bot-test.png}"
|
||||
|
||||
APP="Slack"
|
||||
|
||||
echo "[$APP] Activating..."
|
||||
osascript -e "tell application \"$APP\" to activate"
|
||||
sleep 1
|
||||
|
||||
echo "[$APP] Navigating to channel: $CHANNEL"
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
-- Quick Switcher
|
||||
keystroke "k" using command down
|
||||
delay 0.8
|
||||
keystroke "'"$CHANNEL"'"
|
||||
delay 1.5
|
||||
key code 36 -- Enter
|
||||
end tell
|
||||
'
|
||||
sleep 2
|
||||
|
||||
echo "[$APP] Sending message: $MESSAGE"
|
||||
osascript -e '
|
||||
set the clipboard to "'"$MESSAGE"'"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36 -- Enter
|
||||
end tell
|
||||
'
|
||||
|
||||
echo "[$APP] Waiting ${WAIT}s for bot response..."
|
||||
sleep "$WAIT"
|
||||
|
||||
echo "[$APP] Capturing screenshot..."
|
||||
"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
|
||||
echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
|
||||
@@ -1,80 +0,0 @@
|
||||
# Telegram Bot Testing
|
||||
|
||||
**App name:** `Telegram` | **Process name:** `Telegram`
|
||||
|
||||
See [references/osascript.md](../../references/osascript.md) for shared patterns.
|
||||
|
||||
## Activate & Navigate
|
||||
|
||||
```bash
|
||||
# Activate Telegram
|
||||
osascript -e 'tell application "Telegram" to activate'
|
||||
sleep 1
|
||||
|
||||
# Search for a bot (Cmd+F or click search)
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
keystroke "f" using command down
|
||||
delay 0.5
|
||||
keystroke "MyTestBot"
|
||||
delay 1
|
||||
key code 36 -- Enter to select
|
||||
end tell
|
||||
'
|
||||
sleep 2
|
||||
```
|
||||
|
||||
## Send Message to Bot
|
||||
|
||||
```bash
|
||||
# After navigating to bot chat, input is focused
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
keystroke "/start"
|
||||
delay 0.3
|
||||
key code 36
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
## Send Long Message
|
||||
|
||||
```bash
|
||||
osascript -e '
|
||||
tell application "Telegram" to activate
|
||||
delay 0.5
|
||||
set the clipboard to "Tell me about quantum computing in detail"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
## Verify Response
|
||||
|
||||
```bash
|
||||
sleep 10
|
||||
screencapture /tmp/telegram-bot-response.png
|
||||
```
|
||||
|
||||
## Telegram Bot API (programmatic alternative)
|
||||
|
||||
For sending messages directly to the bot's chat without UI:
|
||||
|
||||
```bash
|
||||
# Send message as the bot (for testing webhooks/responses)
|
||||
curl -s "https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage" \
|
||||
-d "chat_id=$CHAT_ID&text=test message"
|
||||
|
||||
# Get recent updates
|
||||
curl -s "https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/getUpdates?limit=5" | jq .
|
||||
```
|
||||
|
||||
## Script
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/bot/telegram/test-telegram-bot.sh "MyTestBot" "/start"
|
||||
./.agents/skills/agent-testing/bot/telegram/test-telegram-bot.sh "GPTBot" "Hello" 60
|
||||
```
|
||||
@@ -1,79 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# test-telegram-bot.sh — Send a message to a Telegram bot and capture the response
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/test-telegram-bot.sh <bot_or_chat> <message> [wait_seconds] [screenshot_path]
|
||||
#
|
||||
# bot_or_chat — Bot username or chat name to search for
|
||||
# message — Message to send to the bot
|
||||
# wait_seconds — Seconds to wait for bot response (default: 10)
|
||||
# screenshot_path — Output screenshot path (default: /tmp/telegram-bot-test.png)
|
||||
#
|
||||
# Prerequisites:
|
||||
# - Telegram desktop app installed and logged in
|
||||
# - Accessibility permission granted (System Preferences > Privacy > Accessibility)
|
||||
#
|
||||
# Notes:
|
||||
# - The app name may be "Telegram" or "Telegram Desktop" depending on installation
|
||||
# - Uses Cmd+F to search for the bot, then Enter to open the chat
|
||||
#
|
||||
# Examples:
|
||||
# ./scripts/test-telegram-bot.sh "MyTestBot" "/start"
|
||||
# ./scripts/test-telegram-bot.sh "MyTestBot" "Hello bot" 30
|
||||
# ./scripts/test-telegram-bot.sh "GPTBot" "/ask What is AI?" 60 /tmp/my-test.png
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
BOT="${1:?Usage: test-telegram-bot.sh <bot_or_chat> <message> [wait_seconds] [screenshot_path]}"
|
||||
MESSAGE="${2:?Usage: test-telegram-bot.sh <bot_or_chat> <message> [wait_seconds] [screenshot_path]}"
|
||||
WAIT="${3:-10}"
|
||||
SCREENSHOT="${4:-/tmp/telegram-bot-test.png}"
|
||||
|
||||
# Detect app name — "Telegram" or "Telegram Desktop"
|
||||
APP=""
|
||||
if osascript -e 'tell application "Telegram" to name' &>/dev/null; then
|
||||
APP="Telegram"
|
||||
elif osascript -e 'tell application "Telegram Desktop" to name' &>/dev/null; then
|
||||
APP="Telegram Desktop"
|
||||
else
|
||||
echo "[error] Telegram app not found. Install Telegram or Telegram Desktop."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[$APP] Activating..."
|
||||
osascript -e "tell application \"$APP\" to activate"
|
||||
sleep 1
|
||||
|
||||
echo "[$APP] Searching for: $BOT"
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
-- Search (Escape first to clear any existing state)
|
||||
key code 53 -- Escape
|
||||
delay 0.3
|
||||
keystroke "f" using command down
|
||||
delay 0.8
|
||||
keystroke "'"$BOT"'"
|
||||
delay 2
|
||||
key code 36 -- Enter to select first result
|
||||
end tell
|
||||
'
|
||||
sleep 2
|
||||
|
||||
echo "[$APP] Sending message: $MESSAGE"
|
||||
osascript -e '
|
||||
set the clipboard to "'"$MESSAGE"'"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36 -- Enter
|
||||
end tell
|
||||
'
|
||||
|
||||
echo "[$APP] Waiting ${WAIT}s for bot response..."
|
||||
sleep "$WAIT"
|
||||
|
||||
echo "[$APP] Capturing screenshot..."
|
||||
"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
|
||||
echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
|
||||
@@ -1,81 +0,0 @@
|
||||
# WeChat / 微信 Bot Testing
|
||||
|
||||
**App name:** `微信` or `WeChat` | **Process name:** `WeChat`
|
||||
|
||||
See [references/osascript.md](../../references/osascript.md) for shared patterns.
|
||||
|
||||
## Activate & Navigate
|
||||
|
||||
```bash
|
||||
# Activate WeChat
|
||||
osascript -e 'tell application "微信" to activate'
|
||||
sleep 1
|
||||
|
||||
# Search for a contact/bot (Cmd+F)
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
keystroke "f" using command down
|
||||
delay 0.5
|
||||
keystroke "TestBot"
|
||||
delay 1
|
||||
key code 36 -- Enter to select
|
||||
end tell
|
||||
'
|
||||
sleep 2
|
||||
```
|
||||
|
||||
## Send Message
|
||||
|
||||
```bash
|
||||
# After navigating to a chat, the input is focused
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
keystroke "Hello bot!"
|
||||
delay 0.3
|
||||
key code 36
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
## Send Long Message (clipboard)
|
||||
|
||||
```bash
|
||||
osascript -e '
|
||||
tell application "微信" to activate
|
||||
delay 0.5
|
||||
set the clipboard to "Please help me with this task..."
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
## Verify Response
|
||||
|
||||
```bash
|
||||
sleep 10
|
||||
screencapture /tmp/wechat-bot-response.png
|
||||
```
|
||||
|
||||
## WeChat-Specific Notes
|
||||
|
||||
- WeChat macOS app name can be `微信` or `WeChat` depending on system language. Try both:
|
||||
```bash
|
||||
osascript -e 'tell application "微信" to activate' 2> /dev/null \
|
||||
|| osascript -e 'tell application "WeChat" to activate'
|
||||
```
|
||||
- WeChat uses **Enter** to send (not Cmd+Enter by default, but configurable)
|
||||
- For multi-line messages without sending, use **Shift+Enter**:
|
||||
```bash
|
||||
osascript -e 'tell application "System Events" to key code 36 using shift down'
|
||||
```
|
||||
- Always use clipboard paste for CJK characters — `keystroke` mangles non-ASCII
|
||||
|
||||
## Script
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/bot/wechat/test-wechat-bot.sh "文件传输助手" "test message" 5
|
||||
./.agents/skills/agent-testing/bot/wechat/test-wechat-bot.sh "MyBot" "Tell me a joke" 30
|
||||
```
|
||||
@@ -1,85 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# test-wechat-bot.sh — Send a message to a WeChat bot and capture the response
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/test-wechat-bot.sh <contact> <message> [wait_seconds] [screenshot_path]
|
||||
#
|
||||
# contact — Contact or bot name to search for
|
||||
# message — Message to send
|
||||
# wait_seconds — Seconds to wait for bot response (default: 10)
|
||||
# screenshot_path — Output screenshot path (default: /tmp/wechat-bot-test.png)
|
||||
#
|
||||
# Prerequisites:
|
||||
# - WeChat (微信) desktop app installed and logged in
|
||||
# - Accessibility permission granted (System Preferences > Privacy > Accessibility)
|
||||
#
|
||||
# Notes:
|
||||
# - The app name may be "微信" or "WeChat" depending on system language
|
||||
# - WeChat sends on Enter by default; use Shift+Enter for newlines
|
||||
# - For Chinese text, always uses clipboard paste (keystroke can't handle CJK)
|
||||
#
|
||||
# Examples:
|
||||
# ./scripts/test-wechat-bot.sh "TestBot" "Hello"
|
||||
# ./scripts/test-wechat-bot.sh "文件传输助手" "test message" 5
|
||||
# ./scripts/test-wechat-bot.sh "MyBot" "Tell me a joke" 30 /tmp/my-test.png
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
CONTACT="${1:?Usage: test-wechat-bot.sh <contact> <message> [wait_seconds] [screenshot_path]}"
|
||||
MESSAGE="${2:?Usage: test-wechat-bot.sh <contact> <message> [wait_seconds] [screenshot_path]}"
|
||||
WAIT="${3:-10}"
|
||||
SCREENSHOT="${4:-/tmp/wechat-bot-test.png}"
|
||||
|
||||
# Detect app name — "微信" or "WeChat"
|
||||
APP=""
|
||||
if osascript -e 'tell application "微信" to name' &>/dev/null; then
|
||||
APP="微信"
|
||||
elif osascript -e 'tell application "WeChat" to name' &>/dev/null; then
|
||||
APP="WeChat"
|
||||
else
|
||||
echo "[error] WeChat app not found. Install 微信 (WeChat)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[$APP] Activating..."
|
||||
osascript -e "tell application \"$APP\" to activate"
|
||||
sleep 1
|
||||
|
||||
echo "[$APP] Searching for contact: $CONTACT"
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
-- Search (Cmd+F)
|
||||
keystroke "f" using command down
|
||||
delay 0.8
|
||||
end tell
|
||||
'
|
||||
# Use clipboard for contact name (supports CJK characters)
|
||||
osascript -e '
|
||||
set the clipboard to "'"$CONTACT"'"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 1.5
|
||||
key code 36 -- Enter to select first result
|
||||
end tell
|
||||
'
|
||||
sleep 2
|
||||
|
||||
echo "[$APP] Sending message: $MESSAGE"
|
||||
# Always use clipboard paste — keystroke can't handle CJK or special characters
|
||||
osascript -e '
|
||||
set the clipboard to "'"$MESSAGE"'"
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36 -- Enter to send
|
||||
end tell
|
||||
'
|
||||
|
||||
echo "[$APP] Waiting ${WAIT}s for bot response..."
|
||||
sleep "$WAIT"
|
||||
|
||||
echo "[$APP] Capturing screenshot..."
|
||||
"$SCRIPT_DIR/../../scripts/capture-app-window.sh" "$APP" "$SCREENSHOT"
|
||||
echo "[$APP] Done! Screenshot saved to $SCREENSHOT"
|
||||
@@ -1,142 +0,0 @@
|
||||
# CLI Backend Verification
|
||||
|
||||
Default surface for verifying **backend changes** (TRPC routers, services,
|
||||
models, migrations) end-to-end: fastest loop, text-assertable output, zero UI
|
||||
flakiness.
|
||||
|
||||
## When to use
|
||||
|
||||
- Verifying TRPC router / service / model changes end-to-end
|
||||
- Testing new API fields or response structure changes
|
||||
- Validating CLI command output after backend modifications
|
||||
- Debugging data flow issues between server and CLI
|
||||
|
||||
## Prerequisites
|
||||
|
||||
| Requirement | Details |
|
||||
| ------------ | --------------------------------------------------------------------------------- |
|
||||
| Dev server | `localhost:3010` — see [../references/dev-server.md](../references/dev-server.md) |
|
||||
| CLI source | `apps/cli/` — runs from source, no rebuild; standalone `node_modules` — run `pnpm install` inside `apps/cli/` (root install does not cover it) |
|
||||
| CLI dev mode | `LOBEHUB_CLI_HOME=.lobehub-dev` for isolated credentials |
|
||||
| Auth | Device Code Flow login — see [../references/auth.md](../references/auth.md) |
|
||||
|
||||
All CLI dev commands run from `apps/cli/`. Subsequent examples use `$CLI`:
|
||||
|
||||
```bash
|
||||
CLI="LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts"
|
||||
```
|
||||
|
||||
## Workflow
|
||||
|
||||
### Step 1 — Server up?
|
||||
|
||||
See [../references/dev-server.md](../references/dev-server.md) for the health
|
||||
check, start, and restart commands. Server-side code changes require a restart.
|
||||
|
||||
### Step 2 — Auth ready?
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/scripts/setup-auth.sh status
|
||||
```
|
||||
|
||||
If the CLI is not logged in, **the user must run the login themselves**
|
||||
(interactive browser authorization):
|
||||
|
||||
```bash
|
||||
cd apps/cli && LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts login --server http://localhost:3010
|
||||
```
|
||||
|
||||
Credentials persist in `apps/cli/.lobehub-dev/`. Details:
|
||||
[../references/auth.md](../references/auth.md).
|
||||
|
||||
### Step 3 — Test with CLI commands
|
||||
|
||||
CLI runs from source, so CLI-side code changes take effect immediately without
|
||||
rebuilding:
|
||||
|
||||
```bash
|
||||
cd apps/cli
|
||||
$CLI <command>
|
||||
```
|
||||
|
||||
Capture output for the report as you go (e.g. `$CLI task list | tee "$DIR/assets/task-list.txt"`).
|
||||
|
||||
### Step 4 — Clean up test data
|
||||
|
||||
```bash
|
||||
$CLI task delete < id > -y
|
||||
$CLI agent delete < id > -y
|
||||
```
|
||||
|
||||
### Step 5 — Report
|
||||
|
||||
Finish with a structured report —
|
||||
[../references/report.md](../references/report.md). CLI evidence = exact
|
||||
command + trimmed output.
|
||||
|
||||
## Common testing patterns
|
||||
|
||||
### Task system
|
||||
|
||||
```bash
|
||||
$CLI task list
|
||||
$CLI task create -n "Root Task" -i "Test instruction"
|
||||
$CLI task create -n "Child Task" -i "Sub instruction" --parent T-1
|
||||
$CLI task view T-1
|
||||
$CLI task tree T-1
|
||||
$CLI task edit T-1 --status running
|
||||
$CLI task comment T-1 -m "Test comment"
|
||||
$CLI task delete T-1 -y
|
||||
```
|
||||
|
||||
### Agent system
|
||||
|
||||
```bash
|
||||
$CLI agent list
|
||||
$CLI agent view <agent-id>
|
||||
$CLI agent run <agent-id> -m "Test prompt"
|
||||
```
|
||||
|
||||
### Document & knowledge base
|
||||
|
||||
```bash
|
||||
$CLI doc list
|
||||
$CLI doc create -t "Test Doc" -c "Content here"
|
||||
$CLI doc view <doc-id>
|
||||
$CLI kb list
|
||||
$CLI kb tree <kb-id>
|
||||
```
|
||||
|
||||
### Model & provider
|
||||
|
||||
```bash
|
||||
$CLI model list
|
||||
$CLI provider list
|
||||
$CLI provider test <provider-id>
|
||||
```
|
||||
|
||||
## Dev-test cycle
|
||||
|
||||
```
|
||||
1. Make code changes (service/model/router/type)
|
||||
|
|
||||
2. Run unit tests (fast feedback)
|
||||
bunx vitest run --silent='passed-only' '<test-file>'
|
||||
|
|
||||
3. Restart dev server (if server-side changes — see dev-server.md)
|
||||
|
|
||||
4. CLI verification (end-to-end)
|
||||
$CLI <command>
|
||||
|
|
||||
5. Clean up test data + write the report
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Issue | Solution |
|
||||
| --------------------------- | ----------------------------------------------- |
|
||||
| `No authentication found` | Run `login --server http://localhost:3010` |
|
||||
| `UNAUTHORIZED` on API calls | Token expired; re-run login |
|
||||
| `ECONNREFUSED` | Dev server not running — see dev-server.md |
|
||||
| CLI shows old data/behavior | Server needs restart to pick up code changes |
|
||||
| Login opens wrong server | Must use `--server` flag (env var doesn't work) |
|
||||
@@ -1,257 +0,0 @@
|
||||
# agent-browser CLI Reference
|
||||
|
||||
Generic reference for the `agent-browser` CLI — automate Chromium-based apps (Electron, Chrome, web) via Chrome DevTools Protocol. LobeHub-specific patterns live in [../ui/electron.md](../ui/electron.md) and [../ui/web.md](../ui/web.md); authentication recipes live in [auth.md](./auth.md).
|
||||
|
||||
Use `agent-browser` to automate Chromium-based apps via Chrome DevTools Protocol.
|
||||
|
||||
Install via `npm i -g agent-browser`, `brew install agent-browser`, or `cargo install agent-browser`. Run `agent-browser install` to download Chrome. Run `agent-browser upgrade` to update.
|
||||
|
||||
## Core Workflow
|
||||
|
||||
Every browser automation follows this pattern:
|
||||
|
||||
1. **Navigate**: `agent-browser open <url>`
|
||||
2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
|
||||
3. **Interact**: Use refs to click, fill, select
|
||||
4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
|
||||
|
||||
```bash
|
||||
agent-browser open https://example.com/form
|
||||
agent-browser snapshot -i
|
||||
# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
|
||||
|
||||
agent-browser fill @e1 "user@example.com"
|
||||
agent-browser fill @e2 "password123"
|
||||
agent-browser click @e3
|
||||
agent-browser wait --load networkidle
|
||||
agent-browser snapshot -i # Check result
|
||||
```
|
||||
|
||||
## Command Chaining
|
||||
|
||||
```bash
|
||||
# Chain open + wait + snapshot in one call
|
||||
agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser snapshot -i
|
||||
```
|
||||
|
||||
Use `&&` when you don't need to read intermediate output. Run commands separately when you need to parse output first (e.g., snapshot to discover refs, then interact).
|
||||
|
||||
## Essential Commands
|
||||
|
||||
```bash
|
||||
# Navigation
|
||||
agent-browser open <url> # Navigate (aliases: goto, navigate)
|
||||
agent-browser close # Close browser
|
||||
agent-browser close --all # Close all active sessions
|
||||
|
||||
# Snapshot
|
||||
agent-browser snapshot -i # Interactive elements with refs (recommended)
|
||||
agent-browser snapshot -s "#selector" # Scope to CSS selector
|
||||
|
||||
# Interaction (use @refs from snapshot)
|
||||
agent-browser click @e1 # Click element
|
||||
agent-browser click @e1 --new-tab # Click and open in new tab
|
||||
agent-browser fill @e2 "text" # Clear and type text
|
||||
agent-browser type @e2 "text" # Type without clearing
|
||||
agent-browser select @e1 "option" # Select dropdown option
|
||||
agent-browser check @e1 # Check checkbox
|
||||
agent-browser press Enter # Press key
|
||||
agent-browser keyboard type "text" # Type at current focus (no selector)
|
||||
agent-browser keyboard inserttext "text" # Insert without key events
|
||||
agent-browser scroll down 500 # Scroll page
|
||||
agent-browser scroll down 500 --selector "div.content" # Scroll within container
|
||||
|
||||
# Get information
|
||||
agent-browser get text @e1 # Get element text
|
||||
agent-browser get url # Get current URL
|
||||
agent-browser get title # Get page title
|
||||
agent-browser get cdp-url # Get CDP WebSocket URL
|
||||
|
||||
# Wait
|
||||
agent-browser wait @e1 # Wait for element
|
||||
agent-browser wait --load networkidle # Wait for network idle
|
||||
agent-browser wait --url "**/page" # Wait for URL pattern
|
||||
agent-browser wait 2000 # Wait milliseconds
|
||||
agent-browser wait --text "Welcome" # Wait for text to appear
|
||||
agent-browser wait --fn "!document.body.innerText.includes('Loading...')" # Wait for text to disappear
|
||||
agent-browser wait "#spinner" --state hidden # Wait for element to disappear
|
||||
|
||||
# Downloads
|
||||
agent-browser download @e1 ./file.pdf # Click element to trigger download
|
||||
agent-browser wait --download ./output.zip # Wait for any download to complete
|
||||
|
||||
# Network
|
||||
agent-browser network requests # Inspect tracked requests
|
||||
agent-browser network requests --type xhr,fetch # Filter by resource type
|
||||
agent-browser network requests --method POST # Filter by HTTP method
|
||||
agent-browser network route "**/api/*" --abort # Block matching requests
|
||||
agent-browser network har start # Start HAR recording
|
||||
agent-browser network har stop ./capture.har # Stop and save HAR file
|
||||
|
||||
# Viewport & Device Emulation
|
||||
agent-browser set viewport 1920 1080 # Set viewport size (default: 1280x720)
|
||||
agent-browser set viewport 1920 1080 2 # 2x retina
|
||||
agent-browser set device "iPhone 14" # Emulate device (viewport + user agent)
|
||||
|
||||
# Capture
|
||||
agent-browser screenshot # Screenshot to temp dir
|
||||
agent-browser screenshot --full # Full page screenshot
|
||||
agent-browser screenshot --annotate # Annotated screenshot with numbered element labels
|
||||
agent-browser pdf output.pdf # Save as PDF
|
||||
|
||||
# Clipboard
|
||||
agent-browser clipboard read # Read text from clipboard
|
||||
agent-browser clipboard write "text" # Write text to clipboard
|
||||
agent-browser clipboard copy # Copy current selection
|
||||
agent-browser clipboard paste # Paste from clipboard
|
||||
|
||||
# Dialogs (alert, confirm, prompt, beforeunload)
|
||||
agent-browser dialog accept # Accept dialog
|
||||
agent-browser dialog accept "input" # Accept prompt dialog with text
|
||||
agent-browser dialog dismiss # Dismiss/cancel dialog
|
||||
agent-browser dialog status # Check if dialog is open
|
||||
|
||||
# Diff (compare page states)
|
||||
agent-browser diff snapshot # Compare current vs last snapshot
|
||||
agent-browser diff screenshot --baseline before.png # Visual pixel diff
|
||||
agent-browser diff url <url1> <url2> # Compare two pages
|
||||
|
||||
# Streaming
|
||||
agent-browser stream enable # Start WebSocket streaming
|
||||
agent-browser stream status # Inspect streaming state
|
||||
agent-browser stream disable # Stop streaming
|
||||
```
|
||||
|
||||
## Batch Execution
|
||||
|
||||
```bash
|
||||
echo '[
|
||||
["open", "https://example.com"],
|
||||
["snapshot", "-i"],
|
||||
["click", "@e1"],
|
||||
["screenshot", "result.png"]
|
||||
]' | agent-browser batch --json
|
||||
```
|
||||
|
||||
## Authentication
|
||||
|
||||
```bash
|
||||
# Option 1: Auth vault (credentials stored encrypted)
|
||||
echo "$PASSWORD" | agent-browser auth save myapp --url https://app.example.com/login --username user --password-stdin
|
||||
agent-browser auth login myapp
|
||||
|
||||
# Option 2: Session name (auto-save/restore cookies + localStorage)
|
||||
agent-browser --session-name myapp open https://app.example.com/login
|
||||
agent-browser close # State auto-saved
|
||||
agent-browser --session-name myapp open https://app.example.com/dashboard # Auto-restored
|
||||
|
||||
# Option 3: Persistent profile
|
||||
agent-browser --profile ~/.myapp open https://app.example.com/login
|
||||
|
||||
# Option 4: State file
|
||||
agent-browser state save auth.json
|
||||
agent-browser state load auth.json
|
||||
```
|
||||
|
||||
### LobeHub dev server — inject better-auth cookie
|
||||
|
||||
`agent-browser --headed` on macOS can create an off-screen Chromium window, blocking manual login. For a local LobeHub dev server (e.g. `localhost:3010`), copy the `better-auth.session_token` cookie out of a **Network request** in the user's own Chrome DevTools and load it via `state load`. See [auth.md](./auth.md) for the full recipe.
|
||||
|
||||
## Semantic Locators (Alternative to Refs)
|
||||
|
||||
```bash
|
||||
agent-browser find text "Sign In" click
|
||||
agent-browser find label "Email" fill "user@test.com"
|
||||
agent-browser find role button click --name "Submit"
|
||||
agent-browser find placeholder "Search" type "query"
|
||||
agent-browser find testid "submit-btn" click
|
||||
```
|
||||
|
||||
## JavaScript Evaluation (eval)
|
||||
|
||||
```bash
|
||||
# Simple expressions
|
||||
agent-browser eval 'document.title'
|
||||
|
||||
# Complex JS: use --stdin with heredoc (RECOMMENDED)
|
||||
agent-browser eval --stdin << 'EVALEOF'
|
||||
JSON.stringify(
|
||||
Array.from(document.querySelectorAll("img"))
|
||||
.filter(i => !i.alt)
|
||||
.map(i => ({ src: i.src.split("/").pop(), width: i.width }))
|
||||
)
|
||||
EVALEOF
|
||||
|
||||
# Base64 encoding (avoids all shell escaping issues)
|
||||
agent-browser eval -b "$(echo -n 'document.title' | base64)"
|
||||
```
|
||||
|
||||
## Ref Lifecycle
|
||||
|
||||
Refs (`@e1`, `@e2`, etc.) are invalidated when the page changes. Always re-snapshot after clicking links/buttons that navigate, form submissions, or dynamic content loading.
|
||||
|
||||
## Annotated Screenshots (Vision Mode)
|
||||
|
||||
```bash
|
||||
agent-browser screenshot --annotate
|
||||
# Output includes the image path and a legend:
|
||||
# [1] @e1 button "Submit"
|
||||
# [2] @e2 link "Home"
|
||||
agent-browser click @e2 # Click using ref from annotated screenshot
|
||||
```
|
||||
|
||||
## Parallel Sessions
|
||||
|
||||
```bash
|
||||
agent-browser --session site1 open https://site-a.com
|
||||
agent-browser --session site2 open https://site-b.com
|
||||
agent-browser session list
|
||||
```
|
||||
|
||||
## Connect to Existing Chrome
|
||||
|
||||
```bash
|
||||
agent-browser --auto-connect snapshot # Auto-discover running Chrome
|
||||
agent-browser --cdp 9222 snapshot # Explicit CDP port
|
||||
```
|
||||
|
||||
## iOS Simulator (Mobile Safari)
|
||||
|
||||
```bash
|
||||
agent-browser device list
|
||||
agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
|
||||
agent-browser -p ios snapshot -i
|
||||
agent-browser -p ios tap @e1
|
||||
agent-browser -p ios swipe up
|
||||
agent-browser -p ios screenshot mobile.png
|
||||
agent-browser -p ios close
|
||||
```
|
||||
|
||||
## Observability Dashboard
|
||||
|
||||
```bash
|
||||
agent-browser dashboard install
|
||||
agent-browser dashboard start # Background server on port 4848
|
||||
agent-browser dashboard stop
|
||||
```
|
||||
|
||||
## Cloud Providers
|
||||
|
||||
Use `-p <provider>` to run against cloud browsers: `agentcore`, `browserbase`, `browserless`, `browseruse`, `kernel`.
|
||||
|
||||
## Browser Engine Selection
|
||||
|
||||
```bash
|
||||
agent-browser --engine lightpanda open example.com # 10x faster, 10x less memory
|
||||
```
|
||||
|
||||
## Gotchas
|
||||
|
||||
- **Daemon can get stuck** — if commands hang, `agent-browser close --all` or `pkill -f agent-browser` to reset
|
||||
- **HMR invalidates everything** — after code changes, refs break. Re-snapshot or restart
|
||||
- **`snapshot -i` doesn't find contenteditable** — use `snapshot -i -C` for rich text editors
|
||||
- **`fill` doesn't work on contenteditable** — use `type` for chat inputs
|
||||
- **Screenshots go to `~/.agent-browser/tmp/screenshots/`** — read them with the `Read` tool
|
||||
- **Dialogs block all commands** — if commands time out, check `agent-browser dialog status`
|
||||
- **Default timeout is 25s** — override with `AGENT_BROWSER_DEFAULT_TIMEOUT` (ms) or use explicit waits
|
||||
- **Shell quoting corrupts eval** — use `eval --stdin <<'EVALEOF'` for complex JS
|
||||
@@ -1,93 +0,0 @@
|
||||
# LobeHub gateway streaming + tab-switch test harness
|
||||
|
||||
Captures store + DOM state at 200ms intervals so we can prove or disprove
|
||||
claims like "切回 tab 后消息回到了很早以前". Built for gateway-mode chat but
|
||||
works for any LobeHub streaming session.
|
||||
|
||||
## Files
|
||||
|
||||
`scripts/agent-gateway/`
|
||||
|
||||
| File | Role |
|
||||
| --------------- | ---------------------------------------------------------------- |
|
||||
| `probe.js` | Injects a 200ms sampler + `__PROBE_EVENT` marker + `__switchTab` |
|
||||
| `probe-dump.js` | Stops the sampler and returns `{events, samples}` as JSON string |
|
||||
| `tab-switch.js` | Runs N round-trip switches between two tabs, marks each step |
|
||||
| `analyze.mjs` | Node post-processor: timeline + regression detection |
|
||||
|
||||
## Standard workflow
|
||||
|
||||
```bash
|
||||
# 1. Start Electron with CDP
|
||||
./.agents/skills/agent-testing/scripts/electron-dev.sh start
|
||||
|
||||
# 2. Navigate to a chat, switch runtime to Cloud Sandbox (gateway mode)
|
||||
|
||||
# 3. Install the probe + helpers
|
||||
agent-browser --cdp 9222 eval --stdin \
|
||||
< .agents/skills/agent-testing/scripts/agent-gateway/probe.js
|
||||
|
||||
# 4. Send a tool-call message — manually or via type+press
|
||||
agent-browser --cdp 9222 eval "window.__PROBE_EVENT('SENT')"
|
||||
|
||||
# 5. Run the multi-switch driver (auto-picks active tab as BACK and the
|
||||
# rightmost inactive tab as AWAY — edit ROUND_TRIPS / DWELL_MS in the
|
||||
# file if you want different timing)
|
||||
agent-browser --cdp 9222 eval --stdin \
|
||||
< .agents/skills/agent-testing/scripts/agent-gateway/tab-switch.js
|
||||
|
||||
# 6. Wait for streaming to finish, then dump
|
||||
agent-browser --cdp 9222 eval --stdin \
|
||||
< .agents/skills/agent-testing/scripts/agent-gateway/probe-dump.js \
|
||||
> /tmp/probe.json
|
||||
|
||||
# 7. Analyze
|
||||
node .agents/skills/agent-testing/scripts/agent-gateway/analyze.mjs /tmp/probe.json
|
||||
```
|
||||
|
||||
The analyzer prints three sections: EVENTS, TIMELINE, REGRESSIONS. If
|
||||
REGRESSIONS is non-empty it means content/reasoning/childN dropped on the
|
||||
same topic — the symptom users describe.
|
||||
|
||||
## What the probe tracks (and why)
|
||||
|
||||
`chat.messagesMap` only stores the top-level `assistantGroup` shell. The
|
||||
actual streamed content, reasoning, and tool calls live in
|
||||
`assistantGroup.children: AssistantContentBlock[]`. Any probe that only
|
||||
reads `m.content` / `m.reasoning` will see zeros throughout streaming and
|
||||
miss everything that matters. probe.js walks both levels and sums:
|
||||
|
||||
- `cT` total content length
|
||||
- `rT` total reasoning length
|
||||
- `toolT` total tool-call count
|
||||
- `childN` number of content blocks
|
||||
|
||||
Plus DOM-side signals (`domLen`, search/crawl indicator counts) so you can
|
||||
tell store-side regressions apart from render-side regressions.
|
||||
|
||||
## Gotchas
|
||||
|
||||
- **Optimistic new-topic state.** Before the first chunk lands, messages
|
||||
live under the `<scope>_new` key with `tmp_*` ids and no `topicId` field.
|
||||
probe.js falls back to those when `activeTopicId` is null.
|
||||
- **Reasoning resets to 0 are not bugs.** When the assistant finishes
|
||||
thinking and starts tool-use or text, the streaming reasoning buffer
|
||||
empties and the finalised reasoning gets sealed into a completed block.
|
||||
Filter these out manually if needed.
|
||||
- **DOM length jitters by a handful of chars** because counters like "(10)"
|
||||
in tool-call labels change as results arrive. analyze.mjs only flags
|
||||
`domLen` drops greater than 100 chars to ignore that noise.
|
||||
- **Never identify tabs by innerText.** The active tab's text embeds a
|
||||
` · <agent name>` suffix, so a search like `'LobeHub Growth'` matches the
|
||||
active tab when the active agent happens to be LobeHub Growth — and you
|
||||
end up clicking the tab you're already on. probe.js uses the stable
|
||||
`data-contextmenu-trigger` attribute (a React `useId()` value that's set
|
||||
per-tab and survives focus changes) plus `data-active="true"` to mark
|
||||
the active one. Helpers exposed:
|
||||
`__listTabs()` / `__clickTabByKey(key)` / `__clickTabByIndex(i)` /
|
||||
`__activeTabKey()`.
|
||||
- **`tab-switch.js` fires-and-forgets.** The IIFE kicks off an async loop
|
||||
and returns immediately so the agent-browser CLI eval doesn't blow past
|
||||
its default 25 s timeout. Wait on the `SWITCH_LOOP_DONE` event marker
|
||||
before dumping. Re-running while a loop is in flight is refused — the
|
||||
chaotic data from overlapping runs is not worth debugging.
|
||||
@@ -1,123 +0,0 @@
|
||||
# Auth Setup for Local Agent Testing
|
||||
|
||||
**Auth is the gate for all automated testing.** Prepare and verify it before
|
||||
writing any test step. The one-stop entry point is:
|
||||
|
||||
```bash
|
||||
SCRIPT=".agents/skills/agent-testing/scripts/setup-auth.sh"
|
||||
|
||||
$SCRIPT status # check server + CLI + web auth readiness
|
||||
$SCRIPT cli # interactive CLI device-code login (must be run by the user)
|
||||
pbpaste | $SCRIPT web # inject a copied Cookie header into the agent-browser session
|
||||
$SCRIPT web-verify # live-check that the agent-browser session is authenticated
|
||||
```
|
||||
|
||||
`SERVER_URL` defaults to `http://localhost:3010` (this repo's `dev:next` port).
|
||||
Override it when testing against another server (e.g. `SERVER_URL=http://localhost:3011`
|
||||
in the cloud repo).
|
||||
|
||||
## Per-surface overview
|
||||
|
||||
| Surface | Mechanism | Persistence | Human interaction |
|
||||
| -------- | ---------------------------------------- | ----------------------------------------------------------------- | ----------------------------------------------- |
|
||||
| CLI | OIDC Device Code Flow | `apps/cli/.lobehub-dev/settings.json` | Yes — browser authorization, every token expiry |
|
||||
| Web | better-auth cookie injection | `~/.lobehub-agent-testing/web-state.json` + agent-browser session | Copy the Cookie header once per token rotation |
|
||||
| Electron | App's own login state | Electron user-data dir | Log in once manually in the app |
|
||||
| Bot | Native apps (Discord/WeChat/…) logged in | Each app's own session | Once per app |
|
||||
|
||||
## CLI — Device Code Flow
|
||||
|
||||
Credentials are isolated from the user's real CLI config via
|
||||
`LOBEHUB_CLI_HOME=.lobehub-dev` (kept inside `apps/cli/`, gitignored).
|
||||
|
||||
Login requires interactive browser authorization, so **the user must run it
|
||||
themselves** (e.g. via the `!` prefix in Claude Code):
|
||||
|
||||
```bash
|
||||
cd apps/cli && LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts login --server http://localhost:3010
|
||||
```
|
||||
|
||||
- The `--server` flag is required — an env var does NOT work and login will hit
|
||||
the wrong server without it.
|
||||
- Check state without logging in: `setup-auth.sh status` (verifies
|
||||
`settings.json` exists and `serverUrl` matches).
|
||||
- `UNAUTHORIZED` on API calls means the token expired — re-run login.
|
||||
|
||||
## Web — better-auth cookie injection (agent-browser)
|
||||
|
||||
`agent-browser --headed` on macOS often creates the Chromium window off-screen —
|
||||
the user can't see or interact with it, so manual login inside the agent-browser
|
||||
session fails. Instead, copy the **better-auth session cookie** out of the
|
||||
user's own logged-in Chrome and inject it as a Playwright-style state file.
|
||||
|
||||
Do **not** use this on production URLs — only local dev. Treat the cookie as a
|
||||
secret: don't paste it into shared logs, PRs, or commit it anywhere.
|
||||
|
||||
### One-key path
|
||||
|
||||
1. Ask the user to copy the Cookie header **from a Network request, NOT
|
||||
`document.cookie`** (`document.cookie` cannot see HttpOnly cookies, which is
|
||||
exactly where better-auth puts its session):
|
||||
- Open the logged-in tab (`http://localhost:<port>/…`) in Chrome.
|
||||
- `Cmd+Option+I` → **Network** tab → refresh → click any same-origin request.
|
||||
- Under **Request Headers**, right-click the `Cookie:` line → **Copy value**.
|
||||
2. Inject and verify in one shot:
|
||||
|
||||
```bash
|
||||
pbpaste | ./.agents/skills/agent-testing/scripts/setup-auth.sh web
|
||||
```
|
||||
|
||||
The script filters the header down to the better-auth cookies
|
||||
(`better-auth.session_token`, `better-auth.state`), builds the Playwright
|
||||
`storageState` JSON, loads it into the `agent-browser` session (default name
|
||||
`lobehub-dev`), opens `SERVER_URL`, and asserts the URL is not `/signin`.
|
||||
|
||||
### Using the authenticated session
|
||||
|
||||
```bash
|
||||
agent-browser --session lobehub-dev open "http://localhost:3010/"
|
||||
agent-browser --session lobehub-dev snapshot -i | head -20
|
||||
# Look for the user's avatar/name in the sidebar, or absence of the signin form.
|
||||
```
|
||||
|
||||
### Notes
|
||||
|
||||
- `storageState` doesn't enforce the HttpOnly flag on load — the script stores
|
||||
cookies with `httpOnly: false`, which is fine for local dev and sidesteps a
|
||||
CDP-context quirk where HttpOnly cookies sometimes fail to attach.
|
||||
- The state file is kept at `~/.lobehub-agent-testing/web-state.json` so
|
||||
`setup-auth.sh status` can report web-auth readiness across sessions.
|
||||
|
||||
### Common failure modes
|
||||
|
||||
| Symptom | Cause | Fix |
|
||||
| --------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------- |
|
||||
| Still redirects to `/signin` after injection | User pasted from `document.cookie` → missed HttpOnly session | Re-pull from Network request Headers, not console |
|
||||
| Script reports `no better-auth cookies found` | Separator wrong, or user pasted URL-decoded value | Keep the raw `Cookie:` header as-is |
|
||||
| Login works briefly then expires | `better-auth.session_token` rotated (user logged out / signed in again) | Re-copy and re-inject |
|
||||
| Domain mismatch | Cookie domain must be `localhost` literally, no leading dot for local dev | — |
|
||||
|
||||
## Electron
|
||||
|
||||
The desktop app keeps its own persistent login state in its user-data
|
||||
directory — log in once manually inside the app and it survives restarts of
|
||||
`electron-dev.sh`. No injection needed. The standard check (do NOT hand-roll a
|
||||
store eval) once Electron is up with CDP:
|
||||
|
||||
```bash
|
||||
./.agents/skills/agent-testing/scripts/app-probe.sh auth
|
||||
# → {"ok":true,"isSignedIn":true,"userId":"user_xxx"}
|
||||
```
|
||||
|
||||
`setup-auth.sh status` runs this probe automatically when CDP 9222 is
|
||||
reachable.
|
||||
|
||||
## Scope
|
||||
|
||||
These recipes only cover **local dev** authentication. They do not:
|
||||
|
||||
- Work for production — production cookies are `Secure; HttpOnly; Domain=.lobehub.com`
|
||||
and must be delivered over HTTPS.
|
||||
- Replace real OAuth flows — tests that must exercise the login UI itself need a
|
||||
real Chromium with `--remote-debugging-port` or a bot account.
|
||||
- Flow cookies back to the user's Chrome — injection is one-way.
|
||||
@@ -1,55 +0,0 @@
|
||||
# Local Dev Server
|
||||
|
||||
Single source of truth for starting / restarting the backend that all test
|
||||
surfaces (CLI, Electron, Web) hit.
|
||||
|
||||
## Ports & modes
|
||||
|
||||
| Command | What it runs | Port |
|
||||
| ------------------- | --------------------------------------------------------- | --------------------------------- |
|
||||
| `pnpm run dev:next` | Next.js backend (API + auth) | `3010` |
|
||||
| `bun run dev` | Full-stack (Next.js + Vite SPA, via `devStartupSequence`) | `3010` (API) + SPA |
|
||||
| `bun run dev:spa` | Vite SPA only, proxies API to `3010` | `9876` (prints a Debug Proxy URL) |
|
||||
|
||||
In the **cloud repo** (where this repo is the `lobehub/` submodule) the dev
|
||||
server conventionally runs on `3011` — set `SERVER_URL=http://localhost:3011`
|
||||
for the scripts in this skill when testing there.
|
||||
|
||||
## Health check
|
||||
|
||||
```bash
|
||||
curl -s -o /dev/null -w '%{http_code}' http://localhost:3010/
|
||||
```
|
||||
|
||||
## Start / restart
|
||||
|
||||
```bash
|
||||
# Start (from repo root)
|
||||
pnpm run dev:next
|
||||
|
||||
# Restart — required to pick up server-side code changes
|
||||
lsof -ti:3010 | xargs kill
|
||||
pnpm run dev:next
|
||||
```
|
||||
|
||||
## When a server restart is needed
|
||||
|
||||
Next.js hot-reload may not pick up changes in workspace packages — restart when
|
||||
in doubt.
|
||||
|
||||
| Change location | Restart? |
|
||||
| ----------------------------------------------- | -------- |
|
||||
| `apps/server/src/` (routers, services, modules) | Yes |
|
||||
| `src/server/` (agent-hono, workflows-hono) | Yes |
|
||||
| `packages/database/` (models) | Yes |
|
||||
| `packages/types/` | Yes |
|
||||
| `packages/prompts/` | Yes |
|
||||
| `apps/cli/` (CLI runs from source) | No |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Issue | Solution |
|
||||
| ------------------------- | ------------------------------------------------------- |
|
||||
| `ECONNREFUSED` | Server not running — start it |
|
||||
| `EADDRINUSE` on the port | Already running — `lsof -ti:<port> \| xargs kill` first |
|
||||
| Stale data / old behavior | Server needs a restart to pick up code changes |
|
||||
@@ -1,217 +0,0 @@
|
||||
# osascript Common Patterns
|
||||
|
||||
Shared AppleScript / `osascript` patterns used by all platform bot tests. Read this first, then refer to the per-platform file for app-specific quirks.
|
||||
|
||||
## Core Patterns
|
||||
|
||||
### Activate an App
|
||||
|
||||
```bash
|
||||
osascript -e 'tell application "Discord" to activate'
|
||||
```
|
||||
|
||||
### Type Text
|
||||
|
||||
```bash
|
||||
# Type character by character (reliable, but slow for long text)
|
||||
osascript -e 'tell application "System Events" to keystroke "Hello world"'
|
||||
|
||||
# Press Enter
|
||||
osascript -e 'tell application "System Events" to key code 36'
|
||||
|
||||
# Press Tab
|
||||
osascript -e 'tell application "System Events" to key code 48'
|
||||
|
||||
# Press Escape
|
||||
osascript -e 'tell application "System Events" to key code 53'
|
||||
```
|
||||
|
||||
### Paste from Clipboard (fast, for long text)
|
||||
|
||||
```bash
|
||||
# Set clipboard and paste — much faster than keystroke for long messages
|
||||
osascript -e 'set the clipboard to "Your long message here"'
|
||||
osascript -e 'tell application "System Events" to keystroke "v" using command down'
|
||||
```
|
||||
|
||||
Or in one shot:
|
||||
|
||||
```bash
|
||||
osascript -e '
|
||||
set the clipboard to "Your long message here"
|
||||
tell application "System Events" to keystroke "v" using command down
|
||||
'
|
||||
```
|
||||
|
||||
### Keyboard Shortcuts
|
||||
|
||||
```bash
|
||||
# Cmd+K (quick switcher in Discord/Slack)
|
||||
osascript -e 'tell application "System Events" to keystroke "k" using command down'
|
||||
|
||||
# Cmd+F (search)
|
||||
osascript -e 'tell application "System Events" to keystroke "f" using command down'
|
||||
|
||||
# Cmd+N (new message/chat)
|
||||
osascript -e 'tell application "System Events" to keystroke "n" using command down'
|
||||
|
||||
# Cmd+Shift+K (example: multi-modifier)
|
||||
osascript -e 'tell application "System Events" to keystroke "k" using {command down, shift down}'
|
||||
```
|
||||
|
||||
### Click at Position
|
||||
|
||||
```bash
|
||||
# Click at absolute screen coordinates
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
click at {500, 300}
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
### Get Window Info
|
||||
|
||||
```bash
|
||||
# Get window position and size
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
tell process "Discord"
|
||||
get {position, size} of window 1
|
||||
end tell
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
### Screenshot
|
||||
|
||||
```bash
|
||||
# Full screen
|
||||
screencapture /tmp/screenshot.png
|
||||
|
||||
# Interactive region select
|
||||
screencapture -i /tmp/screenshot.png
|
||||
|
||||
# Specific window (by window ID from CGWindowList)
|
||||
screencapture -l < WINDOW_ID > /tmp/screenshot.png
|
||||
```
|
||||
|
||||
To get window ID for a specific app:
|
||||
|
||||
```bash
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
tell process "Discord"
|
||||
get id of window 1
|
||||
end tell
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
### Read Accessibility Elements
|
||||
|
||||
```bash
|
||||
# Get all UI elements of the frontmost window (can be slow/large)
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
tell process "Discord"
|
||||
entire contents of window 1
|
||||
end tell
|
||||
end tell
|
||||
'
|
||||
|
||||
# Get a specific element's value
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
tell process "Discord"
|
||||
get value of text field 1 of window 1
|
||||
end tell
|
||||
end tell
|
||||
'
|
||||
```
|
||||
|
||||
> **Warning:** `entire contents` can be extremely slow on complex UIs. Prefer screenshots + `Read` tool for visual verification.
|
||||
|
||||
### Read Screen Text via Clipboard
|
||||
|
||||
For reading the latest message or response from an app:
|
||||
|
||||
```bash
|
||||
# Select all text in the focused area and copy
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
keystroke "a" using command down
|
||||
keystroke "c" using command down
|
||||
end tell
|
||||
'
|
||||
sleep 0.5
|
||||
# Read clipboard
|
||||
pbpaste
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Bot Testing Workflow
|
||||
|
||||
Regardless of platform, the pattern is:
|
||||
|
||||
```bash
|
||||
APP_NAME="Discord" # or "Slack", "Telegram", "微信"
|
||||
CHANNEL="bot-testing"
|
||||
MESSAGE="Hello bot!"
|
||||
WAIT_SECONDS=10
|
||||
|
||||
# 1. Activate
|
||||
osascript -e "tell application \"$APP_NAME\" to activate"
|
||||
sleep 1
|
||||
|
||||
# 2. Navigate to channel/chat (via Quick Switcher or Search)
|
||||
osascript -e 'tell application "System Events" to keystroke "k" using command down'
|
||||
sleep 0.5
|
||||
osascript -e "tell application \"System Events\" to keystroke \"$CHANNEL\""
|
||||
sleep 1
|
||||
osascript -e 'tell application "System Events" to key code 36'
|
||||
sleep 2
|
||||
|
||||
# 3. Send message
|
||||
osascript -e "set the clipboard to \"$MESSAGE\""
|
||||
osascript -e '
|
||||
tell application "System Events"
|
||||
keystroke "v" using command down
|
||||
delay 0.3
|
||||
key code 36
|
||||
end tell
|
||||
'
|
||||
|
||||
# 4. Wait for bot response
|
||||
sleep "$WAIT_SECONDS"
|
||||
|
||||
# 5. Screenshot for verification
|
||||
screencapture /tmp/"${APP_NAME,,}"-bot-test.png
|
||||
echo "Result saved to /tmp/${APP_NAME,,}-bot-test.png"
|
||||
```
|
||||
|
||||
### Tips
|
||||
|
||||
- **Use clipboard paste** (`Cmd+V`) for messages containing special characters or long text — `keystroke` can mangle non-ASCII
|
||||
- **Add `delay`** between actions — apps need time to process UI events
|
||||
- **Screenshot for verification** — use `screencapture` + `Read` tool for visual checks
|
||||
- **Use a dedicated test channel/chat** — avoid polluting real conversations
|
||||
- **Check app name** — some apps have different names in different locales (e.g., `微信` vs `WeChat`)
|
||||
- **Accessibility permissions required** — System Events automation requires granting Accessibility access in System Preferences > Privacy & Security > Accessibility
|
||||
|
||||
---
|
||||
|
||||
## Gotchas
|
||||
|
||||
- **Accessibility permission required** — first run will prompt for access; grant it in System Preferences > Privacy & Security > Accessibility for Terminal / iTerm / Claude Code
|
||||
- **`keystroke` is slow for long text** — always use clipboard paste (`Cmd+V`) for messages over \~20 characters
|
||||
- **`keystroke` can mangle non-ASCII** — use clipboard paste for Chinese, emoji, or special characters
|
||||
- **`key code 36` is Enter** — this is the hardware key code, works regardless of keyboard layout
|
||||
- **`entire contents` is extremely slow** — avoid for complex UIs; use screenshots instead
|
||||
- **App name varies by locale** — `微信` vs `WeChat`, `企业微信` vs `WeCom`; handle both
|
||||
- **WeChat Enter sends immediately** — use `Shift+Enter` for newlines within a message
|
||||
- **Rate limiting** — don't send messages too fast; platforms may throttle or flag automated input
|
||||
- **Lark / 飞书 app name varies** — `Lark` (international) vs `飞书` (China mainland); scripts auto-detect
|
||||
- **QQ uses `Cmd+F` for search** — not `Cmd+K` like Discord/Slack/Lark
|
||||
- **Bot response times vary** — AI-powered bots may take 10-60s; use generous sleep values
|
||||
@@ -1,142 +0,0 @@
|
||||
# record-app-screen.sh
|
||||
|
||||
General-purpose screen recording tool for the Electron app. Captures CDP screenshots as video frames and gallery snapshots, then assembles into an MP4 on stop.
|
||||
|
||||
## Why CDP Screenshots Instead of ffmpeg Screen Capture
|
||||
|
||||
- **Works on any screen** — CDP screenshots capture the browser viewport directly, so external monitors, Retina scaling, and window positioning are all handled automatically
|
||||
- **No signal handling issues** — ffmpeg-static (npm) produces corrupt MP4 files when killed (missing moov atom). CDP screenshots avoid this entirely
|
||||
- **Consistent output** — Screenshots are resolution-independent and don't require crop coordinate calculations
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Start recording (Electron must be running with CDP)
|
||||
.agents/skills/agent-testing/scripts/record-app-screen.sh start [output_name]
|
||||
|
||||
# Stop recording and assemble video
|
||||
.agents/skills/agent-testing/scripts/record-app-screen.sh stop
|
||||
|
||||
# Check if recording is active
|
||||
.agents/skills/agent-testing/scripts/record-app-screen.sh status
|
||||
```
|
||||
|
||||
### Arguments
|
||||
|
||||
| Argument | Default | Description |
|
||||
| ------------- | --------------------------- | -------------------------- |
|
||||
| `output_name` | `recording-YYYYMMDD-HHMMSS` | Base name for output files |
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ---------------------- | ------- | -------------------------------------- |
|
||||
| `CDP_PORT` | `9222` | Chrome DevTools Protocol port |
|
||||
| `SCREENSHOT_INTERVAL` | `3` | Seconds between gallery screenshots |
|
||||
| `VIDEO_FRAME_INTERVAL` | `0.5` | Seconds between video frames (\~2 fps) |
|
||||
|
||||
## Output Structure
|
||||
|
||||
```
|
||||
.records/
|
||||
<name>.mp4 # Video assembled from frames (~2 fps)
|
||||
<name>/ # Gallery screenshots (every 3s)
|
||||
0000.png
|
||||
0001.png
|
||||
0002.png
|
||||
...
|
||||
```
|
||||
|
||||
The `.records/` directory is at the project root and is gitignored.
|
||||
|
||||
## How It Works
|
||||
|
||||
### Start
|
||||
|
||||
1. Creates two background loops:
|
||||
- **Video frames** — `agent-browser screenshot` every `VIDEO_FRAME_INTERVAL` seconds into a temp directory (`/tmp/record-frames-XXXXXX/`)
|
||||
- **Gallery screenshots** — `agent-browser screenshot` every `SCREENSHOT_INTERVAL` seconds into `.records/<name>/`
|
||||
2. Saves PIDs and paths to `/tmp/record-app-screen.pids` and `/tmp/record-app-screen.state`
|
||||
|
||||
### Stop
|
||||
|
||||
1. Kills both background loops
|
||||
2. Assembles video frames into MP4 using ffmpeg:
|
||||
```
|
||||
ffmpeg -framerate 2 -i frame_%06d.png -c:v libx264 -crf 23 -pix_fmt yuv420p <output>.mp4
|
||||
```
|
||||
3. Cleans up temp frame directory
|
||||
4. Reports file sizes and paths
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Basic Test Recording
|
||||
|
||||
```bash
|
||||
# Start Electron
|
||||
.agents/skills/agent-testing/scripts/electron-dev.sh start
|
||||
|
||||
# Start recording
|
||||
.agents/skills/agent-testing/scripts/record-app-screen.sh start my-test
|
||||
|
||||
# Run automation
|
||||
agent-browser --cdp 9222 click @e61
|
||||
agent-browser --cdp 9222 type @e42 "hello"
|
||||
agent-browser --cdp 9222 press Enter
|
||||
sleep 10
|
||||
|
||||
# Stop and get results
|
||||
.agents/skills/agent-testing/scripts/record-app-screen.sh stop
|
||||
# → .records/my-test.mp4 + .records/my-test/*.png
|
||||
```
|
||||
|
||||
### Gateway Streaming Demo
|
||||
|
||||
```bash
|
||||
.agents/skills/agent-testing/scripts/electron-dev.sh start
|
||||
|
||||
# Inject gateway URL
|
||||
agent-browser --cdp 9222 eval --stdin << 'EOF'
|
||||
(function() {
|
||||
var store = window.global_serverConfigStore;
|
||||
store.setState({ serverConfig: { ...store.getState().serverConfig,
|
||||
agentGatewayUrl: 'https://agent-gateway.lobehub.com' } });
|
||||
return 'ready';
|
||||
})()
|
||||
EOF
|
||||
|
||||
# Record
|
||||
.agents/skills/agent-testing/scripts/record-app-screen.sh start gateway-demo
|
||||
|
||||
# Navigate to agent, send message, wait for completion...
|
||||
# (automation commands here)
|
||||
|
||||
.agents/skills/agent-testing/scripts/record-app-screen.sh stop
|
||||
open .records/gateway-demo.mp4
|
||||
```
|
||||
|
||||
### Check Active Recording
|
||||
|
||||
```bash
|
||||
.agents/skills/agent-testing/scripts/record-app-screen.sh status
|
||||
# [record] Active recording
|
||||
# Frames: 42 captured (running: yes)
|
||||
# Screenshots: 14 captured (running: yes)
|
||||
# Output: .records/my-test.mp4
|
||||
```
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- **ffmpeg** — For video assembly. Install via `bun add -g ffmpeg-static` or `brew install ffmpeg`
|
||||
- **agent-browser** — For CDP screenshots. Install via `npm i -g agent-browser`
|
||||
- **Electron app running** — With CDP enabled (use `electron-dev.sh start`)
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Problem | Solution |
|
||||
| ----------------------------------- | ------------------------------------------------------------------------------------------------------------ |
|
||||
| "No active recording found" on stop | PID file was cleaned up. Check if background processes are still running with `ps aux \| grep agent-browser` |
|
||||
| "A recording is already active" | Run `stop` first, or manually clean: `rm /tmp/record-app-screen.pids /tmp/record-app-screen.state` |
|
||||
| Video is 0 bytes | No frames were captured. Ensure Electron is running and CDP port is correct |
|
||||
| Screenshots are blank/white | SPA may not have loaded yet. Wait for `electron-dev.sh` to report "Renderer ready" |
|
||||
| ffmpeg assembly fails | Check `/tmp/ffmpeg-assemble.log`. Ensure ffmpeg is installed and frames exist |
|
||||
@@ -1,156 +0,0 @@
|
||||
# Structured Test Reports
|
||||
|
||||
Every automated test session ends with a structured, evidence-backed report.
|
||||
A chat-only summary is not an acceptable deliverable: the report is what the
|
||||
user (or a reviewer, or a later agent) audits without replaying the session.
|
||||
|
||||
## Location & layout
|
||||
|
||||
Reports live under `.records/reports/` (gitignored, like all `.records/`
|
||||
output):
|
||||
|
||||
```
|
||||
.records/reports/<YYYYMMDD-HHMMSS>-<slug>/
|
||||
├── report.md # human-readable report (case table with inline screenshots, verdict)
|
||||
├── result.json # machine-readable results (pass/fail counts, score)
|
||||
└── assets/ # evidence: screenshots, HAR files, CLI transcripts
|
||||
```
|
||||
|
||||
## Workflow
|
||||
|
||||
1. **Scaffold up front** — before running the first test step:
|
||||
|
||||
```bash
|
||||
DIR=$(./.agents/skills/agent-testing/scripts/report-init.sh < slug > "<title>")
|
||||
```
|
||||
|
||||
The script creates the directory, pre-fills branch / commit / date in both
|
||||
files, and prints the directory path. The scaffold uses the compact report
|
||||
shape below; translate its headings and table labels to the user's language
|
||||
before delivery if needed.
|
||||
|
||||
2. **Collect evidence as you test** — every asserted behavior gets one evidence
|
||||
item in `$DIR/assets/`:
|
||||
- UI (static state): `agent-browser screenshot` or `capture-app-window.sh`,
|
||||
then **verify the screenshot with the Read tool before citing it** —
|
||||
never cite an image you haven't looked at.
|
||||
- UI (time-based behavior): **screenshot vs GIF is a judgment you must
|
||||
make per case.** If the assertion is about change over time — streaming
|
||||
output, a ticking timer, loading/progress states, animations,
|
||||
appear/disappear transitions — a static screenshot cannot prove it.
|
||||
Record a frame sequence and synthesize a GIF:
|
||||
|
||||
```bash
|
||||
# start recording (background), trigger the behavior, wait for it to finish
|
||||
../scripts/record-gif.sh "$DIR/assets/case2-streaming.gif" 12 2 &
|
||||
GIF_PID=$!
|
||||
# ... drive the scenario ...
|
||||
wait $GIF_PID
|
||||
```
|
||||
|
||||
Embed it like an image: ``. Verify
|
||||
at least the first/last frames visually (Read the GIF) before citing.
|
||||
|
||||
- CLI: exact command + trimmed output (`$CLI task list | tee "$DIR/assets/task-list.txt"`).
|
||||
- Network: `agent-browser network requests` dumps or HAR files.
|
||||
|
||||
3. **Fill `report.md` as you go** — don't reconstruct from memory at the end.
|
||||
The primary evidence belongs in the case table itself: each row should pair
|
||||
the assertion with the screenshot/GIF/link that proves it, so readers can
|
||||
scan the result without jumping between sections.
|
||||
|
||||
4. **Set the verdict** in both `report.md` and `result.json`, then link the
|
||||
report directory in your final answer to the user.
|
||||
|
||||
## Report language (hard rule)
|
||||
|
||||
**`report.md` MUST be written in the language the user is conversing in** —
|
||||
the whole file, headings included. If the conversation is in Chinese, the
|
||||
report is in Chinese; do not mix English prose into it. The scaffold headings
|
||||
are placeholders — translate them when filling if the user is not conversing in
|
||||
the scaffold language. Exceptions that stay as-is: code/commands, identifiers,
|
||||
log excerpts, and `result.json` (its keys and status values are machine-read
|
||||
and stay English; the `title` and case `name` fields follow the user's
|
||||
language).
|
||||
|
||||
## report.md sections
|
||||
|
||||
Default report shape:
|
||||
|
||||
| Section | Content |
|
||||
| ---------------- | -------------------------------------------------------------------------------------------- |
|
||||
| **Scope** | What changed / what is being verified; branch, commit, date, surface, entry URL/page, focus |
|
||||
| **Cases** | Compact table: `# \| Case \| Result \| Key observation \| Evidence` |
|
||||
| **Verdict** | Overall verdict first (`pass` / `partial` / `fail`), then the concise reasons and follow-ups |
|
||||
| **Verification** | Commands or automated checks run in this session, with trimmed results |
|
||||
| **Score** | Pass/fail/blocked counts, optional 0–100 score |
|
||||
|
||||
The case table is the main reading surface. Prefer one clear row per user
|
||||
scenario or regression assertion, and put the screenshot/GIF directly in the
|
||||
`Evidence` cell:
|
||||
|
||||
```markdown
|
||||
| # | Case | Result | Key observation | Evidence |
|
||||
| --- | ------------------------ | ------ | ----------------------------------------------------------------- | ------------------------------------------------ |
|
||||
| 1 | Create a new page | pass | Title and body persisted after refresh |  |
|
||||
| 2 | Respect requested length | fail | Requested about 600 Chinese characters; final body was about 1286 |  |
|
||||
```
|
||||
|
||||
Avoid the old wide table with separate `steps`, `expected`, and `actual`
|
||||
columns unless the test is purely non-visual and truly needs that breakdown.
|
||||
For UI reports, those columns make screenshot-backed reading harder. Put
|
||||
procedural detail in the row's key observation only when it changes the
|
||||
interpretation of the result.
|
||||
|
||||
Use an extra evidence/detail section only when the inline table cannot carry
|
||||
the material cleanly, such as long CLI transcripts, HAR summaries, or multiple
|
||||
screenshots for one case. In that situation, keep the table evidence cell as a
|
||||
short inline proof or link, then put the longer material under `Verification`
|
||||
or a brief `Additional Evidence` section.
|
||||
|
||||
Status values: `pass` / `fail` / `blocked` (couldn't run — e.g. auth or env
|
||||
missing; a blocked case is not a pass).
|
||||
|
||||
## result.json schema
|
||||
|
||||
```json
|
||||
{
|
||||
"branch": "feat/task-tree",
|
||||
"cases": [
|
||||
{
|
||||
"id": "1",
|
||||
"name": "task tree returns nested children",
|
||||
"surface": "cli",
|
||||
"status": "pass",
|
||||
"evidence": ["assets/task-tree.txt"]
|
||||
}
|
||||
],
|
||||
"commit": "abc1234",
|
||||
"createdAt": "2026-06-11T15:30:00+08:00",
|
||||
"summary": {
|
||||
"total": 1,
|
||||
"passed": 1,
|
||||
"failed": 0,
|
||||
"blocked": 0,
|
||||
"score": 100,
|
||||
"verdict": "pass"
|
||||
},
|
||||
"surfaces": ["cli"],
|
||||
"title": "Verify task tree API"
|
||||
}
|
||||
```
|
||||
|
||||
`score` is optional — use it when the verdict has a subjective component (UI
|
||||
polish, copy quality); omit it for purely binary runs. `verdict` is the single
|
||||
word the user reads first: `pass`, `fail`, or `partial`.
|
||||
|
||||
## Rules
|
||||
|
||||
- **No evidence, no claim** — every `pass`/`fail` in the case table must link
|
||||
at least one asset.
|
||||
- **Screenshots must be visually verified** with the Read tool before being
|
||||
cited.
|
||||
- **Report failures faithfully** — a failing case with clear evidence is a good
|
||||
report; a vague green one is not.
|
||||
- If coverage was cut (cases skipped, surfaces not exercised), say so in the
|
||||
Verdict section — silent truncation reads as "covered everything".
|
||||
@@ -1,243 +0,0 @@
|
||||
// Analyzer for probe-events dumps. Reads a JSON file produced by `run.ts dump`
|
||||
// and prints a layered breakdown:
|
||||
//
|
||||
// 1. STREAM EVENTS — every non-chunk WS/SSE event in receipt order
|
||||
// 2. CHUNKS SUMMARY — collapsed per-step chunk counts (otherwise floods)
|
||||
// 3. ACTION CALLS — replaceMessages / refreshMessages / MARK:* with stack
|
||||
// 4. CORRELATION — calls ↔ nearest stream event within ±300ms
|
||||
// 5. PER-KEY ASSISTANT GROWTH — for each messagesMap key, when the leading
|
||||
// assistant message's cLen / rLen actually moves (this is what reveals
|
||||
// "chunks arrived but the message never grew" regressions)
|
||||
// 6. ROLLBACKS — msgN / childN / role drops in the active-topic timeline
|
||||
//
|
||||
// Usage:
|
||||
// bun run .agents/skills/agent-testing/scripts/agent-gateway/analyze-events.ts <dump.json>
|
||||
|
||||
import { readFileSync } from 'node:fs';
|
||||
|
||||
import type {
|
||||
ProbeActionCall,
|
||||
ProbeDump,
|
||||
ProbeMessageSummary,
|
||||
ProbeStreamEvent,
|
||||
ProbeTimelineSample,
|
||||
} from './types';
|
||||
|
||||
const file = process.argv[2];
|
||||
if (!file) {
|
||||
console.error('usage: bun run analyze-events.ts <dump.json>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const raw = readFileSync(file, 'utf8');
|
||||
// agent-browser eval --stdin wraps return values in quotes when the value is
|
||||
// a string — so the JSON file may be double-encoded depending on how it was
|
||||
// captured. Handle both.
|
||||
const parsedOnce = JSON.parse(raw) as ProbeDump | string;
|
||||
const dump: ProbeDump = typeof parsedOnce === 'string' ? JSON.parse(parsedOnce) : parsedOnce;
|
||||
|
||||
const { streamEvents = [], actionCalls = [], timeline = [] } = dump;
|
||||
|
||||
const pad = (v: unknown, n: number) => String(v).padStart(n);
|
||||
|
||||
// ── META ───────────────────────────────────────────────────────────
|
||||
console.log('=== META ===');
|
||||
console.log(` events: ${streamEvents.length}`);
|
||||
console.log(` calls: ${actionCalls.length}`);
|
||||
console.log(` timeline: ${timeline.length}`);
|
||||
|
||||
// ── 1. STREAM EVENTS (non-chunk) ───────────────────────────────────
|
||||
const nonChunkEvents = streamEvents.filter((e) => e.type !== 'stream_chunk');
|
||||
const chunkEvents = streamEvents.filter((e) => e.type === 'stream_chunk');
|
||||
|
||||
console.log(
|
||||
`\n=== STREAM EVENTS (${nonChunkEvents.length} non-chunk + ${chunkEvents.length} chunks elided) ===`,
|
||||
);
|
||||
for (const e of nonChunkEvents) {
|
||||
const dataStr = e.dataKeys?.length ? ` [${e.dataKeys.join(',')}]` : '';
|
||||
const data = e.data as Record<string, unknown> | undefined;
|
||||
const uiHint = data?.uiMessagesPreview
|
||||
? ` uiPreview=${JSON.stringify(data.uiMessagesPreview)}`
|
||||
: data?.uiMessagesTotal
|
||||
? ` uiTotal=${data.uiMessagesTotal}`
|
||||
: '';
|
||||
const phaseHint = data?.phase ? ` phase=${data.phase}` : '';
|
||||
const extra = e.serverType ? ` serverType=${e.serverType}` : '';
|
||||
console.log(
|
||||
` t=${pad(e.t, 7)} [${(e.transport ?? '?').padEnd(3)}] step=${pad(e.stepIndex ?? '-', 2)} ` +
|
||||
`type=${(e.type ?? '').padEnd(22)} op=${e.opIdTail ?? '-'}${phaseHint}${uiHint}${extra}${dataStr}`,
|
||||
);
|
||||
}
|
||||
|
||||
// ── 2. CHUNK SUMMARY ───────────────────────────────────────────────
|
||||
console.log('\n=== CHUNKS SUMMARY (per step / chunkType) ===');
|
||||
const chunkBuckets = new Map<string, { count: number; firstT: number; lastT: number }>();
|
||||
for (const c of chunkEvents) {
|
||||
const data = c.data as Record<string, unknown> | undefined;
|
||||
const ct = (data?.chunkType as string | undefined) ?? '?';
|
||||
const key = `step=${c.stepIndex ?? '-'} chunkType=${ct.padEnd(8)} op=${c.opIdTail}`;
|
||||
const slot = chunkBuckets.get(key);
|
||||
if (slot) {
|
||||
slot.count += 1;
|
||||
slot.lastT = c.t;
|
||||
} else {
|
||||
chunkBuckets.set(key, { count: 1, firstT: c.t, lastT: c.t });
|
||||
}
|
||||
}
|
||||
for (const [k, v] of chunkBuckets) {
|
||||
console.log(` ${k} count=${pad(v.count, 4)} t=${pad(v.firstT, 7)}..${pad(v.lastT, 7)}`);
|
||||
}
|
||||
|
||||
// ── 3. ACTION CALLS ───────────────────────────────────────────────
|
||||
console.log('\n=== ACTION CALLS (replace/refresh/MARK) ===');
|
||||
for (const c of actionCalls) {
|
||||
if (c.name?.startsWith('MARK:')) {
|
||||
console.log(` t=${pad(c.t, 7)} ${c.name}`);
|
||||
continue;
|
||||
}
|
||||
const snapshot = (c.args as any)?.snapshot as
|
||||
| Array<{ id: string; role: string; cLen: number; rLen: number }>
|
||||
| undefined;
|
||||
const snapStr = snapshot?.length
|
||||
? ' snapshot=' + snapshot.map((m) => `${m.id}:${m.role}/c${m.cLen}/r${m.rLen}`).join(' | ')
|
||||
: '';
|
||||
const summary =
|
||||
c.name === 'replaceMessages'
|
||||
? `count=${c.args?.count} action=${(c.args?.params as any)?.action ?? '-'}${snapStr}`
|
||||
: c.name === 'refreshMessages'
|
||||
? `ctx=${JSON.stringify(c.args?.context)}`
|
||||
: c.error
|
||||
? `error=${c.error}`
|
||||
: '';
|
||||
console.log(` t=${pad(c.t, 7)} ${c.name.padEnd(20)} ${summary}`);
|
||||
if (c.stack) {
|
||||
const frames = c.stack
|
||||
.split(' ← ')
|
||||
.filter((f) => !!f && !f.includes('Object.<anonymous>'))
|
||||
.slice(0, 3);
|
||||
for (const f of frames) console.log(` ↳ ${f}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 4. CORRELATION ────────────────────────────────────────────────
|
||||
function nearestEventForCall(
|
||||
call: ProbeActionCall,
|
||||
windowMs = 300,
|
||||
): { event: ProbeStreamEvent; delta: number } | null {
|
||||
let best: ProbeStreamEvent | null = null;
|
||||
let bestDelta = Infinity;
|
||||
for (const e of streamEvents) {
|
||||
const d = Math.abs(e.t - call.t);
|
||||
if (d < bestDelta && d <= windowMs) {
|
||||
bestDelta = d;
|
||||
best = e;
|
||||
}
|
||||
}
|
||||
return best ? { event: best, delta: bestDelta } : null;
|
||||
}
|
||||
|
||||
console.log('\n=== CORRELATION (replace/refresh ↔ nearest event within ±300ms) ===');
|
||||
for (const c of actionCalls) {
|
||||
if (c.name !== 'refreshMessages' && c.name !== 'replaceMessages') continue;
|
||||
const hit = nearestEventForCall(c);
|
||||
if (hit) {
|
||||
const phase = (hit.event.data as Record<string, unknown> | undefined)?.phase;
|
||||
console.log(
|
||||
` t=${pad(c.t, 7)} ${c.name.padEnd(16)} ← Δ${pad(hit.delta, 4)}ms ${hit.event.type}` +
|
||||
(phase ? ` phase=${phase}` : ''),
|
||||
);
|
||||
} else {
|
||||
console.log(` t=${pad(c.t, 7)} ${c.name.padEnd(16)} ← (no event nearby — external trigger)`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 5. PER-KEY ASSISTANT GROWTH ───────────────────────────────────
|
||||
// For each messagesMap key, find the trailing assistant message and report
|
||||
// the points in time where its cLen / rLen actually changed. If the timeline
|
||||
// shows chunks arriving but the assistant cLen never moves, that's the
|
||||
// signature of "dispatch queue blocked / messageId mismatch".
|
||||
console.log('\n=== PER-KEY ASSISTANT GROWTH ===');
|
||||
const keysEverSeen = new Set<string>();
|
||||
for (const s of timeline) for (const k of Object.keys(s.byKey ?? {})) keysEverSeen.add(k);
|
||||
|
||||
for (const key of keysEverSeen) {
|
||||
console.log(`\n key=${key}`);
|
||||
let lastSig: string | null = null;
|
||||
for (const s of timeline) {
|
||||
const slot = s.byKey?.[key];
|
||||
if (!slot) continue;
|
||||
const last = slot.msgs.at(-1) as ProbeMessageSummary | undefined;
|
||||
if (!last) continue;
|
||||
const sig = `${last.id}|c${last.cLen}|r${last.rLen}|n${slot.n}`;
|
||||
if (sig === lastSig) continue;
|
||||
lastSig = sig;
|
||||
console.log(
|
||||
` t=${pad(s.t, 7)} msgN=${pad(slot.n, 3)} ` +
|
||||
`lastAssistant=${last.id} cLen=${pad(last.cLen, 5)} rLen=${pad(last.rLen, 5)}` +
|
||||
` runOps=${s.runOps}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 6. ROLLBACKS (active-topic msgN / childN / role drops) ─────────
|
||||
console.log('\n=== ROLLBACKS (active-topic msgN / childN / role drops) ===');
|
||||
let prev: ProbeTimelineSample | null = null;
|
||||
const rollbacks: Array<{ t: number; topic: string | null; drops: string[] }> = [];
|
||||
|
||||
const flatten = (s: ProbeTimelineSample) => {
|
||||
if (!s.activeTopic) return [];
|
||||
return Object.entries(s.byKey ?? {})
|
||||
.filter(([k]) => k.includes(s.activeTopic!))
|
||||
.flatMap(([, v]) => v.msgs);
|
||||
};
|
||||
|
||||
for (const s of timeline) {
|
||||
if (s.err) {
|
||||
prev = null;
|
||||
continue;
|
||||
}
|
||||
if (!prev || prev.activeTopic !== s.activeTopic) {
|
||||
prev = s;
|
||||
continue;
|
||||
}
|
||||
const prevMsgs = flatten(prev);
|
||||
const curMsgs = flatten(s);
|
||||
const drops: string[] = [];
|
||||
|
||||
if (curMsgs.length < prevMsgs.length) drops.push(`msgN ${prevMsgs.length}→${curMsgs.length}`);
|
||||
|
||||
let prevChild = 0;
|
||||
let curChild = 0;
|
||||
for (const m of prevMsgs) prevChild += m.chN ?? 0;
|
||||
for (const m of curMsgs) curChild += m.chN ?? 0;
|
||||
if (curChild < prevChild) drops.push(`childN ${prevChild}→${curChild}`);
|
||||
|
||||
const prevById = new Map(prevMsgs.map((m) => [m.id, m]));
|
||||
for (const m of curMsgs) {
|
||||
const pr = prevById.get(m.id);
|
||||
if (!pr) continue;
|
||||
if (m.cLen < pr.cLen) drops.push(`cLen[${m.id}] ${pr.cLen}→${m.cLen}`);
|
||||
if (m.rLen < pr.rLen) drops.push(`rLen[${m.id}] ${pr.rLen}→${m.rLen}`);
|
||||
}
|
||||
|
||||
if (drops.length) rollbacks.push({ t: s.t, topic: s.activeTopic, drops });
|
||||
prev = s;
|
||||
}
|
||||
|
||||
if (rollbacks.length === 0) {
|
||||
console.log(' (none)');
|
||||
} else {
|
||||
for (const r of rollbacks) {
|
||||
const nearEvent = streamEvents
|
||||
.filter((e) => Math.abs(e.t - r.t) <= 300)
|
||||
.map((e) => `${e.type}${(e.data as any)?.phase ? ':' + (e.data as any).phase : ''}`);
|
||||
const nearCall = actionCalls
|
||||
.filter((c) => Math.abs(c.t - r.t) <= 300 && !c.name?.startsWith('MARK:'))
|
||||
.map((c) => c.name);
|
||||
console.log(
|
||||
` t=${pad(r.t, 7)} topic=${r.topic} ${r.drops.join(' | ')}` +
|
||||
(nearEvent.length ? ` near-event:[${nearEvent.join(',')}]` : '') +
|
||||
(nearCall.length ? ` near-call:[${nearCall.join(',')}]` : ''),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,119 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
// Analyze a probe dump captured by probe.js + probe-dump.js.
|
||||
//
|
||||
// node analyze.mjs /tmp/probe.json
|
||||
//
|
||||
// Prints:
|
||||
// 1. EVENTS — user-action markers with their relative timestamps
|
||||
// 2. TIMELINE — periodic samples (~1 per second + event-adjacent samples)
|
||||
// showing every interesting field; columns:
|
||||
// t(ms) | runOps | msgN | childN | content | reasoning | tools | domLen | search | crawl | topic | event
|
||||
// 3. REGRESSIONS — every place a tracked counter *dropped* on the same
|
||||
// topic between adjacent samples. A "true" UI rollback shows up as a
|
||||
// drop in content/reasoning/tools/childN/domLen without a topic change.
|
||||
//
|
||||
// Whitelisted transitions (not flagged):
|
||||
// - topic change → all drops expected (focus moved away)
|
||||
// - reasoning length 0 after content starts → reasoning gets sealed into a
|
||||
// completed sub-block; the parent's running reasoning resets to ''.
|
||||
// - msgN drop when topic transitions from `_new` placeholder to a real id.
|
||||
|
||||
import fs from 'node:fs';
|
||||
|
||||
const file = process.argv[2];
|
||||
if (!file) {
|
||||
console.error('usage: node analyze.mjs <probe.json>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const raw = JSON.parse(fs.readFileSync(file, 'utf8'));
|
||||
// probe-dump.js wraps the payload in JSON.stringify so agent-browser returns
|
||||
// it as a single quoted string. Unwrap.
|
||||
const data = typeof raw === 'string' ? JSON.parse(raw) : raw;
|
||||
const { events, samples } = data;
|
||||
|
||||
const fmt = {
|
||||
pad(v, n) {
|
||||
return String(v).padStart(n);
|
||||
},
|
||||
};
|
||||
|
||||
console.log('=== EVENTS ===');
|
||||
for (const e of events) console.log(` t=${fmt.pad(e.t, 7)} ${e.name}`);
|
||||
|
||||
console.log(
|
||||
'\n=== TIMELINE (~1s cadence, plus event-adjacent samples) ===\n' +
|
||||
' t(ms) runOps msgN childN content reasoning tools domLen search crawl topic event',
|
||||
);
|
||||
|
||||
let lastSampledAt = -1e9;
|
||||
const eventBuckets = events.map((e) => e.t);
|
||||
for (let i = 0; i < samples.length; i++) {
|
||||
const s = samples[i];
|
||||
const nearEvent = eventBuckets.some((et) => Math.abs(et - s.t) < 110);
|
||||
if (!nearEvent && s.t - lastSampledAt < 1000) continue;
|
||||
lastSampledAt = s.t;
|
||||
|
||||
const ev = events.find((e) => Math.abs(e.t - s.t) < 110);
|
||||
const evMarker = ev ? ` ◀ ${ev.name}` : '';
|
||||
const topicSuffix = s.topicId ? s.topicId.slice(-6) : '(none)';
|
||||
const search = s.ind?.search ?? 0;
|
||||
const crawl = s.ind?.crawl ?? 0;
|
||||
console.log(
|
||||
` ${fmt.pad(s.t, 6)} ` +
|
||||
`${fmt.pad(s.runOps, 6)} ` +
|
||||
`${fmt.pad(s.msgN, 4)} ` +
|
||||
`${fmt.pad(s.childN ?? 0, 5)} ` +
|
||||
`${fmt.pad(s.cT ?? 0, 8)} ` +
|
||||
`${fmt.pad(s.rT ?? 0, 9)} ` +
|
||||
`${fmt.pad(s.toolT ?? 0, 5)} ` +
|
||||
`${fmt.pad(s.domLen ?? 0, 7)} ` +
|
||||
`${fmt.pad(search, 6)} ` +
|
||||
`${fmt.pad(crawl, 5)} ` +
|
||||
`${topicSuffix.padEnd(8)}${evMarker}`,
|
||||
);
|
||||
}
|
||||
|
||||
console.log('\n=== REGRESSIONS (same topic, value dropped) ===');
|
||||
const regressions = [];
|
||||
for (let i = 1; i < samples.length; i++) {
|
||||
const prev = samples[i - 1];
|
||||
const cur = samples[i];
|
||||
if (!cur.topicId || prev.topicId !== cur.topicId) continue;
|
||||
|
||||
const drops = [];
|
||||
if (cur.msgN < prev.msgN) drops.push(`msgN: ${prev.msgN}→${cur.msgN}`);
|
||||
if ((cur.childN ?? 0) < (prev.childN ?? 0)) drops.push(`childN: ${prev.childN}→${cur.childN}`);
|
||||
if ((cur.cT ?? 0) < (prev.cT ?? 0)) drops.push(`content: ${prev.cT}→${cur.cT}`);
|
||||
if ((cur.rT ?? 0) < (prev.rT ?? 0)) drops.push(`reasoning: ${prev.rT}→${cur.rT}`);
|
||||
if ((cur.toolT ?? 0) < (prev.toolT ?? 0)) drops.push(`tools: ${prev.toolT}→${cur.toolT}`);
|
||||
// domLen jitters by a few chars from counter labels — only flag big drops.
|
||||
if ((cur.domLen ?? 0) < (prev.domLen ?? 0) - 100) {
|
||||
drops.push(`domLen: ${prev.domLen}→${cur.domLen}`);
|
||||
}
|
||||
if (drops.length === 0) continue;
|
||||
|
||||
const nearbyEv = events.filter((e) => Math.abs(e.t - cur.t) < 600).map((e) => e.name);
|
||||
regressions.push({ t: cur.t, topic: cur.topicId.slice(-6), drops, nearbyEv });
|
||||
}
|
||||
|
||||
if (regressions.length === 0) {
|
||||
console.log(' (none)');
|
||||
} else {
|
||||
for (const r of regressions) {
|
||||
const evStr = r.nearbyEv.length ? ` near:[${r.nearbyEv.join(',')}]` : '';
|
||||
console.log(` t=${fmt.pad(r.t, 7)} topic=${r.topic} ${r.drops.join(' | ')}${evStr}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n=== SUMMARY ===`);
|
||||
console.log(` samples: ${samples.length}`);
|
||||
console.log(` events: ${events.length}`);
|
||||
console.log(` regressions: ${regressions.length}`);
|
||||
if (samples.length) {
|
||||
const last = samples.at(-1);
|
||||
console.log(
|
||||
` final: msgN=${last.msgN} childN=${last.childN ?? 0} content=${last.cT ?? 0} ` +
|
||||
`reasoning=${last.rT ?? 0} tools=${last.toolT ?? 0} runOps=${last.runOps}`,
|
||||
);
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
// Stop the probe and serialize collected data.
|
||||
//
|
||||
// agent-browser --cdp 9222 eval --stdin < probe-dump.js > /tmp/probe.json
|
||||
//
|
||||
// The whole thing is wrapped in a JSON.stringify so agent-browser returns it
|
||||
// as a single quoted string — the analyzer double-parses to handle that.
|
||||
|
||||
(function () {
|
||||
if (window.__PROBE_TIMER) {
|
||||
clearInterval(window.__PROBE_TIMER);
|
||||
window.__PROBE_TIMER = null;
|
||||
}
|
||||
return JSON.stringify({
|
||||
events: window.__PROBE_EVENTS || [],
|
||||
samples: window.__PROBE_SAMPLES || [],
|
||||
});
|
||||
})();
|
||||
@@ -1,37 +0,0 @@
|
||||
// Stops the events-probe timeline timer and stashes the full capture as a
|
||||
// JSON string on `window.__PROBE_LAST_DUMP_JSON`. `run.ts` wraps the bundle
|
||||
// in an IIFE that returns that global, which `agent-browser eval` prints to
|
||||
// stdout — the runner then persists it under `.agent-gateway/`.
|
||||
|
||||
import type { ProbeDump } from './types';
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
__PROBE_LAST_DUMP_JSON?: string;
|
||||
}
|
||||
}
|
||||
|
||||
const w = window;
|
||||
|
||||
if (w.__PROBE_TIMELINE_TIMER) {
|
||||
clearInterval(w.__PROBE_TIMELINE_TIMER);
|
||||
w.__PROBE_TIMELINE_TIMER = null;
|
||||
}
|
||||
|
||||
const mutations = w.__PROBE_MUTATIONS ?? [];
|
||||
|
||||
const dump: ProbeDump & { mutations: typeof mutations } = {
|
||||
meta: {
|
||||
t0: w.__PROBE_T0 ?? 0,
|
||||
collectedAt: Date.now(),
|
||||
sampleCount: (w.__PROBE_MSG_TIMELINE ?? []).length,
|
||||
eventCount: (w.__PROBE_STREAM_EVENTS ?? []).length,
|
||||
callCount: (w.__PROBE_ACTION_CALLS ?? []).length,
|
||||
},
|
||||
streamEvents: w.__PROBE_STREAM_EVENTS ?? [],
|
||||
actionCalls: w.__PROBE_ACTION_CALLS ?? [],
|
||||
timeline: w.__PROBE_MSG_TIMELINE ?? [],
|
||||
mutations,
|
||||
};
|
||||
|
||||
w.__PROBE_LAST_DUMP_JSON = JSON.stringify(dump);
|
||||
@@ -1,637 +0,0 @@
|
||||
// LobeHub gateway raw-event-stream probe.
|
||||
//
|
||||
// Gateway-mode chats subscribe via WebSocket — NOT via the `/api/agent/stream`
|
||||
// SSE endpoint (that one belongs to the direct/client durable-agent runtime).
|
||||
// `AgentStreamClient` (`packages/agent-gateway-client/src/client.ts`) opens
|
||||
// `new WebSocket('wss://.../ws?operationId=...')`, then parses JSON frames in
|
||||
// its `onmessage` handler and re-emits `agent_event.event` objects to the
|
||||
// chat store.
|
||||
//
|
||||
// To capture the RAW gateway events before the store touches them, we wrap
|
||||
// `window.WebSocket` so that for any socket whose URL contains `operationId=`
|
||||
// we intercept the `onmessage` handler / `addEventListener('message')` and
|
||||
// log every `agent_event` frame.
|
||||
//
|
||||
// We *also* keep the `window.fetch` hook for `/api/agent/stream` so this
|
||||
// probe still works for direct-mode runs — but gateway-mode events come
|
||||
// through the WebSocket path.
|
||||
//
|
||||
// Buffers (read via `dump`):
|
||||
// __PROBE_STREAM_EVENTS — raw events parsed off the wire
|
||||
// __PROBE_ACTION_CALLS — replaceMessages / refreshMessages calls (best-effort)
|
||||
// __PROBE_MSG_TIMELINE — 200ms snapshots of every messagesMap key
|
||||
|
||||
import type {
|
||||
ProbeActionCall,
|
||||
ProbeMessageSummary,
|
||||
ProbeStreamEvent,
|
||||
ProbeTimelineSample,
|
||||
} from './types';
|
||||
|
||||
// Bundled by esbuild as an IIFE. Top-level code runs once on injection.
|
||||
|
||||
const w = window;
|
||||
|
||||
// ── Buffers ─────────────────────────────────────────────────────────
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
__PROBE_MUTATIONS?: Array<{
|
||||
t: number;
|
||||
key: string;
|
||||
n: number;
|
||||
last?: { id: string; role: string; cLen: number; rLen: number; updatedAt?: unknown };
|
||||
prevLast?: { id: string; role: string; cLen: number; rLen: number };
|
||||
delta?: string;
|
||||
}>;
|
||||
__PROBE_STORE_UNSUB?: () => void;
|
||||
}
|
||||
}
|
||||
|
||||
const events: ProbeStreamEvent[] = (w.__PROBE_STREAM_EVENTS ??= []);
|
||||
const calls: ProbeActionCall[] = (w.__PROBE_ACTION_CALLS ??= []);
|
||||
const timeline: ProbeTimelineSample[] = (w.__PROBE_MSG_TIMELINE ??= []);
|
||||
const mutations = (w.__PROBE_MUTATIONS ??= []);
|
||||
events.length = 0;
|
||||
calls.length = 0;
|
||||
timeline.length = 0;
|
||||
mutations.length = 0;
|
||||
|
||||
const t0 = Date.now();
|
||||
w.__PROBE_T0 = t0;
|
||||
const now = (): number => Date.now() - t0;
|
||||
|
||||
// ── Helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
function summarizeData(data: unknown): Record<string, unknown> | unknown {
|
||||
if (!data || typeof data !== 'object') return data;
|
||||
const src = data as Record<string, unknown>;
|
||||
const out: Record<string, unknown> = {};
|
||||
for (const k of Object.keys(src)) {
|
||||
const v = src[k];
|
||||
if (v == null) {
|
||||
out[k] = v;
|
||||
} else if (Array.isArray(v)) {
|
||||
out[k] = `Array(${v.length})`;
|
||||
if (k === 'uiMessages') {
|
||||
out.uiMessagesPreview = v.slice(0, 5).map((m: any) => ({
|
||||
id: (m.id ?? '').slice(-8),
|
||||
role: m.role,
|
||||
cLen: (m.content ?? '').length,
|
||||
children: (m.children ?? []).length,
|
||||
tools: (m.tools ?? []).length,
|
||||
reasoning: (m.reasoning?.content ?? '').length,
|
||||
}));
|
||||
out.uiMessagesTotal = v.length;
|
||||
}
|
||||
} else if (typeof v === 'object') {
|
||||
const obj = v as Record<string, unknown>;
|
||||
out[k] =
|
||||
'Object{' +
|
||||
Object.keys(obj)
|
||||
.slice(0, 6)
|
||||
.map((kk) => kk + (typeof obj[kk] === 'string' ? `=${(obj[kk] as string).length}ch` : ''))
|
||||
.join(',') +
|
||||
'}';
|
||||
} else if (typeof v === 'string') {
|
||||
out[k] = v.length > 100 ? v.slice(0, 100) + `…(${v.length})` : v;
|
||||
} else {
|
||||
out[k] = v;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function summarizeMessages(msgs: any[]): ProbeMessageSummary[] {
|
||||
return (msgs ?? []).slice(0, 80).map((m) => ({
|
||||
id: (m.id ?? '').slice(-8),
|
||||
role: m.role,
|
||||
cLen: (m.content ?? '').length,
|
||||
rLen: (m.reasoning?.content ?? '').length,
|
||||
tools: (m.tools ?? []).length,
|
||||
chN: (m.children ?? []).length,
|
||||
}));
|
||||
}
|
||||
|
||||
function shortStack(): string {
|
||||
const raw = new Error('probe-stack').stack ?? '';
|
||||
return raw
|
||||
.split('\n')
|
||||
.slice(3)
|
||||
.filter((l) => !l.includes('probe-events') && !l.includes('node_modules'))
|
||||
.map((l) => l.trim().replace(/^at\s+/, ''))
|
||||
.slice(0, 6)
|
||||
.join(' ← ');
|
||||
}
|
||||
|
||||
function recordAgentEvent(args: {
|
||||
transport: 'ws' | 'sse';
|
||||
opId: string | null;
|
||||
agentEvent: any;
|
||||
eventId?: string | null;
|
||||
rawLen?: number;
|
||||
}): void {
|
||||
const { transport, opId, agentEvent, eventId, rawLen } = args;
|
||||
if (!agentEvent || typeof agentEvent !== 'object') return;
|
||||
events.push({
|
||||
t: now(),
|
||||
transport,
|
||||
opIdTail: (opId ?? '').slice(-10),
|
||||
eventId: eventId ?? null,
|
||||
type: agentEvent.type,
|
||||
stepIndex: agentEvent.stepIndex,
|
||||
dataKeys: agentEvent.data ? Object.keys(agentEvent.data) : [],
|
||||
data: summarizeData(agentEvent.data) as Record<string, unknown>,
|
||||
rawLen,
|
||||
});
|
||||
}
|
||||
|
||||
// ── 1. Patch window.WebSocket for gateway WS events ────────────────
|
||||
|
||||
if (!w.__PROBE_ORIG_WEBSOCKET) w.__PROBE_ORIG_WEBSOCKET = w.WebSocket;
|
||||
const OrigWS = w.__PROBE_ORIG_WEBSOCKET;
|
||||
|
||||
function extractOpIdFromWsUrl(url: string | URL): string | null {
|
||||
const m = String(url ?? '').match(/operationId=([^&]+)/);
|
||||
return m ? decodeURIComponent(m[1]) : null;
|
||||
}
|
||||
|
||||
function isGatewayWs(url: string | URL): boolean {
|
||||
return String(url ?? '').includes('operationId=');
|
||||
}
|
||||
|
||||
function handleWsFrame(rawData: unknown, opId: string | null): void {
|
||||
const rawLen = typeof rawData === 'string' ? rawData.length : -1;
|
||||
let parsed: any;
|
||||
try {
|
||||
parsed = typeof rawData === 'string' ? JSON.parse(rawData) : null;
|
||||
} catch {
|
||||
events.push({
|
||||
t: now(),
|
||||
transport: 'ws',
|
||||
opIdTail: (opId ?? '').slice(-10),
|
||||
type: '_PARSE_ERROR_',
|
||||
raw: typeof rawData === 'string' && rawData.length < 400 ? rawData : '(non-string or large)',
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (!parsed) return;
|
||||
|
||||
if (parsed.type === 'agent_event') {
|
||||
recordAgentEvent({
|
||||
transport: 'ws',
|
||||
opId,
|
||||
agentEvent: parsed.event,
|
||||
eventId: parsed.id,
|
||||
rawLen,
|
||||
});
|
||||
} else {
|
||||
events.push({
|
||||
t: now(),
|
||||
transport: 'ws',
|
||||
opIdTail: (opId ?? '').slice(-10),
|
||||
type: '_SERVER_MSG_',
|
||||
serverType: parsed.type,
|
||||
rawLen,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Wrap the constructor. Instance `constructor` will still reflect OrigWS
|
||||
// (we share prototypes), so use the `_WS_OPEN_` sentinel events to confirm
|
||||
// the patch is firing.
|
||||
function PatchedWebSocket(this: WebSocket, url: string | URL, protocols?: string | string[]) {
|
||||
const ws: WebSocket = protocols == null ? new OrigWS(url) : new OrigWS(url, protocols);
|
||||
const opId = extractOpIdFromWsUrl(url);
|
||||
if (!isGatewayWs(url)) return ws;
|
||||
|
||||
events.push({
|
||||
t: now(),
|
||||
transport: 'ws',
|
||||
opIdTail: (opId ?? '').slice(-10),
|
||||
type: '_WS_OPEN_',
|
||||
url: String(url),
|
||||
});
|
||||
|
||||
// One observer listener that always fires, regardless of how the consumer
|
||||
// (AgentStreamClient uses `ws.onmessage = …`) subscribes.
|
||||
ws.addEventListener('message', (e) => {
|
||||
try {
|
||||
handleWsFrame((e as MessageEvent).data, opId);
|
||||
} catch {
|
||||
/* swallow */
|
||||
}
|
||||
});
|
||||
|
||||
ws.addEventListener('close', () => {
|
||||
events.push({
|
||||
t: now(),
|
||||
transport: 'ws',
|
||||
opIdTail: (opId ?? '').slice(-10),
|
||||
type: '_WS_CLOSE_',
|
||||
});
|
||||
});
|
||||
|
||||
return ws;
|
||||
}
|
||||
|
||||
// Preserve prototype + static fields so `instanceof WebSocket` and
|
||||
// `WebSocket.OPEN` constants still work.
|
||||
(PatchedWebSocket as unknown as { prototype: WebSocket }).prototype = OrigWS.prototype;
|
||||
for (const k of Object.keys(OrigWS) as Array<keyof typeof OrigWS>) {
|
||||
try {
|
||||
(PatchedWebSocket as any)[k] = (OrigWS as any)[k];
|
||||
} catch {
|
||||
/* readonly */
|
||||
}
|
||||
}
|
||||
(['CONNECTING', 'OPEN', 'CLOSING', 'CLOSED'] as const).forEach((k) => {
|
||||
(PatchedWebSocket as any)[k] = (OrigWS as any)[k];
|
||||
});
|
||||
w.WebSocket = PatchedWebSocket as unknown as typeof WebSocket;
|
||||
|
||||
// ── 2. Patch window.fetch for `/api/agent/stream` (direct-mode SSE) ─
|
||||
|
||||
if (!w.__PROBE_ORIG_FETCH) w.__PROBE_ORIG_FETCH = w.fetch.bind(w);
|
||||
const origFetch = w.__PROBE_ORIG_FETCH;
|
||||
|
||||
function isAgentStreamUrl(input: RequestInfo | URL): boolean {
|
||||
let url = '';
|
||||
if (typeof input === 'string') url = input;
|
||||
else if (input instanceof URL) url = input.toString();
|
||||
else if (input && typeof (input as Request).url === 'string') url = (input as Request).url;
|
||||
return url.includes('/api/agent/stream');
|
||||
}
|
||||
|
||||
function extractOpIdFromHttpUrl(input: RequestInfo | URL): string | null {
|
||||
const url = typeof input === 'string' ? input : (input as Request | URL).toString();
|
||||
const m = url.match(/operationId=([^&]+)/);
|
||||
return m ? decodeURIComponent(m[1]) : null;
|
||||
}
|
||||
|
||||
function pushFromSSEFrame(rawFrame: string, opId: string | null): void {
|
||||
const lines = rawFrame.split('\n');
|
||||
let dataJson = '';
|
||||
let evtName = 'message';
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('event:')) evtName = line.slice(6).trim();
|
||||
else if (line.startsWith('data:')) dataJson += line.slice(5).trim();
|
||||
}
|
||||
if (!dataJson) return;
|
||||
let parsed: any;
|
||||
try {
|
||||
parsed = JSON.parse(dataJson);
|
||||
} catch {
|
||||
events.push({
|
||||
t: now(),
|
||||
transport: 'sse',
|
||||
opIdTail: (opId ?? '').slice(-10),
|
||||
type: '_PARSE_ERROR_',
|
||||
sseEvent: evtName,
|
||||
raw: dataJson.length > 400 ? dataJson.slice(0, 400) + '…' : dataJson,
|
||||
});
|
||||
return;
|
||||
}
|
||||
recordAgentEvent({
|
||||
transport: 'sse',
|
||||
opId,
|
||||
agentEvent: parsed,
|
||||
eventId: null,
|
||||
rawLen: dataJson.length,
|
||||
});
|
||||
}
|
||||
|
||||
async function teeAndDrain(response: Response, opId: string | null): Promise<Response> {
|
||||
if (!response.body) return response;
|
||||
const [a, b] = response.body.tee();
|
||||
|
||||
void (async () => {
|
||||
const reader = b.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buf = '';
|
||||
try {
|
||||
while (true) {
|
||||
const { value, done } = await reader.read();
|
||||
if (done) break;
|
||||
buf += decoder.decode(value, { stream: true });
|
||||
let idx: number;
|
||||
|
||||
while ((idx = buf.indexOf('\n\n')) !== -1) {
|
||||
const frame = buf.slice(0, idx);
|
||||
buf = buf.slice(idx + 2);
|
||||
if (frame.trim()) pushFromSSEFrame(frame, opId);
|
||||
}
|
||||
}
|
||||
if (buf.trim()) pushFromSSEFrame(buf, opId);
|
||||
} catch (e: any) {
|
||||
events.push({
|
||||
t: now(),
|
||||
transport: 'sse',
|
||||
opIdTail: (opId ?? '').slice(-10),
|
||||
type: '_TEE_ERROR_',
|
||||
message: String(e?.message ?? e),
|
||||
});
|
||||
}
|
||||
})();
|
||||
|
||||
return new Response(a, {
|
||||
headers: response.headers,
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
});
|
||||
}
|
||||
|
||||
w.fetch = async function patchedFetch(input: RequestInfo | URL, init?: RequestInit) {
|
||||
const response = await origFetch(input as any, init);
|
||||
if (!isAgentStreamUrl(input)) return response;
|
||||
const opId = extractOpIdFromHttpUrl(input);
|
||||
const url =
|
||||
typeof input === 'string'
|
||||
? input.split('?')[0]
|
||||
: (input as Request | URL).toString().split('?')[0];
|
||||
events.push({
|
||||
t: now(),
|
||||
transport: 'sse',
|
||||
opIdTail: (opId ?? '').slice(-10),
|
||||
type: '_CONNECTED_',
|
||||
url,
|
||||
status: response.status,
|
||||
});
|
||||
return teeAndDrain(response, opId);
|
||||
} as typeof fetch;
|
||||
|
||||
// ── 3. Wrap store actions (best-effort for "who called replace") ────
|
||||
|
||||
// Side-global stash for the original chat-store actions. Re-installs ALWAYS
|
||||
// rewrap from the originals so updates to the probe body take effect
|
||||
// without a page reload — using only a `__probeWrapped` flag on the chat
|
||||
// state object would freeze the first-installed wrapper across re-installs.
|
||||
declare global {
|
||||
interface Window {
|
||||
__PROBE_ORIG_REFRESH_MESSAGES?: any;
|
||||
__PROBE_ORIG_REPLACE_MESSAGES?: any;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const chat = w.__LOBE_STORES?.chat?.();
|
||||
if (chat) {
|
||||
// First-time install: cache the originals. Re-install: restore from
|
||||
// the cached originals before wrapping again.
|
||||
if (!w.__PROBE_ORIG_REFRESH_MESSAGES) w.__PROBE_ORIG_REFRESH_MESSAGES = chat.refreshMessages;
|
||||
if (!w.__PROBE_ORIG_REPLACE_MESSAGES) w.__PROBE_ORIG_REPLACE_MESSAGES = chat.replaceMessages;
|
||||
const origRefresh = w.__PROBE_ORIG_REFRESH_MESSAGES;
|
||||
const origReplace = w.__PROBE_ORIG_REPLACE_MESSAGES;
|
||||
chat.refreshMessages = origRefresh;
|
||||
chat.replaceMessages = origReplace;
|
||||
|
||||
chat.refreshMessages = async function probeRefresh(this: unknown, ...args: any[]) {
|
||||
calls.push({
|
||||
t: now(),
|
||||
name: 'refreshMessages',
|
||||
args: { context: args[0] ?? null },
|
||||
stack: shortStack(),
|
||||
});
|
||||
return origRefresh.apply(this, args);
|
||||
};
|
||||
chat.replaceMessages = function probeReplace(this: unknown, ...args: any[]) {
|
||||
const msgs = (args[0] as any[]) ?? [];
|
||||
const snapshot = msgs.slice(-2).map((m) => ({
|
||||
id: (m.id ?? '').slice(-8),
|
||||
role: m.role,
|
||||
cLen: (m.content ?? '').length,
|
||||
rLen: (m.reasoning?.content ?? '').length,
|
||||
updatedAt: m.updatedAt,
|
||||
}));
|
||||
calls.push({
|
||||
t: now(),
|
||||
name: 'replaceMessages',
|
||||
args: { count: msgs.length, params: args[1] ?? null, snapshot } as any,
|
||||
stack: shortStack(),
|
||||
});
|
||||
|
||||
// Pair the call with a mutation row so the analyzer can build a
|
||||
// single ordered timeline across replaceMessages + dispatchMessage.
|
||||
const stackTop = shortStack().split(' ← ')[0]?.slice(0, 80);
|
||||
const last = msgs.at(-1);
|
||||
const lastSum = last
|
||||
? {
|
||||
id: (last.id ?? '').slice(-8),
|
||||
role: last.role,
|
||||
cLen: (last.content ?? '').length,
|
||||
rLen: (last.reasoning?.content ?? '').length,
|
||||
updatedAt: last.updatedAt,
|
||||
}
|
||||
: undefined;
|
||||
const params: any = args[1] ?? {};
|
||||
const ctxKey = params.context
|
||||
? `main_${params.context.agentId ?? '?'}_${
|
||||
params.context.topicId ? 'tpc_' + params.context.topicId : 'new'
|
||||
}`.replace('main_tpc_', 'main_') // crude key inference
|
||||
: '(no-ctx)';
|
||||
mutations.push({
|
||||
t: now(),
|
||||
key: ctxKey,
|
||||
n: msgs.length,
|
||||
last: lastSum,
|
||||
delta: `replaceMessages(action=${params.action ?? '-'}) src=${stackTop ?? '-'}`,
|
||||
});
|
||||
|
||||
return origReplace.apply(this, args);
|
||||
};
|
||||
}
|
||||
} catch (e: any) {
|
||||
calls.push({ t: now(), name: '_WRAP_ERROR_', error: String(e?.message ?? e) });
|
||||
}
|
||||
|
||||
// ── 3.5. Mutation log — wrap the TWO ChatStore writers (replaceMessages,
|
||||
// internal_dispatchMessage) to record EVERY dbMessagesMap[key] reference
|
||||
// change with a one-line "before/after last assistant message" delta. This
|
||||
// reveals dispatchMessage-driven collapses that the replaceMessages wrap
|
||||
// alone cannot see.
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
__PROBE_ORIG_DISPATCH_MESSAGE?: any;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const chat = w.__LOBE_STORES?.chat?.();
|
||||
if (chat?.internal_dispatchMessage) {
|
||||
if (!w.__PROBE_ORIG_DISPATCH_MESSAGE)
|
||||
w.__PROBE_ORIG_DISPATCH_MESSAGE = chat.internal_dispatchMessage;
|
||||
const origDispatch = w.__PROBE_ORIG_DISPATCH_MESSAGE;
|
||||
chat.internal_dispatchMessage = origDispatch;
|
||||
|
||||
chat.internal_dispatchMessage = function probeDispatch(this: unknown, payload: any, ctx?: any) {
|
||||
// Snapshot BEFORE — read the would-be target key + last message.
|
||||
const before = (() => {
|
||||
try {
|
||||
const state = w.__LOBE_STORES?.chat?.();
|
||||
if (!state) return null;
|
||||
// Replicate state.internal_getConversationContext logic enough to
|
||||
// resolve a key — but most callers pass operationId on ctx, and
|
||||
// operationId-keyed lookup needs store internals. Easiest: snapshot
|
||||
// ALL keys' last-assistant cLen and compare BEFORE vs AFTER below.
|
||||
const map = state.dbMessagesMap ?? {};
|
||||
const out: Record<string, any> = {};
|
||||
for (const k of Object.keys(map)) {
|
||||
const last = (map[k] ?? []).at(-1);
|
||||
out[k] = last
|
||||
? {
|
||||
id: (last.id ?? '').slice(-8),
|
||||
cLen: (last.content ?? '').length,
|
||||
rLen: (last.reasoning?.content ?? '').length,
|
||||
n: map[k].length,
|
||||
}
|
||||
: { n: 0 };
|
||||
}
|
||||
return out;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})();
|
||||
|
||||
const result = origDispatch.apply(this, [payload, ctx]);
|
||||
|
||||
// Snapshot AFTER — find which key(s) actually changed.
|
||||
try {
|
||||
const state = w.__LOBE_STORES?.chat?.();
|
||||
if (state && before) {
|
||||
const map = state.dbMessagesMap ?? {};
|
||||
for (const k of Object.keys(map)) {
|
||||
const last = (map[k] ?? []).at(-1);
|
||||
const beforeSnap = before[k];
|
||||
const afterSnap = last
|
||||
? {
|
||||
id: (last.id ?? '').slice(-8),
|
||||
cLen: (last.content ?? '').length,
|
||||
rLen: (last.reasoning?.content ?? '').length,
|
||||
n: map[k].length,
|
||||
}
|
||||
: { n: 0 };
|
||||
const changed =
|
||||
!beforeSnap ||
|
||||
beforeSnap.n !== afterSnap.n ||
|
||||
beforeSnap.id !== (afterSnap as any).id ||
|
||||
beforeSnap.cLen !== (afterSnap as any).cLen ||
|
||||
beforeSnap.rLen !== (afterSnap as any).rLen;
|
||||
if (!changed) continue;
|
||||
let delta = '';
|
||||
if (beforeSnap?.id !== undefined && beforeSnap.id !== (afterSnap as any).id)
|
||||
delta += `id:${beforeSnap.id}→${(afterSnap as any).id};`;
|
||||
if (
|
||||
beforeSnap?.cLen !== undefined &&
|
||||
(afterSnap as any).cLen !== undefined &&
|
||||
(afterSnap as any).cLen < beforeSnap.cLen
|
||||
)
|
||||
delta += `cLen↓${beforeSnap.cLen}→${(afterSnap as any).cLen};`;
|
||||
if (
|
||||
beforeSnap?.rLen !== undefined &&
|
||||
(afterSnap as any).rLen !== undefined &&
|
||||
(afterSnap as any).rLen < beforeSnap.rLen
|
||||
)
|
||||
delta += `rLen↓${beforeSnap.rLen}→${(afterSnap as any).rLen};`;
|
||||
if (beforeSnap?.n !== undefined && afterSnap.n < beforeSnap.n)
|
||||
delta += `n↓${beforeSnap.n}→${afterSnap.n};`;
|
||||
mutations.push({
|
||||
t: now(),
|
||||
key: k,
|
||||
n: afterSnap.n,
|
||||
last: (afterSnap as any).id ? (afterSnap as any) : undefined,
|
||||
prevLast: beforeSnap?.id ? beforeSnap : undefined,
|
||||
delta: delta || `dispatch:${payload?.type}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
mutations.push({
|
||||
t: now(),
|
||||
key: '_DISPATCH_PROBE_ERROR_',
|
||||
n: -1,
|
||||
delta: String(e?.message ?? e),
|
||||
});
|
||||
}
|
||||
return result;
|
||||
};
|
||||
}
|
||||
} catch (e: any) {
|
||||
calls.push({ t: now(), name: '_DISPATCH_WRAP_ERROR_', error: String(e?.message ?? e) });
|
||||
}
|
||||
|
||||
// ── 4. Periodic per-key timeline snapshots ─────────────────────────
|
||||
|
||||
function captureTimeline(): void {
|
||||
try {
|
||||
const c = w.__LOBE_STORES?.chat?.();
|
||||
if (!c) return;
|
||||
const msgsMap = (c.messagesMap ?? {}) as Record<string, any[]>;
|
||||
const dbMap = (c.dbMessagesMap ?? {}) as Record<string, any[]>;
|
||||
const byKey: ProbeTimelineSample['byKey'] = {};
|
||||
for (const k of Object.keys(msgsMap)) {
|
||||
const display = msgsMap[k] ?? [];
|
||||
const db = dbMap[k] ?? [];
|
||||
if (display.length === 0 && db.length === 0) continue;
|
||||
byKey[k] = {
|
||||
n: display.length,
|
||||
dbN: db.length,
|
||||
msgs: summarizeMessages(display),
|
||||
};
|
||||
}
|
||||
const ops = Object.values((c.operations ?? {}) as Record<string, any>);
|
||||
timeline.push({
|
||||
t: now(),
|
||||
activeTopic: ((c.activeTopicId as string | null) ?? '').slice(-10) || null,
|
||||
keys: Object.keys(byKey),
|
||||
byKey,
|
||||
runOps: ops.filter((o: any) => o.status === 'running').length,
|
||||
});
|
||||
} catch (e: any) {
|
||||
timeline.push({
|
||||
t: now(),
|
||||
activeTopic: null,
|
||||
keys: [],
|
||||
byKey: {},
|
||||
runOps: 0,
|
||||
err: e?.message ?? String(e),
|
||||
});
|
||||
}
|
||||
}
|
||||
captureTimeline();
|
||||
if (w.__PROBE_TIMELINE_TIMER) clearInterval(w.__PROBE_TIMELINE_TIMER);
|
||||
w.__PROBE_TIMELINE_TIMER = setInterval(captureTimeline, 200);
|
||||
|
||||
// ── 5. Tab-switch helpers ──────────────────────────────────────────
|
||||
|
||||
function listTopBarTabs(): HTMLElement[] {
|
||||
return Array.from(
|
||||
document.querySelectorAll<HTMLElement>(
|
||||
'[data-insp-path*="TabItem.tsx"][data-contextmenu-trigger]',
|
||||
),
|
||||
).filter((t) => t.getBoundingClientRect().top < 30);
|
||||
}
|
||||
|
||||
w.__listTabs = () =>
|
||||
listTopBarTabs().map((t, i) => ({
|
||||
i,
|
||||
key: t.getAttribute('data-contextmenu-trigger'),
|
||||
active: t.getAttribute('data-active') === 'true',
|
||||
title: (t.innerText ?? '').slice(0, 60),
|
||||
}));
|
||||
|
||||
w.__clickTabByKey = (key: string) => {
|
||||
const tab = listTopBarTabs().find((t) => t.getAttribute('data-contextmenu-trigger') === key);
|
||||
if (!tab) return 'not found: ' + key;
|
||||
if (tab.getAttribute('data-active') === 'true') return 'already active: ' + key;
|
||||
tab.click();
|
||||
return 'clicked key=' + key;
|
||||
};
|
||||
|
||||
w.__PROBE_EVENT = (name: string) => {
|
||||
calls.push({ t: now(), name: 'MARK:' + name });
|
||||
};
|
||||
|
||||
// `run.ts` wraps the bundle in an IIFE and appends a `return <confirmation>`
|
||||
// after the bundle body — agent-browser then prints the confirmation back to
|
||||
// the operator. Nothing to do here at the end of the module body.
|
||||
@@ -1,204 +0,0 @@
|
||||
// LobeHub chat streaming time-series probe.
|
||||
//
|
||||
// Inject into the renderer (via agent-browser eval) to record store + DOM
|
||||
// snapshots every 200ms during a streaming session. Designed to surface
|
||||
// "UI rolled back to an earlier state" symptoms — especially around
|
||||
// gateway-mode tab switches that happen while the assistant is still writing.
|
||||
//
|
||||
// Usage:
|
||||
// agent-browser --cdp 9222 eval --stdin < probe.js
|
||||
// # ...do test interactions, call window.__PROBE_EVENT('LABEL') to mark moments...
|
||||
// agent-browser --cdp 9222 eval --stdin < probe-dump.js > /tmp/probe.json
|
||||
// node analyze.mjs /tmp/probe.json
|
||||
//
|
||||
// What it captures per sample:
|
||||
// - activeTopicId
|
||||
// - msgN: top-level messages in chat.messagesMap for this topic
|
||||
// - childN: total assistantGroup.children blocks across all msgs (THIS is
|
||||
// where streaming content actually lives — top-level assistantGroup stays empty)
|
||||
// - cT / rT / toolT: totals across messages AND their children
|
||||
// (content, reasoning, tool-call count)
|
||||
// - perMsg: per-message breakdown so regressions can be located precisely
|
||||
// - runOps: number of running operations (execServerAgentRuntime etc.)
|
||||
// - domLen: total innerText length of the rendered chat list area
|
||||
// - ind: visible UI indicators (Search pages, Crawled pages, Deeply Thought, Sending)
|
||||
//
|
||||
// Event markers: window.__PROBE_EVENT('NAME') records {t, name} into
|
||||
// __PROBE_EVENTS, used by the analyzer to align state changes with
|
||||
// user-driven actions (SENT, AWAY_1, BACK_1, ...).
|
||||
|
||||
(function () {
|
||||
if (window.__PROBE_TIMER) clearInterval(window.__PROBE_TIMER);
|
||||
window.__PROBE_SAMPLES = [];
|
||||
window.__PROBE_EVENTS = [];
|
||||
const t0 = Date.now();
|
||||
|
||||
function snapshot() {
|
||||
try {
|
||||
const chat = window.__LOBE_STORES.chat();
|
||||
const topicId = chat.activeTopicId;
|
||||
const idTail = topicId ? topicId.replace('tpc_', '') : null;
|
||||
const keys = Object.keys(chat.messagesMap || {});
|
||||
|
||||
// Collect messages for the active topic. Before a topic is committed,
|
||||
// optimistic messages live under the `<agentScope>_new` key — fall
|
||||
// back to those when no topic is active yet.
|
||||
let msgs = [];
|
||||
if (idTail) {
|
||||
keys.forEach((k) => {
|
||||
if (k.includes(idTail)) msgs = msgs.concat(chat.messagesMap[k] || []);
|
||||
});
|
||||
} else {
|
||||
keys
|
||||
.filter((k) => k.endsWith('_new'))
|
||||
.forEach((k) => {
|
||||
msgs = msgs.concat(chat.messagesMap[k] || []);
|
||||
});
|
||||
}
|
||||
|
||||
// Walk top-level + assistantGroup.children. children carry the actual
|
||||
// streamed content / reasoning / tool calls; the parent assistantGroup
|
||||
// remains a placeholder (cLen=0, rLen=0) for its whole lifetime.
|
||||
let totalContent = 0;
|
||||
let totalReason = 0;
|
||||
let totalTools = 0;
|
||||
let childCount = 0;
|
||||
const perMsg = msgs.map((m) => {
|
||||
const cLen = (m.content || '').length;
|
||||
const rLen = ((m.reasoning && m.reasoning.content) || '').length;
|
||||
const tools = (m.tools || []).length;
|
||||
totalContent += cLen;
|
||||
totalReason += rLen;
|
||||
totalTools += tools;
|
||||
|
||||
const children = m.children || [];
|
||||
let chC = 0;
|
||||
let chR = 0;
|
||||
let chT = 0;
|
||||
children.forEach((c) => {
|
||||
chC += (c.content || '').length;
|
||||
chR += ((c.reasoning && c.reasoning.content) || '').length;
|
||||
chT += (c.tools || []).length;
|
||||
});
|
||||
totalContent += chC;
|
||||
totalReason += chR;
|
||||
totalTools += chT;
|
||||
childCount += children.length;
|
||||
|
||||
return {
|
||||
id: (m.id || '').slice(-8),
|
||||
role: m.role,
|
||||
cLen,
|
||||
rLen,
|
||||
tools,
|
||||
chCount: children.length,
|
||||
chC,
|
||||
chR,
|
||||
chT,
|
||||
};
|
||||
});
|
||||
|
||||
const ops = Object.values(chat.operations || {});
|
||||
const runningOps = ops.filter((o) => o.status === 'running');
|
||||
|
||||
// DOM probe: total rendered text in the chat scroll area (proxy for
|
||||
// "how much is actually visible to the user").
|
||||
const convScroll =
|
||||
document.querySelector(
|
||||
'[data-chat-list], [class*="ChatList"], [class*="ConversationList"]',
|
||||
) ||
|
||||
document.querySelector('main [class*="scroll"]') ||
|
||||
document.querySelector('main');
|
||||
const domTxt = convScroll ? convScroll.innerText || '' : '';
|
||||
|
||||
const bodyTxt = document.body.innerText || '';
|
||||
const searchMatches = (bodyTxt.match(/Search pages?:|Searched the web/g) || []).length;
|
||||
const crawlMatches = (bodyTxt.match(/Crawl(ed|ing) pages?/g) || []).length;
|
||||
|
||||
window.__PROBE_SAMPLES.push({
|
||||
t: Date.now() - t0,
|
||||
topicId,
|
||||
msgN: msgs.length,
|
||||
childN: childCount,
|
||||
cT: totalContent,
|
||||
rT: totalReason,
|
||||
toolT: totalTools,
|
||||
perMsg,
|
||||
runOps: runningOps.length,
|
||||
runOpTypes: runningOps.map((o) => o.type),
|
||||
domLen: domTxt.length,
|
||||
ind: {
|
||||
search: searchMatches,
|
||||
crawl: crawlMatches,
|
||||
sending: bodyTxt.includes('Sending message'),
|
||||
deeplyThinking: bodyTxt.includes('Deeply Thinking'),
|
||||
deeplyThought: bodyTxt.includes('Deeply Thought'),
|
||||
},
|
||||
});
|
||||
} catch (e) {
|
||||
window.__PROBE_SAMPLES.push({ t: Date.now() - t0, err: e.message });
|
||||
}
|
||||
}
|
||||
|
||||
snapshot();
|
||||
window.__PROBE_TIMER = setInterval(snapshot, 200);
|
||||
window.__PROBE_EVENT = function (name) {
|
||||
window.__PROBE_EVENTS.push({ t: Date.now() - t0, name });
|
||||
};
|
||||
|
||||
// Tab-switch helpers installed alongside the probe.
|
||||
//
|
||||
// The Electron tab bar mounts each tab as a div with data-insp-path
|
||||
// ending in `TabItem.tsx:...`. The active tab is marked with
|
||||
// data-active="true". DO NOT search by innerText — the active tab's text
|
||||
// includes a ` · <agent name>` suffix that produces false matches when
|
||||
// your search string happens to overlap with the agent name.
|
||||
function listTabs() {
|
||||
return Array.from(
|
||||
document.querySelectorAll('[data-insp-path*="TabItem.tsx"][data-contextmenu-trigger]'),
|
||||
).filter((t) => t.getBoundingClientRect().top < 30);
|
||||
}
|
||||
function tabKey(el) {
|
||||
// Stable for the tab's lifetime; survives focus changes.
|
||||
return el.getAttribute('data-contextmenu-trigger');
|
||||
}
|
||||
function findActiveTab() {
|
||||
return listTabs().find((t) => t.getAttribute('data-active') === 'true') || null;
|
||||
}
|
||||
|
||||
// Click by stable key captured earlier (preferred for round-trips).
|
||||
window.__clickTabByKey = function (key) {
|
||||
const tab = listTabs().find((t) => tabKey(t) === key);
|
||||
if (!tab) return 'not found: key=' + key;
|
||||
if (tab.getAttribute('data-active') === 'true') return 'already active: ' + key;
|
||||
tab.click();
|
||||
return 'clicked key=' + key;
|
||||
};
|
||||
|
||||
// Click by index in the tab strip (0-based, left-to-right).
|
||||
window.__clickTabByIndex = function (i) {
|
||||
const tabs = listTabs();
|
||||
if (i < 0 || i >= tabs.length) return 'index out of range: ' + i + '/' + tabs.length;
|
||||
const t = tabs[i];
|
||||
if (t.getAttribute('data-active') === 'true') return 'already active: i=' + i;
|
||||
t.click();
|
||||
return 'clicked i=' + i + ' key=' + tabKey(t);
|
||||
};
|
||||
|
||||
// Snapshot all tabs in order: [{key, active, title (first 60 chars of innerText)}]
|
||||
window.__listTabs = function () {
|
||||
return listTabs().map((t, i) => ({
|
||||
i,
|
||||
key: tabKey(t),
|
||||
active: t.getAttribute('data-active') === 'true',
|
||||
title: (t.innerText || '').slice(0, 60),
|
||||
}));
|
||||
};
|
||||
|
||||
window.__activeTabKey = function () {
|
||||
const a = findActiveTab();
|
||||
return a ? tabKey(a) : null;
|
||||
};
|
||||
|
||||
return 'probe installed';
|
||||
})();
|
||||
@@ -1,211 +0,0 @@
|
||||
// CLI for the agent-gateway probe.
|
||||
//
|
||||
// Bundles the TS probes with esbuild, pipes them into `agent-browser eval`,
|
||||
// and persists dumps under `.agent-gateway/` (gitignored) for later use as
|
||||
// streaming-replay test fixtures.
|
||||
//
|
||||
// Commands:
|
||||
// bun run .agents/skills/agent-testing/scripts/agent-gateway/run.ts install
|
||||
// Bundle probe-events.ts and inject into the CDP-attached browser.
|
||||
// Re-installing clears all buffers and re-patches WebSocket / fetch.
|
||||
//
|
||||
// bun run .agents/skills/agent-testing/scripts/agent-gateway/run.ts dump [name]
|
||||
// Stop the timeline timer, fetch the capture as JSON, write it to
|
||||
// `.agent-gateway/<name>-<YYYYMMDD-HHmmss>.json`. `name` defaults to
|
||||
// `dump`. Prints the absolute path written.
|
||||
//
|
||||
// bun run .agents/skills/agent-testing/scripts/agent-gateway/run.ts analyze [path]
|
||||
// Run analyze-events.ts on the dump. `path` defaults to the most
|
||||
// recently modified file in `.agent-gateway/`.
|
||||
//
|
||||
// Optional flags:
|
||||
// --cdp <port> CDP port (default 9222)
|
||||
// --browser <bin> agent-browser binary (default 'agent-browser')
|
||||
|
||||
import { spawn } from 'node:child_process';
|
||||
import { mkdirSync, readdirSync, statSync, writeFileSync } from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const SCRIPT_DIR = path.dirname(fileURLToPath(import.meta.url));
|
||||
// .agents/skills/agent-testing/scripts/agent-gateway/ → 5 levels up
|
||||
const PROJECT_ROOT = path.resolve(SCRIPT_DIR, '../../../../..');
|
||||
const DUMP_DIR = path.join(PROJECT_ROOT, '.agent-gateway');
|
||||
|
||||
interface Flags {
|
||||
browser: string;
|
||||
cdp: string;
|
||||
positional: string[];
|
||||
}
|
||||
|
||||
function parseFlags(argv: string[]): Flags {
|
||||
const out: Flags = { cdp: '9222', browser: 'agent-browser', positional: [] };
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const a = argv[i];
|
||||
if (a === '--cdp') out.cdp = argv[++i] ?? out.cdp;
|
||||
else if (a === '--browser') out.browser = argv[++i] ?? out.browser;
|
||||
else out.positional.push(a);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
async function bundle(entry: string): Promise<string> {
|
||||
// Bun.build is built into the Bun runtime — no external dep needed.
|
||||
const r = await Bun.build({
|
||||
entrypoints: [path.join(SCRIPT_DIR, entry)],
|
||||
target: 'browser',
|
||||
format: 'esm',
|
||||
minify: false,
|
||||
});
|
||||
if (!r.success) {
|
||||
const msgs = r.logs.map((l) => `${l.level}: ${l.message}`).join('\n');
|
||||
throw new Error(`bundle failed for ${entry}:\n${msgs}`);
|
||||
}
|
||||
return await r.outputs[0].text();
|
||||
}
|
||||
|
||||
function wrapIife(body: string, returnExpr: string): string {
|
||||
// Wrap as an IIFE that swallows the bundled top-level (top-level `const`
|
||||
// declarations get scoped to the IIFE, so re-injection doesn't conflict)
|
||||
// and returns the configured expression — which `agent-browser eval`
|
||||
// captures and prints to stdout.
|
||||
return `(() => {\n${body}\n;return ${returnExpr};\n})()`;
|
||||
}
|
||||
|
||||
function runAgentBrowserEval(flags: Flags, script: string): Promise<string> {
|
||||
return new Promise((resolveP, rejectP) => {
|
||||
const child = spawn(flags.browser, ['--cdp', flags.cdp, 'eval', '--stdin'], {
|
||||
stdio: ['pipe', 'pipe', 'inherit'],
|
||||
});
|
||||
let stdout = '';
|
||||
child.stdout.on('data', (chunk: Buffer) => {
|
||||
stdout += chunk.toString('utf8');
|
||||
});
|
||||
child.on('error', rejectP);
|
||||
child.on('close', (code) => {
|
||||
if (code === 0) resolveP(stdout);
|
||||
else rejectP(new Error(`agent-browser exited ${code}`));
|
||||
});
|
||||
child.stdin.write(script);
|
||||
child.stdin.end();
|
||||
});
|
||||
}
|
||||
|
||||
// agent-browser prints eval results as JSON (string values are quoted).
|
||||
function unquoteAgentBrowserResult(raw: string): string {
|
||||
const trimmed = raw.trim();
|
||||
if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
|
||||
try {
|
||||
return JSON.parse(trimmed) as string;
|
||||
} catch {
|
||||
/* fall through */
|
||||
}
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
function isoStamp(): string {
|
||||
const d = new Date();
|
||||
const yyyy = d.getFullYear();
|
||||
const mm = String(d.getMonth() + 1).padStart(2, '0');
|
||||
const dd = String(d.getDate()).padStart(2, '0');
|
||||
const hh = String(d.getHours()).padStart(2, '0');
|
||||
const mi = String(d.getMinutes()).padStart(2, '0');
|
||||
const ss = String(d.getSeconds()).padStart(2, '0');
|
||||
return `${yyyy}${mm}${dd}-${hh}${mi}${ss}`;
|
||||
}
|
||||
|
||||
function ensureDumpDir(): void {
|
||||
mkdirSync(DUMP_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
function latestDump(): string | null {
|
||||
ensureDumpDir();
|
||||
const entries = readdirSync(DUMP_DIR)
|
||||
.filter((f) => f.endsWith('.json'))
|
||||
.map((f) => ({ f, mtime: statSync(path.join(DUMP_DIR, f)).mtimeMs }))
|
||||
.sort((a, b) => b.mtime - a.mtime);
|
||||
return entries[0] ? path.join(DUMP_DIR, entries[0].f) : null;
|
||||
}
|
||||
|
||||
// ── Commands ────────────────────────────────────────────────────────
|
||||
|
||||
async function cmdInstall(flags: Flags): Promise<void> {
|
||||
const body = await bundle('probe-events.ts');
|
||||
const installMsg = JSON.stringify(
|
||||
'events probe installed: WebSocket+fetch interception. ' +
|
||||
'WS captures operationId= sockets (gateway), fetch captures /api/agent/stream (direct).',
|
||||
);
|
||||
const script = wrapIife(body, installMsg);
|
||||
const out = await runAgentBrowserEval(flags, script);
|
||||
console.log(unquoteAgentBrowserResult(out));
|
||||
}
|
||||
|
||||
async function cmdDump(flags: Flags): Promise<void> {
|
||||
const name = flags.positional[1] ?? 'dump';
|
||||
const body = await bundle('probe-dump.ts');
|
||||
const script = wrapIife(body, 'window.__PROBE_LAST_DUMP_JSON');
|
||||
const raw = await runAgentBrowserEval(flags, script);
|
||||
const json = unquoteAgentBrowserResult(raw);
|
||||
ensureDumpDir();
|
||||
const filename = `${name}-${isoStamp()}.json`;
|
||||
const dumpPath = path.join(DUMP_DIR, filename);
|
||||
writeFileSync(dumpPath, json, 'utf8');
|
||||
// Validate by parsing the meta header so we error early on bad capture
|
||||
try {
|
||||
const parsed = JSON.parse(json) as {
|
||||
meta?: { eventCount?: number; callCount?: number; sampleCount?: number };
|
||||
};
|
||||
const meta = parsed.meta ?? {};
|
||||
console.log(
|
||||
`wrote ${dumpPath} (${json.length} bytes events=${meta.eventCount ?? '?'} ` +
|
||||
`calls=${meta.callCount ?? '?'} samples=${meta.sampleCount ?? '?'})`,
|
||||
);
|
||||
} catch {
|
||||
console.log(`wrote ${dumpPath} (${json.length} bytes — JSON.parse failed; see file)`);
|
||||
}
|
||||
}
|
||||
|
||||
async function cmdAnalyze(flags: Flags): Promise<void> {
|
||||
const target = flags.positional[1] ?? latestDump();
|
||||
if (!target) {
|
||||
console.error('no dump file found. run `dump` first or pass a path.');
|
||||
process.exit(1);
|
||||
}
|
||||
const child = spawn('bun', ['run', path.join(SCRIPT_DIR, 'analyze-events.ts'), target], {
|
||||
stdio: 'inherit',
|
||||
});
|
||||
await new Promise<void>((resolveP, rejectP) => {
|
||||
child.on('error', rejectP);
|
||||
child.on('close', (code) => (code === 0 ? resolveP() : rejectP(new Error(`exit ${code}`))));
|
||||
});
|
||||
}
|
||||
|
||||
// ── Entry point ─────────────────────────────────────────────────────
|
||||
|
||||
const flags = parseFlags(process.argv.slice(2));
|
||||
const cmd = flags.positional[0];
|
||||
|
||||
const usage = `usage:
|
||||
bun run run.ts install [--cdp 9222]
|
||||
bun run run.ts dump [name] [--cdp 9222]
|
||||
bun run run.ts analyze [path]
|
||||
`;
|
||||
|
||||
if (!cmd) {
|
||||
console.error(usage);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
if (cmd === 'install') await cmdInstall(flags);
|
||||
else if (cmd === 'dump') await cmdDump(flags);
|
||||
else if (cmd === 'analyze') await cmdAnalyze(flags);
|
||||
else {
|
||||
console.error(`unknown command: ${cmd}\n\n${usage}`);
|
||||
process.exit(1);
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.error(e?.stack ?? e);
|
||||
process.exit(1);
|
||||
}
|
||||
@@ -1,72 +0,0 @@
|
||||
// Run N round-trip tab switches with event markers timed against the probe.
|
||||
//
|
||||
// agent-browser --cdp 9222 eval --stdin < tab-switch.js
|
||||
//
|
||||
// Captures the currently-active tab as the BACK target and the rightmost
|
||||
// inactive tab as the AWAY target. Both are addressed by their stable
|
||||
// data-contextmenu-trigger key (NOT by visible title — the active tab's
|
||||
// innerText embeds a ` · <agent name>` suffix that breaks text matching).
|
||||
//
|
||||
// Fires the loop in the background and returns immediately so the
|
||||
// agent-browser eval doesn't have to await the full ROUND_TRIPS × DWELL_MS
|
||||
// duration. Wait on the `SWITCH_LOOP_DONE` event before dumping.
|
||||
//
|
||||
// Refuses to launch if a previous loop is still in flight.
|
||||
//
|
||||
// Requires probe.js to have been installed first (provides
|
||||
// window.__PROBE_EVENT / __listTabs / __clickTabByKey / __activeTabKey).
|
||||
|
||||
(function () {
|
||||
const ROUND_TRIPS = 4;
|
||||
const DWELL_MS = 10_000;
|
||||
|
||||
if (!window.__PROBE_EVENT || !window.__listTabs || !window.__clickTabByKey) {
|
||||
return 'probe not installed — eval probe.js first';
|
||||
}
|
||||
if (window.__SWITCH_LOOP_RUNNING) {
|
||||
return 'switch loop already running — wait for SWITCH_LOOP_DONE first';
|
||||
}
|
||||
|
||||
const tabs = window.__listTabs();
|
||||
const activeTab = tabs.find((t) => t.active);
|
||||
if (!activeTab) return 'no active tab — abort';
|
||||
|
||||
// Pick the first inactive tab as AWAY target. With multiple inactive tabs
|
||||
// you'll usually want the one that's stable across the test — feel free
|
||||
// to swap to tabs[tabs.length-1] if you want the rightmost.
|
||||
const inactives = tabs.filter((t) => !t.active);
|
||||
if (inactives.length === 0) return 'no inactive tab to switch to — abort';
|
||||
const awayTab = inactives.at(-1); // rightmost inactive
|
||||
|
||||
const BACK_KEY = activeTab.key;
|
||||
const AWAY_KEY = awayTab.key;
|
||||
|
||||
window.__SWITCH_LOOP_RUNNING = true;
|
||||
window.__PROBE_EVENT('SWITCH_LOOP_CONFIG:back=' + BACK_KEY + ',away=' + AWAY_KEY);
|
||||
|
||||
(async function () {
|
||||
function sleep(ms) {
|
||||
return new Promise((r) => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
try {
|
||||
window.__PROBE_EVENT('SWITCH_LOOP_START');
|
||||
for (let i = 1; i <= ROUND_TRIPS; i++) {
|
||||
window.__PROBE_EVENT('AWAY_' + i);
|
||||
const awayResult = window.__clickTabByKey(AWAY_KEY);
|
||||
window.__PROBE_EVENT('AWAY_' + i + '_RES:' + awayResult.slice(0, 50));
|
||||
await sleep(DWELL_MS);
|
||||
|
||||
window.__PROBE_EVENT('BACK_' + i);
|
||||
const backResult = window.__clickTabByKey(BACK_KEY);
|
||||
window.__PROBE_EVENT('BACK_' + i + '_RES:' + backResult.slice(0, 50));
|
||||
await sleep(DWELL_MS);
|
||||
}
|
||||
window.__PROBE_EVENT('SWITCH_LOOP_DONE');
|
||||
} finally {
|
||||
window.__SWITCH_LOOP_RUNNING = false;
|
||||
}
|
||||
})();
|
||||
|
||||
return 'switch loop kicked off (BACK=' + BACK_KEY + ', AWAY=' + AWAY_KEY + ')';
|
||||
})();
|
||||
@@ -1,113 +0,0 @@
|
||||
// Shared types between the in-browser probe and the Node-side analyzer.
|
||||
// Kept tiny on purpose — anything the analyzer can re-derive is left off.
|
||||
|
||||
export interface ProbeStreamEvent {
|
||||
/** Summarized payload — long strings truncated, arrays printed as Array(N) */
|
||||
data?: Record<string, unknown>;
|
||||
/** Keys present on the event's `data` payload — useful at a glance */
|
||||
dataKeys?: string[];
|
||||
/** ServerMessage.id — gateway WS frames carry an event-id we may resume from */
|
||||
eventId?: string | null;
|
||||
message?: string;
|
||||
/** Last 10 chars of the operationId (full id is excessively long) */
|
||||
opIdTail: string;
|
||||
raw?: string;
|
||||
/** Raw frame byte length, when applicable */
|
||||
rawLen?: number;
|
||||
/** For non-agent_event server frames (auth_success, heartbeat_ack, …) */
|
||||
serverType?: string;
|
||||
sseEvent?: string;
|
||||
status?: number;
|
||||
stepIndex?: number;
|
||||
/** Milliseconds since the probe's t0 (install time). */
|
||||
t: number;
|
||||
/** 'ws' for gateway WebSocket frames, 'sse' for direct /api/agent/stream */
|
||||
transport: 'ws' | 'sse';
|
||||
/** Either the AgentStreamEvent.type, or a probe sentinel like `_WS_OPEN_` */
|
||||
type: string;
|
||||
url?: string;
|
||||
}
|
||||
|
||||
export interface ProbeActionCall {
|
||||
args?: {
|
||||
count?: number;
|
||||
context?: unknown;
|
||||
params?: unknown;
|
||||
};
|
||||
error?: string;
|
||||
/** `replaceMessages` / `refreshMessages` / `MARK:<label>` / `_WRAP_ERROR_` */
|
||||
name: string;
|
||||
stack?: string;
|
||||
t: number;
|
||||
}
|
||||
|
||||
export interface ProbeMessageSummary {
|
||||
/** children.length */
|
||||
chN: number;
|
||||
/** content.length */
|
||||
cLen: number;
|
||||
/** Last 8 chars of the message id */
|
||||
id: string;
|
||||
/** reasoning.content.length */
|
||||
rLen: number;
|
||||
role: string;
|
||||
/** tools.length */
|
||||
tools: number;
|
||||
}
|
||||
|
||||
export interface ProbeTimelineSample {
|
||||
/** Last 10 chars of activeTopicId, or null */
|
||||
activeTopic: string | null;
|
||||
/** Per-key breakdown: display count, db count, message summaries */
|
||||
byKey: Record<
|
||||
string,
|
||||
{
|
||||
n: number;
|
||||
dbN: number;
|
||||
msgs: ProbeMessageSummary[];
|
||||
}
|
||||
>;
|
||||
err?: string;
|
||||
/** All messagesMap keys that have content at this moment */
|
||||
keys: string[];
|
||||
/** Number of operations in 'running' status */
|
||||
runOps: number;
|
||||
t: number;
|
||||
}
|
||||
|
||||
export interface ProbeDumpMeta {
|
||||
callCount: number;
|
||||
/** Date.now() at dump call */
|
||||
collectedAt: number;
|
||||
eventCount: number;
|
||||
sampleCount: number;
|
||||
/** Date.now() at probe install */
|
||||
t0: number;
|
||||
}
|
||||
|
||||
export interface ProbeDump {
|
||||
actionCalls: ProbeActionCall[];
|
||||
meta: ProbeDumpMeta;
|
||||
streamEvents: ProbeStreamEvent[];
|
||||
timeline: ProbeTimelineSample[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Globals the probe attaches to `window`. Keeps `as any` casts at the boundary
|
||||
* instead of sprinkling them through the probe body.
|
||||
*/
|
||||
declare global {
|
||||
interface Window {
|
||||
__clickTabByKey?: (key: string) => string;
|
||||
__listTabs?: () => Array<{ i: number; key: string | null; active: boolean; title: string }>;
|
||||
__LOBE_STORES?: Record<string, () => any>;
|
||||
__PROBE_ACTION_CALLS?: ProbeActionCall[];
|
||||
__PROBE_EVENT?: (label: string) => void;
|
||||
__PROBE_MSG_TIMELINE?: ProbeTimelineSample[];
|
||||
__PROBE_ORIG_FETCH?: typeof fetch;
|
||||
__PROBE_ORIG_WEBSOCKET?: typeof WebSocket;
|
||||
__PROBE_STREAM_EVENTS?: ProbeStreamEvent[];
|
||||
__PROBE_T0?: number;
|
||||
__PROBE_TIMELINE_TIMER?: ReturnType<typeof setInterval> | null;
|
||||
}
|
||||
}
|
||||
@@ -1,95 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# app-probe.sh — standardized probes for a running LobeHub app (Electron via
|
||||
# CDP, or a web agent-browser session). Use these instead of hand-rolling
|
||||
# `window.__LOBE_STORES` eval snippets — especially the auth check.
|
||||
#
|
||||
# Usage:
|
||||
# app-probe.sh auth # { isSignedIn, userId } from the user store
|
||||
# app-probe.sh route # current SPA route
|
||||
# app-probe.sh ops # running chat operations (type / status / startTime)
|
||||
# app-probe.sh goto <path> # navigate the SPA to a route (full reload), e.g. goto /agent/agt_xxx
|
||||
# app-probe.sh errors-install # install a console.error interceptor
|
||||
# app-probe.sh errors # dump errors captured since errors-install
|
||||
#
|
||||
# Target selection (default: Electron over CDP 9222):
|
||||
# AB_TARGET="--cdp 9222" # Electron (default; CDP_PORT also honored)
|
||||
# AB_TARGET="--session lobehub-dev" # web agent-browser session
|
||||
#
|
||||
# Common routes (desktop SPA): / /agent/<agentId> /agent/<agentId>/<topicId>
|
||||
# /task /task/<taskId> /page /settings /community
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
AB_TARGET="${AB_TARGET:---cdp ${CDP_PORT:-9222}}"
|
||||
|
||||
run_eval() {
|
||||
# shellcheck disable=SC2086
|
||||
agent-browser $AB_TARGET eval --stdin
|
||||
}
|
||||
|
||||
case "${1:-}" in
|
||||
auth)
|
||||
run_eval << 'EVALEOF'
|
||||
(function () {
|
||||
var stores = window.__LOBE_STORES;
|
||||
if (!stores || !stores.user) return JSON.stringify({ ok: false, reason: 'no user store — app not loaded yet?' });
|
||||
var u = stores.user();
|
||||
return JSON.stringify({ ok: !!u.isSignedIn, isSignedIn: !!u.isSignedIn, userId: (u.user && u.user.id) || null });
|
||||
})()
|
||||
EVALEOF
|
||||
;;
|
||||
route)
|
||||
run_eval << 'EVALEOF'
|
||||
location.pathname + location.search + location.hash
|
||||
EVALEOF
|
||||
;;
|
||||
ops)
|
||||
run_eval << 'EVALEOF'
|
||||
(function () {
|
||||
var stores = window.__LOBE_STORES;
|
||||
if (!stores || !stores.chat) return JSON.stringify({ ok: false, reason: 'no chat store — open a conversation first' });
|
||||
var ops = Object.values(stores.chat().operations || {});
|
||||
var running = ops.filter(function (o) { return o.status === 'running'; });
|
||||
return JSON.stringify({
|
||||
ok: true,
|
||||
running: running.map(function (o) { return { startTime: o.metadata && o.metadata.startTime, type: o.type }; }),
|
||||
runningCount: running.length,
|
||||
total: ops.length,
|
||||
});
|
||||
})()
|
||||
EVALEOF
|
||||
;;
|
||||
goto)
|
||||
TARGET_PATH="${2:?Usage: app-probe.sh goto <path>}"
|
||||
# shellcheck disable=SC2086
|
||||
agent-browser $AB_TARGET eval "location.href = '$TARGET_PATH'" > /dev/null
|
||||
sleep 2
|
||||
bash "${BASH_SOURCE[0]}" route
|
||||
;;
|
||||
errors-install)
|
||||
run_eval << 'EVALEOF'
|
||||
(function () {
|
||||
window.__CAPTURED_ERRORS = [];
|
||||
var orig = console.error;
|
||||
console.error = function () {
|
||||
var msg = Array.from(arguments).map(function (a) {
|
||||
if (a instanceof Error) return a.message;
|
||||
return typeof a === 'object' ? JSON.stringify(a) : String(a);
|
||||
}).join(' ');
|
||||
window.__CAPTURED_ERRORS.push(msg);
|
||||
orig.apply(console, arguments);
|
||||
};
|
||||
return 'installed';
|
||||
})()
|
||||
EVALEOF
|
||||
;;
|
||||
errors)
|
||||
run_eval << 'EVALEOF'
|
||||
JSON.stringify(window.__CAPTURED_ERRORS || 'interceptor not installed — run errors-install first')
|
||||
EVALEOF
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 {auth|route|ops|goto <path>|errors-install|errors}" >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,54 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# capture-app-window.sh — Capture a screenshot of a specific app window
|
||||
#
|
||||
# Uses CGWindowList via Swift to find the window by process name, then
|
||||
# screencapture -l <windowID> to capture only that window.
|
||||
# Falls back to full-screen capture if the window is not found.
|
||||
#
|
||||
# Usage:
|
||||
# ./capture-app-window.sh <process_name> <output_path>
|
||||
#
|
||||
# Arguments:
|
||||
# process_name — The process/owner name as shown in Activity Monitor
|
||||
# (e.g., "Discord", "Slack", "Telegram", "WeChat", "QQ", "Lark")
|
||||
# output_path — Path to save the screenshot (e.g., /tmp/screenshot.png)
|
||||
#
|
||||
# Examples:
|
||||
# ./capture-app-window.sh "Discord" /tmp/discord.png
|
||||
# ./capture-app-window.sh "Slack" /tmp/slack.png
|
||||
# ./capture-app-window.sh "微信" /tmp/wechat.png
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
PROCESS="${1:?Usage: capture-app-window.sh <process_name> <output_path>}"
|
||||
OUTPUT="${2:?Usage: capture-app-window.sh <process_name> <output_path>}"
|
||||
|
||||
# Find the CGWindowID for the target process using Swift + CGWindowList
|
||||
# Pass process name via environment variable (swift -e doesn't support -- args)
|
||||
WINDOW_ID=$(TARGET_PROCESS="$PROCESS" swift -e '
|
||||
import Cocoa
|
||||
import Foundation
|
||||
let target = ProcessInfo.processInfo.environment["TARGET_PROCESS"] ?? ""
|
||||
let windowList = CGWindowListCopyWindowInfo([.optionAll], kCGNullWindowID) as! [[String: Any]]
|
||||
for w in windowList {
|
||||
let owner = w["kCGWindowOwnerName"] as? String ?? ""
|
||||
let layer = w["kCGWindowLayer"] as? Int ?? -1
|
||||
let bounds = w["kCGWindowBounds"] as? [String: Any] ?? [:]
|
||||
let ww = bounds["Width"] as? Double ?? 0
|
||||
let wh = bounds["Height"] as? Double ?? 0
|
||||
let wid = w["kCGWindowNumber"] as? Int ?? 0
|
||||
// Match process name, normal window layer (0), and reasonable size
|
||||
if owner == target && layer == 0 && ww > 200 && wh > 200 {
|
||||
print(wid)
|
||||
break
|
||||
}
|
||||
}
|
||||
' 2>/dev/null || true)
|
||||
|
||||
if [ -n "$WINDOW_ID" ]; then
|
||||
screencapture -l "$WINDOW_ID" -x "$OUTPUT"
|
||||
else
|
||||
echo "[capture] Warning: Could not find window for '$PROCESS', falling back to full screen"
|
||||
screencapture -x "$OUTPUT"
|
||||
fi
|
||||
@@ -1,327 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# electron-dev.sh — Manage Electron dev environment for testing
|
||||
#
|
||||
# Usage:
|
||||
# ./electron-dev.sh start # Kill existing, start fresh, wait until ready
|
||||
# ./electron-dev.sh stop # Kill all Electron-related processes
|
||||
# ./electron-dev.sh status # Check if Electron is running and CDP is reachable
|
||||
# ./electron-dev.sh restart # Stop then start
|
||||
#
|
||||
# Environment variables:
|
||||
# CDP_PORT — Chrome DevTools Protocol port (default: 9222)
|
||||
# ELECTRON_LOG — Log file path (default: /tmp/electron-dev.log)
|
||||
# ELECTRON_WAIT_S — Max seconds to wait for CDP to become reachable (default: 90)
|
||||
# RENDERER_WAIT_S — Max seconds to wait for SPA after CDP is up (default: 60)
|
||||
# FORCE_KILL_USER — When set to 1, silently kill the user's `bun run dev`
|
||||
# Electron without confirmation (default: always confirm-by-action)
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
CDP_PORT="${CDP_PORT:-9222}"
|
||||
ELECTRON_LOG="${ELECTRON_LOG:-/tmp/electron-dev.log}"
|
||||
ELECTRON_WAIT_S="${ELECTRON_WAIT_S:-90}"
|
||||
RENDERER_WAIT_S="${RENDERER_WAIT_S:-60}"
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
|
||||
PIDFILE="/tmp/electron-dev-cdp-${CDP_PORT}.pid"
|
||||
|
||||
# Project-scoped electron path prefix used for pgrep matching. Any Electron
|
||||
# binary from this project (main + helpers, with or without --remote-debugging-port)
|
||||
# starts with this string in its argv[0], so a single substring match catches all.
|
||||
PROJECT_ELECTRON_PATH="${PROJECT_ROOT}/apps/desktop/node_modules/.pnpm/electron@"
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
# Print pid + every descendant pid (DFS via pgrep -P).
|
||||
expand_descendants() {
|
||||
local pid="$1"
|
||||
echo "$pid"
|
||||
local children
|
||||
children=$(pgrep -P "$pid" 2>/dev/null || true)
|
||||
for c in $children; do
|
||||
expand_descendants "$c"
|
||||
done
|
||||
}
|
||||
|
||||
# Find seed PIDs related to this project's Electron dev session.
|
||||
# Matches REGARDLESS of whether --remote-debugging-port was passed, so it also
|
||||
# catches a plain `bun run dev` session the user started outside this script.
|
||||
find_project_pids() {
|
||||
local pids=""
|
||||
|
||||
# 1. Any process whose command line mentions this project's electron path
|
||||
# (covers the main Electron binary AND every Helper subprocess)
|
||||
local electron_pids
|
||||
electron_pids=$(pgrep -f "$PROJECT_ELECTRON_PATH" 2>/dev/null || true)
|
||||
pids="$pids $electron_pids"
|
||||
|
||||
# 2. electron-vite dev server (narrow match to avoid catching unrelated Vite invocations)
|
||||
local vite_pids
|
||||
vite_pids=$(pgrep -f "electron-vite[/.].*\\bdev\\b" 2>/dev/null || true)
|
||||
pids="$pids $vite_pids"
|
||||
|
||||
# 3. The launcher subshell from a previous `start` (saved to pidfile)
|
||||
if [ -f "$PIDFILE" ]; then
|
||||
local saved_pid
|
||||
saved_pid=$(cat "$PIDFILE" 2>/dev/null || true)
|
||||
if [ -n "$saved_pid" ] && kill -0 "$saved_pid" 2>/dev/null; then
|
||||
pids="$pids $saved_pid"
|
||||
fi
|
||||
fi
|
||||
|
||||
# 4. Whatever is currently bound to the CDP port — catches strays whose
|
||||
# binary path doesn't match (e.g. orphaned from a crashed restart)
|
||||
local port_pid
|
||||
port_pid=$(lsof -ti tcp:"$CDP_PORT" -sTCP:LISTEN 2>/dev/null || true)
|
||||
pids="$pids $port_pid"
|
||||
|
||||
# `|| true` because `grep -v '^$'` exits 1 when input has no non-empty
|
||||
# lines, which (with pipefail + set -e) silently kills the caller.
|
||||
echo "$pids" | tr ' ' '\n' | sort -u | grep -v '^$' | tr '\n' ' ' || true
|
||||
}
|
||||
|
||||
# Wait for the CDP HTTP endpoint to respond, with a deadline + early bail-out
|
||||
# if the launcher process died (no point waiting if Electron crashed).
|
||||
wait_for_cdp() {
|
||||
local deadline=$(( $(date +%s) + ELECTRON_WAIT_S ))
|
||||
echo "[electron-dev] Waiting for CDP on port ${CDP_PORT} (up to ${ELECTRON_WAIT_S}s)..."
|
||||
|
||||
while [ "$(date +%s)" -lt "$deadline" ]; do
|
||||
if curl -sf --max-time 2 "http://localhost:${CDP_PORT}/json/version" >/dev/null 2>&1; then
|
||||
echo "[electron-dev] CDP is reachable."
|
||||
return 0
|
||||
fi
|
||||
|
||||
# If our launcher subshell died, abort early so we don't hang the full timeout
|
||||
if [ -f "$PIDFILE" ]; then
|
||||
local saved_pid
|
||||
saved_pid=$(cat "$PIDFILE" 2>/dev/null || true)
|
||||
if [ -n "$saved_pid" ] && ! kill -0 "$saved_pid" 2>/dev/null; then
|
||||
echo "[electron-dev] Launcher PID $saved_pid is gone before CDP came up."
|
||||
echo "[electron-dev] Last 30 lines of $ELECTRON_LOG:"
|
||||
tail -30 "$ELECTRON_LOG" 2>/dev/null || true
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
sleep 2
|
||||
done
|
||||
|
||||
echo "[electron-dev] ERROR: CDP did not respond within ${ELECTRON_WAIT_S}s"
|
||||
echo "[electron-dev] Last 30 lines of $ELECTRON_LOG:"
|
||||
tail -30 "$ELECTRON_LOG" 2>/dev/null || true
|
||||
return 1
|
||||
}
|
||||
|
||||
# After CDP is up, wait until the SPA renders interactive elements.
|
||||
wait_for_renderer() {
|
||||
local deadline=$(( $(date +%s) + RENDERER_WAIT_S ))
|
||||
echo "[electron-dev] Waiting for SPA to load (up to ${RENDERER_WAIT_S}s)..."
|
||||
|
||||
while [ "$(date +%s)" -lt "$deadline" ]; do
|
||||
local snap
|
||||
snap=$(agent-browser --cdp "$CDP_PORT" snapshot -i 2>&1 || true)
|
||||
if echo "$snap" | grep -qE '\b(link|button)\b'; then
|
||||
echo "[electron-dev] Renderer ready."
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
echo "[electron-dev] WARNING: Renderer not interactive within ${RENDERER_WAIT_S}s — proceeding anyway."
|
||||
return 0
|
||||
}
|
||||
|
||||
# ── Commands ─────────────────────────────────────────────────────────
|
||||
|
||||
do_stop() {
|
||||
echo "[electron-dev] Stopping Electron dev environment..."
|
||||
|
||||
local seed_pids
|
||||
seed_pids=$(find_project_pids)
|
||||
|
||||
# Expand to include all descendants — catches helpers spawned by the main
|
||||
# process AFTER our pgrep snapshot, and the launcher's child node/electron-vite
|
||||
# process tree.
|
||||
local all_pids=""
|
||||
for pid in $seed_pids; do
|
||||
all_pids="$all_pids $(expand_descendants "$pid")"
|
||||
done
|
||||
all_pids=$(echo "$all_pids" | tr ' ' '\n' | sort -u | grep -v '^$' | tr '\n' ' ' || true)
|
||||
|
||||
if [ -z "$all_pids" ]; then
|
||||
echo "[electron-dev] No project Electron/vite processes found."
|
||||
else
|
||||
local count
|
||||
count=$(echo "$all_pids" | tr ' ' '\n' | grep -c .)
|
||||
echo "[electron-dev] Sending SIGTERM to $count process(es): $all_pids"
|
||||
for pid in $all_pids; do
|
||||
kill "$pid" 2>/dev/null || true
|
||||
done
|
||||
|
||||
# Wait up to 5s for graceful exit
|
||||
local waited=0
|
||||
while [ $waited -lt 5 ]; do
|
||||
local any_alive=0
|
||||
for pid in $all_pids; do
|
||||
if kill -0 "$pid" 2>/dev/null; then any_alive=1; break; fi
|
||||
done
|
||||
[ "$any_alive" = "0" ] && break
|
||||
sleep 1
|
||||
waited=$((waited + 1))
|
||||
done
|
||||
|
||||
# SIGKILL anyone still alive
|
||||
for pid in $all_pids; do
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
echo "[electron-dev] Force-killing PID $pid"
|
||||
kill -9 "$pid" 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Belt-and-suspenders: anything still bound to the CDP port goes away
|
||||
local port_pid
|
||||
port_pid=$(lsof -ti tcp:"$CDP_PORT" -sTCP:LISTEN 2>/dev/null || true)
|
||||
if [ -n "$port_pid" ]; then
|
||||
echo "[electron-dev] Port $CDP_PORT still bound by PID $port_pid; force-killing"
|
||||
# shellcheck disable=SC2086
|
||||
kill -9 $port_pid 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Also re-sweep the project's electron processes — sometimes the OS spawns
|
||||
# new helpers during shutdown that didn't exist when we first enumerated.
|
||||
local stragglers
|
||||
stragglers=$(pgrep -f "$PROJECT_ELECTRON_PATH" 2>/dev/null || true)
|
||||
if [ -n "$stragglers" ]; then
|
||||
echo "[electron-dev] Cleaning up stragglers: $stragglers"
|
||||
for pid in $stragglers; do
|
||||
kill -9 "$pid" 2>/dev/null || true
|
||||
done
|
||||
fi
|
||||
|
||||
# Close any agent-browser sessions connected to this port
|
||||
agent-browser --cdp "$CDP_PORT" close --all 2>/dev/null || true
|
||||
|
||||
rm -f "$PIDFILE"
|
||||
echo "[electron-dev] Stopped."
|
||||
}
|
||||
|
||||
do_status() {
|
||||
local pids
|
||||
pids=$(find_project_pids)
|
||||
|
||||
if [ -z "$pids" ]; then
|
||||
echo "[electron-dev] No project Electron processes found."
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "[electron-dev] Project processes: $pids"
|
||||
|
||||
if curl -sf --max-time 2 "http://localhost:${CDP_PORT}/json/version" >/dev/null 2>&1; then
|
||||
local url
|
||||
url=$(agent-browser --cdp "$CDP_PORT" get url 2>&1 | tail -1 || echo "?")
|
||||
echo "[electron-dev] CDP port ${CDP_PORT} is reachable. URL: $url"
|
||||
return 0
|
||||
else
|
||||
echo "[electron-dev] CDP port ${CDP_PORT} is NOT reachable (no --remote-debugging-port, or still loading)."
|
||||
return 2
|
||||
fi
|
||||
}
|
||||
|
||||
do_start() {
|
||||
# Already up and CDP is reachable → nothing to do
|
||||
if curl -sf --max-time 2 "http://localhost:${CDP_PORT}/json/version" >/dev/null 2>&1; then
|
||||
echo "[electron-dev] CDP already reachable on port $CDP_PORT. Skipping start."
|
||||
echo "[electron-dev] Use 'restart' to force a fresh session."
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Detect the user's existing dev session (or stale processes) BEFORE killing
|
||||
local existing
|
||||
existing=$(find_project_pids)
|
||||
if [ -n "$existing" ]; then
|
||||
echo "[electron-dev] Existing project Electron/vite processes detected:"
|
||||
echo "$existing" | tr ' ' '\n' | sed 's/^/[electron-dev] PID /'
|
||||
echo "[electron-dev] Tearing them down so we can start a CDP-enabled session..."
|
||||
fi
|
||||
|
||||
do_stop
|
||||
|
||||
# Wait for port + user-data-dir locks to release. Without this, the new
|
||||
# Electron may fail with "user data directory in use" or fail to bind CDP.
|
||||
local waited=0
|
||||
while [ $waited -lt 10 ]; do
|
||||
if ! lsof -i tcp:"$CDP_PORT" >/dev/null 2>&1 \
|
||||
&& ! pgrep -f "$PROJECT_ELECTRON_PATH" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
[ $waited -eq 0 ] && echo "[electron-dev] Waiting for port + Electron locks to release..."
|
||||
sleep 1
|
||||
waited=$((waited + 1))
|
||||
done
|
||||
|
||||
echo "[electron-dev] Starting Electron dev server..."
|
||||
echo "[electron-dev] Project: $PROJECT_ROOT"
|
||||
echo "[electron-dev] CDP port: $CDP_PORT"
|
||||
echo "[electron-dev] Log: $ELECTRON_LOG"
|
||||
|
||||
: > "$ELECTRON_LOG" # Truncate log
|
||||
|
||||
# Launch in a new session (setsid) so the whole process tree shares a PGID
|
||||
# we can later signal in one shot. `setsid bash -c '... exec ...' &` keeps
|
||||
# the bash shell as the session leader; its PID is what we save.
|
||||
# macOS doesn't ship setsid by default — fall back to plain bash; cleanup
|
||||
# still works via `expand_descendants` walking the process tree.
|
||||
local launch_cmd="
|
||||
cd '$PROJECT_ROOT/apps/desktop'
|
||||
exec npx electron-vite dev -- --remote-debugging-port=$CDP_PORT
|
||||
"
|
||||
if command -v setsid >/dev/null 2>&1; then
|
||||
setsid bash -c "$launch_cmd" >> "$ELECTRON_LOG" 2>&1 < /dev/null &
|
||||
else
|
||||
bash -c "$launch_cmd" >> "$ELECTRON_LOG" 2>&1 < /dev/null &
|
||||
fi
|
||||
local launcher_pid=$!
|
||||
echo "$launcher_pid" > "$PIDFILE"
|
||||
echo "[electron-dev] Launcher PID (session leader): $launcher_pid"
|
||||
|
||||
if ! wait_for_cdp; then
|
||||
echo "[electron-dev] Failed to bring up CDP. Cleaning up..."
|
||||
do_stop
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! wait_for_renderer; then
|
||||
echo "[electron-dev] Renderer not interactive — you may need to wait more."
|
||||
fi
|
||||
|
||||
echo "[electron-dev] Ready! Use: agent-browser --cdp $CDP_PORT snapshot -i"
|
||||
}
|
||||
|
||||
do_restart() {
|
||||
do_stop
|
||||
sleep 1
|
||||
do_start
|
||||
}
|
||||
|
||||
# ── Main ─────────────────────────────────────────────────────────────
|
||||
|
||||
case "${1:-help}" in
|
||||
start) do_start ;;
|
||||
stop) do_stop ;;
|
||||
status) do_status ;;
|
||||
restart) do_restart ;;
|
||||
*)
|
||||
echo "Usage: $0 {start|stop|status|restart}"
|
||||
echo ""
|
||||
echo " start — Start Electron dev with CDP. Detects + tears down any"
|
||||
echo " existing project Electron (e.g. \`bun run dev\`) first."
|
||||
echo " stop — Kill all project Electron/vite processes (main + helpers"
|
||||
echo " + descendants), with SIGTERM → 5s wait → SIGKILL fallback."
|
||||
echo " status — Check if Electron is running and CDP is reachable."
|
||||
echo " restart — Stop then start."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@@ -1,189 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# record-app-screen.sh — Record the Electron app window (video + screenshots)
|
||||
#
|
||||
# Captures screenshots via agent-browser (CDP), then assembles into video on stop.
|
||||
# Works on any screen (including external monitors) since it uses CDP, not screen capture.
|
||||
#
|
||||
# Usage:
|
||||
# ./record-app-screen.sh start [output_name] # Begin recording
|
||||
# ./record-app-screen.sh stop # Stop and save
|
||||
# ./record-app-screen.sh status # Check recording state
|
||||
#
|
||||
# Outputs to .records/ directory:
|
||||
# .records/<name>.mp4 — Video assembled from screenshots (~2 fps)
|
||||
# .records/<name>/ — Screenshots every SCREENSHOT_INTERVAL seconds
|
||||
#
|
||||
# Prerequisites:
|
||||
# - ffmpeg installed (bun add -g ffmpeg-static, or brew install ffmpeg)
|
||||
# - agent-browser CLI installed
|
||||
# - Electron app already running with CDP enabled
|
||||
#
|
||||
# Environment variables:
|
||||
# CDP_PORT — Chrome DevTools Protocol port (default: 9222)
|
||||
# SCREENSHOT_INTERVAL — Seconds between gallery screenshots (default: 3)
|
||||
# VIDEO_FRAME_INTERVAL — Seconds between video frames (default: 0.5)
|
||||
#
|
||||
# Examples:
|
||||
# ./electron-dev.sh start
|
||||
# ./record-app-screen.sh start gateway-demo
|
||||
# # ... run automation via agent-browser ...
|
||||
# ./record-app-screen.sh stop
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_DIR="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
|
||||
|
||||
RECORDS_DIR="$PROJECT_DIR/.records"
|
||||
PID_FILE="/tmp/record-app-screen.pids"
|
||||
STATE_FILE="/tmp/record-app-screen.state"
|
||||
|
||||
CDP_PORT="${CDP_PORT:-9222}"
|
||||
SCREENSHOT_INTERVAL="${SCREENSHOT_INTERVAL:-3}"
|
||||
VIDEO_FRAME_INTERVAL="${VIDEO_FRAME_INTERVAL:-0.5}"
|
||||
|
||||
AB="agent-browser --cdp $CDP_PORT"
|
||||
|
||||
# ─── Commands ───
|
||||
|
||||
cmd_start() {
|
||||
local output_name="${1:-recording-$(date +%Y%m%d-%H%M%S)}"
|
||||
local output_video="$RECORDS_DIR/${output_name}.mp4"
|
||||
local screenshot_dir="$RECORDS_DIR/${output_name}"
|
||||
local frames_dir
|
||||
frames_dir=$(mktemp -d /tmp/record-frames-XXXXXX)
|
||||
|
||||
if [ -f "$PID_FILE" ]; then
|
||||
echo "[record] A recording is already active. Run '$0 stop' first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$RECORDS_DIR" "$screenshot_dir"
|
||||
|
||||
# Video frames loop (~2 fps via agent-browser CDP screenshots)
|
||||
(
|
||||
local idx=0
|
||||
while true; do
|
||||
local fname
|
||||
fname=$(printf "%s/frame_%06d.png" "$frames_dir" "$idx")
|
||||
$AB screenshot "$fname" 2>/dev/null || true
|
||||
idx=$((idx + 1))
|
||||
sleep "$VIDEO_FRAME_INTERVAL"
|
||||
done
|
||||
) &
|
||||
local frames_pid=$!
|
||||
|
||||
# Gallery screenshots loop (every N seconds for human review)
|
||||
(
|
||||
local idx=0
|
||||
while true; do
|
||||
local fname
|
||||
fname=$(printf "%s/%04d.png" "$screenshot_dir" "$idx")
|
||||
$AB screenshot "$fname" 2>/dev/null || true
|
||||
idx=$((idx + 1))
|
||||
sleep "$SCREENSHOT_INTERVAL"
|
||||
done
|
||||
) &
|
||||
local screenshot_pid=$!
|
||||
|
||||
# Save state
|
||||
echo "$frames_pid $screenshot_pid" > "$PID_FILE"
|
||||
echo "$output_video $frames_dir $screenshot_dir" > "$STATE_FILE"
|
||||
|
||||
echo "[record] Started!"
|
||||
echo " Video frames: every ${VIDEO_FRAME_INTERVAL}s (PID $frames_pid)"
|
||||
echo " Screenshots: every ${SCREENSHOT_INTERVAL}s → $screenshot_dir/"
|
||||
echo " Stop with: $0 stop"
|
||||
}
|
||||
|
||||
cmd_stop() {
|
||||
if [ ! -f "$PID_FILE" ] || [ ! -f "$STATE_FILE" ]; then
|
||||
echo "[record] No active recording found."
|
||||
return 0
|
||||
fi
|
||||
|
||||
local frames_pid screenshot_pid
|
||||
read -r frames_pid screenshot_pid < "$PID_FILE"
|
||||
|
||||
local output_video frames_dir screenshot_dir
|
||||
read -r output_video frames_dir screenshot_dir < "$STATE_FILE"
|
||||
|
||||
# Stop both capture loops
|
||||
kill "$frames_pid" 2>/dev/null || true
|
||||
kill "$screenshot_pid" 2>/dev/null || true
|
||||
wait "$frames_pid" 2>/dev/null || true
|
||||
wait "$screenshot_pid" 2>/dev/null || true
|
||||
|
||||
# Assemble frames into video
|
||||
local frame_count
|
||||
frame_count=$(ls -1 "$frames_dir"/frame_*.png 2>/dev/null | wc -l | tr -d ' ')
|
||||
|
||||
if [ "$frame_count" -gt 0 ]; then
|
||||
echo "[record] Assembling $frame_count frames into video..."
|
||||
ffmpeg -y -framerate 2 -i "$frames_dir/frame_%06d.png" \
|
||||
-c:v libx264 -crf 23 -pix_fmt yuv420p -an \
|
||||
"$output_video" > /tmp/ffmpeg-assemble.log 2>&1
|
||||
|
||||
if [ ! -s "$output_video" ]; then
|
||||
echo " [warn] Video assembly failed. Check /tmp/ffmpeg-assemble.log"
|
||||
echo " Frames preserved in: $frames_dir/"
|
||||
fi
|
||||
else
|
||||
echo " [warn] No frames captured."
|
||||
fi
|
||||
|
||||
rm -rf "$frames_dir" 2>/dev/null
|
||||
rm -f "$PID_FILE" "$STATE_FILE"
|
||||
|
||||
local video_size screenshot_count
|
||||
video_size=$(ls -lh "$output_video" 2>/dev/null | awk '{print $5}' || echo "?")
|
||||
screenshot_count=$(ls -1 "$screenshot_dir"/*.png 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
||||
|
||||
echo "[record] Stopped!"
|
||||
echo " Video: $output_video ($video_size)"
|
||||
echo " Screenshots: ${screenshot_count} files in $screenshot_dir/"
|
||||
echo " Play: open $output_video"
|
||||
}
|
||||
|
||||
cmd_status() {
|
||||
if [ ! -f "$PID_FILE" ]; then
|
||||
echo "[record] No active recording."
|
||||
return 0
|
||||
fi
|
||||
|
||||
local frames_pid screenshot_pid
|
||||
read -r frames_pid screenshot_pid < "$PID_FILE"
|
||||
|
||||
local frames_ok="no" screenshot_ok="no"
|
||||
kill -0 "$frames_pid" 2>/dev/null && frames_ok="yes"
|
||||
kill -0 "$screenshot_pid" 2>/dev/null && screenshot_ok="yes"
|
||||
|
||||
if [ -f "$STATE_FILE" ]; then
|
||||
local output_video frames_dir screenshot_dir
|
||||
read -r output_video frames_dir screenshot_dir < "$STATE_FILE"
|
||||
local frame_count ss_count
|
||||
frame_count=$(ls -1 "$frames_dir"/frame_*.png 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
||||
ss_count=$(ls -1 "$screenshot_dir"/*.png 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
||||
echo "[record] Active recording"
|
||||
echo " Frames: $frame_count captured (running: $frames_ok)"
|
||||
echo " Screenshots: $ss_count captured (running: $screenshot_ok)"
|
||||
echo " Output: $output_video"
|
||||
fi
|
||||
}
|
||||
|
||||
# ─── Main ───
|
||||
|
||||
case "${1:-}" in
|
||||
start) shift; cmd_start "$@" ;;
|
||||
stop) cmd_stop ;;
|
||||
status) cmd_status ;;
|
||||
*)
|
||||
echo "Usage: $0 {start [name] | stop | status}"
|
||||
echo ""
|
||||
echo " start [name] Start recording (default: recording-YYYYMMDD-HHMMSS)"
|
||||
echo " stop Stop recording and save outputs"
|
||||
echo " status Check if recording is active"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@@ -1,353 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# record-electron-demo.sh — Record an automated demo of the Electron app
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/record-electron-demo.sh [script.sh] [output.mp4]
|
||||
#
|
||||
# script.sh — A shell script containing agent-browser commands to automate.
|
||||
# It receives the CDP port as $1. Defaults to a built-in queue-edit demo.
|
||||
# output.mp4 — Output file path. Defaults to /tmp/electron-demo.mp4
|
||||
#
|
||||
# Prerequisites:
|
||||
# - agent-browser CLI installed globally
|
||||
# - ffmpeg installed (brew install ffmpeg)
|
||||
# - Electron app NOT already running (script manages lifecycle)
|
||||
#
|
||||
# Examples:
|
||||
# # Run built-in demo
|
||||
# ./scripts/record-electron-demo.sh
|
||||
#
|
||||
# # Run custom automation script
|
||||
# ./scripts/record-electron-demo.sh ./my-demo.sh /tmp/my-demo.mp4
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
CDP_PORT=9222
|
||||
DEMO_SCRIPT="${1:-}"
|
||||
OUTPUT="${2:-/tmp/electron-demo.mp4}"
|
||||
ELECTRON_LOG="/tmp/electron-dev.log"
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
||||
RECORD_PID=""
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
cleanup() {
|
||||
echo "[cleanup] Stopping all processes..."
|
||||
[ -n "$RECORD_PID" ] && kill -INT "$RECORD_PID" 2>/dev/null && sleep 2
|
||||
pkill -f "electron-vite" 2>/dev/null || true
|
||||
pkill -f "Electron" 2>/dev/null || true
|
||||
pkill -f "agent-browser" 2>/dev/null || true
|
||||
echo "[cleanup] Done."
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
wait_for_electron() {
|
||||
echo "[wait] Waiting for Electron to start..."
|
||||
for i in $(seq 1 24); do
|
||||
sleep 5
|
||||
if strings "$ELECTRON_LOG" 2>/dev/null | grep -q "starting electron"; then
|
||||
echo "[wait] Electron process ready."
|
||||
return 0
|
||||
fi
|
||||
echo "[wait] Still waiting... (${i}/24)"
|
||||
done
|
||||
echo "[error] Electron failed to start within 120s"
|
||||
exit 1
|
||||
}
|
||||
|
||||
wait_for_renderer() {
|
||||
echo "[wait] Waiting for renderer to load..."
|
||||
sleep 15
|
||||
agent-browser --cdp "$CDP_PORT" wait 3000
|
||||
|
||||
# Poll until interactive elements appear (SPA may take extra time)
|
||||
for i in $(seq 1 12); do
|
||||
local snap
|
||||
snap=$(agent-browser --cdp "$CDP_PORT" snapshot -i 2>&1)
|
||||
if echo "$snap" | grep -q 'link "'; then
|
||||
echo "[wait] Renderer ready (interactive elements found)."
|
||||
return 0
|
||||
fi
|
||||
echo "[wait] SPA still loading... (${i}/12)"
|
||||
sleep 5
|
||||
done
|
||||
echo "[warn] Timed out waiting for interactive elements, proceeding anyway."
|
||||
}
|
||||
|
||||
get_window_and_screen_info() {
|
||||
# Returns: window_x window_y window_w window_h screen_index
|
||||
# Uses Swift to find the Electron window bounds and which screen it's on
|
||||
swift -e '
|
||||
import Cocoa
|
||||
let windowList = CGWindowListCopyWindowInfo([.optionAll], kCGNullWindowID) as! [[String: Any]]
|
||||
for w in windowList {
|
||||
let owner = w["kCGWindowOwnerName"] as? String ?? ""
|
||||
let name = w["kCGWindowName"] as? String ?? ""
|
||||
let layer = w["kCGWindowLayer"] as? Int ?? -1
|
||||
let bounds = w["kCGWindowBounds"] as? [String: Any] ?? [:]
|
||||
let wx = bounds["X"] as? Double ?? 0
|
||||
let wy = bounds["Y"] as? Double ?? 0
|
||||
let ww = bounds["Width"] as? Double ?? 0
|
||||
let wh = bounds["Height"] as? Double ?? 0
|
||||
if (owner == "Electron" || owner == "LobeHub") && layer == 0 && name == "LobeHub" && ww > 200 && wh > 200 {
|
||||
// Find which screen this window is on
|
||||
let screens = NSScreen.screens
|
||||
var screenIdx = 0
|
||||
let windowCenter = NSPoint(x: wx + ww / 2, y: wy + wh / 2)
|
||||
for (i, screen) in screens.enumerated() {
|
||||
let frame = screen.frame
|
||||
// Convert CG coords (top-left origin) to NSScreen coords (bottom-left origin)
|
||||
let mainHeight = screens[0].frame.height
|
||||
let screenTop = mainHeight - frame.origin.y - frame.height
|
||||
let screenBottom = screenTop + frame.height
|
||||
let screenLeft = frame.origin.x
|
||||
let screenRight = screenLeft + frame.width
|
||||
if windowCenter.x >= screenLeft && windowCenter.x <= screenRight &&
|
||||
windowCenter.y >= screenTop && windowCenter.y <= screenBottom {
|
||||
screenIdx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
// Compute window position relative to the screen it is on
|
||||
let screen = screens[screenIdx]
|
||||
let mainHeight = screens[0].frame.height
|
||||
let screenTop = mainHeight - screen.frame.origin.y - screen.frame.height
|
||||
let relX = wx - screen.frame.origin.x
|
||||
let relY = wy - screenTop
|
||||
let scale = Int(screen.backingScaleFactor)
|
||||
print("\(Int(relX)) \(Int(relY)) \(Int(ww)) \(Int(wh)) \(screenIdx) \(scale)")
|
||||
break
|
||||
}
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
start_recording() {
|
||||
local rel_x=$1 rel_y=$2 w=$3 h=$4 screen_idx=$5 scale=$6
|
||||
|
||||
# ffmpeg avfoundation device index for screens
|
||||
# List devices and find the one matching our screen index
|
||||
local device_idx
|
||||
device_idx=$(ffmpeg -f avfoundation -list_devices true -i "" 2>&1 \
|
||||
| grep "Capture screen ${screen_idx}" \
|
||||
| grep -oE '\[[0-9]+\]' | tr -d '[]' || true)
|
||||
|
||||
if [ -z "$device_idx" ]; then
|
||||
echo "[warn] Could not find capture device for screen $screen_idx, trying default (3)"
|
||||
device_idx=3
|
||||
fi
|
||||
|
||||
# Scale coordinates to native resolution
|
||||
local cx=$((rel_x * scale))
|
||||
local cy=$((rel_y * scale))
|
||||
local cw=$((w * scale))
|
||||
local ch=$((h * scale))
|
||||
|
||||
echo "[record] Window: ${rel_x},${rel_y} ${w}x${h} on screen ${screen_idx} (scale=${scale})"
|
||||
echo "[record] Crop: ${cx},${cy} ${cw}x${ch}, device: ${device_idx}"
|
||||
echo "[record] Output: $OUTPUT"
|
||||
|
||||
ffmpeg -y \
|
||||
-f avfoundation -framerate 30 -capture_cursor 1 -i "${device_idx}:" \
|
||||
-vf "crop=${cw}:${ch}:${cx}:${cy},scale=${w}:${h}" \
|
||||
-c:v libx264 -crf 23 -preset fast -an \
|
||||
"$OUTPUT" \
|
||||
> /tmp/ffmpeg-record.log 2>&1 &
|
||||
RECORD_PID=$!
|
||||
sleep 2
|
||||
|
||||
if ! kill -0 "$RECORD_PID" 2>/dev/null; then
|
||||
echo "[error] ffmpeg failed to start. Log:"
|
||||
cat /tmp/ffmpeg-record.log
|
||||
RECORD_PID=""
|
||||
return 1
|
||||
fi
|
||||
echo "[record] Recording started (PID=$RECORD_PID)"
|
||||
}
|
||||
|
||||
stop_recording() {
|
||||
if [ -n "$RECORD_PID" ]; then
|
||||
echo "[record] Stopping recording..."
|
||||
kill -INT "$RECORD_PID" 2>/dev/null || true
|
||||
wait "$RECORD_PID" 2>/dev/null || true
|
||||
RECORD_PID=""
|
||||
echo "[record] Saved to $OUTPUT"
|
||||
ls -lh "$OUTPUT"
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Built-in demo: Queue Edit ────────────────────────────────────────
|
||||
|
||||
find_input_ref() {
|
||||
local port=$1
|
||||
agent-browser --cdp "$port" snapshot -i -C 2>&1 \
|
||||
| grep "editable" \
|
||||
| grep -oE 'ref=e[0-9]+' \
|
||||
| head -1 \
|
||||
| sed 's/ref=//'
|
||||
}
|
||||
|
||||
builtin_demo() {
|
||||
local port=$1
|
||||
|
||||
echo "[demo] Step 1: Navigate to first available agent"
|
||||
local snapshot agent_ref
|
||||
snapshot=$(agent-browser --cdp "$port" snapshot -i 2>&1)
|
||||
# Try Lobe AI first, then fall back to any agent link in the sidebar
|
||||
agent_ref=$(echo "$snapshot" | grep -oE 'link "Lobe AI" \[ref=e[0-9]+\]' | grep -oE 'e[0-9]+' || true)
|
||||
if [ -z "$agent_ref" ]; then
|
||||
# Pick the first agent-like link (skip nav links)
|
||||
agent_ref=$(echo "$snapshot" | grep 'link "' | grep -vE '"Home"|"Pages"|"Settings"|"Search"|"Resources"|"Marketplace"' | head -1 | grep -oE 'ref=e[0-9]+' | sed 's/ref=//' || true)
|
||||
fi
|
||||
if [ -z "$agent_ref" ]; then
|
||||
echo "[error] No agent link found in snapshot"
|
||||
echo "$snapshot" | head -30
|
||||
return 1
|
||||
fi
|
||||
echo "[demo] Clicking agent ref: @$agent_ref"
|
||||
agent-browser --cdp "$port" click "@$agent_ref"
|
||||
sleep 3
|
||||
|
||||
echo "[demo] Step 2: Send first message (triggers AI generation)"
|
||||
local input_ref
|
||||
input_ref=$(find_input_ref "$port")
|
||||
agent-browser --cdp "$port" click "@$input_ref"
|
||||
agent-browser --cdp "$port" type "@$input_ref" "Write a 3000 word essay about the complete history of space exploration from Sputnik to the James Webb Space Telescope"
|
||||
sleep 1
|
||||
agent-browser --cdp "$port" press Enter
|
||||
sleep 3
|
||||
|
||||
echo "[demo] Step 3: Queue message 1"
|
||||
input_ref=$(find_input_ref "$port")
|
||||
agent-browser --cdp "$port" click "@$input_ref"
|
||||
agent-browser --cdp "$port" type "@$input_ref" "This message should be edited"
|
||||
sleep 1
|
||||
agent-browser --cdp "$port" press Enter
|
||||
sleep 1
|
||||
|
||||
echo "[demo] Step 4: Queue message 2"
|
||||
input_ref=$(find_input_ref "$port")
|
||||
agent-browser --cdp "$port" click "@$input_ref"
|
||||
agent-browser --cdp "$port" type "@$input_ref" "Another queued message"
|
||||
sleep 1
|
||||
agent-browser --cdp "$port" press Enter
|
||||
sleep 1
|
||||
|
||||
echo "[demo] Step 5: Verify queue has messages"
|
||||
local queue_count
|
||||
queue_count=$(agent-browser --cdp "$port" eval --stdin << 'EVALEOF'
|
||||
(function() {
|
||||
var chat = window.__LOBE_STORES.chat();
|
||||
var total = 0;
|
||||
Object.keys(chat.queuedMessages).forEach(function(k) {
|
||||
total += chat.queuedMessages[k].length;
|
||||
});
|
||||
return String(total);
|
||||
})()
|
||||
EVALEOF
|
||||
)
|
||||
echo "[demo] Queue count: $queue_count"
|
||||
|
||||
if [ "$queue_count" = "0" ] || [ "$queue_count" = '"0"' ]; then
|
||||
echo "[demo] Queue was already drained. Retrying..."
|
||||
input_ref=$(find_input_ref "$port")
|
||||
agent-browser --cdp "$port" click "@$input_ref"
|
||||
agent-browser --cdp "$port" type "@$input_ref" "Now write another 3000 word essay about artificial intelligence from Turing to transformers covering every major breakthrough"
|
||||
sleep 1
|
||||
agent-browser --cdp "$port" press Enter
|
||||
sleep 2
|
||||
input_ref=$(find_input_ref "$port")
|
||||
agent-browser --cdp "$port" click "@$input_ref"
|
||||
agent-browser --cdp "$port" type "@$input_ref" "This message should be edited"
|
||||
sleep 1
|
||||
agent-browser --cdp "$port" press Enter
|
||||
sleep 1
|
||||
input_ref=$(find_input_ref "$port")
|
||||
agent-browser --cdp "$port" click "@$input_ref"
|
||||
agent-browser --cdp "$port" type "@$input_ref" "Another queued message"
|
||||
sleep 1
|
||||
agent-browser --cdp "$port" press Enter
|
||||
sleep 1
|
||||
fi
|
||||
|
||||
echo "[demo] Step 6: Scroll to show queue tray"
|
||||
agent-browser --cdp "$port" scroll down 5000
|
||||
sleep 2
|
||||
|
||||
echo "[demo] Step 7: Click edit button on first queued message"
|
||||
agent-browser --cdp "$port" eval --stdin << 'EVALEOF'
|
||||
(function() {
|
||||
var chat = window.__LOBE_STORES.chat();
|
||||
var keys = Object.keys(chat.queuedMessages);
|
||||
for (var k = 0; k < keys.length; k++) {
|
||||
var queue = chat.queuedMessages[keys[k]];
|
||||
if (queue.length > 0) {
|
||||
var targetText = queue[0].content;
|
||||
var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null);
|
||||
while (walker.nextNode()) {
|
||||
var node = walker.currentNode;
|
||||
if (node.textContent.trim() === targetText) {
|
||||
var row = node.parentElement.parentElement;
|
||||
var buttons = row.querySelectorAll('[role="button"]');
|
||||
if (buttons.length >= 1) {
|
||||
buttons[0].click();
|
||||
return 'clicked edit on: ' + targetText;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 'edit button not found';
|
||||
})()
|
||||
EVALEOF
|
||||
sleep 3
|
||||
|
||||
echo "[demo] Step 8: Show result — content restored to input"
|
||||
sleep 3
|
||||
|
||||
echo "[demo] Complete!"
|
||||
}
|
||||
|
||||
# ── Main ─────────────────────────────────────────────────────────────
|
||||
|
||||
echo "=== Electron Demo Recorder ==="
|
||||
|
||||
# 1. Kill existing instances
|
||||
echo "[setup] Cleaning up existing processes..."
|
||||
pkill -f "Electron" 2>/dev/null || true
|
||||
pkill -f "electron-vite" 2>/dev/null || true
|
||||
pkill -f "agent-browser" 2>/dev/null || true
|
||||
sleep 3
|
||||
|
||||
# 2. Start Electron
|
||||
echo "[setup] Starting Electron..."
|
||||
cd "$PROJECT_ROOT/apps/desktop"
|
||||
ELECTRON_ENABLE_LOGGING=1 npx electron-vite dev -- --remote-debugging-port="$CDP_PORT" > "$ELECTRON_LOG" 2>&1 &
|
||||
|
||||
wait_for_electron
|
||||
wait_for_renderer
|
||||
|
||||
# 3. Get window position and start recording
|
||||
WIN_INFO=$(get_window_and_screen_info)
|
||||
if [ -z "$WIN_INFO" ]; then
|
||||
echo "[error] Could not find Electron window"
|
||||
exit 1
|
||||
fi
|
||||
read -r WIN_X WIN_Y WIN_W WIN_H SCREEN_IDX SCALE <<< "$WIN_INFO"
|
||||
start_recording "$WIN_X" "$WIN_Y" "$WIN_W" "$WIN_H" "$SCREEN_IDX" "$SCALE"
|
||||
|
||||
# 4. Run demo script
|
||||
if [ -n "$DEMO_SCRIPT" ] && [ -f "$DEMO_SCRIPT" ]; then
|
||||
echo "[demo] Running custom script: $DEMO_SCRIPT"
|
||||
bash "$DEMO_SCRIPT" "$CDP_PORT"
|
||||
else
|
||||
echo "[demo] Running built-in queue-edit demo"
|
||||
builtin_demo "$CDP_PORT"
|
||||
fi
|
||||
|
||||
# 5. Stop recording
|
||||
stop_recording
|
||||
|
||||
echo "=== Done! Output: $OUTPUT ==="
|
||||
@@ -1,61 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# record-gif.sh — capture a frame sequence via agent-browser (CDP) and
|
||||
# synthesize a GIF for embedding in a test report.
|
||||
#
|
||||
# Use this whenever the asserted behavior is about CHANGE OVER TIME —
|
||||
# streaming output, a ticking timer, loading states, animations. A static
|
||||
# screenshot cannot prove those; a GIF can. Cloud-portable: frames come from
|
||||
# CDP rendering, no OS-level screen capture.
|
||||
#
|
||||
# Usage:
|
||||
# record-gif.sh <output.gif> <duration_seconds> [fps]
|
||||
#
|
||||
# AB_TARGET="--cdp 9222" # Electron (default; CDP_PORT honored)
|
||||
# AB_TARGET="--session lobehub-dev" # web agent-browser session
|
||||
# GIF_WIDTH=960 # output width (px), default 960
|
||||
#
|
||||
# Requires ffmpeg (`brew install ffmpeg`). Effective fps is capped by
|
||||
# screenshot latency (~0.3-0.5s per frame); 1-2 fps is the realistic range.
|
||||
#
|
||||
# Example — record a 12s run and embed it in the report:
|
||||
# ./record-gif.sh "$DIR/assets/case2-tray-running.gif" 12 2 &
|
||||
# GIF_PID=$!
|
||||
# # ... trigger the streaming behavior ...
|
||||
# wait $GIF_PID
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
OUT="${1:?Usage: record-gif.sh <output.gif> <duration_seconds> [fps]}"
|
||||
DUR="${2:?Usage: record-gif.sh <output.gif> <duration_seconds> [fps]}"
|
||||
FPS="${3:-2}"
|
||||
AB_TARGET="${AB_TARGET:---cdp ${CDP_PORT:-9222}}"
|
||||
GIF_WIDTH="${GIF_WIDTH:-960}"
|
||||
|
||||
command -v ffmpeg > /dev/null || {
|
||||
echo "ffmpeg not found — install with: brew install ffmpeg" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
TMP=$(mktemp -d)
|
||||
trap 'rm -rf "$TMP"' EXIT
|
||||
|
||||
FRAMES=$((DUR * FPS))
|
||||
INTERVAL=$(python3 -c "print(1 / $FPS)")
|
||||
|
||||
for i in $(seq -f '%04g' 1 "$FRAMES"); do
|
||||
# shellcheck disable=SC2086
|
||||
agent-browser $AB_TARGET screenshot "$TMP/frame-$i.png" > /dev/null 2>&1 || true
|
||||
sleep "$INTERVAL"
|
||||
done
|
||||
|
||||
CAPTURED=$(find "$TMP" -name 'frame-*.png' | wc -l | tr -d ' ')
|
||||
[ "$CAPTURED" -gt 0 ] || {
|
||||
echo "no frames captured — is the app reachable via $AB_TARGET?" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
ffmpeg -y -loglevel error -framerate "$FPS" -pattern_type glob -i "$TMP/frame-*.png" \
|
||||
-vf "fps=$FPS,scale=$GIF_WIDTH:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse" \
|
||||
"$OUT"
|
||||
|
||||
echo "$OUT ($CAPTURED frames @ ${FPS}fps)"
|
||||
@@ -1,88 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# report-init.sh — scaffold a structured test report under .records/reports/.
|
||||
#
|
||||
# Format spec and evidence rules: ../references/report.md
|
||||
#
|
||||
# Usage:
|
||||
# report-init.sh <slug> [title]
|
||||
#
|
||||
# Prints the report directory path (capture it: DIR=$(report-init.sh my-test)).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SLUG="${1:?Usage: report-init.sh <slug> [title]}"
|
||||
TITLE="${2:-$SLUG}"
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)"
|
||||
TS="$(date +%Y%m%d-%H%M%S)"
|
||||
DIR="$REPO_ROOT/.records/reports/$TS-$SLUG"
|
||||
mkdir -p "$DIR/assets"
|
||||
|
||||
BRANCH=$(git -C "$REPO_ROOT" branch --show-current 2> /dev/null || echo "unknown")
|
||||
COMMIT=$(git -C "$REPO_ROOT" rev-parse --short HEAD 2> /dev/null || echo "unknown")
|
||||
DATE_HUMAN=$(date '+%Y-%m-%d %H:%M')
|
||||
DATE_ISO=$(date '+%Y-%m-%dT%H:%M:%S%z')
|
||||
|
||||
cat > "$DIR/report.md" << EOF
|
||||
# 测试报告:$TITLE
|
||||
|
||||
## 范围
|
||||
|
||||
<!-- 测试目标 / 变更范围 / 重点风险 -->
|
||||
|
||||
- 分支:\`$BRANCH\`
|
||||
- 当前提交:\`$COMMIT\`
|
||||
- 日期:$DATE_HUMAN
|
||||
- 表面:<!-- CLI / Electron + CDP / Web / Bot:<platform> -->
|
||||
- 测试页 / 入口:<!-- e.g. /settings or http://localhost:3010 -->
|
||||
- 重点:<!-- 本轮最关心的体验、功能或回归点 -->
|
||||
|
||||
## 用例
|
||||
|
||||
| # | 用例 | 结果 | 关键现象 | 证据 |
|
||||
| - | ---- | ---- | -------- | ---- |
|
||||
| 1 | | 待测 | |  |
|
||||
|
||||
## 结论
|
||||
|
||||
整体结论:\`pending\`。
|
||||
|
||||
<!-- 用 1-2 段概括用户最需要知道的结果;失败和阻塞必须明确说明影响。 -->
|
||||
|
||||
仍需处理 / 跟进:
|
||||
|
||||
- <!-- TODO -->
|
||||
|
||||
## 本轮验证
|
||||
|
||||
<!-- 如有自动化或命令行验证,保留精简命令与结果;没有则写“未运行额外自动化验证”。 -->
|
||||
|
||||
\`\`\`bash
|
||||
# command
|
||||
\`\`\`
|
||||
|
||||
结果:
|
||||
|
||||
- <!-- TODO -->
|
||||
|
||||
## 评分
|
||||
|
||||
- 通过:0
|
||||
- 失败:0
|
||||
- 阻塞:0
|
||||
- 评分:— / 100
|
||||
EOF
|
||||
|
||||
cat > "$DIR/result.json" << EOF
|
||||
{
|
||||
"title": "$TITLE",
|
||||
"createdAt": "$DATE_ISO",
|
||||
"branch": "$BRANCH",
|
||||
"commit": "$COMMIT",
|
||||
"surfaces": [],
|
||||
"cases": [],
|
||||
"summary": { "total": 0, "passed": 0, "failed": 0, "blocked": 0, "verdict": "pending" }
|
||||
}
|
||||
EOF
|
||||
|
||||
echo "$DIR"
|
||||
@@ -1,174 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# setup-auth.sh — one-stop auth setup & check for local agent testing.
|
||||
#
|
||||
# Auth is the gate for all automated testing: prepare it BEFORE writing any
|
||||
# test step. Background and failure modes: ../references/auth.md
|
||||
#
|
||||
# Usage:
|
||||
# setup-auth.sh status # check server + CLI + web auth readiness
|
||||
# setup-auth.sh cli # interactive CLI device-code login (run by a human)
|
||||
# setup-auth.sh web # stdin = Cookie header -> inject into agent-browser session
|
||||
# setup-auth.sh web-verify # live-check the agent-browser session is authenticated
|
||||
#
|
||||
# Env:
|
||||
# SERVER_URL (default http://localhost:3010) dev server under test
|
||||
# SESSION (default lobehub-dev) agent-browser session name
|
||||
# AUTH_DIR (default ~/.lobehub-agent-testing) where web state is persisted
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SERVER_URL="${SERVER_URL:-http://localhost:3010}"
|
||||
SESSION="${SESSION:-lobehub-dev}"
|
||||
AUTH_DIR="${AUTH_DIR:-$HOME/.lobehub-agent-testing}"
|
||||
STATE_FILE="$AUTH_DIR/web-state.json"
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)"
|
||||
CLI_HOME="$REPO_ROOT/apps/cli/.lobehub-dev"
|
||||
|
||||
ok() { printf ' \033[32m✔\033[0m %s\n' "$1"; }
|
||||
bad() { printf ' \033[31m✘\033[0m %s\n' "$1"; }
|
||||
note() { printf ' %s\n' "$1"; }
|
||||
|
||||
check_server() {
|
||||
local code
|
||||
code=$(curl -s -o /dev/null -w '%{http_code}' "$SERVER_URL/" 2> /dev/null || true)
|
||||
if [[ "$code" =~ ^[23] ]]; then
|
||||
ok "dev server reachable at $SERVER_URL"
|
||||
else
|
||||
bad "dev server NOT reachable at $SERVER_URL (http_code='$code')"
|
||||
note "start it: pnpm run dev:next (see references/dev-server.md)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_cli() {
|
||||
if [[ -f "$CLI_HOME/settings.json" ]] && grep -q "$SERVER_URL" "$CLI_HOME/settings.json"; then
|
||||
ok "CLI logged in to $SERVER_URL (creds: apps/cli/.lobehub-dev)"
|
||||
else
|
||||
bad "CLI not logged in to $SERVER_URL"
|
||||
note "ask the user to run:"
|
||||
note "cd apps/cli && LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts login --server $SERVER_URL"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_web() {
|
||||
if [[ -f "$STATE_FILE" ]]; then
|
||||
ok "web auth state saved ($STATE_FILE)"
|
||||
note "live-verify: $0 web-verify"
|
||||
else
|
||||
bad "no web auth state for agent-browser"
|
||||
note "copy the Cookie header from Chrome DevTools (Network tab), then:"
|
||||
note "pbpaste | $0 web (see references/auth.md)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_electron() {
|
||||
local cdp_port="${CDP_PORT:-9222}"
|
||||
if ! curl -s -o /dev/null --max-time 2 "http://localhost:$cdp_port/json/version" 2> /dev/null; then
|
||||
note "electron: not running (CDP $cdp_port unreachable) — start with electron-dev.sh; check skipped"
|
||||
return 0
|
||||
fi
|
||||
local probe result
|
||||
probe="$(dirname "${BASH_SOURCE[0]}")/app-probe.sh"
|
||||
result=$(bash "$probe" auth 2> /dev/null || true)
|
||||
# agent-browser eval returns the JSON string with escaped quotes — normalize.
|
||||
result="${result//\\/}"
|
||||
if [[ "$result" == *'"isSignedIn":true'* ]]; then
|
||||
ok "electron app signed in ($result)"
|
||||
else
|
||||
bad "electron app NOT signed in ($result)"
|
||||
note "log in once manually inside the app (state persists across restarts)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
cmd_status() {
|
||||
echo "agent-testing auth status (SERVER_URL=$SERVER_URL):"
|
||||
local rc=0
|
||||
check_server || rc=1
|
||||
check_cli || rc=1
|
||||
check_web || rc=1
|
||||
check_electron || rc=1
|
||||
if [[ $rc -eq 0 ]]; then
|
||||
echo "all green — safe to start automated testing."
|
||||
else
|
||||
echo "auth NOT ready — fix the ✘ items before writing any test step."
|
||||
fi
|
||||
return $rc
|
||||
}
|
||||
|
||||
cmd_cli() {
|
||||
echo "Starting CLI device-code login against $SERVER_URL ..."
|
||||
echo "(opens a browser authorization — must be run by a human in a terminal)"
|
||||
cd "$REPO_ROOT/apps/cli"
|
||||
LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts login --server "$SERVER_URL"
|
||||
}
|
||||
|
||||
# Build a Playwright storageState file from a raw Cookie header on stdin,
|
||||
# keeping only the better-auth cookies. See references/auth.md for why the
|
||||
# header must come from a Network request (HttpOnly) and why httpOnly=false.
|
||||
cmd_web() {
|
||||
mkdir -p "$AUTH_DIR"
|
||||
python3 - "$STATE_FILE" << 'PY'
|
||||
import json, sys, time
|
||||
|
||||
raw = sys.stdin.read().strip()
|
||||
if raw.lower().startswith("cookie:"):
|
||||
raw = raw.split(":", 1)[1].strip()
|
||||
|
||||
WANTED = {"better-auth.session_token", "better-auth.state"}
|
||||
exp = int(time.time()) + 30 * 24 * 3600 # 30 days
|
||||
|
||||
cookies = []
|
||||
for pair in raw.split("; "):
|
||||
if "=" not in pair:
|
||||
continue
|
||||
name, _, value = pair.partition("=")
|
||||
if name not in WANTED:
|
||||
continue
|
||||
cookies.append({
|
||||
"name": name,
|
||||
"value": value,
|
||||
"domain": "localhost",
|
||||
"path": "/",
|
||||
"expires": exp,
|
||||
"httpOnly": False,
|
||||
"secure": False,
|
||||
"sameSite": "Lax",
|
||||
})
|
||||
|
||||
if not cookies:
|
||||
sys.stderr.write("no better-auth cookies found in input — paste the raw Cookie header from a Network request\n")
|
||||
sys.exit(1)
|
||||
|
||||
with open(sys.argv[1], "w") as f:
|
||||
json.dump({"cookies": cookies, "origins": []}, f, indent=2)
|
||||
print(f"wrote {len(cookies)} cookie(s) to {sys.argv[1]}")
|
||||
PY
|
||||
agent-browser --session "$SESSION" state load "$STATE_FILE"
|
||||
cmd_web_verify
|
||||
}
|
||||
|
||||
cmd_web_verify() {
|
||||
agent-browser --session "$SESSION" open "$SERVER_URL/" > /dev/null
|
||||
local url
|
||||
url=$(agent-browser --session "$SESSION" get url)
|
||||
if [[ "$url" == *"/signin"* || "$url" == *"/login"* ]]; then
|
||||
bad "agent-browser session '$SESSION' NOT authenticated (landed on $url)"
|
||||
note "re-copy the Cookie header and re-run: pbpaste | $0 web"
|
||||
return 1
|
||||
fi
|
||||
ok "agent-browser session '$SESSION' authenticated (at $url)"
|
||||
}
|
||||
|
||||
case "${1:-status}" in
|
||||
status) cmd_status ;;
|
||||
cli) cmd_cli ;;
|
||||
web) cmd_web ;;
|
||||
web-verify) cmd_web_verify ;;
|
||||
*)
|
||||
echo "Usage: $0 {status|cli|web|web-verify}" >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,154 +0,0 @@
|
||||
# Electron (LobeHub Desktop) UI Testing
|
||||
|
||||
Default surface for verifying **pure frontend changes** (components, store logic, styles, interactions) in the primary product shape. Drives the Electron renderer over CDP with `agent-browser` — see [../references/agent-browser.md](../references/agent-browser.md) for the full command reference.
|
||||
|
||||
**Auth**: the Electron app keeps its own persistent login state — log in once manually in the app; sessions survive restarts. Run `../scripts/setup-auth.sh status` before testing (see [../references/auth.md](../references/auth.md)).
|
||||
|
||||
**Linux / headless (cloud)**: Electron itself runs on Linux, but it has no true headless mode — it needs a display server. In a headless environment wrap the launch with `xvfb-run` (virtual framebuffer). Everything CDP-based keeps working under Xvfb: the `agent-browser --cdp 9222` connection, snapshots, eval, and `agent-browser screenshot` (captured from the renderer via CDP, not the OS screen). What does NOT work on Linux: `capture-app-window.sh` (macOS `screencapture`), osascript, and the ffmpeg recording scripts in their current form.
|
||||
|
||||
### Setup / Teardown
|
||||
|
||||
Use the `electron-dev.sh` script to manage the Electron dev environment. It handles process lifecycle, waits for SPA readiness, and reliably kills all child processes (main + helpers + vite).
|
||||
|
||||
```bash
|
||||
SCRIPT=".agents/skills/agent-testing/scripts/electron-dev.sh"
|
||||
|
||||
# Start Electron dev with CDP (idempotent — skips if already running)
|
||||
$SCRIPT start
|
||||
|
||||
# Check if Electron is running and CDP is reachable
|
||||
$SCRIPT status
|
||||
|
||||
# Kill all Electron-related processes (main + helper + vite)
|
||||
$SCRIPT stop
|
||||
|
||||
# Force fresh restart
|
||||
$SCRIPT restart
|
||||
```
|
||||
|
||||
After `start` succeeds, connect with: `agent-browser --cdp 9222 snapshot -i`
|
||||
|
||||
**Always run `$SCRIPT stop` when done testing** — `pkill -f "Electron"` alone won't catch all helper processes.
|
||||
|
||||
#### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ----------------- | ----------------------- | ---------------------------------------- |
|
||||
| `CDP_PORT` | `9222` | Chrome DevTools Protocol port |
|
||||
| `ELECTRON_LOG` | `/tmp/electron-dev.log` | Electron process log |
|
||||
| `ELECTRON_WAIT_S` | `60` | Max seconds to wait for Electron process |
|
||||
| `RENDERER_WAIT_S` | `60` | Max seconds to wait for SPA to load |
|
||||
|
||||
### LobeHub Probes & Quick Navigation
|
||||
|
||||
`scripts/app-probe.sh` is the standard fast path into app state — **use it
|
||||
instead of hand-rolling `__LOBE_STORES` eval snippets** for these common needs:
|
||||
|
||||
```bash
|
||||
PROBE=".agents/skills/agent-testing/scripts/app-probe.sh"
|
||||
|
||||
$PROBE auth # login check (Step 0.3) → { isSignedIn, userId }
|
||||
$PROBE route # current SPA route
|
||||
$PROBE ops # running chat operations (type / startTime)
|
||||
$PROBE goto /settings # jump the SPA straight to a route (full reload)
|
||||
$PROBE errors-install # install console.error interceptor
|
||||
$PROBE errors # dump captured errors
|
||||
```
|
||||
|
||||
`goto` lets a test enter the state under test directly instead of clicking
|
||||
through the UI. Common desktop routes:
|
||||
|
||||
| Route | Where it lands |
|
||||
| ----------------------------- | ------------------------------------ |
|
||||
| `/` | Home (has a chat input) |
|
||||
| `/agent/<agentId>` | Agent conversation (latest topic) |
|
||||
| `/agent/<agentId>/<topicId>` | Specific topic in a conversation |
|
||||
| `/task` · `/task/<taskId>` | Task list / task detail |
|
||||
| `/page` | Documents (文稿) |
|
||||
| `/settings` | Settings |
|
||||
| `/community` | Discover / community |
|
||||
|
||||
Targets default to Electron (`--cdp 9222`); set `AB_TARGET="--session <name>"`
|
||||
for web sessions. For deeper or one-off state inspection, fall back to raw
|
||||
eval below.
|
||||
|
||||
### LobeHub-Specific Patterns
|
||||
|
||||
#### Access Zustand Store State
|
||||
|
||||
```bash
|
||||
agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
|
||||
(function() {
|
||||
var chat = window.__LOBE_STORES.chat();
|
||||
var ops = Object.values(chat.operations);
|
||||
return JSON.stringify({
|
||||
ops: ops.map(function(o) { return { type: o.type, status: o.status }; }),
|
||||
activeAgent: chat.activeAgentId,
|
||||
activeTopic: chat.activeTopicId,
|
||||
});
|
||||
})()
|
||||
EVALEOF
|
||||
```
|
||||
|
||||
#### Find and Use the Chat Input
|
||||
|
||||
```bash
|
||||
# The chat input is contenteditable — must use -C flag
|
||||
agent-browser --cdp 9222 snapshot -i -C 2>&1 | grep "editable"
|
||||
|
||||
agent-browser --cdp 9222 click @e48
|
||||
agent-browser --cdp 9222 type @e48 "Hello world"
|
||||
agent-browser --cdp 9222 press Enter
|
||||
```
|
||||
|
||||
#### Wait for Agent to Complete
|
||||
|
||||
```bash
|
||||
agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
|
||||
(function() {
|
||||
var chat = window.__LOBE_STORES.chat();
|
||||
var ops = Object.values(chat.operations);
|
||||
var running = ops.filter(function(o) { return o.status === 'running'; });
|
||||
return running.length === 0 ? 'done' : 'running: ' + running.length;
|
||||
})()
|
||||
EVALEOF
|
||||
```
|
||||
|
||||
#### Install Error Interceptor
|
||||
|
||||
```bash
|
||||
agent-browser --cdp 9222 eval --stdin << 'EVALEOF'
|
||||
(function() {
|
||||
window.__CAPTURED_ERRORS = [];
|
||||
var orig = console.error;
|
||||
console.error = function() {
|
||||
var msg = Array.from(arguments).map(function(a) {
|
||||
if (a instanceof Error) return a.message;
|
||||
return typeof a === 'object' ? JSON.stringify(a) : String(a);
|
||||
}).join(' ');
|
||||
window.__CAPTURED_ERRORS.push(msg);
|
||||
orig.apply(console, arguments);
|
||||
};
|
||||
return 'installed';
|
||||
})()
|
||||
EVALEOF
|
||||
|
||||
# Later, check captured errors:
|
||||
agent-browser --cdp 9222 eval "JSON.stringify(window.__CAPTURED_ERRORS)"
|
||||
```
|
||||
|
||||
## Electron Gotchas
|
||||
|
||||
- **Always use `electron-dev.sh stop` to clean up** — `pkill -f "Electron"` only kills the main process; helper processes (GPU, renderer, network) survive. The script finds and kills all of them via PID matching against the project's electron binary path.
|
||||
- **`npx electron-vite dev` must run from `apps/desktop/`** — running from project root fails silently. The `electron-dev.sh` script handles this automatically.
|
||||
- **Dev build auto-opens DevTools, which hijacks the CDP target** — `agent-browser --cdp 9222` may attach to the DevTools page (`devtools://…`) instead of the app (`app://renderer/`). Symptom: `get url` returns a `devtools://` URL. Fix: close the DevTools target and reconnect:
|
||||
|
||||
```bash
|
||||
DT_ID=$(curl -s http://localhost:9222/json/list | python3 -c "import json,sys; ts=json.load(sys.stdin); print(next(t['id'] for t in ts if t['type']=='page' and t['url'].startswith('devtools://')))")
|
||||
curl -s "http://localhost:9222/json/close/$DT_ID" > /dev/null
|
||||
agent-browser close --all && agent-browser --cdp 9222 get url # expect app://renderer/
|
||||
```
|
||||
|
||||
- **Don't resize the Electron window after load** — resizing triggers full SPA reload
|
||||
- **Store is at `window.__LOBE_STORES`** not `window.__ZUSTAND_STORES__`
|
||||
- **Streaming / ticking UI needs GIF evidence** — see `scripts/record-gif.sh`; a static screenshot cannot prove time-based behavior.
|
||||
@@ -1,69 +0,0 @@
|
||||
# Web (Full-Stack) Testing
|
||||
|
||||
Default surface for **full-stack changes** — a new/changed API plus the UI that
|
||||
consumes it. The browser is the one surface where network requests and UI state
|
||||
are observable together, so you can assert both sides of the contract in a
|
||||
single run.
|
||||
|
||||
For pure-frontend changes prefer [electron.md](./electron.md); for
|
||||
backend-only changes prefer [../cli/index.md](../cli/index.md).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Local dev server running — [../references/dev-server.md](../references/dev-server.md)
|
||||
- Web auth injected into agent-browser — [../references/auth.md](../references/auth.md):
|
||||
|
||||
```bash
|
||||
pbpaste | ./.agents/skills/agent-testing/scripts/setup-auth.sh web # after copying the Cookie header
|
||||
```
|
||||
|
||||
## Option A — agent-browser with injected auth (recommended)
|
||||
|
||||
```bash
|
||||
SESSION=lobehub-dev
|
||||
|
||||
agent-browser --session $SESSION open "http://localhost:3010/"
|
||||
agent-browser --session $SESSION snapshot -i
|
||||
# interact via refs — full command reference: ../references/agent-browser.md
|
||||
```
|
||||
|
||||
### Watch the API while driving the UI
|
||||
|
||||
```bash
|
||||
# After triggering the UI action under test:
|
||||
agent-browser --session $SESSION network requests --type xhr,fetch
|
||||
agent-browser --session $SESSION network requests --method POST
|
||||
|
||||
# Record a full HAR for the report
|
||||
agent-browser --session $SESSION network har start
|
||||
# ... drive the scenario ...
|
||||
agent-browser --session $SESSION network har stop ./capture.har
|
||||
```
|
||||
|
||||
Assert both layers: the request/response shape (network) and the rendered
|
||||
result (snapshot/screenshot). Both belong in the report as evidence.
|
||||
|
||||
## Option B — real Chrome with remote debugging
|
||||
|
||||
For flows that need a real, visible browser (e.g. exercising the login UI
|
||||
itself):
|
||||
|
||||
```bash
|
||||
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \
|
||||
--remote-debugging-port=9222 \
|
||||
--user-data-dir=/tmp/chrome-test-profile \
|
||||
"<URL>" &
|
||||
sleep 5
|
||||
agent-browser --cdp 9222 snapshot -i
|
||||
|
||||
# Or auto-discover running Chrome with remote debugging
|
||||
agent-browser --auto-connect snapshot -i
|
||||
```
|
||||
|
||||
## Option C — Debug Proxy (local frontend, production backend)
|
||||
|
||||
`bun run dev:spa` prints a **Debug Proxy** URL
|
||||
(`https://app.lobehub.com/_dangerous_local_dev_proxy?debug-host=…`) that loads
|
||||
your local Vite SPA inside the online environment — HMR against real server
|
||||
config. Useful for verifying frontend behavior against production data, **not**
|
||||
for testing backend changes (the backend is production, not your branch).
|
||||
@@ -1,221 +0,0 @@
|
||||
---
|
||||
name: agent-tracing
|
||||
description: 'Agent tracing CLI for execution snapshots. Use for agent-tracing, traces, snapshots, LLM call inspection, context engine data, agent step analysis, or execution debugging.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# Agent Tracing CLI Guide
|
||||
|
||||
`@lobechat/agent-tracing` is a zero-config local dev tool that records agent execution snapshots to disk and provides a CLI to inspect them.
|
||||
|
||||
## How It Works
|
||||
|
||||
In `NODE_ENV=development`, `AgentRuntimeService.executeStep()` automatically records each step to `.agent-tracing/` as partial snapshots. When the operation completes, the partial is finalized into a complete `ExecutionSnapshot` JSON file.
|
||||
|
||||
**Data flow**: executeStep loop -> build `StepPresentationData` -> write partial snapshot to disk -> on completion, finalize to `.agent-tracing/{timestamp}_{traceId}.json`
|
||||
|
||||
**Context engine capture**: In `RuntimeExecutors.ts`, the `call_llm` executor calls `ctx.tracingContextEngine(input, output)` after `serverMessagesEngine()` processes messages. `AgentRuntimeService.executeStep` buffers the call per step and forwards it to `OperationTraceRecorder.appendStep` as the typed `contextEngine` field. CE flows through this side channel rather than the `events` array so its heavy payload (agentDocuments, systemRole, …) never enters the Redis state pipeline (LOBE-9110).
|
||||
|
||||
## Package Location
|
||||
|
||||
```
|
||||
packages/agent-tracing/
|
||||
src/
|
||||
types.ts # ExecutionSnapshot, StepSnapshot, SnapshotSummary
|
||||
store/
|
||||
types.ts # ISnapshotStore interface
|
||||
file-store.ts # FileSnapshotStore (.agent-tracing/*.json)
|
||||
recorder/
|
||||
index.ts # appendStepToPartial(), finalizeSnapshot()
|
||||
viewer/
|
||||
index.ts # Terminal rendering: renderSnapshot, renderStepDetail, renderMessageDetail, renderSummaryTable, renderPayload, renderPayloadTools, renderMemory
|
||||
cli/
|
||||
index.ts # CLI entry point (#!/usr/bin/env bun)
|
||||
inspect.ts # Inspect command (default)
|
||||
partial.ts # Partial snapshot commands (list, inspect, clean)
|
||||
index.ts # Barrel exports
|
||||
```
|
||||
|
||||
## Data Storage
|
||||
|
||||
- Completed snapshots: `.agent-tracing/{ISO-timestamp}_{traceId-short}.json`
|
||||
- Latest symlink: `.agent-tracing/latest.json`
|
||||
- In-progress partials: `.agent-tracing/_partial/{operationId}.json`
|
||||
- `FileSnapshotStore` resolves from `process.cwd()` — **run CLI from the repo root**
|
||||
|
||||
## CLI Commands
|
||||
|
||||
All commands run from the **repo root**:
|
||||
|
||||
```bash
|
||||
# View latest trace (tree overview, `inspect` is the default command)
|
||||
agent-tracing
|
||||
agent-tracing inspect
|
||||
agent-tracing inspect <traceId>
|
||||
agent-tracing inspect latest
|
||||
|
||||
# List recent snapshots
|
||||
agent-tracing list
|
||||
agent-tracing list -l 20
|
||||
|
||||
# Inspect specific step (-s is short for --step)
|
||||
agent-tracing inspect <traceId> -s 0
|
||||
|
||||
# View messages (-m is short for --messages)
|
||||
agent-tracing inspect <traceId> -s 0 -m
|
||||
|
||||
# View full content of a specific message (by index shown in -m output)
|
||||
agent-tracing inspect <traceId> -s 0 --msg 2
|
||||
agent-tracing inspect <traceId> -s 0 --msg-input 1
|
||||
|
||||
# View tool call/result details (-t is short for --tools)
|
||||
agent-tracing inspect <traceId> -s 1 -t
|
||||
|
||||
# View raw events (-e is short for --events)
|
||||
agent-tracing inspect <traceId> -s 0 -e
|
||||
|
||||
# View runtime context (-c is short for --context)
|
||||
agent-tracing inspect <traceId> -s 0 -c
|
||||
|
||||
# View context engine input overview (-p is short for --payload)
|
||||
agent-tracing inspect <traceId> -p
|
||||
agent-tracing inspect <traceId> -s 0 -p
|
||||
|
||||
# View available tools in payload (-T is short for --payload-tools)
|
||||
agent-tracing inspect <traceId> -T
|
||||
agent-tracing inspect <traceId> -s 0 -T
|
||||
|
||||
# View user memory (-M is short for --memory)
|
||||
agent-tracing inspect <traceId> -M
|
||||
agent-tracing inspect <traceId> -s 0 -M
|
||||
|
||||
# Raw JSON output (-j is short for --json)
|
||||
agent-tracing inspect <traceId> -j
|
||||
agent-tracing inspect <traceId> -s 0 -j
|
||||
|
||||
# List in-progress partial snapshots
|
||||
agent-tracing partial list
|
||||
|
||||
# Inspect a partial (use `inspect` directly — all flags work with partial IDs)
|
||||
agent-tracing inspect <partialOperationId>
|
||||
agent-tracing inspect <partialOperationId> -T
|
||||
agent-tracing inspect <partialOperationId> -p
|
||||
|
||||
# Clean up stale partial snapshots
|
||||
agent-tracing partial clean
|
||||
```
|
||||
|
||||
## Inspect Flag Reference
|
||||
|
||||
| Flag | Short | Description | Default Step |
|
||||
| ----------------- | ----- | ------------------------------------------------------------------------------------------------- | ------------ |
|
||||
| `--step <n>` | `-s` | Target a specific step | — |
|
||||
| `--messages` | `-m` | Messages context (CE input → params → LLM payload) | — |
|
||||
| `--tools` | `-t` | Tool calls & results (what agent invoked) | — |
|
||||
| `--events` | `-e` | Raw events (llm_start, llm_result, etc.) | — |
|
||||
| `--context` | `-c` | Runtime context & payload (raw) | — |
|
||||
| `--system-role` | `-r` | Full system role content | 0 |
|
||||
| `--env` | | Environment context | 0 |
|
||||
| `--payload` | `-p` | Context engine input overview (model, knowledge, tools summary, memory summary, platform context) | 0 |
|
||||
| `--payload-tools` | `-T` | Available tools detail (plugin manifests + LLM function definitions) | 0 |
|
||||
| `--memory` | `-M` | Full user memory (persona, identity, contexts, preferences, experiences) | 0 |
|
||||
| `--diff <n>` | `-d` | Diff against step N (use with `-r` or `--env`) | — |
|
||||
| `--msg <n>` | | Full content of message N from Final LLM Payload | — |
|
||||
| `--msg-input <n>` | | Full content of message N from Context Engine Input | — |
|
||||
| `--json` | `-j` | Output as JSON (combinable with any flag above) | — |
|
||||
|
||||
Flags marked "Default Step: 0" auto-select step 0 if `--step` is not provided. All flags support `latest` or omitted traceId.
|
||||
|
||||
## Typical Debug Workflow
|
||||
|
||||
```bash
|
||||
# 1. Trigger an agent operation in the dev UI
|
||||
|
||||
# 2. See the overview
|
||||
agent-tracing inspect
|
||||
|
||||
# 3. List all traces, get traceId
|
||||
agent-tracing list
|
||||
|
||||
# 4. Quick overview of what was fed into context engine
|
||||
agent-tracing inspect -p
|
||||
|
||||
# 5. Inspect a specific step's messages to see what was sent to the LLM
|
||||
agent-tracing inspect TRACE_ID -s 0 -m
|
||||
|
||||
# 6. Drill into a truncated message for full content
|
||||
agent-tracing inspect TRACE_ID -s 0 --msg 2
|
||||
|
||||
# 7. Check available tools vs actual tool calls
|
||||
agent-tracing inspect -T # available tools
|
||||
agent-tracing inspect -s 1 -t # actual tool calls & results
|
||||
|
||||
# 8. Inspect user memory injected into the conversation
|
||||
agent-tracing inspect -M
|
||||
|
||||
# 9. Diff system role between steps (multi-step agents)
|
||||
agent-tracing inspect TRACE_ID -r -d 2
|
||||
```
|
||||
|
||||
## Key Types
|
||||
|
||||
```typescript
|
||||
interface ExecutionSnapshot {
|
||||
traceId: string;
|
||||
operationId: string;
|
||||
model?: string;
|
||||
provider?: string;
|
||||
startedAt: number;
|
||||
completedAt?: number;
|
||||
completionReason?:
|
||||
| 'done'
|
||||
| 'error'
|
||||
| 'interrupted'
|
||||
| 'max_steps'
|
||||
| 'cost_limit'
|
||||
| 'waiting_for_human';
|
||||
totalSteps: number;
|
||||
totalTokens: number;
|
||||
totalCost: number;
|
||||
error?: { type: string; message: string };
|
||||
steps: StepSnapshot[];
|
||||
}
|
||||
|
||||
interface StepSnapshot {
|
||||
stepIndex: number;
|
||||
stepType: 'call_llm' | 'call_tool';
|
||||
executionTimeMs: number;
|
||||
content?: string; // LLM output
|
||||
reasoning?: string; // Reasoning/thinking
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
toolsCalling?: Array<{ apiName: string; identifier: string; arguments?: string }>;
|
||||
toolsResult?: Array<{
|
||||
apiName: string;
|
||||
identifier: string;
|
||||
isSuccess?: boolean;
|
||||
output?: string;
|
||||
}>;
|
||||
messages?: any[]; // DB messages before step
|
||||
context?: { phase: string; payload?: unknown; stepContext?: unknown };
|
||||
events?: Array<{ type: string; [key: string]: unknown }>;
|
||||
contextEngine?: {
|
||||
input?: unknown; // contextEngineInput minus messages + toolsConfig (reconstructible from baseline)
|
||||
output?: unknown; // processed messages array (final LLM payload)
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
## --messages Output Structure
|
||||
|
||||
When using `--messages`, the output shows three sections (if context engine data is available):
|
||||
|
||||
1. **Context Engine Input** — DB messages passed to the engine, with `[0]`, `[1]`, ... indices. Use `--msg-input N` to view full content.
|
||||
2. **Context Engine Params** — systemRole, model, provider, knowledge, tools, userMemory, etc.
|
||||
3. **Final LLM Payload** — Processed messages after context engine (system date injection, user memory, history truncation, etc.), with `[0]`, `[1]`, ... indices. Use `--msg N` to view full content.
|
||||
|
||||
## Integration Points
|
||||
|
||||
- **Recording**: `apps/server/src/services/agentRuntime/AgentRuntimeService.ts` — in the `executeStep()` method, after building `stepPresentationData`, writes partial snapshot in dev mode
|
||||
- **Context engine capture**: `apps/server/src/modules/AgentRuntime/RuntimeExecutors.ts` — in `call_llm` executor, after `serverMessagesEngine()` returns, calls `ctx.tracingContextEngine(input, output)`. `AgentRuntimeService.executeStep` buffers it per step and passes it to `traceRecorder.appendStep` as the typed `contextEngine` field (kept off the `events` array to stay out of Redis state).
|
||||
- **Store**: `FileSnapshotStore` reads/writes to `.agent-tracing/` relative to `process.cwd()`
|
||||
@@ -1,130 +0,0 @@
|
||||
---
|
||||
name: builtin-tool
|
||||
description: 'Build LobeHub builtin tool packages. Use when adding agent-callable tools, manifests, executors, runtimes, inspectors, renders, placeholders, streaming, interventions, portals, or tool registries.'
|
||||
---
|
||||
|
||||
# Builtin Tool Authoring Guide
|
||||
|
||||
A builtin tool is a package the agent runtime can call. It ships **five faces**:
|
||||
|
||||
| Face | Lives in | Audience |
|
||||
| -------------------- | -------------------------------------------------------------------------------------- | ------------------------------------- |
|
||||
| **Manifest + types** | `src/{manifest,types,systemRole}.ts` | The LLM (tool spec + system prompt) |
|
||||
| **ExecutionRuntime** | `src/ExecutionRuntime/` | Server / desktop / any runtime caller |
|
||||
| **Executor** | `src/client/executor/` | Frontend (wraps stores/services) |
|
||||
| **Client UI** | `src/client/{Inspector,Render,…}/` | Chat UI |
|
||||
| **Registry wiring** | `packages/builtin-tools/src/*.ts` + `src/store/tool/slices/builtin/executors/index.ts` | Framework |
|
||||
|
||||
---
|
||||
|
||||
## Read These First
|
||||
|
||||
| Question | Doc |
|
||||
| ------------------------------------------------------------------------------------ | --------------------------------------------- |
|
||||
| Where do files live? What does each face do? Wiring? | [architecture.md](references/architecture.md) |
|
||||
| How do I name the tool, design APIs, write the manifest, executor, ExecutionRuntime? | [tool-design.md](references/tool-design.md) |
|
||||
| How do I build Inspector / Render / Placeholder / Streaming / Intervention / Portal? | [ui/](references/ui/README.md) |
|
||||
|
||||
---
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
- Creating a new `packages/builtin-tool-<name>/` package
|
||||
- Adding a new API method to an existing builtin tool
|
||||
- Building or restyling any of the 6 client surfaces for a tool
|
||||
- Wiring a tool into the central registries
|
||||
- Debugging "tool not found / API not found / render not showing / placeholder stuck" errors
|
||||
|
||||
---
|
||||
|
||||
## Top-Level Design Principles
|
||||
|
||||
1. **`lobe-<domain>` identifier is permanent.** It's stored in message history. Renames need `@deprecated` aliases (see `packages/builtin-tools/src/inspectors.ts:88-89`). Get it right the first time.
|
||||
2. **ApiName is an `as const` object**, not a TS enum. It doubles as the runtime list `BaseExecutor` iterates over.
|
||||
3. **Three result fields, three audiences:**
|
||||
- `content: string` → the LLM reads it
|
||||
- `state: Record<…>` → the UI's `pluginState`; **result-domain only**, never echo all params back
|
||||
- `error: { type, message, body? }` → both LLM and UI; `type` is a stable code
|
||||
4. **Split execution from frontend wiring.**
|
||||
- `src/ExecutionRuntime/` — pure runtime, no React, no Zustand, accepts services via constructor. **The default place for new logic.**
|
||||
- `src/client/executor/` — `BaseExecutor` subclass that calls `ExecutionRuntime` (or stores/services directly when frontend-only).
|
||||
5. **UI defaults to "do nothing".** Inspector is required (the header strip). Render/Placeholder/Streaming/Intervention/Portal are added **only when there's something specific to show** — empty registries are fine.
|
||||
6. **Style with `createStaticStyles + cssVar.*`** (zero-runtime). Fall back to `createStyles + token` only when you genuinely need runtime values. Use `@lobehub/ui` components, not raw antd.
|
||||
7. **i18n keys live in `src/locales/default/plugin.ts`.** Inspector titles must come from `t('builtins.<identifier>.apiName.<api>')` so something renders while args stream.
|
||||
|
||||
---
|
||||
|
||||
## Package Layout (preferred, post-2026 convention)
|
||||
|
||||
```
|
||||
packages/builtin-tool-<name>/
|
||||
├── package.json
|
||||
└── src/
|
||||
├── index.ts # exports manifest + types + systemRole + Identifier (no React, no stores)
|
||||
├── manifest.ts # BuiltinToolManifest with JSON Schema for every API
|
||||
├── types.ts # ApiName const + Params/State interfaces per API
|
||||
├── systemRole.ts # System prompt teaching the model when/how to use the APIs
|
||||
├── ExecutionRuntime/ # ✅ Default home for runtime logic (server- or anywhere-callable)
|
||||
│ └── index.ts
|
||||
└── client/
|
||||
├── index.ts # Re-exports for the registries
|
||||
├── executor/ # ✅ Frontend executor — extends BaseExecutor, often delegates to ExecutionRuntime
|
||||
│ └── index.ts
|
||||
├── Inspector/ # required — header chip per API
|
||||
├── Render/ # optional — rich result card
|
||||
├── Placeholder/ # optional — skeleton during streaming/execution
|
||||
├── Streaming/ # optional — live output renderer (e.g. RunCommand, WriteFile)
|
||||
├── Intervention/ # optional — approval / edit-before-run UI
|
||||
├── Portal/ # optional — full-screen detail view
|
||||
└── components/ # shared subcomponents used by the surfaces above
|
||||
```
|
||||
|
||||
**Older packages** (`builtin-tool-task`, `builtin-tool-calculator`, etc.) still have `src/executor/` as a sibling of `src/client/`. That's grandfathered; **don't relocate without a deliberate refactor**. New packages and new APIs added to existing packages should follow the layout above.
|
||||
|
||||
`package.json` exports map:
|
||||
|
||||
```json
|
||||
"exports": {
|
||||
".": "./src/index.ts",
|
||||
"./client": "./src/client/index.ts",
|
||||
"./executor": "./src/client/executor/index.ts",
|
||||
"./executionRuntime": "./src/ExecutionRuntime/index.ts"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Authoring Checklist
|
||||
|
||||
Before opening the PR:
|
||||
|
||||
- [ ] Identifier follows `lobe-<domain>` and is **stable** (lives in message history).
|
||||
- [ ] Every `<Name>ApiName` value has: a manifest `api[]` entry, an executor method, an Inspector, an i18n `apiName.*` key.
|
||||
- [ ] `Params` interfaces match the JSON Schema; `State` interfaces match what the executor returns and what the UI surfaces read.
|
||||
- [ ] System prompt disambiguates confusable APIs and points to batch variants.
|
||||
- [ ] Runtime logic lives in `ExecutionRuntime/`; the `client/executor/` only wires stores/services and delegates.
|
||||
- [ ] Executor returns `{ success, content, state, error? }` via a single `toResult()` funnel — `content` always non-empty (default to `error.message`).
|
||||
- [ ] Inspector handles `isArgumentsStreaming`, `isLoading`, `partialArgs`, missing `pluginState`.
|
||||
- [ ] Render returns `null` until it has data; only created for APIs with rich results.
|
||||
- [ ] Placeholder added if the API has a perceivable execution lag (search, list, crawl).
|
||||
- [ ] Streaming added for APIs that emit incremental output (run command, write file, code execution).
|
||||
- [ ] Intervention added if `humanIntervention` is set in the manifest.
|
||||
- [ ] All registry files updated (see [architecture.md → Registry wiring](references/architecture.md#registry-wiring)).
|
||||
- [ ] i18n keys in `src/locales/default/plugin.ts` plus dev seeds in `en-US`/`zh-CN`.
|
||||
- [ ] `bunx vitest run --silent='passed-only' 'packages/builtin-tool-<name>'` passes.
|
||||
- [ ] `bun run type-check` passes.
|
||||
|
||||
---
|
||||
|
||||
## Reference Tools
|
||||
|
||||
Pick the closest neighbor and copy:
|
||||
|
||||
| If your tool is… | Read first |
|
||||
| ----------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
|
||||
| Pure-compute, no UI state | `packages/builtin-tool-calculator/` — `ExecutionRuntime` reuses executor (mathjs/nerdamer work everywhere) |
|
||||
| CRUD over a domain entity | `packages/builtin-tool-task/` — full Inspector + Render set, batch variants |
|
||||
| Heavy UI (Inspector/Render/Placeholder/Portal) | `packages/builtin-tool-web-browsing/` — search-style result UI, Portal for detail view |
|
||||
| Desktop / filesystem with all surfaces (incl. Streaming + Intervention) | `packages/builtin-tool-local-system/` — `ExecutionRuntime` injects an `ILocalSystemService`, executor calls it |
|
||||
| Server-side pure (no client executor) | `packages/builtin-tool-web-browsing/` — only `ExecutionRuntime` is exported; the chat client doesn't run it |
|
||||
| Needs human approval before running | `packages/builtin-tool-local-system/src/client/Intervention/` — per-API approval components |
|
||||
@@ -1,315 +0,0 @@
|
||||
# Builtin Tool Architecture
|
||||
|
||||
## The Five Faces
|
||||
|
||||
A builtin tool ships five distinct faces, each compiled into a different bundle:
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ ./ │
|
||||
│ Manifest + Types + systemRole │
|
||||
│ ─ Pure data, no React, no Node-only deps. │
|
||||
│ ─ Imported by: server (LLM tool spec), client (registries), │
|
||||
│ anyone who needs to know "what tools exist". │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ ./executionRuntime │
|
||||
│ src/ExecutionRuntime/index.ts │
|
||||
│ ─ Pure runtime logic. Accepts services via constructor — │
|
||||
│ never imports concrete services or stores directly. │
|
||||
│ ─ Imported by: server (BuiltinServerRuntimeOutput), tests, │
|
||||
│ and the client executor as a delegate. │
|
||||
│ ─ Returns: BuiltinServerRuntimeOutput { content, state, … } │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ ./executor │
|
||||
│ src/client/executor/index.ts │
|
||||
│ ─ BaseExecutor subclass. Wires Zustand stores and frontend │
|
||||
│ services into ExecutionRuntime, then funnels through │
|
||||
│ toResult() into BuiltinToolResult { content, state, error, │
|
||||
│ success }. │
|
||||
│ ─ Imported by: src/store/tool/slices/builtin/executors/ │
|
||||
│ index.ts (registered as a singleton). │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ ./client │
|
||||
│ src/client/{Inspector,Render,Placeholder,Streaming, │
|
||||
│ Intervention,Portal,components}/ │
|
||||
│ ─ React 'use client' surfaces. Read args + pluginState. │
|
||||
│ ─ Imported by: packages/builtin-tools/src/{inspectors, │
|
||||
│ renders,placeholders,streamings,interventions,portals}.ts. │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Registry wiring │
|
||||
│ packages/builtin-tools/src/*.ts │
|
||||
│ src/store/tool/slices/builtin/executors/index.ts │
|
||||
│ ─ Aggregator maps: identifier → { apiName → component }. │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
The split exists so:
|
||||
|
||||
- Server bundles import only `./` and `./executionRuntime` and never touch React.
|
||||
- Frontend bundles import `./client` and never touch Node-only services.
|
||||
- The runtime is testable without React or Electron present.
|
||||
|
||||
---
|
||||
|
||||
## Why ExecutionRuntime is the Default Home for Logic
|
||||
|
||||
**Old pattern (grandfathered):** business logic in `src/executor/` directly. Examples: `builtin-tool-task`, older tools. Works, but the executor mixes runtime logic with frontend service plumbing — hard to reuse on the server.
|
||||
|
||||
**New pattern (preferred):** business logic in `src/ExecutionRuntime/`, frontend wiring in `src/client/executor/`. Examples: `builtin-tool-local-system`, `builtin-tool-web-browsing`, `builtin-tool-calculator`.
|
||||
|
||||
```
|
||||
ExecutionRuntime
|
||||
├─ accepts services via constructor (or `static create(opts)`)
|
||||
├─ returns BuiltinServerRuntimeOutput (content + state + success)
|
||||
└─ no React, no Zustand, no `@/services/...` direct imports
|
||||
|
||||
client/executor
|
||||
├─ extends BaseExecutor<typeof <Name>ApiName>
|
||||
├─ holds a `runtime = new <Name>ExecutionRuntime(realService)` instance
|
||||
├─ each ApiName method:
|
||||
│ 1. resolve scope / pull defaults from BuiltinToolContext
|
||||
│ 2. call runtime.<method>(args)
|
||||
│ 3. funnel through toResult() → BuiltinToolResult
|
||||
└─ exported singleton: export const <name>Executor = new <Name>Executor()
|
||||
```
|
||||
|
||||
### Service injection
|
||||
|
||||
`ExecutionRuntime` should declare a TypeScript interface for the services it needs and accept the implementation via constructor. Server callers wire in real implementations; tests wire in mocks. Example from `local-system`:
|
||||
|
||||
```ts
|
||||
export interface ILocalSystemService {
|
||||
readLocalFile: (params: any) => Promise<any>;
|
||||
writeFile: (params: any) => Promise<any>;
|
||||
/* … */
|
||||
}
|
||||
|
||||
export class LocalSystemExecutionRuntime extends ComputerRuntime {
|
||||
constructor(private service: ILocalSystemService) {
|
||||
super();
|
||||
}
|
||||
/* methods delegate to this.service.* */
|
||||
}
|
||||
```
|
||||
|
||||
The `client/executor` instantiates it once with the real service:
|
||||
|
||||
```ts
|
||||
import { localFileService } from '@/services/electron/localFileService';
|
||||
import { LocalSystemExecutionRuntime } from '../../ExecutionRuntime';
|
||||
|
||||
class LocalSystemExecutor extends BaseExecutor<typeof LocalSystemApiEnum> {
|
||||
private runtime = new LocalSystemExecutionRuntime(localFileService);
|
||||
/* … */
|
||||
}
|
||||
```
|
||||
|
||||
### When ExecutionRuntime is the only thing you ship
|
||||
|
||||
Some tools are server-only — there's no frontend executor. `builtin-tool-web-browsing` is the canonical example: only `./` and `./executionRuntime` are exported, no `./executor`, and the runtime is constructed by the server-side `ToolExecutionService`. Skip `client/executor/` entirely for those.
|
||||
|
||||
### When the executor reuses the runtime as-is
|
||||
|
||||
Pure-compute tools (`builtin-tool-calculator`) often have an executor whose ApiName methods call `executor.calculate(args)` and an `ExecutionRuntime` whose methods call `calculatorExecutor.calculate(args)` — same logic, two thin wrappers. That's fine; the duplication buys you the bundle split.
|
||||
|
||||
---
|
||||
|
||||
## The Result Contract
|
||||
|
||||
### `BuiltinServerRuntimeOutput` (what ExecutionRuntime returns)
|
||||
|
||||
```ts
|
||||
{
|
||||
content: string; // the LLM-facing text — never undefined; default to error message
|
||||
state?: any; // result-domain object the UI reads as pluginState
|
||||
success: boolean; // mandatory
|
||||
error?: any; // raw error; the executor will repackage
|
||||
}
|
||||
```
|
||||
|
||||
### `BuiltinToolResult` (what the executor returns to the runtime)
|
||||
|
||||
```ts
|
||||
{
|
||||
success: boolean;
|
||||
content?: string;
|
||||
state?: any;
|
||||
error?: { type: string; message: string; body?: any };
|
||||
metadata?: Record<string, any>; // rare; e.g. { agentCouncil: true }
|
||||
stop?: boolean; // rare; halt the orchestration step
|
||||
}
|
||||
```
|
||||
|
||||
### The `toResult` funnel (mandatory)
|
||||
|
||||
Every executor method returns through a single `toResult()` to enforce two invariants:
|
||||
|
||||
1. **`content` is never undefined.** A missing content collapses downstream into `''`, leaving the Debug pane blank while `pluginState` was already saved. See the `globLocalFiles` regression in `local-system/src/client/executor/index.ts:60-84`.
|
||||
2. **`state` survives failures.** Renderers can keep showing partial output even when `success: false`.
|
||||
|
||||
```ts
|
||||
private toResult(output: BuiltinServerRuntimeOutput): BuiltinToolResult {
|
||||
const errorMessage = typeof output.error?.message === 'string' ? output.error.message : undefined;
|
||||
const safeContent = output.content || errorMessage || 'Tool execution failed';
|
||||
|
||||
if (!output.success) {
|
||||
return {
|
||||
success: false,
|
||||
content: safeContent,
|
||||
state: output.state,
|
||||
error: output.error
|
||||
? { type: 'PluginServerError', message: errorMessage ?? safeContent, body: output.error }
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
return { success: true, content: safeContent, state: output.state };
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `BaseExecutor` — How Method Dispatch Works
|
||||
|
||||
`BaseExecutor.invoke(apiName, params, ctx)` does:
|
||||
|
||||
```ts
|
||||
if (!this.hasApi(apiName)) return { error: { type: 'ApiNotFound', … }, success: false };
|
||||
return (this as any)[apiName](params, ctx); // method name MUST equal apiName value
|
||||
```
|
||||
|
||||
So:
|
||||
|
||||
- **Method names must equal `<Name>ApiName` values, exactly.** A typo silently routes to "ApiNotFound".
|
||||
- **Methods must be class fields, not class methods**, because `this` is lost when registry calls `executor.invoke(apiName, params, ctx)`. Always declare as `methodName = async (…) => { … }`.
|
||||
- **Always destructure `apiEnum` and `identifier` as `readonly` instance fields**, not getters — `BaseExecutor.hasApi/getApiNames` reads them synchronously.
|
||||
|
||||
---
|
||||
|
||||
## `BuiltinToolContext` — What the Executor Receives
|
||||
|
||||
The runtime hands every executor method an optional `BuiltinToolContext` as the second argument:
|
||||
|
||||
| Field | Use |
|
||||
| ----------------------------- | -------------------------------------------------------------- |
|
||||
| `agentId` | Default agent for "current agent" semantics (e.g. `listTasks`) |
|
||||
| `groupId` | Group chat scope |
|
||||
| `topicId` | Current topic — needed when creating messages/operations |
|
||||
| `taskId` | Current task identifier — fallback for "implicit" param |
|
||||
| `documentId` | Current page/document scope |
|
||||
| `messageId` | The tool message being created (for state attachments) |
|
||||
| `sourceMessageId` | The user message that triggered this tool turn |
|
||||
| `operationId` | Operation lineage (use for cancellation, tracing) |
|
||||
| `scope` | `'task' \| 'agent' \| …` — toggles default behaviors |
|
||||
| `signal: AbortSignal` | Honor for long-running ops |
|
||||
| `stepContext` | Cross-message runtime state (lobe-agent todos, etc.) |
|
||||
| `registerAfterCompletion(cb)` | Defer side-effects past message-update race |
|
||||
| `groupOrchestration` | Group orchestration callbacks |
|
||||
|
||||
**Use rule:** read with `?.`, fall back to explicit params, **never silently override** an explicit param with a context value.
|
||||
|
||||
---
|
||||
|
||||
## i18n Integration
|
||||
|
||||
Source of truth: `src/locales/default/plugin.ts`. Keys follow `builtins.<identifier>.<topic>.<…>`:
|
||||
|
||||
| Key | Use |
|
||||
| ------------------------------------- | ------------------------------------------------------------ |
|
||||
| `builtins.<identifier>.title` | Display title (overrides `manifest.meta.title` when present) |
|
||||
| `builtins.<identifier>.apiName.<api>` | Inspector header label (one per ApiName) |
|
||||
| `builtins.<identifier>.inspector.<…>` | Extra Inspector strings ("no results", chips, counters) |
|
||||
| `builtins.<identifier>.<feature>.<…>` | Render / Intervention strings, free-form per tool |
|
||||
|
||||
For dev preview, also seed `locales/zh-CN/plugin.json` and `locales/en-US/plugin.json`. Run `pnpm i18n` before opening a PR — it's slow, so do it once at the end. (See the **i18n** skill for the full workflow.)
|
||||
|
||||
---
|
||||
|
||||
## Registry Wiring
|
||||
|
||||
Five core files plus optional ones. Miss any and you'll see "tool not found", a missing chip, a blank result card, a stuck spinner, or an approval dialog that never appears.
|
||||
|
||||
| File | Add what |
|
||||
| -------------------------------------------------- | ----------------------------------------------------------------------------------------- |
|
||||
| **Required** | |
|
||||
| `packages/builtin-tools/src/index.ts` | Import `<Name>Manifest`; push entry to `builtinTools`. Set `hidden`/`discoverable` flags. |
|
||||
| `packages/builtin-tools/src/identifiers.ts` | Add `<Name>Manifest.identifier` to `builtinToolIdentifiers`. |
|
||||
| `packages/builtin-tools/src/inspectors.ts` | Import `<Name>Inspectors, <Name>Manifest`; add to `BuiltinToolInspectors`. |
|
||||
| `src/store/tool/slices/builtin/executors/index.ts` | Import `<name>Executor`; add to `registerExecutors([…])`. |
|
||||
| **Conditional — add only if the surface exists** | |
|
||||
| `packages/builtin-tools/src/renders.ts` | Add to `BuiltinToolsRenders` if any API has a Render. |
|
||||
| `packages/builtin-tools/src/placeholders.ts` | Add to `BuiltinToolPlaceholders` if any API has a Placeholder. |
|
||||
| `packages/builtin-tools/src/streamings.ts` | Add to `BuiltinToolStreamings` if any API has a Streaming renderer. |
|
||||
| `packages/builtin-tools/src/interventions.ts` | Add to `BuiltinToolInterventions` if any API has an Intervention component. |
|
||||
| `packages/builtin-tools/src/portals.ts` | Add to `BuiltinToolsPortals` if the tool has a Portal. |
|
||||
| `packages/builtin-tools/src/displayControls.ts` | Add if Render must show/hide based on result content (rare; see ClaudeCode/Codex). |
|
||||
|
||||
### Optional flags in `packages/builtin-tools/src/index.ts`
|
||||
|
||||
```ts
|
||||
{
|
||||
identifier: TaskManifest.identifier,
|
||||
manifest: TaskManifest,
|
||||
type: 'builtin',
|
||||
hidden: true, // hide from chat-input Tools popover
|
||||
discoverable: false, // exclude from agent builder / skill discovery
|
||||
}
|
||||
```
|
||||
|
||||
Lists in the same file you may need to touch:
|
||||
|
||||
- `defaultToolIds` — added to the agent's tool list by default
|
||||
- `alwaysOnToolIds` — forced on regardless of user selection (use sparingly)
|
||||
- `runtimeManagedToolIds` — enable state controlled by runtime, not user UI; **must mirror the rules map** in `apps/server/src/modules/Mecha/AgentToolsEngine/index.ts` and `src/helpers/toolEngineering/index.ts`
|
||||
|
||||
---
|
||||
|
||||
## File-Map at a Glance
|
||||
|
||||
```
|
||||
packages/builtin-tool-<name>/
|
||||
├── package.json # exports: ., ./client, ./executor, ./executionRuntime
|
||||
└── src/
|
||||
├── index.ts # export Manifest, Identifier, types, systemPrompt
|
||||
├── manifest.ts # BuiltinToolManifest + Identifier const
|
||||
├── types.ts # ApiName + Params/State per API
|
||||
├── systemRole.ts # System prompt (multiple variants OK: systemRole.desktop.ts)
|
||||
├── ExecutionRuntime/
|
||||
│ └── index.ts # <Name>ExecutionRuntime — pure runtime, service injection
|
||||
└── client/
|
||||
├── index.ts # exports for the registries
|
||||
├── executor/
|
||||
│ └── index.ts # <Name>Executor extends BaseExecutor; export <name>Executor
|
||||
├── Inspector/
|
||||
│ ├── index.ts # <Name>Inspectors record
|
||||
│ └── <ApiName>/index.tsx # one folder per API (or .tsx file when trivial)
|
||||
├── Render/
|
||||
│ ├── index.ts # <Name>Renders record
|
||||
│ └── <ApiName>/ # rich renders → folder with subcomponents
|
||||
├── Placeholder/
|
||||
│ ├── index.ts
|
||||
│ └── <ApiName>.tsx # usually a single skeleton file
|
||||
├── Streaming/
|
||||
│ ├── index.ts
|
||||
│ └── <ApiName>/ # live-output renderer
|
||||
├── Intervention/
|
||||
│ ├── index.ts
|
||||
│ └── <ApiName>/ # approval / edit-before-run UI
|
||||
├── Portal/
|
||||
│ ├── index.tsx # routing component (switch on apiName)
|
||||
│ └── <ApiName>/ # full-screen detail view
|
||||
└── components/ # FileItem, EngineAvatar, etc. — shared subcomponents
|
||||
```
|
||||
|
||||
Skip every `client/<surface>/` directory you don't need — empty registries are fine.
|
||||
@@ -1,478 +0,0 @@
|
||||
# Tool Design (Naming, Manifest, Executor, Runtime)
|
||||
|
||||
This doc covers everything that **isn't UI**: the tool's identifier, API surface, manifest, types, system prompt, ExecutionRuntime, and the executor that wires it into the frontend.
|
||||
|
||||
For UI surfaces (Inspector / Render / Placeholder / Streaming / Intervention / Portal), see [ui/](ui/README.md).
|
||||
For where files live and how registries work, see [architecture.md](architecture.md).
|
||||
|
||||
---
|
||||
|
||||
## 1. Naming
|
||||
|
||||
| Thing | Convention | Example |
|
||||
| ----------------------- | -------------------------------------------------------------- | ------------------------------------------------------------ |
|
||||
| Package directory | `packages/builtin-tool-<kebab>/` | `builtin-tool-task` |
|
||||
| npm name | `@lobechat/builtin-tool-<kebab>` | `@lobechat/builtin-tool-task` |
|
||||
| Tool `identifier` | `lobe-<kebab-domain>` — **persisted in message history** | `lobe-task`, `lobe-calculator`, `lobe-knowledge-base` |
|
||||
| Identifier const | `<Name>Identifier` exported from `manifest.ts` (or `types.ts`) | `export const TaskIdentifier = 'lobe-task'` |
|
||||
| API name const | `<Name>ApiName` — `as const` object, **camelCase verbs** | `createTask`, `listTasks`, `runTask` |
|
||||
| Executor class | `<Name>Executor extends BaseExecutor<typeof <Name>ApiName>` | `TaskExecutor` |
|
||||
| Executor singleton | `<name>Executor` (camelCase) | `export const taskExecutor = new TaskExecutor()` |
|
||||
| ExecutionRuntime class | `<Name>ExecutionRuntime` | `LocalSystemExecutionRuntime`, `WebBrowsingExecutionRuntime` |
|
||||
| Inspector / Render etc. | `<ApiName>Inspector` / `<ApiName>Render` | `CreateTaskInspector`, `SearchInspector` |
|
||||
|
||||
### Identifier rules
|
||||
|
||||
- **`lobe-` prefix is mandatory** — many switches in the codebase key off it.
|
||||
- Pick a **domain noun**, not a verb (`lobe-task`, not `lobe-task-manager`).
|
||||
- The identifier is **persisted in message history** — renaming after release means the `@deprecated` alias trick (register the legacy identifier as a second key in `inspectors.ts` / `renders.ts` pointing at the new module). Get it right the first time.
|
||||
|
||||
### ApiName rules
|
||||
|
||||
- Verb + noun, camelCase: `createTask`, `viewTask`, `runTasks`.
|
||||
- **Plural variant for batch** (`createTasks`, `runTasks`) — describe in the manifest description that it's preferred over multiple single calls. The system prompt should also push the batch form.
|
||||
- Reserve **clear separation between mutating verbs** (`updateTaskStatus`, `editTask`) and **execution verbs** (`runTask`). The system prompt must warn the model when these are confusable — see `task` for the canonical "do NOT use updateTaskStatus(running) to start a task" warning.
|
||||
- Read-only verbs: `list*`, `view*`, `get*`, `search*`. Mutating: `create*`, `edit*`, `update*`, `delete*`. Triggers/effects: `run*`, `execute*`, `submit*`.
|
||||
|
||||
---
|
||||
|
||||
## 2. `types.ts` — ApiName + Params/State
|
||||
|
||||
Define `<Name>ApiName` as `as const` so it doubles as a runtime enum (used by `BaseExecutor`) and a literal type. Then declare `Params` and `State` per API.
|
||||
|
||||
```ts
|
||||
export const TaskIdentifier = 'lobe-task';
|
||||
|
||||
export const TaskApiName = {
|
||||
createTask: 'createTask',
|
||||
createTasks: 'createTasks',
|
||||
listTasks: 'listTasks',
|
||||
/* …one entry per API, group logically (CRUD then run-style) */
|
||||
} as const;
|
||||
|
||||
export type TaskApiNameType = (typeof TaskApiName)[keyof typeof TaskApiName];
|
||||
|
||||
// One block per API
|
||||
export interface CreateTaskParams {
|
||||
name: string;
|
||||
instruction: string; /* … */
|
||||
}
|
||||
export interface CreateTaskState {
|
||||
identifier?: string;
|
||||
success: boolean;
|
||||
}
|
||||
|
||||
export interface CreateTasksParams {
|
||||
tasks: CreateTaskParams[];
|
||||
}
|
||||
export interface CreateTasksItemResult {
|
||||
error?: string;
|
||||
identifier?: string;
|
||||
name: string;
|
||||
success: boolean;
|
||||
}
|
||||
export interface CreateTasksState {
|
||||
failed: number;
|
||||
results: CreateTasksItemResult[];
|
||||
succeeded: number;
|
||||
}
|
||||
```
|
||||
|
||||
**The result-domain rule for `State`** (memory: "pluginState is result-domain, not call-domain"):
|
||||
|
||||
- Include only fields the UI **renders after the call returns** — ids the LLM didn't have when calling, counts, summary numbers, server-assigned status.
|
||||
- **Don't echo all params.** The Inspector/Render gets `args` for free.
|
||||
- Keep batch results as `{ succeeded, failed, results }` so the Render can show a one-line summary plus a detail list.
|
||||
|
||||
---
|
||||
|
||||
## 3. `manifest.ts` — JSON Schema for the LLM
|
||||
|
||||
```ts
|
||||
import type { BuiltinToolManifest } from '@lobechat/types';
|
||||
|
||||
import { systemPrompt } from './systemRole';
|
||||
import { TaskApiName, TaskIdentifier } from './types';
|
||||
|
||||
export const TaskManifest: BuiltinToolManifest = {
|
||||
identifier: TaskIdentifier,
|
||||
type: 'builtin',
|
||||
systemRole: systemPrompt,
|
||||
meta: {
|
||||
avatar: '📋',
|
||||
title: 'Task Tools',
|
||||
description: 'Create, list, edit, delete tasks with dependencies',
|
||||
readme: 'Optional long description shown in tool detail pages',
|
||||
},
|
||||
api: [
|
||||
{
|
||||
name: TaskApiName.createTask,
|
||||
description:
|
||||
'Create a new task. Optionally attach as a subtask via parentIdentifier. ' +
|
||||
'Prefer createTasks when planning a batch.',
|
||||
parameters: {
|
||||
type: 'object',
|
||||
required: ['name', 'instruction'],
|
||||
properties: {
|
||||
name: { type: 'string', description: 'Short, descriptive name.' },
|
||||
instruction: {
|
||||
type: 'string',
|
||||
description: 'Detailed instruction for what the task should accomplish.',
|
||||
},
|
||||
parentIdentifier: {
|
||||
type: 'string',
|
||||
description:
|
||||
'Identifier of the parent task (e.g. "TASK-1"). If provided, the new task becomes a subtask.',
|
||||
},
|
||||
priority: {
|
||||
type: 'number',
|
||||
description: 'Priority level: 0=none, 1=urgent, 2=high, 3=normal, 4=low. Default is 0.',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
/* …one entry per ApiName */
|
||||
],
|
||||
};
|
||||
```
|
||||
|
||||
### Manifest writing checklist
|
||||
|
||||
- **Every API in `<Name>ApiName` has exactly one entry in `api[]`.** Easy to drift after a refactor.
|
||||
- **`description` on each API is the model's only docs.** Make it long enough for the LLM to pick the right tool. Mention edge cases ("If you provide any filter, omitted filters are not applied implicitly"), defaults, and the relationship to sibling APIs ("To START a task, use runTask — updateTaskStatus only flips a flag").
|
||||
- **`parameters` is JSON Schema** (`LobeChatPluginApi`). Use `enum`, `required`, `items`, `oneOf`, `additionalProperties: false` etc. — these survive into the LLM's tool spec.
|
||||
- **Use `additionalProperties: false`** on parameter objects so the model can't sneak unknown fields past validation.
|
||||
- **Number parameters with semantic values** (`priority: 0=none, 1=urgent, …`) should describe the mapping in the description. Don't rely on `enum` alone for numbers — the model often fills the wrong one.
|
||||
- **`enum` arrays for known string sets** (statuses, categories, engines). Spread from a constants module (`enum: [...TASK_STATUSES]`) so the manifest stays in sync.
|
||||
|
||||
### Optional manifest fields
|
||||
|
||||
```ts
|
||||
{
|
||||
/* Where this tool can run.
|
||||
'client' → Agent Gateway dispatches to the desktop client (filesystem, Electron only)
|
||||
'server' → ToolExecutionService runs it on the server
|
||||
omitted → server only */
|
||||
executors: ['client', 'server'],
|
||||
|
||||
/* Default human intervention policy for all APIs that don't specify one.
|
||||
Pair with an Intervention component (see ui/intervention.md). */
|
||||
humanIntervention: 'never' | 'always' | { /* extended config */ },
|
||||
}
|
||||
```
|
||||
|
||||
Per-API `humanIntervention` and `renderDisplayControl` go inside each `api[]` entry.
|
||||
|
||||
---
|
||||
|
||||
## 4. `systemRole.ts` — Operator Instructions for the Model
|
||||
|
||||
This is appended to the agent system prompt whenever the tool is enabled. Treat it as a **how-to-use guide for the LLM**, not marketing copy.
|
||||
|
||||
```ts
|
||||
export const systemPrompt = `You have access to Task management tools. Use them to:
|
||||
|
||||
- **createTask**: Create a new task. Use parentIdentifier to make it a subtask.
|
||||
- **createTasks**: Prefer this over multiple createTask calls when planning a batch
|
||||
(e.g. all subtasks under one parent, or all chapters of an outline).
|
||||
- **runTask**: Actually START a task — kicks off the agent in a new (or continued)
|
||||
topic. Do NOT use updateTaskStatus(running) to start a task; that only flips a
|
||||
flag without executing. The task must have an assigneeAgentId.
|
||||
- **updateTaskStatus**: Change a task's status (completed/cancelled/paused/failed).
|
||||
If you mark a task as failed, include an error message explaining why.
|
||||
- ...
|
||||
|
||||
When planning work:
|
||||
1. Create tasks for each major piece (use parentIdentifier to organize as subtasks).
|
||||
2. Use editTask with addDependencies to control execution order.
|
||||
3. Use updateTaskStatus to mark the current task completed when done.`;
|
||||
```
|
||||
|
||||
### Patterns that work well
|
||||
|
||||
- **Bulleted list, bold the API name, one line per API.** The model picks tools by skimming.
|
||||
- **Disambiguate confusable APIs explicitly** (`runTask` vs `updateTaskStatus`).
|
||||
- **Push toward batched APIs** ("Prefer this when…").
|
||||
- **End with a numbered workflow** if the tool has a typical sequence.
|
||||
- **For tools with multiple environments** (e.g. desktop vs cloud), keep variants in `systemRole.ts` and `systemRole.desktop.ts` and pick at the manifest level. See `builtin-tool-local-system`.
|
||||
|
||||
### Dynamic system prompts
|
||||
|
||||
If the prompt depends on runtime state (current date, available models), export a function and call it in the manifest:
|
||||
|
||||
```ts
|
||||
// systemRole.ts
|
||||
export const systemPrompt = (today: string) => `Today is ${today}. You have web search tools…`;
|
||||
|
||||
// manifest.ts
|
||||
import dayjs from 'dayjs';
|
||||
systemRole: systemPrompt(dayjs(new Date()).format('YYYY-MM-DD')),
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. `ExecutionRuntime/index.ts` — Pure Runtime
|
||||
|
||||
This is **the default home for new tool logic** going forward. The runtime is a class that:
|
||||
|
||||
- Has no React, no Zustand, no `@/services/...` direct imports.
|
||||
- Receives services as **constructor injection** (or as method args).
|
||||
- Returns `BuiltinServerRuntimeOutput` from each method.
|
||||
- Is unit-testable by passing in mocks.
|
||||
|
||||
### Pattern A: Inject a service interface
|
||||
|
||||
Use when the runtime calls out to IPC, network, or DB.
|
||||
|
||||
```ts
|
||||
// ExecutionRuntime/index.ts
|
||||
import type { BuiltinServerRuntimeOutput } from '@lobechat/types';
|
||||
|
||||
export interface IWebBrowsingService {
|
||||
search: (q: SearchQuery) => Promise<UniformSearchResponse>;
|
||||
crawlPages: (urls: string[]) => Promise<CrawlResults>;
|
||||
}
|
||||
|
||||
export interface WebBrowsingRuntimeOptions {
|
||||
searchService: IWebBrowsingService;
|
||||
documentService?: WebBrowsingDocumentService;
|
||||
agentId?: string;
|
||||
topicId?: string;
|
||||
}
|
||||
|
||||
export class WebBrowsingExecutionRuntime {
|
||||
constructor(private opts: WebBrowsingRuntimeOptions) {}
|
||||
|
||||
async search(
|
||||
args: SearchQuery,
|
||||
options?: { signal?: AbortSignal },
|
||||
): Promise<BuiltinServerRuntimeOutput> {
|
||||
try {
|
||||
const data = await this.opts.searchService.search(args, options);
|
||||
if (data.errorDetail) {
|
||||
return {
|
||||
success: false,
|
||||
content: data.errorDetail,
|
||||
error: { message: data.errorDetail },
|
||||
state: data,
|
||||
};
|
||||
}
|
||||
return {
|
||||
success: true,
|
||||
content: searchResultsPrompt(data.results.slice(0, 10)),
|
||||
state: data,
|
||||
};
|
||||
} catch (e) {
|
||||
return { success: false, content: (e as Error).message, error: e };
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Pattern B: Reuse the executor
|
||||
|
||||
Use when the same logic runs in browser and Node (e.g. mathjs, nerdamer). The runtime is a thin wrapper that imports the executor and re-types the state per API. See `builtin-tool-calculator/src/ExecutionRuntime/index.ts` for the canonical example.
|
||||
|
||||
### Pattern C: Extend a shared base
|
||||
|
||||
When you're implementing a domain that already has a base runtime (file ops via `ComputerRuntime`), extend and only override `callService` + result normalization. See `builtin-tool-local-system/src/ExecutionRuntime/index.ts`.
|
||||
|
||||
### Runtime contract
|
||||
|
||||
Every method returns:
|
||||
|
||||
```ts
|
||||
{
|
||||
content: string; // LLM-facing — never undefined; default to error message
|
||||
state?: any; // result-domain — what the UI's pluginState becomes
|
||||
success: boolean; // mandatory
|
||||
error?: any; // raw error object; the executor will repackage
|
||||
}
|
||||
```
|
||||
|
||||
Use `@lobechat/prompts` formatters (`searchResultsPrompt`, `crawlResultsPrompt`, `formatTaskCreated`, etc.) to produce structured `content`. They emit XML/markdown that's already tuned for token efficiency.
|
||||
|
||||
---
|
||||
|
||||
## 6. `client/executor/index.ts` — Frontend Wiring
|
||||
|
||||
The executor's job is to **resolve frontend defaults** (current agent, current task, scope) and **call the runtime**. It then funnels through `toResult()` into the `BuiltinToolResult` shape.
|
||||
|
||||
```ts
|
||||
import { BaseExecutor, type BuiltinToolContext, type BuiltinToolResult } from '@lobechat/types';
|
||||
import debug from 'debug';
|
||||
|
||||
import { taskService } from '@/services/task';
|
||||
import { getTaskStoreState } from '@/store/task';
|
||||
|
||||
import { TaskIdentifier } from '../../manifest';
|
||||
import { TaskApiName, type CreateTaskParams } from '../../types';
|
||||
|
||||
const log = debug('lobe-task:executor');
|
||||
|
||||
class TaskExecutor extends BaseExecutor<typeof TaskApiName> {
|
||||
readonly identifier = TaskIdentifier;
|
||||
protected readonly apiEnum = TaskApiName;
|
||||
|
||||
// ⚠ class FIELD, not a method — preserves `this` when invoked via registry
|
||||
createTask = async (
|
||||
params: CreateTaskParams,
|
||||
ctx?: BuiltinToolContext,
|
||||
): Promise<BuiltinToolResult> => {
|
||||
try {
|
||||
log('createTask params=%o', params);
|
||||
const task = await getTaskStoreState().createTask({
|
||||
name: params.name,
|
||||
instruction: params.instruction,
|
||||
// Default assignee from context — never silently override an explicit value
|
||||
assigneeAgentId:
|
||||
params.assigneeAgentId ?? (ctx?.scope === 'task' ? undefined : ctx?.agentId),
|
||||
parentTaskId: params.parentIdentifier?.trim() || undefined,
|
||||
priority: params.priority,
|
||||
});
|
||||
|
||||
if (!task) return this.errorResult('Failed to create task', 'CreateFailed');
|
||||
|
||||
return {
|
||||
success: true,
|
||||
content: formatTaskCreated({ identifier: task.identifier, name: task.name /* … */ }),
|
||||
state: { identifier: task.identifier, success: true },
|
||||
};
|
||||
} catch (error) {
|
||||
return this.errorResult(error, 'CreateTaskFailed');
|
||||
}
|
||||
};
|
||||
|
||||
private errorResult(err: unknown, type: string): BuiltinToolResult {
|
||||
const message = err instanceof Error ? err.message : String(err) || 'Unknown error';
|
||||
return { success: false, content: `Failed: ${message}`, error: { type, message } };
|
||||
}
|
||||
}
|
||||
|
||||
export const taskExecutor = new TaskExecutor();
|
||||
```
|
||||
|
||||
### Hard rules
|
||||
|
||||
1. **Methods are class fields** (`name = async (…) => {…}`), not class methods. The registry calls `(executor as any)[apiName](params, ctx)`; arrow-function fields keep `this` bound.
|
||||
2. **`identifier` and `apiEnum` are `readonly` instance fields**, not getters — `BaseExecutor.hasApi/getApiNames` reads them synchronously at registration time.
|
||||
3. **Default missing params from `ctx`**, but never silently override explicit values. Use `params.foo ?? ctx?.foo`, not `ctx?.foo ?? params.foo`.
|
||||
4. **One funnel for all returns.** Either always return through `toResult(runtime.x())` (when delegating) or through `errorResult(…)` for the catch arm. Never inline `{ success: false, content: '' }` — `content: ''` collapses the Debug pane to blank.
|
||||
5. **`debug('lobe-<name>:executor')`.** Match the namespace to the identifier minus `lobe-` when convenient.
|
||||
6. **Singleton export.** `export const <name>Executor = new <Name>Executor()` — the registry imports the instance, not the class.
|
||||
|
||||
### When the executor delegates to ExecutionRuntime
|
||||
|
||||
```ts
|
||||
class LocalSystemExecutor extends BaseExecutor<typeof LocalSystemApiEnum> {
|
||||
readonly identifier = LocalSystemIdentifier;
|
||||
protected readonly apiEnum = LocalSystemApiEnum;
|
||||
private runtime = new LocalSystemExecutionRuntime(localFileService);
|
||||
|
||||
readLocalFile = async (params: LocalReadFileParams): Promise<BuiltinToolResult> => {
|
||||
try {
|
||||
const result = await this.runtime.readFile({
|
||||
path: params.path,
|
||||
startLine: params.loc?.[0],
|
||||
endLine: params.loc?.[1],
|
||||
});
|
||||
return this.toResult(result);
|
||||
} catch (error) {
|
||||
return this.errorResult(error);
|
||||
}
|
||||
};
|
||||
|
||||
private toResult(out: BuiltinServerRuntimeOutput): BuiltinToolResult {
|
||||
const errMsg = typeof out.error?.message === 'string' ? out.error.message : undefined;
|
||||
const safe = out.content || errMsg || 'Tool execution failed';
|
||||
if (!out.success) {
|
||||
return {
|
||||
success: false,
|
||||
content: safe,
|
||||
state: out.state, // ← preserve partial state on failure
|
||||
error: out.error
|
||||
? { type: 'PluginServerError', message: errMsg ?? safe, body: out.error }
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
return { success: true, content: safe, state: out.state };
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The `toResult` funnel is **mandatory**: it enforces never-undefined `content` and partial-state preservation. Both invariants caught real production bugs (`globLocalFiles` Response empty, `editLocalFile` partial state lost).
|
||||
|
||||
---
|
||||
|
||||
## 7. `index.ts` — Package Entry Point
|
||||
|
||||
Keep it pure data + the manifest. **No React, no stores, no Node-only imports.**
|
||||
|
||||
```ts
|
||||
export { TaskIdentifier, TaskManifest } from './manifest';
|
||||
export { systemPrompt } from './systemRole';
|
||||
export {
|
||||
TaskApiName,
|
||||
type TaskApiNameType,
|
||||
type CreateTaskParams,
|
||||
type CreateTaskState,
|
||||
/* …all Params/State types */
|
||||
} from './types';
|
||||
|
||||
// Optional helpers used by both the runtime and the UI
|
||||
export { TASK_STATUSES, UNFINISHED_TASK_STATUSES } from './constants';
|
||||
```
|
||||
|
||||
This entry is what `packages/builtin-tools/src/index.ts` and `identifiers.ts` import — it must be importable from server bundles.
|
||||
|
||||
---
|
||||
|
||||
## 8. `package.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"dependencies": {
|
||||
"@lobechat/prompts": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@lobechat/types": "workspace:*"
|
||||
},
|
||||
"exports": {
|
||||
".": "./src/index.ts",
|
||||
"./client": "./src/client/index.ts",
|
||||
"./executor": "./src/client/executor/index.ts",
|
||||
"./executionRuntime": "./src/ExecutionRuntime/index.ts"
|
||||
},
|
||||
"main": "./src/index.ts",
|
||||
"name": "@lobechat/builtin-tool-<name>",
|
||||
"peerDependencies": {
|
||||
"@lobehub/ui": "^5",
|
||||
"antd": "^6",
|
||||
"antd-style": "*",
|
||||
"lucide-react": "*",
|
||||
"react": "*",
|
||||
"react-i18next": "*"
|
||||
},
|
||||
"private": true,
|
||||
"version": "1.0.0"
|
||||
}
|
||||
```
|
||||
|
||||
**Why peer not direct deps for client libs:** the `./` and `./executionRuntime` entry points must be importable from server code. Listing React etc. as peer deps prevents bundlers from following them when only the runtime is consumed.
|
||||
|
||||
**Skip `./executor`** if the package has no frontend executor (server-only tools like `builtin-tool-web-browsing`).
|
||||
|
||||
---
|
||||
|
||||
## 9. Common Pitfalls
|
||||
|
||||
| Symptom | Likely cause |
|
||||
| ------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- |
|
||||
| "ApiNotFound" at runtime | Method name in executor doesn't match `ApiName` value (typo, wrong case) |
|
||||
| Method works once, then "this is undefined" | Method declared as `async fn() {}` instead of `fn = async () => {}` — `this` lost when registry invokes |
|
||||
| Debug "Response" pane blank but `pluginState` populated | Returning `content: ''` or letting `output.content` be undefined — use the `toResult` funnel |
|
||||
| Partial result vanishes on failure | `toResult` discarded `state` when `success: false`; preserve it |
|
||||
| Tool shows up but doesn't run on desktop | `executors` in manifest doesn't include `'client'` (or vice versa for server-only) |
|
||||
| Same tool registered twice / legacy identifier ghost | Identifier collision; check `@deprecated` aliases in `inspectors.ts`/`renders.ts` |
|
||||
| Manifest test fails after adding API | Forgot to add the corresponding i18n `apiName.<api>` key |
|
||||
| TypeScript error on `BaseExecutor<typeof X>` | `X` declared with `enum` instead of `as const` object — must be the const-object form |
|
||||
@@ -1,36 +0,0 @@
|
||||
# Tool UI Surfaces
|
||||
|
||||
A builtin tool can ship up to **six client-side surfaces**, each with a different role in the chat UI. Only `Inspector` is required; the other five are added on demand and registered in their own central files.
|
||||
|
||||
| Surface | Required? | When the chat shows it | Registered in |
|
||||
| ------------ | --------- | --------------------------------------------------------------------- | --------------------------------------------- |
|
||||
| Inspector | ✅ Always | Header strip of every tool call (one-line chip) | `packages/builtin-tools/src/inspectors.ts` |
|
||||
| Render | Optional | Rich result card below the header, after the call returns | `packages/builtin-tools/src/renders.ts` |
|
||||
| Placeholder | Optional | Skeleton between "args streaming complete" and "result arrives" | `packages/builtin-tools/src/placeholders.ts` |
|
||||
| Streaming | Optional | Live output during execution (e.g. command stdout) | `packages/builtin-tools/src/streamings.ts` |
|
||||
| Intervention | Optional | Approval / edit-before-run dialog (when `humanIntervention` triggers) | `packages/builtin-tools/src/interventions.ts` |
|
||||
| Portal | Optional | Full-screen detail view (right-side or modal) | `packages/builtin-tools/src/portals.ts` |
|
||||
|
||||
The two reference tools to read end-to-end:
|
||||
|
||||
- **`builtin-tool-web-browsing/src/client/`** — Inspector + Render + Placeholder + Portal (no Intervention/Streaming).
|
||||
- **`builtin-tool-local-system/src/client/`** — all six surfaces, including `components/` for shared building blocks.
|
||||
|
||||
---
|
||||
|
||||
## Files in this folder
|
||||
|
||||
Read **principles** and **shared-rules** first — they apply to every surface. Then jump to the surface you're building.
|
||||
|
||||
| File | What it covers |
|
||||
| ---------------------------------- | ----------------------------------------------------------------------- |
|
||||
| [principles.md](principles.md) | Design principles — when each surface exists and how far to take it |
|
||||
| [shared-rules.md](shared-rules.md) | Cross-surface rules: component skeleton, styling, single-layer surfaces |
|
||||
| [inspector.md](inspector.md) | Inspector — header chip (required) |
|
||||
| [render.md](render.md) | Render — rich result card |
|
||||
| [placeholder.md](placeholder.md) | Placeholder — skeleton between args and result |
|
||||
| [streaming.md](streaming.md) | Streaming — live output during execution |
|
||||
| [intervention.md](intervention.md) | Intervention — approval / edit-before-run |
|
||||
| [portal.md](portal.md) | Portal — full-screen detail view |
|
||||
| [composition.md](composition.md) | Shared subcomponents (`client/components/`) + package public API |
|
||||
| [diagnostics.md](diagnostics.md) | Symptom → surface quick-lookup |
|
||||
@@ -1,51 +0,0 @@
|
||||
# Composition — Shared Components & Package API
|
||||
|
||||
## `client/components/` — Shared Subcomponents
|
||||
|
||||
Cross-cutting building blocks used by multiple surfaces live here, not duplicated in each surface folder.
|
||||
|
||||
Examples from `web-browsing/src/client/components/`:
|
||||
|
||||
- `CategoryAvatar.tsx` — search category icon
|
||||
- `EngineAvatar.tsx` — search engine logo (used in Inspector chip + Render list + Portal header)
|
||||
- `SearchBar.tsx` — editable query bar (used in Render and Portal)
|
||||
|
||||
Examples from `local-system/src/client/components/`:
|
||||
|
||||
- `FileItem.tsx` — single file row (used in ListFiles Render, SearchFiles Render, MoveLocalFiles Render)
|
||||
- `FilePathDisplay.tsx` — path with truncation (used everywhere)
|
||||
|
||||
### Rules
|
||||
|
||||
- Live under `client/components/`, exported via `client/components/index.ts`.
|
||||
- Re-export from `client/index.ts` only if other packages need them; otherwise keep internal.
|
||||
- Keep them dumb — props in, JSX out, no store reads. The store reads belong in the surface that composes them.
|
||||
|
||||
---
|
||||
|
||||
## `client/index.ts` — Package Public API
|
||||
|
||||
Re-exports everything the registries need plus useful types/manifest:
|
||||
|
||||
```ts
|
||||
// Inspector — required
|
||||
export { TaskInspectors } from './Inspector';
|
||||
|
||||
// Render — only if any API has one
|
||||
export { TaskRenders, CreateTaskRender, RunTasksRender } from './Render';
|
||||
|
||||
// Placeholder / Streaming / Intervention — only if used
|
||||
export { LocalSystemListFilesPlaceholder, LocalSystemSearchFilesPlaceholder } from './Placeholder';
|
||||
export { LocalSystemStreamings } from './Streaming';
|
||||
export { LocalSystemInterventions } from './Intervention';
|
||||
|
||||
// Portal — single export per tool
|
||||
export { default as WebBrowsingPortal } from './Portal';
|
||||
|
||||
// Reusable components if other packages need them
|
||||
export { CategoryAvatar, EngineAvatar, SearchBar } from './components';
|
||||
|
||||
// Re-export manifest, identifier, types for convenience
|
||||
export { TaskManifest, TaskIdentifier } from '../manifest';
|
||||
export * from '../types';
|
||||
```
|
||||
@@ -1,15 +0,0 @@
|
||||
# Diagnostic Quick-Lookup
|
||||
|
||||
| Symptom | Surface to check |
|
||||
| ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| No header at all on the tool call | Inspector missing from `client/Inspector/index.ts` registry |
|
||||
| Header shows the API name but no chips | Inspector missing `args?.X \|\| partialArgs?.X` fallback |
|
||||
| Header doesn't pulse during loading | Missing `shinyTextStyles.shinyText` on `isArgumentsStreaming \|\| isLoading` |
|
||||
| Empty result card under header | Render returned `<div />` instead of `null` when no data |
|
||||
| Render looks "complex" / card-in-card | Filled container (`colorFillQuaternary`) wrapping more filled boxes — flatten to single-layer, see [shared-rules.md](shared-rules.md) |
|
||||
| Layout jump when result arrives | Placeholder dimensions don't match Render dimensions |
|
||||
| Approval dialog never appears | Manifest missing `humanIntervention`, or Intervention not in registry |
|
||||
| Approval click doesn't wait for inline edit | Missing `registerBeforeApprove(id, flushFn)` |
|
||||
| Portal opens but blank | Switch in `Portal/index.tsx` doesn't cover the apiName |
|
||||
| Strings show as `builtins.lobe-foo.apiName.bar` | Missing i18n key in `src/locales/default/plugin.ts` (or not seeded in dev locale files) |
|
||||
| Wrong color shade on `<Text type="secondary">` | `type='secondary'` is lighter than `colorTextSecondary` — pass via `style={{ color: cssVar.colorTextSecondary }}` |
|
||||
@@ -1,118 +0,0 @@
|
||||
# Inspector — Header Chip (required)
|
||||
|
||||
**Lifecycle:** Inspector renders for **every phase** of a tool call: while args are streaming in, while the executor is running, and after results come back. It's the only surface that's always visible.
|
||||
|
||||
**Goal:** keep it to a single line. Show what's happening with as much context as is currently available.
|
||||
|
||||
## Props (`BuiltinInspectorProps<Args, State>`)
|
||||
|
||||
```ts
|
||||
interface BuiltinInspectorProps<Arguments = any, State = any> {
|
||||
apiName: string;
|
||||
args: Arguments; // final args (only after the assistant stops streaming)
|
||||
identifier: string;
|
||||
isArgumentsStreaming?: boolean; // args still arriving
|
||||
isLoading?: boolean; // args complete, executor running
|
||||
partialArgs?: Arguments; // partial JSON during streaming
|
||||
pluginState?: State; // executor's `state` after success
|
||||
result?: { content: string | null; error?: any };
|
||||
}
|
||||
```
|
||||
|
||||
## State machine
|
||||
|
||||
| Phase | What's available | What to show |
|
||||
| ----------------------------------- | ---------------------------------------------------------- | ---------------------------------------------------------- |
|
||||
| Args streaming, no useful field yet | `isArgumentsStreaming === true`, `partialArgs.X` undefined | Just the API title with `shinyTextStyles.shinyText` |
|
||||
| Args streaming, key field arrived | `partialArgs.X` populated | Title + key field chip, still pulse-animated |
|
||||
| Args complete, executor running | `args` populated, `isLoading === true` | Same as above, still pulse-animated |
|
||||
| Result arrived | `pluginState` populated, `isLoading === false` | Title + chips + result summary (count, identifier, status) |
|
||||
|
||||
## Canonical example — Search
|
||||
|
||||
`packages/builtin-tool-web-browsing/src/client/Inspector/Search/index.tsx`:
|
||||
|
||||
```tsx
|
||||
'use client';
|
||||
|
||||
import type { BuiltinInspectorProps, SearchQuery, UniformSearchResponse } from '@lobechat/types';
|
||||
import { Text } from '@lobehub/ui';
|
||||
import { cssVar, cx } from 'antd-style';
|
||||
import { memo } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
import { highlightTextStyles, inspectorTextStyles, shinyTextStyles } from '@/styles';
|
||||
|
||||
export const SearchInspector = memo<BuiltinInspectorProps<SearchQuery, UniformSearchResponse>>(
|
||||
({ args, partialArgs, isArgumentsStreaming, isLoading, pluginState }) => {
|
||||
const { t } = useTranslation('plugin');
|
||||
|
||||
const query = args?.query || partialArgs?.query || '';
|
||||
const resultCount = pluginState?.results?.length ?? 0;
|
||||
const hasResults = resultCount > 0;
|
||||
|
||||
if (isArgumentsStreaming && !query) {
|
||||
return (
|
||||
<div className={cx(inspectorTextStyles.root, shinyTextStyles.shinyText)}>
|
||||
<span>{t('builtins.lobe-web-browsing.apiName.search')}</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cx(
|
||||
inspectorTextStyles.root,
|
||||
(isArgumentsStreaming || isLoading) && shinyTextStyles.shinyText,
|
||||
)}
|
||||
>
|
||||
<span>{t('builtins.lobe-web-browsing.apiName.search')}: </span>
|
||||
{query && <span className={highlightTextStyles.primary}>{query}</span>}
|
||||
{!isLoading &&
|
||||
!isArgumentsStreaming &&
|
||||
pluginState?.results &&
|
||||
(hasResults ? (
|
||||
<span style={{ marginInlineStart: 4 }}>({resultCount})</span>
|
||||
) : (
|
||||
<Text as="span" color={cssVar.colorTextDescription} fontSize={12}>
|
||||
({t('builtins.lobe-web-browsing.inspector.noResults')})
|
||||
</Text>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
},
|
||||
);
|
||||
SearchInspector.displayName = 'SearchInspector';
|
||||
export default SearchInspector;
|
||||
```
|
||||
|
||||
## Inspector rules
|
||||
|
||||
- Wrap the whole row with `inspectorTextStyles.root` (provides correct flex / line-height baseline).
|
||||
- Pulse with `shinyTextStyles.shinyText` whenever `isArgumentsStreaming || isLoading`.
|
||||
- Show the i18n title first so the row is non-empty during the earliest streaming phase.
|
||||
- Read both `args?.X` and `partialArgs?.X` together — `args` is final, `partialArgs` is in-stream.
|
||||
- Use chips/tags for distinct facets (identifier, name, parent, status, count). Each chip should clip with `text-overflow: ellipsis` and have a `max-width` so long values don't blow out the chat bubble.
|
||||
- Append `pluginState`-derived suffixes only **after** loading finishes — count or "(no results)" should not appear while still searching.
|
||||
- **Switch copy by phase.** If the verb implies an ongoing action ("Creating", "Searching", "Listing"), define `<api>.loading` and `<api>.completed` keys and select via `isArgumentsStreaming || isLoading ? loadingKey : completedKey`. Inspector chips persist in chat history — leaving "Creating task" frozen on a finished call reads as if the tool is still running. Read-only labels that are already noun-form ("View task") can keep a single key. See `CallSubAgentInspector` for the canonical two-key pattern.
|
||||
|
||||
## Inspector registry — `client/Inspector/index.ts`
|
||||
|
||||
```ts
|
||||
import type { BuiltinInspector } from '@lobechat/types';
|
||||
|
||||
import { TaskApiName } from '../../types';
|
||||
import { CreateTaskInspector } from './CreateTask';
|
||||
import { ListTasksInspector } from './ListTasks';
|
||||
/* … */
|
||||
|
||||
export const TaskInspectors: Record<string, BuiltinInspector> = {
|
||||
[TaskApiName.createTask]: CreateTaskInspector as BuiltinInspector,
|
||||
[TaskApiName.listTasks]: ListTasksInspector as BuiltinInspector,
|
||||
/* one entry per ApiName */
|
||||
};
|
||||
|
||||
export { CreateTaskInspector } from './CreateTask';
|
||||
export { ListTasksInspector } from './ListTasks';
|
||||
/* re-export each */
|
||||
```
|
||||
@@ -1,88 +0,0 @@
|
||||
# Intervention — Approval / Edit-Before-Run (optional)
|
||||
|
||||
**Lifecycle:** rendered **before the executor runs** for APIs whose manifest sets `humanIntervention`. The user sees a preview of the args, can edit them, then approves or skips/cancels.
|
||||
|
||||
**Add for** destructive or sensitive ops: shell commands, file writes, file moves, payments, message broadcasts.
|
||||
|
||||
## Props (`BuiltinInterventionProps<Args>`)
|
||||
|
||||
```ts
|
||||
interface BuiltinInterventionProps<Arguments = any> {
|
||||
apiName?: string;
|
||||
args: Arguments;
|
||||
identifier?: string;
|
||||
interactionMode?: 'approval' | 'custom';
|
||||
messageId: string;
|
||||
|
||||
/** Called when the user edits the args; the approve action awaits this. */
|
||||
onArgsChange?: (args: Arguments) => void | Promise<void>;
|
||||
|
||||
/** Called on approve / skip / cancel. */
|
||||
onInteractionAction?: (
|
||||
action:
|
||||
| { type: 'submit'; payload: Record<string, unknown> }
|
||||
| { type: 'skip'; payload?: Record<string, unknown>; reason?: string }
|
||||
| { type: 'cancel'; payload?: Record<string, unknown> },
|
||||
) => Promise<void>;
|
||||
|
||||
/** Register a callback to flush pending saves before approval. Returns cleanup. */
|
||||
registerBeforeApprove?: (id: string, callback: () => void | Promise<void>) => () => void;
|
||||
}
|
||||
```
|
||||
|
||||
## Canonical example — RunCommand Intervention
|
||||
|
||||
`packages/builtin-tool-local-system/src/client/Intervention/RunCommand/index.tsx`:
|
||||
|
||||
```tsx
|
||||
import type { RunCommandParams } from '@lobechat/electron-client-ipc';
|
||||
import type { BuiltinInterventionProps } from '@lobechat/types';
|
||||
import { Flexbox, Highlighter, Text } from '@lobehub/ui';
|
||||
import { memo } from 'react';
|
||||
|
||||
const RunCommand = memo<BuiltinInterventionProps<RunCommandParams>>(({ args }) => {
|
||||
const { description, command, timeout } = args;
|
||||
return (
|
||||
<Flexbox gap={8}>
|
||||
<Flexbox horizontal justify="space-between">
|
||||
{description && <Text>{description}</Text>}
|
||||
{timeout && (
|
||||
<Text style={{ fontSize: 12 }} type="secondary">
|
||||
timeout: {formatTimeout(timeout)}
|
||||
</Text>
|
||||
)}
|
||||
</Flexbox>
|
||||
{command && (
|
||||
<Highlighter wrap language="sh" showLanguage={false} variant="outlined">
|
||||
{command}
|
||||
</Highlighter>
|
||||
)}
|
||||
</Flexbox>
|
||||
);
|
||||
});
|
||||
export default RunCommand;
|
||||
```
|
||||
|
||||
## Intervention rules
|
||||
|
||||
- **Show a preview, not a form by default.** Editing UI is opt-in via `onArgsChange` and is usually inline (click to edit a code block, etc.).
|
||||
- For args with debounced edit state (text fields), use `registerBeforeApprove(id, flushFn)` so the approve action waits for the debounce to flush. Always return the cleanup function.
|
||||
- Call `onInteractionAction({ type: 'submit', payload })` when the user approves; `'skip'` if they skip with a reason; `'cancel'` if they cancel the whole turn.
|
||||
- Add a corresponding `interventionAudit.ts` in the package root if the tool needs scope/path validation before approval (see `local-system/src/interventionAudit.ts`).
|
||||
|
||||
## Intervention registry — `client/Intervention/index.ts`
|
||||
|
||||
```ts
|
||||
import { LocalSystemApiName } from '../..';
|
||||
import EditLocalFile from './EditLocalFile';
|
||||
import RunCommand from './RunCommand';
|
||||
import WriteFile from './WriteFile';
|
||||
/* … */
|
||||
|
||||
export const LocalSystemInterventions = {
|
||||
[LocalSystemApiName.editLocalFile]: EditLocalFile,
|
||||
[LocalSystemApiName.runCommand]: RunCommand,
|
||||
[LocalSystemApiName.writeLocalFile]: WriteFile,
|
||||
/* one entry per API that needs approval */
|
||||
};
|
||||
```
|
||||
@@ -1,93 +0,0 @@
|
||||
# Placeholder — Skeleton Between Args and Result (optional)
|
||||
|
||||
**Lifecycle:** rendered when the args have finished streaming but the executor hasn't returned yet. Disappears when `pluginState` arrives. Bridges the moment of perceived lag.
|
||||
|
||||
**Add for** APIs with noticeable execution time: web search, network crawl, file list, large grep. **Skip for** instant ops (status flips, calculator).
|
||||
|
||||
## Props (`BuiltinPlaceholderProps<Args>`)
|
||||
|
||||
```ts
|
||||
interface BuiltinPlaceholderProps<T extends Record<string, any> = any> {
|
||||
apiName: string;
|
||||
args?: T;
|
||||
identifier: string;
|
||||
}
|
||||
```
|
||||
|
||||
No `pluginState` — Placeholder lives entirely in the "executing" gap.
|
||||
|
||||
## Canonical example — Search Placeholder
|
||||
|
||||
`packages/builtin-tool-web-browsing/src/client/Placeholder/Search.tsx`:
|
||||
|
||||
```tsx
|
||||
import type { BuiltinPlaceholderProps, SearchQuery } from '@lobechat/types';
|
||||
import { Flexbox, Icon, Skeleton } from '@lobehub/ui';
|
||||
import { createStaticStyles, cx } from 'antd-style';
|
||||
import { SearchIcon } from 'lucide-react';
|
||||
import { memo } from 'react';
|
||||
|
||||
import { useIsMobile } from '@/hooks/useIsMobile';
|
||||
import { shinyTextStyles } from '@/styles';
|
||||
|
||||
const styles = createStaticStyles(({ css, cssVar }) => ({
|
||||
query: cx(
|
||||
css`
|
||||
padding: 4px 8px;
|
||||
border-radius: 8px;
|
||||
font-size: 12px;
|
||||
color: ${cssVar.colorTextSecondary};
|
||||
&:hover {
|
||||
background: ${cssVar.colorFillTertiary};
|
||||
}
|
||||
`,
|
||||
shinyTextStyles.shinyText,
|
||||
),
|
||||
}));
|
||||
|
||||
export const Search = memo<BuiltinPlaceholderProps<SearchQuery>>(({ args }) => {
|
||||
const { query } = args || {};
|
||||
const isMobile = useIsMobile();
|
||||
|
||||
return (
|
||||
<Flexbox gap={8}>
|
||||
<Flexbox horizontal={!isMobile} gap={isMobile ? 8 : 40}>
|
||||
<Flexbox horizontal align="center" className={styles.query} gap={8}>
|
||||
<Icon icon={SearchIcon} />
|
||||
{query ? query : <Skeleton.Block active style={{ height: 20, width: 40 }} />}
|
||||
</Flexbox>
|
||||
<Skeleton.Block active style={{ height: 20, width: 40 }} />
|
||||
</Flexbox>
|
||||
<Flexbox horizontal gap={12}>
|
||||
{[1, 2, 3, 4, 5].map((id) => (
|
||||
<Skeleton.Button active key={id} style={{ borderRadius: 8, height: 80, width: 160 }} />
|
||||
))}
|
||||
</Flexbox>
|
||||
</Flexbox>
|
||||
);
|
||||
});
|
||||
```
|
||||
|
||||
## Placeholder rules
|
||||
|
||||
- **Mirror the eventual Render's layout.** When the result arrives the Placeholder unmounts and the Render mounts; if they share dimensions, the chat doesn't jump.
|
||||
- Use `Skeleton.Block` / `Skeleton.Button` from `@lobehub/ui` for placeholder shapes.
|
||||
- Embed any args you have (e.g. the query text) — context helps the user know what's loading.
|
||||
- Pulse with `shinyTextStyles.shinyText` if the Placeholder includes literal text.
|
||||
|
||||
## Placeholder registry — `client/Placeholder/index.ts`
|
||||
|
||||
```ts
|
||||
import { WebBrowsingApiName } from '../../types';
|
||||
import CrawlMultiPages from './CrawlMultiPages';
|
||||
import CrawlSinglePage from './CrawlSinglePage';
|
||||
import { Search } from './Search';
|
||||
|
||||
export const WebBrowsingPlaceholders = {
|
||||
[WebBrowsingApiName.crawlMultiPages]: CrawlMultiPages,
|
||||
[WebBrowsingApiName.crawlSinglePage]: CrawlSinglePage,
|
||||
[WebBrowsingApiName.search]: Search,
|
||||
};
|
||||
|
||||
export { CrawlMultiPages, CrawlSinglePage, Search };
|
||||
```
|
||||
@@ -1,71 +0,0 @@
|
||||
# Portal — Full-Screen Detail View (optional)
|
||||
|
||||
**Lifecycle:** rendered when the user opens the tool message in a side panel or full-screen modal. One Portal per **tool**, not per API — the Portal switches on `apiName` internally.
|
||||
|
||||
**Add for** tools whose results deserve a deep-dive view: search results with editable filters, page content with reader mode, code interpreter sessions.
|
||||
|
||||
## Props (`BuiltinPortalProps<Args, State>`)
|
||||
|
||||
```ts
|
||||
interface BuiltinPortalProps<Arguments = Record<string, any>, State = any> {
|
||||
apiName?: string;
|
||||
arguments: Arguments;
|
||||
identifier: string;
|
||||
messageId: string;
|
||||
state: State;
|
||||
}
|
||||
```
|
||||
|
||||
## Canonical example — Web-Browsing Portal
|
||||
|
||||
`packages/builtin-tool-web-browsing/src/client/Portal/index.tsx`:
|
||||
|
||||
```tsx
|
||||
import type { BuiltinPortalProps, CrawlPluginState, SearchQuery } from '@lobechat/types';
|
||||
import { memo } from 'react';
|
||||
|
||||
import { WebBrowsingApiName } from '../../types';
|
||||
import PageContent from './PageContent';
|
||||
import PageContents from './PageContents';
|
||||
import Search from './Search';
|
||||
|
||||
const Portal = memo<BuiltinPortalProps>(({ arguments: args, messageId, state, apiName }) => {
|
||||
switch (apiName) {
|
||||
case WebBrowsingApiName.search:
|
||||
return <Search messageId={messageId} query={args as SearchQuery} response={state} />;
|
||||
|
||||
case WebBrowsingApiName.crawlSinglePage: {
|
||||
const result = (state as CrawlPluginState).results.find((r) => r.originalUrl === args.url);
|
||||
return <PageContent messageId={messageId} result={result} />;
|
||||
}
|
||||
|
||||
case WebBrowsingApiName.crawlMultiPages:
|
||||
return (
|
||||
<PageContents
|
||||
messageId={messageId}
|
||||
results={(state as CrawlPluginState).results}
|
||||
urls={args.urls}
|
||||
/>
|
||||
);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
export default Portal;
|
||||
```
|
||||
|
||||
## Portal rules
|
||||
|
||||
- One Portal per tool — the file is the routing layer, subcomponents implement each API's view.
|
||||
- Portals can read the chat store directly to detect "still streaming" and render a Skeleton internally (see `Search/index.tsx:20-46`).
|
||||
- Layout assumes more space than the Render — use `Flexbox` with `height={'100%'}` and structure for a side panel viewport.
|
||||
|
||||
## Portal registry — `packages/builtin-tools/src/portals.ts`
|
||||
|
||||
```ts
|
||||
import { WebBrowsingManifest, WebBrowsingPortal } from '@lobechat/builtin-tool-web-browsing/client';
|
||||
import { type BuiltinPortal } from '@lobechat/types';
|
||||
|
||||
export const BuiltinToolsPortals: Record<string, BuiltinPortal> = {
|
||||
[WebBrowsingManifest.identifier]: WebBrowsingPortal as BuiltinPortal,
|
||||
};
|
||||
```
|
||||
@@ -1,19 +0,0 @@
|
||||
# Tool Render 设计原则(中文草案)
|
||||
|
||||
这些原则用于判断一个 builtin tool 的 Inspector / Render / Placeholder / Streaming / Intervention / Portal 应该做什么,以及做到什么程度。
|
||||
|
||||
1. **先保证折叠态可读。** 每个 API 都必须有 Inspector;用户不展开也应该能看懂 “正在做什么 / 对什么做 / 当前结果是什么”。Inspector 不应该只展示函数名和原始参数。
|
||||
2. **Inspector 是一句话,不是详情页。** 优先表达动作、关键对象、数量、状态,例如 “分析图片 3 张”“搜索 12 个结果”“读取 config.json”。长文本、列表和结构化结果放到 Render 或 Portal。
|
||||
3. **Inspector 要覆盖执行生命周期。** `args` 还在 streaming、工具执行中、执行完成、执行失败时都应该有稳定展示;必要时同时读取 `args`、`partialArgs` 和 `pluginState`,避免出现空白、跳变或只显示半截参数。
|
||||
4. **文案要随状态切换时态。** 同一个动作在 loading 与 completed 两个阶段必须用不同的措辞:执行中用现在进行时(“正在创建任务 / Creating task / 正在搜索”),执行完成后切到完成态(“已创建任务 / Task created / 已找到 N 条”)。Inspector chip 会一直留在聊天记录里 —— 如果一直挂着 “正在 xxx”,几小时后回看历史时会读起来像还在跑。约定的 i18n 形式是 `<api>.loading` / `<api>.completed` 一对键(见 `lobe-agent.apiName.callSubAgent.{loading,completed}` 与 `lobe-claude-code.task.{create,list,update,get}.{loading,completed}`),渲染时按 `isArgumentsStreaming || isLoading` 决定取哪一个。只读 / 查询类(“查看任务” 这种本来就是名词性的)可以共用一个键。
|
||||
5. **只有结构化结果才需要 Render。** 如果工具结果只是自然语言总结,通常不需要 Render;如果结果包含列表、媒体、文件、表格、代码、diff、地图、时间线、权限请求等结构,就应该提供 Render。
|
||||
6. **Render 要帮助用户检查结果,而不是复述参数。** Render 的主体应该围绕工具产物组织:可预览、可比较、可筛选、可定位。参数只作为上下文辅助出现,不要把 Render 做成一块更大的 args dump。
|
||||
7. **参数和结果要一起参与渲染。** 好的 Tool UI 通常同时用 `args` 解释意图,用 `pluginState` 展示真实执行结果;但 `pluginState` 只放结果域数据,不要反向塞入可以从 `args` 推导出的内容。
|
||||
8. **慢操作要有 Placeholder。** 如果工具通常需要等待网络、文件系统、模型或外部进程,Placeholder 应该先占住最终 Render 的版式,让用户知道即将看到什么,而不是只显示一个泛化 loading。
|
||||
9. **Streaming 只用于连续产物。** 搜索列表、日志、长文本、文件分析、分阶段计划适合 Streaming;一次性小结果不需要强行做 Streaming。Streaming UI 要能渐进追加,并且完成后自然过渡到最终 Render。
|
||||
10. **有风险的动作必须 Intervention。** 写文件、删除、发送、安装、执行命令、外部可见操作、权限敏感操作,都应该在执行前给出可理解的确认界面;确认文案要说明影响范围,而不是只问 “是否继续”。
|
||||
11. **错误、空态和截断都是正式状态。** Render 不能在失败、无结果、超长结果时退化成空白。错误要说明发生在哪一步;空态要告诉用户没有产物;超长内容要明确 “展示前 N 项 / 还有 N 项”。
|
||||
12. **信息密度要克制。** 默认展示最有判断价值的部分:标题、来源、状态、摘要、少量关键字段。大对象、长列表、原文、调试数据放进可展开区域或 Portal,避免把聊天流撑成后台管理页。
|
||||
13. **视觉上融入聊天流。** Tool UI 应该使用 `@lobehub/ui` / base-ui、`Flexbox`、`createStaticStyles` 和 `cssVar.*`,遵循现有间距、圆角、颜色、字号;不要为单个工具发明一套独立视觉语言。具体的样式约定见 [shared-rules.md](shared-rules.md)。
|
||||
14. **Devtools fixture 是验收入口。** 新增或修改 Tool UI 时,应在 `/devtools` 里准备覆盖典型态、loading/streaming、空态、错误态、长内容态的 fixture;一个 API 如果在真实聊天里会出现,就不应该在 devtools 中缺席。
|
||||
15. **先做用户会看的 UI,再做调试 UI。** Raw JSON、trace、schema、内部 id 可以存在,但应默认收起或放到调试区;主界面先回答用户最关心的问题:工具做了什么,结果值不值得信任,下一步能做什么。
|
||||
@@ -1,101 +0,0 @@
|
||||
# Render — Rich Result Card (optional)
|
||||
|
||||
**Lifecycle:** rendered **once the result arrives** (after Placeholder/Streaming hand off). Sits below the Inspector header.
|
||||
|
||||
**Skip if** the API is read-only or the result is just text — the framework already shows the executor's `content` string. Add a Render only when there's a structured artifact worth seeing: a card, a chart, a diff, a list of files.
|
||||
|
||||
## Props (`BuiltinRenderProps<Args, State, Content>`)
|
||||
|
||||
```ts
|
||||
interface BuiltinRenderProps<Arguments = any, State = any, Content = any> {
|
||||
apiName?: string;
|
||||
args: Arguments; // final params from the LLM
|
||||
content: Content; // executor's content string (or parsed)
|
||||
identifier?: string;
|
||||
messageId: string; // for store lookups
|
||||
pluginError?: any; // from BuiltinToolResult.error
|
||||
pluginState?: State; // executor's state
|
||||
toolCallId?: string;
|
||||
}
|
||||
```
|
||||
|
||||
## Two patterns
|
||||
|
||||
**Pattern A — Single-file Render** (web-browsing CrawlSinglePage):
|
||||
|
||||
```tsx
|
||||
// client/Render/CrawlSinglePage.tsx
|
||||
import type { BuiltinRenderProps, CrawlPluginState, CrawlSinglePageQuery } from '@lobechat/types';
|
||||
import { memo } from 'react';
|
||||
|
||||
import PageContent from './PageContent';
|
||||
|
||||
const CrawlSinglePage = memo<BuiltinRenderProps<CrawlSinglePageQuery, CrawlPluginState>>(
|
||||
({ messageId, pluginState, args }) => (
|
||||
<PageContent messageId={messageId} results={pluginState?.results} urls={[args?.url]} />
|
||||
),
|
||||
);
|
||||
export default CrawlSinglePage;
|
||||
```
|
||||
|
||||
**Pattern B — Folder with subcomponents** (web-browsing Search):
|
||||
|
||||
```
|
||||
client/Render/Search/
|
||||
├── index.tsx # composes the subcomponents, handles error states
|
||||
├── ConfigForm.tsx # appears when pluginError.type === 'PluginSettingsInvalid'
|
||||
├── SearchQuery.tsx # editable query header
|
||||
└── SearchResult.tsx # result list
|
||||
```
|
||||
|
||||
Use Pattern B when the Render has internal state (editing mode, expanded items), error variants, or is large enough to benefit from splitting.
|
||||
|
||||
## Error handling in Render
|
||||
|
||||
Renders are the canonical place to surface `pluginError` because the chat doesn't auto-render typed errors:
|
||||
|
||||
```tsx
|
||||
if (pluginError) {
|
||||
if (pluginError?.type === 'PluginSettingsInvalid') {
|
||||
return <ConfigForm id={messageId} provider={pluginError.body?.provider} />;
|
||||
}
|
||||
return (
|
||||
<Alert
|
||||
title={pluginError?.message}
|
||||
type="error"
|
||||
extra={<Highlighter language="json">{JSON.stringify(pluginError.body, null, 2)}</Highlighter>}
|
||||
/>
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
## Render rules
|
||||
|
||||
- **Return `null`** if there's nothing useful to draw yet (avoids empty cards during stream).
|
||||
- Use `pluginState` for server-truth (ids, counts, server-assigned status) and `args` for what the LLM asked. **Combine — neither alone is enough.**
|
||||
- For lists, summarize with a header line and show top N items with a "+N more" tail rather than rendering everything.
|
||||
- **Keep the Render single-layer** — the tool card is already your surface, so don't open with your own filled container and then nest more filled boxes inside it. See [shared-rules.md](shared-rules.md) → "Stay single-layer".
|
||||
- For modals from a Render, use `@lobehub/ui/base-ui` (`createModal`, `useModalContext`, `confirmModal`) — see the **modal** skill.
|
||||
|
||||
## Render registry — `client/Render/index.ts`
|
||||
|
||||
```ts
|
||||
import type { BuiltinRender } from '@lobechat/types';
|
||||
|
||||
import { TaskApiName } from '../../types';
|
||||
import CreateTaskRender from './CreateTask';
|
||||
import RunTasksRender from './RunTasks';
|
||||
|
||||
export const TaskRenders: Record<string, BuiltinRender> = {
|
||||
[TaskApiName.createTask]: CreateTaskRender as BuiltinRender,
|
||||
[TaskApiName.runTasks]: RunTasksRender as BuiltinRender,
|
||||
/* only the APIs with rich result UI — others fall back to text content */
|
||||
};
|
||||
|
||||
export { default as CreateTaskRender } from './CreateTask';
|
||||
export { default as RunTasksRender } from './RunTasks';
|
||||
```
|
||||
|
||||
## Render display control (rare)
|
||||
|
||||
If the Render should hide for certain results (e.g. ClaudeCode's TodoWrite hides when the agent is mid-stream), add a `RenderDisplayControl` to `packages/builtin-tools/src/displayControls.ts`. See `ClaudeCodeRenderDisplayControls` for the pattern.
|
||||
@@ -1,89 +0,0 @@
|
||||
# Shared Style Rules
|
||||
|
||||
These apply across every surface.
|
||||
|
||||
## The component skeleton
|
||||
|
||||
Every surface file is the same shape, so internalize it once instead of re-deriving it per rule. The skeleton below bakes in five mechanical conventions — copy it and fill the body:
|
||||
|
||||
```tsx
|
||||
'use client'; // (a) leaves of the chat tree must not block server rendering
|
||||
|
||||
import type { BuiltinInspectorProps, SearchQuery, UniformSearchResponse } from '@lobechat/types';
|
||||
import { memo } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
// (b) type with BuiltinXProps<Args, State> — never widen to `any`.
|
||||
// Args = the JSON Schema params, State = the executor's `state` field;
|
||||
// they should match <Name>Params / <Name>State from types.ts.
|
||||
export const SearchInspector = memo<BuiltinInspectorProps<SearchQuery, UniformSearchResponse>>(
|
||||
({ args, pluginState }) => {
|
||||
const { t } = useTranslation('plugin'); // (c) all strings from the `plugin` namespace
|
||||
|
||||
// (d) cross-cutting state (loading, streaming buffer) comes from the store,
|
||||
// not props — props only carry args/state/messageId.
|
||||
// const buffer = useChatStore((s) => chatToolSelectors.streamingBuffer(messageId)(s));
|
||||
|
||||
return <span>{t('builtins.<identifier>.apiName.search')}</span>;
|
||||
},
|
||||
);
|
||||
SearchInspector.displayName = 'SearchInspector'; // (e) always memo + displayName
|
||||
export default SearchInspector;
|
||||
```
|
||||
|
||||
- **(c)** Default an Inspector to `t('builtins.<identifier>.apiName.<api>')` so the row is non-empty while args stream in.
|
||||
- **(d)** Read the store via Zustand selectors inside the component; see [streaming.md](streaming.md) for the buffer selector.
|
||||
|
||||
## Styling: `createStaticStyles + cssVar.*`, `@lobehub/ui` over `antd`
|
||||
|
||||
Zero-runtime CSS-in-JS — styles compile once and read CSS variables at runtime:
|
||||
|
||||
```tsx
|
||||
import { createStaticStyles, cssVar } from 'antd-style';
|
||||
|
||||
const styles = createStaticStyles(({ css, cssVar }) => ({
|
||||
chip: css`
|
||||
padding-block: 2px;
|
||||
padding-inline: 8px;
|
||||
border-radius: 999px;
|
||||
color: ${cssVar.colorText};
|
||||
background: ${cssVar.colorFillTertiary};
|
||||
`,
|
||||
}));
|
||||
```
|
||||
|
||||
- Fall back to `createStyles + token` only when you need runtime token computation (rare). Inline `style={{ color: cssVar.colorTextSecondary }}` is fine for one-off dynamic values.
|
||||
- Components come from `@lobehub/ui` (`Block`, `Text`, `Flexbox`, `Highlighter`, `Alert`, `Tooltip`, `Skeleton`), not raw `antd`. Modals come from `@lobehub/ui/base-ui` (`createModal`, `useModalContext`, `confirmModal`) — see the **modal** skill.
|
||||
- Note: `<Text type='secondary'>` is a lighter shade than `colorTextSecondary`. For that exact token color, write `<Text style={{ color: cssVar.colorTextSecondary }}>`.
|
||||
|
||||
## Stay single-layer — don't nest filled cards
|
||||
|
||||
The framework already wraps every Render / Intervention in a tool card, so that card **is** your surface. A Render that opens with its own `background: ${cssVar.colorFillQuaternary}` container is already one card deep; put another filled box inside it (`colorBgContainer` / `colorFillTertiary`) and you get the card-in-card look that reads as "complex" — two or three stacked fills for what is really a flat list of fields.
|
||||
|
||||
- **The outermost wrapper carries no fill.** Use a flat container with only `padding-block: 4px` for breathing room; let the tool card provide the card. (See `Agent/index.tsx`'s `container`.)
|
||||
- **At most one filled box, and only to delineate real content** — a Markdown preview, a diff, a code/result block. Labels, key–value fields, question/answer text, chips: render flat on the surface, separated by spacing or a hairline divider (`height: 1px; background: ${cssVar.colorFillSecondary}`), not by wrapping each in its own box.
|
||||
- **A box on a flat surface needs a visible fill.** Once the outer fill is gone, an inner `colorBgContainer` box can vanish against the tool card (same color). Use `colorFillTertiary` for the one content box so it still reads as delineated.
|
||||
- Don't wrap a single value in a box just to give it padding — that's the redundant-nesting smell (a `detailCard` around a `value` box around one string).
|
||||
|
||||
```tsx
|
||||
// ❌ card-in-card: filled container wrapping a filled preview box
|
||||
container: css`
|
||||
padding: 12px;
|
||||
background: ${cssVar.colorFillQuaternary};
|
||||
`,
|
||||
previewBox: css`
|
||||
background: ${cssVar.colorBgContainer};
|
||||
`,
|
||||
|
||||
// ✅ single-layer: flat container, one visible content box
|
||||
container: css`
|
||||
padding-block: 4px;
|
||||
`,
|
||||
previewBox: css`
|
||||
background: ${cssVar.colorFillTertiary};
|
||||
`,
|
||||
```
|
||||
|
||||
For the common "icon + file/title header, then one content box" shape, reuse `ToolResultCard` from `@lobechat/shared-tool-ui/components` instead of rebuilding it — it's already single-layer (flat wrapper, one `colorFillTertiary` content box) and is what CC `Read` / `Grep` / `Glob` / `Write` / `WebSearch` / `WebFetch` render through.
|
||||
|
||||
The exception is a deliberate **panel** pattern — an `<Block variant="outlined">` with a header bar + list rows (CC `TodoWrite` / `Task`). There the single outlined block is the panel and the header fill is a header bar, not a nested card. One structured panel is fine; stacked decorative fills are not.
|
||||
@@ -1,83 +0,0 @@
|
||||
# Streaming — Live Output During Execution (optional)
|
||||
|
||||
**Lifecycle:** rendered **while the executor is still running** for APIs that emit incremental output. The component is responsible for fetching the in-flight stream from the chat store and rendering it.
|
||||
|
||||
**Add for** long-running ops with continuous output: shell command execution (stdout/stderr), file write progress, code interpreter cells.
|
||||
|
||||
## Props (`BuiltinStreamingProps<Args>`)
|
||||
|
||||
```ts
|
||||
interface BuiltinStreamingProps<Arguments = any> {
|
||||
apiName: string;
|
||||
args: Arguments;
|
||||
identifier: string;
|
||||
messageId: string; // use to fetch the streaming buffer from store
|
||||
toolCallId: string;
|
||||
}
|
||||
```
|
||||
|
||||
Note there's **no `state` or `result` prop** — the Streaming component is for the in-flight phase. It pulls the live buffer from the store itself (typically via `chatToolSelectors.streamingContent(messageId)` or similar).
|
||||
|
||||
## Canonical example — RunCommandStreaming
|
||||
|
||||
`packages/builtin-tool-local-system/src/client/Streaming/RunCommand/index.tsx`:
|
||||
|
||||
```tsx
|
||||
'use client';
|
||||
|
||||
import type { BuiltinStreamingProps } from '@lobechat/types';
|
||||
import { Highlighter } from '@lobehub/ui';
|
||||
import { memo } from 'react';
|
||||
|
||||
interface RunCommandParams {
|
||||
command?: string;
|
||||
description?: string;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
export const RunCommandStreaming = memo<BuiltinStreamingProps<RunCommandParams>>(({ args }) => {
|
||||
const { command } = args || {};
|
||||
if (!command) return null;
|
||||
|
||||
return (
|
||||
<Highlighter
|
||||
animated
|
||||
wrap
|
||||
language="sh"
|
||||
showLanguage={false}
|
||||
style={{ padding: '4px 8px' }}
|
||||
variant="outlined"
|
||||
>
|
||||
{command}
|
||||
</Highlighter>
|
||||
);
|
||||
});
|
||||
RunCommandStreaming.displayName = 'RunCommandStreaming';
|
||||
```
|
||||
|
||||
For real-time output beyond just the command (stderr/stdout streaming), pull from the chat store:
|
||||
|
||||
```tsx
|
||||
const buffer = useChatStore((state) =>
|
||||
chatToolSelectors.streamingBuffer(messageId, toolCallId)(state),
|
||||
);
|
||||
```
|
||||
|
||||
## Streaming rules
|
||||
|
||||
- Render `null` until you have something to display (avoids flash).
|
||||
- For terminal-style output, use `Highlighter` with `animated` to show typing-like effect.
|
||||
- The Streaming component must **unmount cleanly** when execution ends — typically the framework swaps it out for the Render automatically.
|
||||
|
||||
## Streaming registry — `client/Streaming/index.ts`
|
||||
|
||||
```ts
|
||||
import { LocalSystemApiName } from '../..';
|
||||
import { RunCommandStreaming } from './RunCommand';
|
||||
import { WriteFileStreaming } from './WriteFile';
|
||||
|
||||
export const LocalSystemStreamings = {
|
||||
[LocalSystemApiName.runCommand]: RunCommandStreaming,
|
||||
[LocalSystemApiName.writeLocalFile]: WriteFileStreaming,
|
||||
};
|
||||
```
|
||||
@@ -1,147 +0,0 @@
|
||||
---
|
||||
name: chat-sdk
|
||||
description: 'Build multi-platform chat bots with the chat SDK. Use for Slack, Teams, Google Chat, Discord, GitHub, Linear bots, webhooks, mentions, slash commands, cards, modals, or streaming responses.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# Chat SDK
|
||||
|
||||
Unified TypeScript SDK for building chat bots across Slack, Teams, Google Chat, Discord, GitHub, and Linear. Write bot logic once, deploy everywhere.
|
||||
|
||||
## Critical: Read the bundled docs
|
||||
|
||||
The `chat` package ships with full documentation in `node_modules/chat/docs/` and TypeScript source types. **Always read these before writing code:**
|
||||
|
||||
```
|
||||
node_modules/chat/docs/ # Full documentation (MDX files)
|
||||
node_modules/chat/dist/ # Built types (.d.ts files)
|
||||
```
|
||||
|
||||
Key docs to read based on task:
|
||||
|
||||
- `docs/getting-started.mdx` — setup guides
|
||||
- `docs/usage.mdx` — event handlers, threads, messages, channels
|
||||
- `docs/streaming.mdx` — AI streaming with AI SDK
|
||||
- `docs/cards.mdx` — JSX interactive cards
|
||||
- `docs/actions.mdx` — button/dropdown handlers
|
||||
- `docs/modals.mdx` — form dialogs (Slack only)
|
||||
- `docs/adapters/*.mdx` — platform-specific adapter setup
|
||||
- `docs/state/*.mdx` — state adapter config (Redis, ioredis, memory)
|
||||
|
||||
Also read the TypeScript types from `node_modules/chat/dist/` to understand the full API surface.
|
||||
|
||||
## Quick start
|
||||
|
||||
```typescript
|
||||
import { Chat } from 'chat';
|
||||
import { createSlackAdapter } from '@chat-adapter/slack';
|
||||
import { createRedisState } from '@chat-adapter/state-redis';
|
||||
|
||||
const bot = new Chat({
|
||||
userName: 'mybot',
|
||||
adapters: {
|
||||
slack: createSlackAdapter({
|
||||
botToken: process.env.SLACK_BOT_TOKEN!,
|
||||
signingSecret: process.env.SLACK_SIGNING_SECRET!,
|
||||
}),
|
||||
},
|
||||
state: createRedisState({ url: process.env.REDIS_URL! }),
|
||||
});
|
||||
|
||||
bot.onNewMention(async (thread) => {
|
||||
await thread.subscribe();
|
||||
await thread.post("Hello! I'm listening to this thread.");
|
||||
});
|
||||
|
||||
bot.onSubscribedMessage(async (thread, message) => {
|
||||
await thread.post(`You said: ${message.text}`);
|
||||
});
|
||||
```
|
||||
|
||||
## Core concepts
|
||||
|
||||
- **Chat** — main entry point, coordinates adapters and routes events
|
||||
- **Adapters** — platform-specific (Slack, Teams, GChat, Discord, GitHub, Linear)
|
||||
- **State** — pluggable persistence (Redis for prod, memory for dev)
|
||||
- **Thread** — conversation thread with `post()`, `subscribe()`, `startTyping()`
|
||||
- **Message** — normalized format with `text`, `formatted` (mdast AST), `raw`
|
||||
- **Channel** — container for threads, supports listing and posting
|
||||
|
||||
## Event handlers
|
||||
|
||||
| Handler | Trigger |
|
||||
| -------------------------- | ------------------------------------------------- |
|
||||
| `onNewMention` | Bot @-mentioned in unsubscribed thread |
|
||||
| `onSubscribedMessage` | Any message in subscribed thread |
|
||||
| `onNewMessage(regex)` | Messages matching pattern in unsubscribed threads |
|
||||
| `onSlashCommand("/cmd")` | Slash command invocations |
|
||||
| `onReaction(emojis)` | Emoji reactions added/removed |
|
||||
| `onAction(actionId)` | Button clicks and dropdown selections |
|
||||
| `onAssistantThreadStarted` | Slack Assistants API thread opened |
|
||||
| `onAppHomeOpened` | Slack App Home tab opened |
|
||||
|
||||
## Streaming
|
||||
|
||||
Pass any `AsyncIterable<string>` to `thread.post()`. Works with AI SDK's `textStream`:
|
||||
|
||||
```typescript
|
||||
import { ToolLoopAgent } from 'ai';
|
||||
const agent = new ToolLoopAgent({ model: 'anthropic/claude-4.5-sonnet' });
|
||||
|
||||
bot.onNewMention(async (thread, message) => {
|
||||
const result = await agent.stream({ prompt: message.text });
|
||||
await thread.post(result.textStream);
|
||||
});
|
||||
```
|
||||
|
||||
## Cards (JSX)
|
||||
|
||||
Set `jsxImportSource: "chat"` in tsconfig. Components: `Card`, `CardText`, `Button`, `Actions`, `Fields`, `Field`, `Select`, `SelectOption`, `Image`, `Divider`, `LinkButton`, `Section`, `RadioSelect`.
|
||||
|
||||
```tsx
|
||||
await thread.post(
|
||||
<Card title="Order #1234">
|
||||
<CardText>Your order has been received!</CardText>
|
||||
<Actions>
|
||||
<Button id="approve" style="primary">
|
||||
Approve
|
||||
</Button>
|
||||
<Button id="reject" style="danger">
|
||||
Reject
|
||||
</Button>
|
||||
</Actions>
|
||||
</Card>,
|
||||
);
|
||||
```
|
||||
|
||||
## Packages
|
||||
|
||||
| Package | Purpose |
|
||||
| ----------------------------- | ----------------------------- |
|
||||
| `chat` | Core SDK |
|
||||
| `@chat-adapter/slack` | Slack |
|
||||
| `@chat-adapter/teams` | Microsoft Teams |
|
||||
| `@chat-adapter/gchat` | Google Chat |
|
||||
| `@chat-adapter/discord` | Discord |
|
||||
| `@chat-adapter/github` | GitHub Issues |
|
||||
| `@chat-adapter/linear` | Linear Issues |
|
||||
| `@chat-adapter/state-redis` | Redis state (production) |
|
||||
| `@chat-adapter/state-ioredis` | ioredis state (alternative) |
|
||||
| `@chat-adapter/state-memory` | In-memory state (development) |
|
||||
|
||||
## Changesets (Release Flow)
|
||||
|
||||
This monorepo uses [Changesets](https://github.com/changesets/changesets) for versioning and changelogs. Every PR that changes a package's behavior must include a changeset.
|
||||
|
||||
```bash
|
||||
pnpm changeset
|
||||
# → select affected package(s) (e.g. @chat-adapter/slack, chat)
|
||||
# → choose bump type: patch (fixes), minor (features), major (breaking)
|
||||
# → write a short summary for the CHANGELOG
|
||||
```
|
||||
|
||||
This creates a file in `.changeset/` — commit it with the PR. When merged to `main`, the Changesets GitHub Action opens a "Version Packages" PR to bump versions and update CHANGELOGs. Merging that PR publishes to npm.
|
||||
|
||||
## Webhook setup
|
||||
|
||||
Each adapter exposes a webhook handler via `bot.webhooks.{platform}`. Wire these to your HTTP framework's routes (e.g. Next.js API routes, Hono, Express).
|
||||
@@ -1,296 +0,0 @@
|
||||
---
|
||||
name: cli
|
||||
description: LobeHub CLI (@lobehub/cli) development guide — commands, subcommands, architecture.
|
||||
disable-model-invocation: true
|
||||
---
|
||||
|
||||
# LobeHub CLI Development Guide
|
||||
|
||||
## Overview
|
||||
|
||||
LobeHub CLI (`@lobehub/cli`) is a command-line tool for managing and interacting with LobeHub services. Built with Commander.js + TypeScript.
|
||||
|
||||
- **Package**: `apps/cli/`
|
||||
- **Entry**: `apps/cli/src/index.ts`
|
||||
- **Binaries**: `lh`, `lobe`, `lobehub` (all aliases for the same CLI)
|
||||
- **Build**: tsup
|
||||
- **Runtime**: Node.js / Bun
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
apps/cli/src/
|
||||
├── index.ts # Entry point, registers all commands
|
||||
├── api/
|
||||
│ ├── client.ts # tRPC client (type-safe backend API)
|
||||
│ └── http.ts # Raw HTTP utilities
|
||||
├── auth/
|
||||
│ ├── credentials.ts # Encrypted credential storage (AES-256-GCM)
|
||||
│ ├── refresh.ts # Token auto-refresh
|
||||
│ └── resolveToken.ts # Token resolution (flag > stored)
|
||||
├── commands/ # All CLI commands (one file per command group)
|
||||
│ ├── agent.ts # Agent CRUD + run
|
||||
│ ├── config.ts # whoami, usage
|
||||
│ ├── connect.ts # Device gateway connection + daemon
|
||||
│ ├── doc.ts # Document management
|
||||
│ ├── file.ts # File management
|
||||
│ ├── generate/ # Content generation (text/image/video/tts/asr)
|
||||
│ ├── kb.ts # Knowledge base management
|
||||
│ ├── login.ts # OIDC Device Code Flow auth
|
||||
│ ├── logout.ts # Clear credentials
|
||||
│ ├── memory.ts # User memory management
|
||||
│ ├── message.ts # Message management
|
||||
│ ├── model.ts # AI model management
|
||||
│ ├── plugin.ts # Plugin management
|
||||
│ ├── provider.ts # AI provider management
|
||||
│ ├── search.ts # Global search
|
||||
│ ├── skill.ts # Agent skill management
|
||||
│ ├── status.ts # Gateway connectivity check
|
||||
│ └── topic.ts # Conversation topic management
|
||||
├── daemon/
|
||||
│ └── manager.ts # Background daemon process management
|
||||
├── tools/
|
||||
│ ├── shell.ts # Shell command execution (for gateway)
|
||||
│ └── file.ts # File operations (for gateway)
|
||||
├── settings/
|
||||
│ └── index.ts # Persistent settings (~/.lobehub/)
|
||||
├── utils/
|
||||
│ ├── logger.ts # Logging (verbose mode)
|
||||
│ ├── format.ts # Table output, JSON, timeAgo, truncate
|
||||
│ └── agentStream.ts # SSE streaming for agent runs
|
||||
└── constants/
|
||||
└── urls.ts # Official server & gateway URLs
|
||||
```
|
||||
|
||||
## Command Groups
|
||||
|
||||
| Command | Alias | Description |
|
||||
| ------------- | ----- | ----------------------------------------------------------- |
|
||||
| `lh login` | - | Authenticate via OIDC Device Code Flow |
|
||||
| `lh logout` | - | Clear stored credentials |
|
||||
| `lh connect` | - | Device gateway connection & daemon management |
|
||||
| `lh status` | - | Quick gateway connectivity check |
|
||||
| `lh agent` | - | Agent CRUD, run, status |
|
||||
| `lh generate` | `gen` | Content generation (text, image, video, tts, asr, download) |
|
||||
| `lh doc` | - | Document CRUD, batch-create, parse, topic linking |
|
||||
| `lh file` | - | File list, view, delete, recent |
|
||||
| `lh kb` | - | Knowledge base CRUD, folders, docs, upload, tree view |
|
||||
| `lh memory` | - | User memory CRUD + extraction |
|
||||
| `lh message` | - | Message list, search, delete, count, heatmap |
|
||||
| `lh topic` | - | Topic CRUD + search + recent |
|
||||
| `lh skill` | - | Skill CRUD + import (GitHub/URL/market) |
|
||||
| `lh model` | - | Model CRUD, toggle, batch-toggle, clear |
|
||||
| `lh provider` | - | Provider CRUD, config, test, toggle |
|
||||
| `lh plugin` | - | Plugin install, uninstall, update |
|
||||
| `lh search` | - | Global search across all types |
|
||||
| `lh whoami` | - | Current user info |
|
||||
| `lh usage` | - | Monthly/daily usage statistics |
|
||||
|
||||
## Adding a New Command
|
||||
|
||||
### 1. Create Command File
|
||||
|
||||
Create `apps/cli/src/commands/<name>.ts`:
|
||||
|
||||
```typescript
|
||||
import type { Command } from 'commander';
|
||||
import { getTrpcClient } from '../api/client';
|
||||
import { outputJson, printTable, truncate } from '../utils/format';
|
||||
|
||||
export function register<Name>Command(program: Command) {
|
||||
const cmd = program.command('<name>').description('...');
|
||||
|
||||
// Subcommands
|
||||
cmd
|
||||
.command('list')
|
||||
.description('List items')
|
||||
.option('-L, --limit <n>', 'Maximum number of items', '30')
|
||||
.option('--json [fields]', 'Output JSON, optionally specify fields')
|
||||
.action(async (options) => {
|
||||
const client = await getTrpcClient();
|
||||
const result = await client.<router>.<procedure>.query({ ... });
|
||||
// Handle output
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Register in Entry Point
|
||||
|
||||
In `apps/cli/src/index.ts`:
|
||||
|
||||
```typescript
|
||||
import { registerNewCommand } from './commands/new';
|
||||
// ...
|
||||
registerNewCommand(program);
|
||||
```
|
||||
|
||||
### 3. Add Tests
|
||||
|
||||
Create `apps/cli/src/commands/<name>.test.ts` alongside the command file.
|
||||
|
||||
## Conventions
|
||||
|
||||
### Output Patterns
|
||||
|
||||
All list/view commands follow consistent patterns:
|
||||
|
||||
- `--json [fields]` - JSON output with optional field filtering
|
||||
- `--yes` - Skip confirmation for destructive ops
|
||||
- `-L, --limit <n>` - Pagination limit (default: 30)
|
||||
- `-v, --verbose` - Verbose logging
|
||||
|
||||
### Table Output
|
||||
|
||||
```typescript
|
||||
const rows = items.map((item) => [item.id, truncate(item.title, 40), timeAgo(item.updatedAt)]);
|
||||
printTable(rows, ['ID', 'TITLE', 'UPDATED']);
|
||||
```
|
||||
|
||||
### JSON Output
|
||||
|
||||
```typescript
|
||||
if (options.json !== undefined) {
|
||||
const fields = typeof options.json === 'string' ? options.json : undefined;
|
||||
outputJson(items, fields);
|
||||
return;
|
||||
}
|
||||
```
|
||||
|
||||
### Authentication
|
||||
|
||||
Commands that need auth use `getTrpcClient()` which auto-resolves tokens:
|
||||
|
||||
```typescript
|
||||
const client = await getTrpcClient();
|
||||
// client.router.procedure.query/mutate(...)
|
||||
```
|
||||
|
||||
### Confirmation Prompts
|
||||
|
||||
```typescript
|
||||
import { confirm } from '../utils/format';
|
||||
if (!options.yes) {
|
||||
const ok = await confirm('Are you sure?');
|
||||
if (!ok) return;
|
||||
}
|
||||
```
|
||||
|
||||
## Storage Locations
|
||||
|
||||
| File | Path | Purpose |
|
||||
| ------------- | ----------------------------- | ------------------------------ |
|
||||
| Credentials | `~/.lobehub/credentials.json` | Encrypted tokens (AES-256-GCM) |
|
||||
| Settings | `~/.lobehub/settings.json` | Custom server/gateway URLs |
|
||||
| Daemon PID | `~/.lobehub/daemon.pid` | Background process PID |
|
||||
| Daemon Status | `~/.lobehub/daemon.status` | Connection status JSON |
|
||||
| Daemon Log | `~/.lobehub/daemon.log` | Daemon output log |
|
||||
|
||||
The base directory (`~/.lobehub/`) can be overridden with the `LOBEHUB_CLI_HOME` env var (e.g. `LOBEHUB_CLI_HOME=.lobehub-dev` for dev mode isolation).
|
||||
|
||||
## Key Dependencies
|
||||
|
||||
- `commander` - CLI framework
|
||||
- `@trpc/client` + `superjson` - Type-safe API client
|
||||
- `@lobechat/device-gateway-client` - WebSocket gateway connection
|
||||
- `@lobechat/local-file-shell` - Local shell/file tool execution
|
||||
- `picocolors` - Terminal colors
|
||||
- `ws` - WebSocket
|
||||
- `diff` - Text diffing
|
||||
- `fast-glob` - File pattern matching
|
||||
|
||||
## Development
|
||||
|
||||
### Running in Dev Mode
|
||||
|
||||
Dev mode uses `LOBEHUB_CLI_HOME=.lobehub-dev` to isolate credentials from the global `~/.lobehub/` directory, so dev and production configs never conflict.
|
||||
|
||||
```bash
|
||||
# Run a command in dev mode (from apps/cli/)
|
||||
cd apps/cli && bun run dev -- <command>
|
||||
|
||||
# This is equivalent to:
|
||||
LOBEHUB_CLI_HOME=.lobehub-dev bun src/index.ts <command>
|
||||
```
|
||||
|
||||
### Connecting to Local Dev Server
|
||||
|
||||
To test CLI against a local dev server (e.g. `localhost:3011`):
|
||||
|
||||
**Step 1: Start the local server**
|
||||
|
||||
```bash
|
||||
# From cloud repo root
|
||||
bun run dev
|
||||
# Server starts on http://localhost:3011 (or configured port)
|
||||
```
|
||||
|
||||
**Step 2: Login to local server via Device Code Flow**
|
||||
|
||||
```bash
|
||||
cd apps/cli && bun run dev -- login --server http://localhost:3011
|
||||
```
|
||||
|
||||
This will:
|
||||
|
||||
1. Call `POST http://localhost:3011/oidc/device/auth` to get a device code
|
||||
2. Print a URL like `http://localhost:3011/oidc/device?user_code=XXXX-YYYY`
|
||||
3. Open the URL in your browser — log in and authorize
|
||||
4. Save credentials to `apps/cli/.lobehub-dev/credentials.json`
|
||||
5. Save server URL to `apps/cli/.lobehub-dev/settings.json`
|
||||
|
||||
After login, all subsequent `bun run dev -- <command>` calls will use the local server.
|
||||
|
||||
**Step 3: Run commands against local server**
|
||||
|
||||
```bash
|
||||
cd apps/cli && bun run dev -- task list
|
||||
cd apps/cli && bun run dev -- task create -i "Test task" -n "My Task"
|
||||
cd apps/cli && bun run dev -- agent list
|
||||
```
|
||||
|
||||
**Troubleshooting:**
|
||||
|
||||
- If login returns `invalid_grant`, make sure the local OIDC provider is properly configured (check `OIDC_*` env vars in `.env`)
|
||||
- If you get `UNAUTHORIZED` on API calls, your token may have expired — run `bun run dev -- login --server http://localhost:3011` again
|
||||
- Dev credentials are stored in `apps/cli/.lobehub-dev/` (gitignored), not in `~/.lobehub/`
|
||||
|
||||
### Switching Between Local and Production
|
||||
|
||||
```bash
|
||||
# Dev mode (local server) — uses .lobehub-dev/
|
||||
cd apps/cli && bun run dev -- <command>
|
||||
|
||||
# Production (app.lobehub.com) — uses ~/.lobehub/
|
||||
lh <command>
|
||||
```
|
||||
|
||||
The two environments are completely isolated by different credential directories.
|
||||
|
||||
### Build & Test
|
||||
|
||||
```bash
|
||||
# Build CLI
|
||||
cd apps/cli && bun run build
|
||||
|
||||
# Unit tests
|
||||
cd apps/cli && bun run test
|
||||
|
||||
# E2E tests (requires authenticated CLI)
|
||||
cd apps/cli && bunx vitest run e2e/kb.e2e.test.ts
|
||||
|
||||
# Link globally for testing (installs lh/lobe/lobehub commands)
|
||||
cd apps/cli && bun run cli:link
|
||||
```
|
||||
|
||||
## Detailed Command References
|
||||
|
||||
See `references/` for each command group:
|
||||
|
||||
- **Agent**: `references/agent.md` (CRUD, run, status)
|
||||
- **Content Generation**: `references/generate.md` (text, image, video, tts, asr, download)
|
||||
- **Knowledge & Files**: `references/knowledge.md` (kb, file, doc)
|
||||
- **Conversation**: `references/conversation.md` (topic, message)
|
||||
- **Memory**: `references/memory.md` (memory management, extraction)
|
||||
- **Skills & Plugins**: `references/skills-plugins.md` (skill, plugin)
|
||||
- **Models & Providers**: `references/models-providers.md` (model, provider)
|
||||
- **Search & Config**: `references/search-config.md` (search, whoami, usage)
|
||||
@@ -1,144 +0,0 @@
|
||||
# Agent Commands
|
||||
|
||||
Manage AI agents: create, edit, delete, list, run, and check status.
|
||||
|
||||
**Source**: `apps/cli/src/commands/agent.ts`
|
||||
|
||||
## `lh agent list`
|
||||
|
||||
List all agents.
|
||||
|
||||
```bash
|
||||
lh agent list [-L [-k [--json [fields]] < n > ] < keyword > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ------------------------- | -------------------------------------- | ------- |
|
||||
| `-L, --limit <n>` | Maximum items | `30` |
|
||||
| `-k, --keyword <keyword>` | Filter by keyword | - |
|
||||
| `--json [fields]` | JSON output with optional field filter | - |
|
||||
|
||||
**Table columns**: ID, TITLE, DESCRIPTION, MODEL
|
||||
|
||||
---
|
||||
|
||||
## `lh agent view <agentId>`
|
||||
|
||||
View agent configuration details.
|
||||
|
||||
```bash
|
||||
lh agent view [fields]] < agentId > [--json
|
||||
```
|
||||
|
||||
**Displays**: Title, description, model, provider, system role, plugins, tools.
|
||||
|
||||
---
|
||||
|
||||
## `lh agent create`
|
||||
|
||||
Create a new agent.
|
||||
|
||||
```bash
|
||||
lh agent create [options]
|
||||
```
|
||||
|
||||
| Option | Description | Required |
|
||||
| --------------------------- | -------------- | -------- |
|
||||
| `-t, --title <title>` | Agent title | No |
|
||||
| `-d, --description <desc>` | Description | No |
|
||||
| `-m, --model <model>` | Model ID | No |
|
||||
| `-p, --provider <provider>` | Provider ID | No |
|
||||
| `-s, --system-role <role>` | System prompt | No |
|
||||
| `--group <groupId>` | Agent group ID | No |
|
||||
|
||||
**Output**: Created agent ID and session ID.
|
||||
|
||||
---
|
||||
|
||||
## `lh agent edit <agentId>`
|
||||
|
||||
Update an existing agent. Same options as `create`, all optional. Only specified fields are updated.
|
||||
|
||||
```bash
|
||||
lh agent edit [-m [-s ... < agentId > [-t < title > ] < model > ] < role > ]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `lh agent delete <agentId>`
|
||||
|
||||
Delete an agent.
|
||||
|
||||
```bash
|
||||
lh agent delete < agentId > [--yes]
|
||||
```
|
||||
|
||||
Requires confirmation unless `--yes` is provided.
|
||||
|
||||
---
|
||||
|
||||
## `lh agent duplicate <agentId>`
|
||||
|
||||
Duplicate an existing agent.
|
||||
|
||||
```bash
|
||||
lh agent duplicate < agentId > [-t < title > ]
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| --------------------- | ------------------------------------ |
|
||||
| `-t, --title <title>` | Optional new title for the duplicate |
|
||||
|
||||
**Output**: New agent ID.
|
||||
|
||||
---
|
||||
|
||||
## `lh agent run`
|
||||
|
||||
Start an agent execution (streaming SSE).
|
||||
|
||||
```bash
|
||||
lh agent run [options]
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| --------------------- | -------------------------------------------- |
|
||||
| `-a, --agent-id <id>` | Agent ID to run |
|
||||
| `-s, --slug <slug>` | Agent slug (alternative to ID) |
|
||||
| `-p, --prompt <text>` | User prompt |
|
||||
| `-t, --topic-id <id>` | Reuse existing topic |
|
||||
| `--no-auto-start` | Don't auto-start the agent |
|
||||
| `--json` | Output full JSON event stream |
|
||||
| `-v, --verbose` | Show detailed tool call info |
|
||||
| `--replay <file>` | Replay events from saved JSON file (offline) |
|
||||
|
||||
### Streaming Behavior
|
||||
|
||||
Uses `utils/agentStream.ts` to handle Server-Sent Events:
|
||||
|
||||
1. Sends agent run request to backend
|
||||
2. Streams SSE events in real-time
|
||||
3. Displays: text chunks, tool call status, operation progress
|
||||
4. Shows final token usage and cost summary
|
||||
|
||||
### Replay Mode
|
||||
|
||||
`--replay <file>` reads a saved JSON event stream for offline debugging without server connection.
|
||||
|
||||
---
|
||||
|
||||
## `lh agent status <operationId>`
|
||||
|
||||
Check agent operation status.
|
||||
|
||||
```bash
|
||||
lh agent status [fields]] [--history] [--history-limit < operationId > [--json < n > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| --------------------- | -------------------- | ------- |
|
||||
| `--json [fields]` | JSON output | - |
|
||||
| `--history` | Include step history | `false` |
|
||||
| `--history-limit <n>` | Max history entries | `10` |
|
||||
|
||||
**Displays**: Status (running/completed/failed), steps count, tokens used, cost, error info, timestamps.
|
||||
@@ -1,122 +0,0 @@
|
||||
# Conversation Commands (Topic & Message)
|
||||
|
||||
## Topic Management (`lh topic`)
|
||||
|
||||
Manage conversation topics (threads).
|
||||
|
||||
**Source**: `apps/cli/src/commands/topic.ts`
|
||||
|
||||
### `lh topic list`
|
||||
|
||||
```bash
|
||||
lh topic list [--agent-id [-L [--page [--json [fields]] < id > ] < n > ] < n > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ----------------- | --------------- | ------- |
|
||||
| `--agent-id <id>` | Filter by agent | - |
|
||||
| `-L, --limit <n>` | Page size | `30` |
|
||||
| `--page <n>` | Page number | `1` |
|
||||
|
||||
**Table columns**: ID, TITLE, FAV, UPDATED
|
||||
|
||||
### `lh topic search <keywords>`
|
||||
|
||||
```bash
|
||||
lh topic search [--json [fields]] < keywords > [--agent-id < id > ]
|
||||
```
|
||||
|
||||
### `lh topic create`
|
||||
|
||||
```bash
|
||||
lh topic create -t [--favorite] < title > [--agent-id < id > ]
|
||||
```
|
||||
|
||||
| Option | Description | Required |
|
||||
| --------------------- | -------------------- | -------- |
|
||||
| `-t, --title <title>` | Topic title | Yes |
|
||||
| `--agent-id <id>` | Associate with agent | No |
|
||||
| `--favorite` | Mark as favorite | No |
|
||||
|
||||
### `lh topic edit <id>`
|
||||
|
||||
```bash
|
||||
lh topic edit [--favorite] [--no-favorite] < id > [-t < title > ]
|
||||
```
|
||||
|
||||
### `lh topic delete <ids...>`
|
||||
|
||||
```bash
|
||||
lh topic delete [--yes] < id1 > [id2...]
|
||||
```
|
||||
|
||||
### `lh topic recent`
|
||||
|
||||
```bash
|
||||
lh topic recent [-L [--json [fields]] < n > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ----------------- | --------------- | ------- |
|
||||
| `-L, --limit <n>` | Number of items | `10` |
|
||||
|
||||
---
|
||||
|
||||
## Message Management (`lh message`)
|
||||
|
||||
Manage chat messages within topics.
|
||||
|
||||
**Source**: `apps/cli/src/commands/message.ts`
|
||||
|
||||
### `lh message list`
|
||||
|
||||
```bash
|
||||
lh message list [options] [--json [fields]]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ----------------- | ----------------------- | ------- |
|
||||
| `--topic-id <id>` | Filter by topic | - |
|
||||
| `--agent-id <id>` | Filter by agent | - |
|
||||
| `-L, --limit <n>` | Page size | `30` |
|
||||
| `--page <n>` | Page number | `1` |
|
||||
| `--user` | Only show user messages | - |
|
||||
|
||||
**Table columns**: ID, ROLE, CONTENT, CREATED
|
||||
|
||||
**Note**: When `--topic-id` or `--agent-id` is provided, uses `message.getMessages`; otherwise uses `message.listAll`.
|
||||
|
||||
### `lh message search <keywords>`
|
||||
|
||||
```bash
|
||||
lh message search [fields]] < keywords > [--json
|
||||
```
|
||||
|
||||
Full-text search across all messages.
|
||||
|
||||
### `lh message delete <ids...>`
|
||||
|
||||
```bash
|
||||
lh message delete [--yes] < id1 > [id2...]
|
||||
```
|
||||
|
||||
### `lh message count`
|
||||
|
||||
```bash
|
||||
lh message count [--start [--end [--json] < date > ] < date > ]
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| ---------------- | ------------------------------------------ |
|
||||
| `--start <date>` | Start date (ISO format, e.g. `2024-01-01`) |
|
||||
| `--end <date>` | End date (ISO format) |
|
||||
|
||||
**Output**: Total message count for the specified period.
|
||||
|
||||
### `lh message heatmap`
|
||||
|
||||
```bash
|
||||
lh message heatmap [--json]
|
||||
```
|
||||
|
||||
**Output**: Activity heatmap data showing message frequency over time.
|
||||
@@ -1,271 +0,0 @@
|
||||
# Content Generation Commands
|
||||
|
||||
Generate text, images, videos, speech, and transcriptions.
|
||||
|
||||
**Source**: `apps/cli/src/commands/generate/`
|
||||
|
||||
## Command Structure
|
||||
|
||||
```
|
||||
lh generate (alias: gen)
|
||||
├── text <prompt> # Text generation
|
||||
├── image <prompt> # Image generation
|
||||
├── video <prompt> # Video generation
|
||||
├── tts <text> # Text-to-speech
|
||||
├── asr <audioFile> # Audio-to-text (speech recognition)
|
||||
├── download <generationId> <asyncTaskId> # Wait & download generation result
|
||||
├── status <generationId> <asyncTaskId> # Check async task status
|
||||
└── list # List generation topics
|
||||
```
|
||||
|
||||
> ⚠️ **Important**: `status` and `download` require an `asyncTaskId` (UUID format, e.g.
|
||||
> `7ad0eb13-e9a5-4403-8070-1f7fe95b2f95`), **not** the generation ID (`gen_xxx`).
|
||||
> The asyncTaskId is printed after "→ Task" in the `video` / `image` command output.
|
||||
|
||||
---
|
||||
|
||||
## `lh generate text <prompt>` / `lh gen text <prompt>`
|
||||
|
||||
Generate text completion.
|
||||
|
||||
**Source**: `apps/cli/src/commands/generate/text.ts`
|
||||
|
||||
```bash
|
||||
lh gen text "Explain quantum computing" [options]
|
||||
echo "context" | lh gen text "summarize" --pipe
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| --------------------------- | ---------------------------------- | -------------------- |
|
||||
| `-m, --model <model>` | Model ID | `openai/gpt-4o-mini` |
|
||||
| `-p, --provider <provider>` | Provider name | - |
|
||||
| `-s, --system <prompt>` | System prompt | - |
|
||||
| `--temperature <n>` | Temperature (0-2) | - |
|
||||
| `--max-tokens <n>` | Maximum output tokens | - |
|
||||
| `--stream` | Enable streaming output | `false` |
|
||||
| `--json` | Output full JSON response | `false` |
|
||||
| `--pipe` | Read additional context from stdin | `false` |
|
||||
|
||||
### Pipe Mode
|
||||
|
||||
When `--pipe` is used, reads stdin and prepends it to the prompt. Useful for piping file contents:
|
||||
|
||||
```bash
|
||||
cat README.md | lh gen text "summarize this" --pipe
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `lh generate image <prompt>` / `lh gen image <prompt>`
|
||||
|
||||
Generate images from text prompt. This is an async operation — the command submits the task and returns a generation ID + async task ID for tracking.
|
||||
|
||||
**Source**: `apps/cli/src/commands/generate/image.ts`
|
||||
|
||||
```bash
|
||||
lh gen image "A sunset over mountains" [options]
|
||||
lh gen image "A cute cat" --model dall-e-3 --provider openai --json
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| --------------------------- | ---------------- | ---------- |
|
||||
| `-m, --model <model>` | Model ID | `dall-e-3` |
|
||||
| `-p, --provider <provider>` | Provider name | `openai` |
|
||||
| `-n, --num <n>` | Number of images | `1` |
|
||||
| `--width <px>` | Width in pixels | - |
|
||||
| `--height <px>` | Height in pixels | - |
|
||||
| `--steps <n>` | Number of steps | - |
|
||||
| `--seed <n>` | Random seed | - |
|
||||
| `--json` | Output raw JSON | `false` |
|
||||
|
||||
**Output** (non-JSON):
|
||||
|
||||
```
|
||||
✓ Image generation started
|
||||
Batch ID: gb_xxx
|
||||
1 image(s) queued
|
||||
Generation gen_xxx → Task 7ad0eb13-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
This is the asyncTaskId — use this for status/download
|
||||
|
||||
Use "lh generate status <generationId> <asyncTaskId>" to check progress.
|
||||
```
|
||||
|
||||
**Typical workflow**:
|
||||
|
||||
```bash
|
||||
# 1. Submit generation — note down BOTH IDs from the output
|
||||
lh gen image "A cute cat"
|
||||
# Generation gen_abc123 → Task 7ad0eb13-e9a5-4403-8070-1f7fe95b2f95
|
||||
|
||||
# 2. Wait & download using generationId + asyncTaskId (the UUID)
|
||||
lh gen download gen_abc123 7ad0eb13-e9a5-4403-8070-1f7fe95b2f95 -o cat.png
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `lh generate video <prompt>` / `lh gen video <prompt>`
|
||||
|
||||
Generate video from text prompt. This is an async operation.
|
||||
|
||||
**Source**: `apps/cli/src/commands/generate/video.ts`
|
||||
|
||||
```bash
|
||||
lh gen video "A cat playing piano" -m < model > -p < provider > [options]
|
||||
```
|
||||
|
||||
| Option | Description | Required |
|
||||
| --------------------------- | ------------------------ | -------- |
|
||||
| `-m, --model <model>` | Model ID | Yes |
|
||||
| `-p, --provider <provider>` | Provider name | Yes |
|
||||
| `--aspect-ratio <ratio>` | Aspect ratio (e.g. 16:9) | No |
|
||||
| `--duration <sec>` | Duration in seconds | No |
|
||||
| `--resolution <res>` | Resolution (e.g. 720p) | No |
|
||||
| `--seed <n>` | Random seed | No |
|
||||
| `--json` | Output raw JSON | No |
|
||||
|
||||
**Note**: Unlike image, video requires `-m` and `-p` (no defaults). Use `lh model list <provider> --type video` to find available video models.
|
||||
|
||||
**Output** (non-JSON):
|
||||
|
||||
```
|
||||
✓ Video generation started
|
||||
Batch ID: gb_xxx
|
||||
Generation gen_xxx → Task 7ad0eb13-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
This is the asyncTaskId — use this for status/download
|
||||
|
||||
Use "lh generate status <generationId> <asyncTaskId>" to check progress.
|
||||
```
|
||||
|
||||
**Typical workflow**:
|
||||
|
||||
```bash
|
||||
# 1. Find available video models for a provider
|
||||
lh model list volcengine --json | grep -i seedance
|
||||
|
||||
# 2. Submit generation — note down BOTH IDs from the output
|
||||
lh gen video "A cat on a runway" -m doubao-seedance-2-0-260128 -p volcengine \
|
||||
--aspect-ratio 9:16 --duration 5 --resolution 1080p
|
||||
# Generation gen_abc123 → Task 7ad0eb13-e9a5-4403-8070-1f7fe95b2f95
|
||||
|
||||
# 3. Wait & download using generationId + asyncTaskId (the UUID)
|
||||
lh gen download gen_abc123 7ad0eb13-e9a5-4403-8070-1f7fe95b2f95 -o result.mp4 --timeout 600
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `lh generate tts <text>` / `lh gen tts <text>`
|
||||
|
||||
Text-to-speech generation.
|
||||
|
||||
**Source**: `apps/cli/src/commands/generate/tts.ts`
|
||||
|
||||
```bash
|
||||
lh gen tts "Hello, world!" [options]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `lh generate asr <audioFile>` / `lh gen asr <audioFile>`
|
||||
|
||||
Audio-to-text transcription (Automatic Speech Recognition).
|
||||
|
||||
**Source**: `apps/cli/src/commands/generate/asr.ts`
|
||||
|
||||
```bash
|
||||
lh gen asr recording.wav [options]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `lh generate download <generationId> <asyncTaskId>`
|
||||
|
||||
Wait for an async generation task to complete and download the result file.
|
||||
|
||||
**Source**: `apps/cli/src/commands/generate/index.ts`
|
||||
|
||||
> ⚠️ `<asyncTaskId>` is the UUID printed after "→ Task" in the video/image output.
|
||||
> Do **not** pass the generation ID (`gen_xxx`) here — that will cause a server error.
|
||||
|
||||
```bash
|
||||
lh gen download <generationId> <asyncTaskId> [-o output.png]
|
||||
lh gen download gen_xxx 7ad0eb13-xxxx-xxxx-xxxx-xxxxxxxxxxxx -o ~/Desktop/result.mp4 --timeout 600
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| --------------------- | ---------------------------------------- | ---------------------- |
|
||||
| `-o, --output <path>` | Output file path (auto-detect extension) | `<generationId>.<ext>` |
|
||||
| `--interval <sec>` | Polling interval in seconds | `5` |
|
||||
| `--timeout <sec>` | Timeout in seconds (0 = no timeout) | `300` |
|
||||
|
||||
**Behavior**:
|
||||
|
||||
1. Polls `generation.getGenerationStatus` at the specified interval
|
||||
2. Shows live progress: `⋯ Status: processing... (42s)`
|
||||
3. On success: downloads asset URL to local file
|
||||
4. On error / wrong ID: displays a clear message pointing to the correct ID format
|
||||
5. On timeout: suggests using `lh gen status` to check later
|
||||
|
||||
---
|
||||
|
||||
## `lh generate status <generationId> <asyncTaskId>`
|
||||
|
||||
Check the status of an async generation task.
|
||||
|
||||
> ⚠️ `<asyncTaskId>` is the UUID printed after "→ Task" in the video/image output.
|
||||
> Do **not** pass the generation ID (`gen_xxx`) here — that will cause a server error.
|
||||
|
||||
```bash
|
||||
lh gen status <generationId> <asyncTaskId> [--json]
|
||||
lh gen status gen_xxx 7ad0eb13-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| -------- | ------------------------ |
|
||||
| `--json` | Output raw JSON response |
|
||||
|
||||
**Displays**:
|
||||
|
||||
- Status (color-coded): `success` (green), `error` (red), `processing` (yellow), `pending` (cyan)
|
||||
- Error message (if failed)
|
||||
- Asset URL and thumbnail URL (if completed)
|
||||
|
||||
---
|
||||
|
||||
## `lh generate list`
|
||||
|
||||
List all generation topics.
|
||||
|
||||
```bash
|
||||
lh gen list [--json [fields]]
|
||||
```
|
||||
|
||||
**Table columns**: ID, TITLE, TYPE, UPDATED
|
||||
|
||||
---
|
||||
|
||||
## Backend Architecture
|
||||
|
||||
Image and video generation use an async task pattern:
|
||||
|
||||
1. **Create topic** → `generationTopic.createTopic`
|
||||
2. **Submit generation** → `image.createImage` / `video.createVideo`
|
||||
- Creates batch + generation + asyncTask records in a DB transaction
|
||||
- Triggers async background task (image via `createAsyncCaller`, video via `initModelRuntimeFromDB`)
|
||||
- Returns `{ data: { batch, generations }, success }` with `asyncTaskId` in each generation
|
||||
3. **Poll status** → `generation.getGenerationStatus`
|
||||
- Input: `{ generationId, asyncTaskId }` — both are required, and `asyncTaskId` must be the
|
||||
UUID from the `async_tasks` table, not `gen_xxx`
|
||||
- Returns `{ status, error, generation }` (generation includes asset URLs on success)
|
||||
- Before querying, calls `checkTimeoutTasks` which marks tasks as `error` if they have been
|
||||
`pending` or `processing` for more than \~5 minutes (`ASYNC_TASK_TIMEOUT = 298s`)
|
||||
|
||||
**Server routes**:
|
||||
|
||||
- `apps/server/src/routers/lambda/image/index.ts` — image creation (uses `authedProcedure` + `serverDatabase`)
|
||||
- `apps/server/src/routers/lambda/video/index.ts` — video creation (uses `authedProcedure` + `serverDatabase`)
|
||||
- `apps/server/src/routers/lambda/generation.ts` — status checking
|
||||
- `packages/database/src/models/asyncTask.ts` — `AsyncTaskModel` including `checkTimeoutTasks`
|
||||
|
||||
**Note**: Image/video routes do NOT use the `keyVaults` middleware — they read API keys from the database via `initModelRuntimeFromDB` or `createAsyncCaller`.
|
||||
@@ -1,281 +0,0 @@
|
||||
# Knowledge Base, File & Document Commands
|
||||
|
||||
## Knowledge Base (`lh kb`)
|
||||
|
||||
Manage knowledge bases for RAG (Retrieval-Augmented Generation). Supports directory tree structure with folders, documents, and file uploads.
|
||||
|
||||
**Source**: `apps/cli/src/commands/kb.ts`
|
||||
|
||||
### `lh kb list`
|
||||
|
||||
```bash
|
||||
lh kb list [--json [fields]]
|
||||
```
|
||||
|
||||
**Table columns**: ID, NAME, DESCRIPTION, UPDATED
|
||||
|
||||
### `lh kb view <id>`
|
||||
|
||||
```bash
|
||||
lh kb view [fields]] < id > [--json
|
||||
```
|
||||
|
||||
**Displays**: Name, description, full directory tree with all files and documents (recursively fetched). Shows indented tree structure with item type (File/Doc), file type, and size.
|
||||
|
||||
**API**: Uses `file.getKnowledgeItems` to recursively fetch items. Folders (`custom/folder` fileType) are traversed in parallel via `Promise.all` for performance.
|
||||
|
||||
### `lh kb create`
|
||||
|
||||
```bash
|
||||
lh kb create -n [--avatar < name > [-d < desc > ] < url > ]
|
||||
```
|
||||
|
||||
| Option | Description | Required |
|
||||
| -------------------------- | ------------------- | -------- |
|
||||
| `-n, --name <name>` | Knowledge base name | Yes |
|
||||
| `-d, --description <desc>` | Description | No |
|
||||
| `--avatar <url>` | Avatar URL | No |
|
||||
|
||||
**Output**: Created KB ID. Note: backend returns ID as a string directly (not an object).
|
||||
|
||||
### `lh kb edit <id>`
|
||||
|
||||
```bash
|
||||
lh kb edit [-d [--avatar < id > [-n < name > ] < desc > ] < url > ]
|
||||
```
|
||||
|
||||
Requires at least one change flag. Errors if none specified.
|
||||
|
||||
### `lh kb delete <id>`
|
||||
|
||||
```bash
|
||||
lh kb delete [--yes] < id > [--remove-files]
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| ---------------- | ---------------------------- |
|
||||
| `--remove-files` | Also delete associated files |
|
||||
| `--yes` | Skip confirmation |
|
||||
|
||||
### `lh kb add-files <knowledgeBaseId>`
|
||||
|
||||
```bash
|
||||
lh kb add-files <kbId> --ids <fileId1> <fileId2> ...
|
||||
```
|
||||
|
||||
Link existing files to a knowledge base.
|
||||
|
||||
### `lh kb remove-files <knowledgeBaseId>`
|
||||
|
||||
```bash
|
||||
lh kb remove-files <kbId> --ids <fileId1> <fileId2> ... [--yes]
|
||||
```
|
||||
|
||||
Unlink files from a knowledge base.
|
||||
|
||||
### `lh kb mkdir <knowledgeBaseId>`
|
||||
|
||||
```bash
|
||||
lh kb mkdir < kbId > -n < name > [--parent < folderId > ]
|
||||
```
|
||||
|
||||
Create a folder in a knowledge base. Uses `document.createDocument` with `fileType: 'custom/folder'`.
|
||||
|
||||
| Option | Description | Required |
|
||||
| --------------------- | ---------------- | -------- |
|
||||
| `-n, --name <name>` | Folder name | Yes |
|
||||
| `--parent <parentId>` | Parent folder ID | No |
|
||||
|
||||
### `lh kb create-doc <knowledgeBaseId>`
|
||||
|
||||
```bash
|
||||
lh kb create-doc [--parent < kbId > -t < title > [-c < content > ] < folderId > ]
|
||||
```
|
||||
|
||||
Create a document in a knowledge base. Uses `document.createDocument` with `fileType: 'custom/document'`.
|
||||
|
||||
| Option | Description | Required |
|
||||
| ---------------------- | ---------------- | -------- |
|
||||
| `-t, --title <title>` | Document title | Yes |
|
||||
| `-c, --content <text>` | Document content | No |
|
||||
| `--parent <parentId>` | Parent folder ID | No |
|
||||
|
||||
### `lh kb move <id>`
|
||||
|
||||
```bash
|
||||
lh kb move < id > --type < file | doc > [--parent < folderId > ]
|
||||
```
|
||||
|
||||
Move a file or document to a different folder (or to root if `--parent` is omitted).
|
||||
|
||||
| Option | Description | Default |
|
||||
| --------------------- | -------------------------------- | ------- |
|
||||
| `--type <type>` | Item type: `file` or `doc` | `file` |
|
||||
| `--parent <parentId>` | Target folder ID (omit for root) | - |
|
||||
|
||||
Uses `document.updateDocument` for docs, `file.updateFile` for files.
|
||||
|
||||
### `lh kb upload <knowledgeBaseId> <filePath>`
|
||||
|
||||
```bash
|
||||
lh kb upload <kbId> <filePath> [--parent <folderId>]
|
||||
```
|
||||
|
||||
Upload a local file to a knowledge base via S3 presigned URL.
|
||||
|
||||
| Option | Description |
|
||||
| --------------------- | ---------------- |
|
||||
| `--parent <parentId>` | Parent folder ID |
|
||||
|
||||
**Flow**: Compute SHA-256 hash → get presigned URL via `upload.createS3PreSignedUrl` → PUT to S3 → create file record via `file.createFile`.
|
||||
|
||||
---
|
||||
|
||||
## File Management (`lh file`)
|
||||
|
||||
Manage uploaded files.
|
||||
|
||||
**Source**: `apps/cli/src/commands/file.ts`
|
||||
|
||||
### `lh file list`
|
||||
|
||||
```bash
|
||||
lh file list [--kb-id [-L [--json [fields]] < id > ] < n > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ----------------- | ------------------------ | ------- |
|
||||
| `--kb-id <id>` | Filter by knowledge base | - |
|
||||
| `-L, --limit <n>` | Maximum items | `30` |
|
||||
|
||||
**Table columns**: ID, NAME, TYPE, SIZE, UPDATED
|
||||
|
||||
### `lh file view <id>`
|
||||
|
||||
```bash
|
||||
lh file view [fields]] < id > [--json
|
||||
```
|
||||
|
||||
**Displays**: Name, type, size, chunking status, embedding status.
|
||||
|
||||
### `lh file delete <ids...>`
|
||||
|
||||
```bash
|
||||
lh file delete [--yes] < id1 > [id2...]
|
||||
```
|
||||
|
||||
Supports deleting multiple files at once.
|
||||
|
||||
### `lh file recent`
|
||||
|
||||
```bash
|
||||
lh file recent [-L [--json [fields]] < n > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ----------------- | --------------- | ------- |
|
||||
| `-L, --limit <n>` | Number of items | `10` |
|
||||
|
||||
---
|
||||
|
||||
## Document Management (`lh doc`)
|
||||
|
||||
Manage text documents (notes, wiki pages).
|
||||
|
||||
**Source**: `apps/cli/src/commands/doc.ts`
|
||||
|
||||
### `lh doc list`
|
||||
|
||||
```bash
|
||||
lh doc list [-L [--file-type [--source-type [--json [fields]] < n > ] < type > ] < type > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ---------------------- | --------------------------------------------- | ------- |
|
||||
| `-L, --limit <n>` | Maximum items | `30` |
|
||||
| `--file-type <type>` | Filter by file type | - |
|
||||
| `--source-type <type>` | Filter by source type (file, web, api, topic) | - |
|
||||
|
||||
**Table columns**: ID, TITLE, TYPE, UPDATED
|
||||
|
||||
### `lh doc view <id>`
|
||||
|
||||
```bash
|
||||
lh doc view [fields]] < id > [--json
|
||||
```
|
||||
|
||||
**Displays**: Title, type, KB association, updated time, full content.
|
||||
|
||||
### `lh doc create`
|
||||
|
||||
```bash
|
||||
lh doc create -t [-F [--parent [--slug [--kb [--file-type < title > [-b < body > ] < path > ] < id > ] < slug > ] < id > ] < type > ]
|
||||
```
|
||||
|
||||
| Option | Description | Required |
|
||||
| ------------------------ | ----------------------------------------------- | -------- |
|
||||
| `-t, --title <title>` | Document title | Yes |
|
||||
| `-b, --body <content>` | Document body text | No |
|
||||
| `-F, --body-file <path>` | Read body from file | No |
|
||||
| `--parent <id>` | Parent document ID | No |
|
||||
| `--slug <slug>` | Custom URL slug | No |
|
||||
| `--kb <id>` | Knowledge base ID to associate with | No |
|
||||
| `--file-type <type>` | File type (e.g. custom/document, custom/folder) | No |
|
||||
|
||||
`-b` and `-F` are mutually exclusive; `-F` reads the file content as the body.
|
||||
|
||||
### `lh doc batch-create <file>`
|
||||
|
||||
Batch create documents from a JSON file. The file must contain a non-empty array of document objects.
|
||||
|
||||
```bash
|
||||
lh doc batch-create documents.json
|
||||
```
|
||||
|
||||
Each object in the array can have: `title`, `content`, `fileType`, `knowledgeBaseId`, `parentId`, `slug`.
|
||||
|
||||
### `lh doc edit <id>`
|
||||
|
||||
```bash
|
||||
lh doc edit [-b [-F [--parent [--file-type < id > [-t < title > ] < body > ] < path > ] < id > ] < type > ]
|
||||
```
|
||||
|
||||
### `lh doc delete <ids...>`
|
||||
|
||||
```bash
|
||||
lh doc delete [--yes] < id1 > [id2...]
|
||||
```
|
||||
|
||||
### `lh doc parse <fileId>`
|
||||
|
||||
Parse an uploaded file into a document.
|
||||
|
||||
```bash
|
||||
lh doc parse [--json [fields]] < fileId > [--with-pages]
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| -------------- | ----------------------- |
|
||||
| `--with-pages` | Preserve page structure |
|
||||
|
||||
**Output**: Parsed title and content preview.
|
||||
|
||||
### `lh doc link-topic <docId> <topicId>`
|
||||
|
||||
Associate a document with a topic. Creates a linked copy via the notebook router.
|
||||
|
||||
```bash
|
||||
lh doc link-topic <docId> <topicId>
|
||||
```
|
||||
|
||||
### `lh doc topic-docs <topicId>`
|
||||
|
||||
List documents associated with a topic.
|
||||
|
||||
```bash
|
||||
lh doc topic-docs [--json [fields]] < topicId > [--type < type > ]
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| --------------- | ------------------------------------------------ |
|
||||
| `--type <type>` | Filter by type (article, markdown, note, report) |
|
||||
@@ -1,138 +0,0 @@
|
||||
# Memory Commands
|
||||
|
||||
Manage user memories - the AI's long-term knowledge about users.
|
||||
|
||||
**Source**: `apps/cli/src/commands/memory.ts`
|
||||
|
||||
## Memory Categories
|
||||
|
||||
| Category | Description |
|
||||
| ------------ | ----------------------------------------- |
|
||||
| `identity` | User's name, role, relationships |
|
||||
| `activity` | Recent activities and their status |
|
||||
| `context` | Ongoing contexts, projects, goals |
|
||||
| `experience` | Past experiences and key learnings |
|
||||
| `preference` | User preferences, directives, suggestions |
|
||||
|
||||
---
|
||||
|
||||
## `lh memory list [category]`
|
||||
|
||||
List memory entries, optionally filtered by category.
|
||||
|
||||
```bash
|
||||
lh memory list # All categories
|
||||
lh memory list identity # Only identity memories
|
||||
lh memory list preference # Only preferences
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| ----------------- | ----------- |
|
||||
| `--json [fields]` | JSON output |
|
||||
|
||||
**Output**: Grouped by category, showing type/status and descriptions.
|
||||
|
||||
---
|
||||
|
||||
## `lh memory create`
|
||||
|
||||
Create a new identity memory entry.
|
||||
|
||||
```bash
|
||||
lh memory create [options]
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| -------------------------- | ------------------------ |
|
||||
| `--type <type>` | Memory type |
|
||||
| `--role <role>` | User's role |
|
||||
| `--relationship <rel>` | Relationship description |
|
||||
| `-d, --description <desc>` | Description |
|
||||
| `--labels <labels...>` | Extracted labels |
|
||||
|
||||
---
|
||||
|
||||
## `lh memory edit <category> <id>`
|
||||
|
||||
Edit a memory entry. Options vary by category:
|
||||
|
||||
```bash
|
||||
lh memory edit identity < id > [options]
|
||||
lh memory edit activity < id > [options]
|
||||
lh memory edit context < id > [options]
|
||||
lh memory edit experience < id > [options]
|
||||
lh memory edit preference < id > [options]
|
||||
```
|
||||
|
||||
### Category-specific Options
|
||||
|
||||
**identity**:
|
||||
|
||||
- `--type <type>`, `--role <role>`, `--relationship <rel>`
|
||||
|
||||
**activity**:
|
||||
|
||||
- `--narrative <text>`, `--notes <text>`, `--status <status>`
|
||||
|
||||
**context**:
|
||||
|
||||
- `--title <title>`, `--description <desc>`, `--status <status>`
|
||||
|
||||
**experience**:
|
||||
|
||||
- `--situation <text>`, `--action <text>`, `--key-learning <text>`
|
||||
|
||||
**preference**:
|
||||
|
||||
- `--directives <text>`, `--suggestions <text>`
|
||||
|
||||
---
|
||||
|
||||
## `lh memory delete <category> <id>`
|
||||
|
||||
```bash
|
||||
lh memory delete identity < id > [--yes]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `lh memory persona`
|
||||
|
||||
Display the compiled memory persona summary.
|
||||
|
||||
```bash
|
||||
lh memory persona [--json [fields]]
|
||||
```
|
||||
|
||||
**Output**: Summarized user profile built from all memory categories.
|
||||
|
||||
---
|
||||
|
||||
## `lh memory extract`
|
||||
|
||||
Trigger async memory extraction from chat history.
|
||||
|
||||
```bash
|
||||
lh memory extract [--from [--to < date > ] < date > ]
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| --------------- | ----------------------- |
|
||||
| `--from <date>` | Start date (ISO format) |
|
||||
| `--to <date>` | End date (ISO format) |
|
||||
|
||||
Starts a background task that analyzes chat history and creates new memory entries.
|
||||
|
||||
---
|
||||
|
||||
## `lh memory extract-status`
|
||||
|
||||
Check the status of a memory extraction task.
|
||||
|
||||
```bash
|
||||
lh memory extract-status [--task-id [--json [fields]] < id > ]
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| ---------------- | ------------------- |
|
||||
| `--task-id <id>` | Check specific task |
|
||||
@@ -1,186 +0,0 @@
|
||||
# Model & Provider Commands
|
||||
|
||||
## Model Management (`lh model`)
|
||||
|
||||
Manage AI models within providers.
|
||||
|
||||
**Source**: `apps/cli/src/commands/model.ts`
|
||||
|
||||
### `lh model list <providerId>`
|
||||
|
||||
List models for a specific provider.
|
||||
|
||||
```bash
|
||||
lh model list openai
|
||||
lh model list openai --type image --enabled
|
||||
lh model list lobehub --type video --json
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ----------------- | -------------------------------------------------------------------------------------- | ------- |
|
||||
| `-L, --limit <n>` | Maximum items | `50` |
|
||||
| `--enabled` | Only show enabled models | `false` |
|
||||
| `--type <type>` | Filter by model type (`chat\|embedding\|tts\|stt\|image\|video\|text2music\|realtime`) | - |
|
||||
| `--json [fields]` | Output JSON, optionally specify fields | - |
|
||||
|
||||
**Table columns**: ID, NAME, ENABLED, TYPE
|
||||
|
||||
**Backend**: `aiModel.getAiProviderModelList` → `AiInfraRepos.getAiProviderModelList` (supports `type` filter at repository level)
|
||||
|
||||
### `lh model view <id>`
|
||||
|
||||
```bash
|
||||
lh model view [fields]] < modelId > [--json
|
||||
```
|
||||
|
||||
**Displays**: Name, provider, type, enabled status, capabilities.
|
||||
|
||||
### `lh model create`
|
||||
|
||||
```bash
|
||||
lh model create --id [--type < id > --provider < providerId > [--display-name < name > ] < type > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ------------------------- | ------------ | -------- |
|
||||
| `--id <id>` | Model ID | Required |
|
||||
| `--provider <providerId>` | Provider ID | Required |
|
||||
| `--display-name <name>` | Display name | - |
|
||||
| `--type <type>` | Model type | `chat` |
|
||||
|
||||
### `lh model edit <id>`
|
||||
|
||||
```bash
|
||||
lh model edit [--type < modelId > --provider < providerId > [--display-name < name > ] < type > ]
|
||||
```
|
||||
|
||||
### `lh model toggle <id>`
|
||||
|
||||
Enable or disable a model.
|
||||
|
||||
```bash
|
||||
lh model toggle < modelId > --provider < providerId > --enable
|
||||
lh model toggle < modelId > --provider < providerId > --disable
|
||||
```
|
||||
|
||||
| Option | Description | Required |
|
||||
| ------------------------- | ----------------- | ------------ |
|
||||
| `--provider <providerId>` | Provider ID | Yes |
|
||||
| `--enable` | Enable the model | One required |
|
||||
| `--disable` | Disable the model | One required |
|
||||
|
||||
### `lh model batch-toggle <ids...>`
|
||||
|
||||
Enable or disable multiple models at once.
|
||||
|
||||
```bash
|
||||
lh model batch-toggle model1 model2 model3 --provider openai --enable
|
||||
```
|
||||
|
||||
### `lh model delete <id>`
|
||||
|
||||
```bash
|
||||
lh model delete < modelId > --provider < providerId > [--yes]
|
||||
```
|
||||
|
||||
### `lh model clear`
|
||||
|
||||
Clear all models (or only remote/fetched models) for a provider.
|
||||
|
||||
```bash
|
||||
lh model clear --provider [--yes] < providerId > [--remote]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Provider Management (`lh provider`)
|
||||
|
||||
Manage AI service providers.
|
||||
|
||||
**Source**: `apps/cli/src/commands/provider.ts`
|
||||
|
||||
### `lh provider list`
|
||||
|
||||
```bash
|
||||
lh provider list [--json [fields]]
|
||||
```
|
||||
|
||||
**Table columns**: ID, NAME, ENABLED, SOURCE
|
||||
|
||||
### `lh provider view <id>`
|
||||
|
||||
```bash
|
||||
lh provider view [fields]] < providerId > [--json
|
||||
```
|
||||
|
||||
**Displays**: Name, enabled status, source, configuration.
|
||||
|
||||
### `lh provider create`
|
||||
|
||||
```bash
|
||||
lh provider create --id [-d [--logo [--sdk-type < id > -n < name > [-s < source > ] < desc > ] < url > ] < type > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| -------------------------- | ------------------------------------------------- | -------- |
|
||||
| `--id <id>` | Provider ID | Required |
|
||||
| `-n, --name <name>` | Provider name | Required |
|
||||
| `-s, --source <source>` | Source type (`builtin` or `custom`) | `custom` |
|
||||
| `-d, --description <desc>` | Provider description | - |
|
||||
| `--logo <logo>` | Provider logo URL | - |
|
||||
| `--sdk-type <sdkType>` | SDK type (openai, anthropic, azure, bedrock, ...) | - |
|
||||
|
||||
### `lh provider edit <id>`
|
||||
|
||||
```bash
|
||||
lh provider edit [-d [--logo [--sdk-type < providerId > [-n < name > ] < desc > ] < url > ] < type > ]
|
||||
```
|
||||
|
||||
Requires at least one change flag.
|
||||
|
||||
### `lh provider config <id>`
|
||||
|
||||
Configure provider settings (API key, base URL, etc.).
|
||||
|
||||
```bash
|
||||
lh provider config openai --api-key sk-xxx
|
||||
lh provider config openai --base-url https://custom-endpoint.com
|
||||
lh provider config openai --show
|
||||
lh provider config openai --show --json
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| ------------------------ | --------------------------------- |
|
||||
| `--api-key <key>` | Set API key |
|
||||
| `--base-url <url>` | Set base URL |
|
||||
| `--check-model <model>` | Set connectivity check model |
|
||||
| `--enable-response-api` | Enable Response API mode (OpenAI) |
|
||||
| `--disable-response-api` | Disable Response API mode |
|
||||
| `--fetch-on-client` | Enable fetching models on client |
|
||||
| `--no-fetch-on-client` | Disable fetching models on client |
|
||||
| `--show` | Show current config |
|
||||
| `--json [fields]` | Output JSON (with --show) |
|
||||
|
||||
**Important**: The `lobehub` provider is platform-managed. Attempting to set `--api-key` or `--base-url` on it will be rejected with an error message.
|
||||
|
||||
### `lh provider test <id>`
|
||||
|
||||
Test provider connectivity.
|
||||
|
||||
```bash
|
||||
lh provider test openai
|
||||
lh provider test openai -m gpt-4o --json
|
||||
```
|
||||
|
||||
### `lh provider toggle <id>`
|
||||
|
||||
```bash
|
||||
lh provider toggle < providerId > --enable
|
||||
lh provider toggle < providerId > --disable
|
||||
```
|
||||
|
||||
### `lh provider delete <id>`
|
||||
|
||||
```bash
|
||||
lh provider delete < providerId > [--yes]
|
||||
```
|
||||
@@ -1,94 +0,0 @@
|
||||
# Search & Configuration Commands
|
||||
|
||||
## Global Search (`lh search`)
|
||||
|
||||
Search across all LobeHub resource types.
|
||||
|
||||
**Source**: `apps/cli/src/commands/search.ts`
|
||||
|
||||
### `lh search <query>`
|
||||
|
||||
```bash
|
||||
lh search "meeting notes" [-t [-L [--json [fields]] < type > ] < n > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ------------------- | ----------------------- | --------- |
|
||||
| `-t, --type <type>` | Filter by resource type | All types |
|
||||
| `-L, --limit <n>` | Results per type | `10` |
|
||||
|
||||
### Searchable Types
|
||||
|
||||
| Type | Description |
|
||||
| ---------------- | ---------------------------- |
|
||||
| `agent` | AI agents |
|
||||
| `topic` | Conversation topics |
|
||||
| `file` | Uploaded files |
|
||||
| `folder` | File folders |
|
||||
| `message` | Chat messages |
|
||||
| `page` | Documents/pages |
|
||||
| `memory` | User memories |
|
||||
| `mcp` | MCP servers |
|
||||
| `plugin` | Installed plugins |
|
||||
| `communityAgent` | Community marketplace agents |
|
||||
| `knowledgeBase` | Knowledge bases |
|
||||
|
||||
**Output**: Results grouped by type, showing ID, title/name, description.
|
||||
|
||||
---
|
||||
|
||||
## User Configuration (`lh whoami` / `lh usage`)
|
||||
|
||||
**Source**: `apps/cli/src/commands/config.ts`
|
||||
|
||||
### `lh whoami`
|
||||
|
||||
Display current authenticated user information.
|
||||
|
||||
```bash
|
||||
lh whoami [--json [fields]]
|
||||
```
|
||||
|
||||
**Displays**: Name, username, email, user ID, subscription plan.
|
||||
|
||||
### `lh usage`
|
||||
|
||||
Display usage statistics.
|
||||
|
||||
```bash
|
||||
lh usage [--month [--daily] [--json [fields]] < YYYY-MM > ]
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
| ------------------- | -------------- | ----------------------- |
|
||||
| `--month <YYYY-MM>` | Month to query | Current month |
|
||||
| `--daily` | Group by day | `false` (monthly total) |
|
||||
|
||||
**Output**: Token usage, costs, and model breakdown for the specified period.
|
||||
|
||||
---
|
||||
|
||||
## Global Options
|
||||
|
||||
These options are available across most commands:
|
||||
|
||||
| Option | Description |
|
||||
| ----------------- | ---------------------------------------------------------------------- |
|
||||
| `--json [fields]` | Output as JSON; optionally filter to specific fields (comma-separated) |
|
||||
| `--yes` | Skip confirmation prompts for destructive operations |
|
||||
| `-L, --limit <n>` | Pagination limit for list commands |
|
||||
| `-v, --verbose` | Enable verbose/debug logging |
|
||||
| `--help` | Show command help |
|
||||
| `--version` | Show CLI version |
|
||||
|
||||
### JSON Field Filtering
|
||||
|
||||
The `--json` option supports field selection:
|
||||
|
||||
```bash
|
||||
# Full JSON output
|
||||
lh agent list --json
|
||||
|
||||
# Only specific fields
|
||||
lh agent list --json "id,title,model"
|
||||
```
|
||||
@@ -1,149 +0,0 @@
|
||||
# Skill & Plugin Commands
|
||||
|
||||
## Skill Management (`lh skill`)
|
||||
|
||||
Manage agent skills (custom instructions and capabilities).
|
||||
|
||||
**Source**: `apps/cli/src/commands/skill.ts`
|
||||
|
||||
### `lh skill list`
|
||||
|
||||
```bash
|
||||
lh skill list [--source [--json [fields]] < source > ]
|
||||
```
|
||||
|
||||
| Option | Description |
|
||||
| ------------------- | ----------------------------------- |
|
||||
| `--source <source>` | Filter: `builtin`, `market`, `user` |
|
||||
|
||||
**Table columns**: ID, NAME, DESCRIPTION, SOURCE, IDENTIFIER
|
||||
|
||||
### `lh skill view <id>`
|
||||
|
||||
```bash
|
||||
lh skill view [fields]] < id > [--json
|
||||
```
|
||||
|
||||
**Displays**: Name, description, source, identifier, content.
|
||||
|
||||
### `lh skill create`
|
||||
|
||||
```bash
|
||||
lh skill create -n < name > -d < desc > -c < content > [-i < identifier > ]
|
||||
```
|
||||
|
||||
| Option | Description | Required |
|
||||
| -------------------------- | ----------------------------------- | -------- |
|
||||
| `-n, --name <name>` | Skill name | Yes |
|
||||
| `-d, --description <desc>` | Description | Yes |
|
||||
| `-c, --content <content>` | Skill content (prompt/instructions) | Yes |
|
||||
| `-i, --identifier <id>` | Custom identifier | No |
|
||||
|
||||
### `lh skill edit <id>`
|
||||
|
||||
```bash
|
||||
lh skill edit [-n [-d < id > [-c < content > ] < name > ] < desc > ]
|
||||
```
|
||||
|
||||
### `lh skill delete <id>`
|
||||
|
||||
```bash
|
||||
lh skill delete < id > [--yes]
|
||||
```
|
||||
|
||||
### `lh skill search <query>`
|
||||
|
||||
```bash
|
||||
lh skill search [fields]] < query > [--json
|
||||
```
|
||||
|
||||
### `lh skill install <source>` (alias: `lh skill i`)
|
||||
|
||||
Install a skill. Auto-detects source type from the input:
|
||||
|
||||
```bash
|
||||
# GitHub (URL or owner/repo shorthand)
|
||||
lh skill install lobehub/skill-repo
|
||||
lh skill install https://github.com/lobehub/skill-repo
|
||||
lh skill install lobehub/skill-repo --branch dev
|
||||
|
||||
# ZIP URL
|
||||
lh skill install https://example.com/skill.zip
|
||||
|
||||
# Marketplace identifier
|
||||
lh skill install my-cool-skill
|
||||
lh skill i my-cool-skill
|
||||
```
|
||||
|
||||
| Option | Description | Notes |
|
||||
| ------------------- | ------------------------- | -------- |
|
||||
| `--branch <branch>` | Branch name (GitHub only) | Optional |
|
||||
|
||||
**Detection rules**:
|
||||
|
||||
- `https://github.com/...` or `owner/repo` → GitHub
|
||||
- Other `https://...` URLs → ZIP URL
|
||||
- Everything else → marketplace identifier
|
||||
|
||||
### Resource Commands
|
||||
|
||||
#### `lh skill resources <id>`
|
||||
|
||||
List files/resources within a skill.
|
||||
|
||||
```bash
|
||||
lh skill resources [fields]] < id > [--json
|
||||
```
|
||||
|
||||
**Displays**: Path, type, size.
|
||||
|
||||
#### `lh skill read-resource <id> <path>`
|
||||
|
||||
Read a specific resource file from a skill.
|
||||
|
||||
```bash
|
||||
lh skill read-resource <skillId> <path>
|
||||
```
|
||||
|
||||
**Output**: File content or JSON metadata.
|
||||
|
||||
---
|
||||
|
||||
## Plugin Management (`lh plugin`)
|
||||
|
||||
Install and manage plugins (external tool integrations).
|
||||
|
||||
**Source**: `apps/cli/src/commands/plugin.ts`
|
||||
|
||||
### `lh plugin list`
|
||||
|
||||
```bash
|
||||
lh plugin list [--json [fields]]
|
||||
```
|
||||
|
||||
**Table columns**: ID, IDENTIFIER, TYPE, TITLE
|
||||
|
||||
### `lh plugin install`
|
||||
|
||||
```bash
|
||||
lh plugin install -i [--settings < identifier > --manifest < json > [--type < type > ] < json > ]
|
||||
```
|
||||
|
||||
| Option | Description | Required |
|
||||
| ----------------------- | -------------------------- | ---------------------- |
|
||||
| `-i, --identifier <id>` | Plugin identifier | Yes |
|
||||
| `--manifest <json>` | Plugin manifest JSON | Yes |
|
||||
| `--type <type>` | `plugin` or `customPlugin` | No (default: `plugin`) |
|
||||
| `--settings <json>` | Plugin settings JSON | No |
|
||||
|
||||
### `lh plugin uninstall <id>`
|
||||
|
||||
```bash
|
||||
lh plugin uninstall < id > [--yes]
|
||||
```
|
||||
|
||||
### `lh plugin update <id>`
|
||||
|
||||
```bash
|
||||
lh plugin update [--settings < id > [--manifest < json > ] < json > ]
|
||||
```
|
||||
@@ -1,614 +0,0 @@
|
||||
---
|
||||
name: data-fetching-architecture
|
||||
description: 'LobeHub data-fetching pipeline guide. Use for service layer, Zustand store, SWR, lambdaClient, useClientDataSWR, useFetchXxx hooks, or migrating useEffect fetches.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# LobeHub Data Fetching Architecture
|
||||
|
||||
> **Related:** `store-data-structures` covers List vs Detail data shape rationale (Map vs Array).
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```text
|
||||
┌─────────────┐
|
||||
│ Component │
|
||||
└──────┬──────┘
|
||||
│ 1. Call useFetchXxx hook from store
|
||||
↓
|
||||
┌──────────────────┐
|
||||
│ Zustand Store │
|
||||
│ (State + Hook) │
|
||||
└──────┬───────────┘
|
||||
│ 2. useClientDataSWR calls service
|
||||
↓
|
||||
┌──────────────────┐
|
||||
│ Service Layer │
|
||||
│ (xxxService) │
|
||||
└──────┬───────────┘
|
||||
│ 3. Call lambdaClient
|
||||
↓
|
||||
┌──────────────────┐
|
||||
│ lambdaClient │
|
||||
│ (TRPC Client) │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## Core Principles
|
||||
|
||||
### ✅ DO
|
||||
|
||||
1. **Use Service Layer** for all API calls
|
||||
2. **Use Store SWR Hooks** for data fetching (not useEffect)
|
||||
3. **Use proper data structures** — see `store-data-structures` skill for List vs Detail patterns
|
||||
4. **Use lambdaClient.mutate** for write operations (create/update/delete)
|
||||
5. **Use lambdaClient.query** only inside service methods
|
||||
6. **Naming convention** — read hooks are `useFetchXxx`, cache invalidation helpers are `refreshXxx` (e.g. `useFetchBenchmarks` / `refreshBenchmarks`). Mutations then chain `refreshXxx()` after the service call.
|
||||
|
||||
### ❌ DON'T
|
||||
|
||||
1. **Never use useEffect** for data fetching
|
||||
2. **Never call lambdaClient** directly in components or stores
|
||||
3. **Never use useState** for server data
|
||||
4. **Never mix data structure patterns** — follow `store-data-structures` skill
|
||||
|
||||
---
|
||||
|
||||
## Layer 1: Service Layer
|
||||
|
||||
### Purpose
|
||||
|
||||
- Encapsulate all API calls to lambdaClient
|
||||
- Provide clean, typed interfaces
|
||||
- Single source of truth for API operations
|
||||
|
||||
### Service Structure
|
||||
|
||||
```typescript
|
||||
// src/services/agentEval.ts
|
||||
class AgentEvalService {
|
||||
// Query methods - READ operations
|
||||
async listBenchmarks() {
|
||||
return lambdaClient.agentEval.listBenchmarks.query();
|
||||
}
|
||||
|
||||
async getBenchmark(id: string) {
|
||||
return lambdaClient.agentEval.getBenchmark.query({ id });
|
||||
}
|
||||
|
||||
// Mutation methods - WRITE operations
|
||||
async createBenchmark(params: CreateBenchmarkParams) {
|
||||
return lambdaClient.agentEval.createBenchmark.mutate(params);
|
||||
}
|
||||
|
||||
async updateBenchmark(params: UpdateBenchmarkParams) {
|
||||
return lambdaClient.agentEval.updateBenchmark.mutate(params);
|
||||
}
|
||||
|
||||
async deleteBenchmark(id: string) {
|
||||
return lambdaClient.agentEval.deleteBenchmark.mutate({ id });
|
||||
}
|
||||
}
|
||||
|
||||
export const agentEvalService = new AgentEvalService();
|
||||
```
|
||||
|
||||
### Service Guidelines
|
||||
|
||||
1. **One service per domain** (e.g., agentEval, ragEval, aiAgent)
|
||||
2. **Export singleton instance** (`export const xxxService = new XxxService()`)
|
||||
3. **Method names match operations** (list, get, create, update, delete)
|
||||
4. **Clear parameter types** (use interfaces for complex params)
|
||||
|
||||
---
|
||||
|
||||
## Layer 2: Store with SWR Hooks
|
||||
|
||||
### Purpose
|
||||
|
||||
- Manage client-side state
|
||||
- Provide SWR hooks for data fetching
|
||||
- Handle cache invalidation
|
||||
|
||||
### State Structure
|
||||
|
||||
```typescript
|
||||
// src/store/eval/slices/benchmark/initialState.ts
|
||||
export interface BenchmarkSliceState {
|
||||
// List data - simple array
|
||||
benchmarkList: AgentEvalBenchmarkListItem[];
|
||||
benchmarkListInit: boolean;
|
||||
|
||||
// Detail data - map for caching
|
||||
benchmarkDetailMap: Record<string, AgentEvalBenchmark>;
|
||||
loadingBenchmarkDetailIds: string[];
|
||||
|
||||
// Mutation states
|
||||
isCreatingBenchmark: boolean;
|
||||
isUpdatingBenchmark: boolean;
|
||||
isDeletingBenchmark: boolean;
|
||||
}
|
||||
```
|
||||
|
||||
> For complete initialState, reducer, and internal dispatch patterns, see the `store-data-structures` skill.
|
||||
|
||||
### Actions
|
||||
|
||||
```typescript
|
||||
// src/store/eval/slices/benchmark/action.ts
|
||||
const FETCH_BENCHMARKS_KEY = 'FETCH_BENCHMARKS';
|
||||
const FETCH_BENCHMARK_DETAIL_KEY = 'FETCH_BENCHMARK_DETAIL';
|
||||
|
||||
export interface BenchmarkAction {
|
||||
// SWR Hooks - for data fetching
|
||||
useFetchBenchmarks: () => SWRResponse;
|
||||
useFetchBenchmarkDetail: (id?: string) => SWRResponse;
|
||||
|
||||
// Refresh methods - for cache invalidation
|
||||
refreshBenchmarks: () => Promise<void>;
|
||||
refreshBenchmarkDetail: (id: string) => Promise<void>;
|
||||
|
||||
// Mutation actions
|
||||
createBenchmark: (params: CreateParams) => Promise<any>;
|
||||
updateBenchmark: (params: UpdateParams) => Promise<void>;
|
||||
deleteBenchmark: (id: string) => Promise<void>;
|
||||
|
||||
// Internal methods - not for direct UI use
|
||||
internal_dispatchBenchmarkDetail: (payload: BenchmarkDetailDispatch) => void;
|
||||
internal_updateBenchmarkDetailLoading: (id: string, loading: boolean) => void;
|
||||
}
|
||||
|
||||
export const createBenchmarkSlice: StateCreator<EvalStore, any, [], BenchmarkAction> = (
|
||||
set,
|
||||
get,
|
||||
) => ({
|
||||
// Fetch list — simple array stored in benchmarkList
|
||||
useFetchBenchmarks: () =>
|
||||
useClientDataSWR(FETCH_BENCHMARKS_KEY, () => agentEvalService.listBenchmarks(), {
|
||||
onSuccess: (data) => {
|
||||
set({ benchmarkList: data, benchmarkListInit: true }, false, 'useFetchBenchmarks/success');
|
||||
},
|
||||
}),
|
||||
|
||||
// Fetch detail — null key disables the request when id is missing
|
||||
useFetchBenchmarkDetail: (id) =>
|
||||
useClientDataSWR(
|
||||
id ? [FETCH_BENCHMARK_DETAIL_KEY, id] : null,
|
||||
() => agentEvalService.getBenchmark(id!),
|
||||
{
|
||||
onSuccess: (data) => {
|
||||
get().internal_dispatchBenchmarkDetail({
|
||||
type: 'setBenchmarkDetail',
|
||||
id: id!,
|
||||
value: data,
|
||||
});
|
||||
get().internal_updateBenchmarkDetailLoading(id!, false);
|
||||
},
|
||||
},
|
||||
),
|
||||
|
||||
// Refresh methods
|
||||
refreshBenchmarks: () => mutate(FETCH_BENCHMARKS_KEY),
|
||||
refreshBenchmarkDetail: (id) => mutate([FETCH_BENCHMARK_DETAIL_KEY, id]),
|
||||
|
||||
// CREATE — refresh list after creation
|
||||
createBenchmark: async (params) => {
|
||||
set({ isCreatingBenchmark: true }, false, 'createBenchmark/start');
|
||||
try {
|
||||
const result = await agentEvalService.createBenchmark(params);
|
||||
await get().refreshBenchmarks();
|
||||
return result;
|
||||
} finally {
|
||||
set({ isCreatingBenchmark: false }, false, 'createBenchmark/end');
|
||||
}
|
||||
},
|
||||
|
||||
// UPDATE — optimistic update + refresh
|
||||
updateBenchmark: async (params) => {
|
||||
const { id } = params;
|
||||
|
||||
// 1. Optimistic update
|
||||
get().internal_dispatchBenchmarkDetail({
|
||||
type: 'updateBenchmarkDetail',
|
||||
id,
|
||||
value: params,
|
||||
});
|
||||
// 2. Set loading
|
||||
get().internal_updateBenchmarkDetailLoading(id, true);
|
||||
|
||||
try {
|
||||
// 3. Call service
|
||||
await agentEvalService.updateBenchmark(params);
|
||||
// 4. Refresh from server
|
||||
await get().refreshBenchmarks();
|
||||
await get().refreshBenchmarkDetail(id);
|
||||
} finally {
|
||||
get().internal_updateBenchmarkDetailLoading(id, false);
|
||||
}
|
||||
},
|
||||
|
||||
// DELETE — optimistic update + refresh
|
||||
deleteBenchmark: async (id) => {
|
||||
get().internal_dispatchBenchmarkDetail({ type: 'deleteBenchmarkDetail', id });
|
||||
get().internal_updateBenchmarkDetailLoading(id, true);
|
||||
|
||||
try {
|
||||
await agentEvalService.deleteBenchmark(id);
|
||||
await get().refreshBenchmarks();
|
||||
} finally {
|
||||
get().internal_updateBenchmarkDetailLoading(id, false);
|
||||
}
|
||||
},
|
||||
|
||||
// Internal — dispatch to reducer (for detail map)
|
||||
internal_dispatchBenchmarkDetail: (payload) => {
|
||||
const currentMap = get().benchmarkDetailMap;
|
||||
const nextMap = benchmarkDetailReducer(currentMap, payload);
|
||||
|
||||
// Skip set when nothing changed — avoids unnecessary re-renders
|
||||
if (isEqual(nextMap, currentMap)) return;
|
||||
set({ benchmarkDetailMap: nextMap }, false, `dispatchBenchmarkDetail/${payload.type}`);
|
||||
},
|
||||
|
||||
// Internal — update loading state for specific detail
|
||||
internal_updateBenchmarkDetailLoading: (id, loading) => {
|
||||
set(
|
||||
(state) => ({
|
||||
loadingBenchmarkDetailIds: loading
|
||||
? [...state.loadingBenchmarkDetailIds, id]
|
||||
: state.loadingBenchmarkDetailIds.filter((i) => i !== id),
|
||||
}),
|
||||
false,
|
||||
'updateBenchmarkDetailLoading',
|
||||
);
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
### Store Guidelines
|
||||
|
||||
1. **SWR keys as constants** at top of file
|
||||
2. **useClientDataSWR** for all data fetching (never useEffect)
|
||||
3. **onSuccess callback** updates store state
|
||||
4. **Refresh methods** use `mutate()` to invalidate cache
|
||||
5. **Loading states** in initialState, updated in onSuccess
|
||||
6. **Mutations** call service, then refresh relevant cache
|
||||
|
||||
---
|
||||
|
||||
## Layer 3: Component Usage
|
||||
|
||||
### Fetching List Data
|
||||
|
||||
```tsx
|
||||
// ✅ CORRECT
|
||||
const BenchmarkList = () => {
|
||||
// 1. Get the hook from store
|
||||
const useFetchBenchmarks = useEvalStore((s) => s.useFetchBenchmarks);
|
||||
|
||||
// 2. Get list data
|
||||
const benchmarks = useEvalStore((s) => s.benchmarkList);
|
||||
const isInit = useEvalStore((s) => s.benchmarkListInit);
|
||||
|
||||
// 3. Call the hook (SWR handles the data fetching)
|
||||
useFetchBenchmarks();
|
||||
|
||||
// 4. Use the data
|
||||
if (!isInit) return <Loading />;
|
||||
return (
|
||||
<div>
|
||||
<h2>Total: {benchmarks.length}</h2>
|
||||
{benchmarks.map((b) => (
|
||||
<BenchmarkCard key={b.id} {...b} />
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
```
|
||||
|
||||
### Fetching Detail Data
|
||||
|
||||
```tsx
|
||||
// ✅ CORRECT
|
||||
const BenchmarkDetail = () => {
|
||||
const { benchmarkId } = useParams<{ benchmarkId: string }>();
|
||||
|
||||
const useFetchBenchmarkDetail = useEvalStore((s) => s.useFetchBenchmarkDetail);
|
||||
|
||||
// Detail from map
|
||||
const benchmark = useEvalStore((s) =>
|
||||
benchmarkId ? s.benchmarkDetailMap[benchmarkId] : undefined,
|
||||
);
|
||||
|
||||
// Per-item loading
|
||||
const isLoading = useEvalStore((s) =>
|
||||
benchmarkId ? s.loadingBenchmarkDetailIds.includes(benchmarkId) : false,
|
||||
);
|
||||
|
||||
useFetchBenchmarkDetail(benchmarkId);
|
||||
|
||||
if (!benchmark) return <Loading />;
|
||||
return (
|
||||
<div>
|
||||
<h1>{benchmark.name}</h1>
|
||||
<p>{benchmark.description}</p>
|
||||
{isLoading && <Spinner />}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
```
|
||||
|
||||
### Using Selectors (Recommended)
|
||||
|
||||
```typescript
|
||||
// src/store/eval/slices/benchmark/selectors.ts
|
||||
export const benchmarkSelectors = {
|
||||
getBenchmarkDetail: (id: string) => (s: EvalStore) => s.benchmarkDetailMap[id],
|
||||
isLoadingBenchmarkDetail: (id: string) => (s: EvalStore) =>
|
||||
s.loadingBenchmarkDetailIds.includes(id),
|
||||
};
|
||||
|
||||
// Component with selectors
|
||||
const BenchmarkDetail = () => {
|
||||
const { benchmarkId } = useParams();
|
||||
const useFetchBenchmarkDetail = useEvalStore((s) => s.useFetchBenchmarkDetail);
|
||||
const benchmark = useEvalStore(benchmarkSelectors.getBenchmarkDetail(benchmarkId!));
|
||||
|
||||
useFetchBenchmarkDetail(benchmarkId);
|
||||
|
||||
return <div>{benchmark && <h1>{benchmark.name}</h1>}</div>;
|
||||
};
|
||||
```
|
||||
|
||||
### Anti-pattern
|
||||
|
||||
```tsx
|
||||
// ❌ WRONG — Don't use useEffect for data fetching
|
||||
const BenchmarkList = () => {
|
||||
const [data, setData] = useState([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
setLoading(true);
|
||||
lambdaClient.agentEval.listBenchmarks
|
||||
.query()
|
||||
.then(setData)
|
||||
.finally(() => setLoading(false));
|
||||
}, []);
|
||||
|
||||
return <div>...</div>;
|
||||
};
|
||||
```
|
||||
|
||||
### Mutations in Components
|
||||
|
||||
```tsx
|
||||
// Create — global mutation flag drives form loading
|
||||
const CreateBenchmarkModal = () => {
|
||||
const createBenchmark = useEvalStore((s) => s.createBenchmark);
|
||||
const isCreating = useEvalStore((s) => s.isCreatingBenchmark);
|
||||
|
||||
const handleSubmit = async (values) => {
|
||||
try {
|
||||
// Optimistic update + refresh happen inside createBenchmark
|
||||
await createBenchmark(values);
|
||||
message.success('Created successfully');
|
||||
onClose();
|
||||
} catch (error) {
|
||||
message.error('Failed to create');
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Form onSubmit={handleSubmit} loading={isCreating}>
|
||||
...
|
||||
</Form>
|
||||
);
|
||||
};
|
||||
|
||||
// Update / delete — per-item loading so only the row being mutated spins
|
||||
const BenchmarkItem = ({ id }: { id: string }) => {
|
||||
const updateBenchmark = useEvalStore((s) => s.updateBenchmark);
|
||||
const deleteBenchmark = useEvalStore((s) => s.deleteBenchmark);
|
||||
const isLoading = useEvalStore(benchmarkSelectors.isLoadingBenchmarkDetail(id));
|
||||
|
||||
const handleUpdate = async (data) => {
|
||||
await updateBenchmark({ id, ...data });
|
||||
};
|
||||
|
||||
const handleDelete = async () => {
|
||||
await deleteBenchmark(id);
|
||||
};
|
||||
|
||||
return (
|
||||
<div>
|
||||
{isLoading && <Spinner />}
|
||||
<button onClick={handleUpdate}>Update</button>
|
||||
<button onClick={handleDelete}>Delete</button>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
```
|
||||
|
||||
**Why two patterns:** create has no id yet, so a single `isCreatingXxx` flag is enough. Update/delete target a specific row, so global flags would freeze unrelated rows — keep per-item state in `loadingXxxIds`.
|
||||
|
||||
---
|
||||
|
||||
## Need a fuller worked example?
|
||||
|
||||
The canonical `Benchmark` example above is the one to copy for a flat list + detail map. If you need to maintain a list **keyed by a parent id** (e.g. `datasetMap[benchmarkId]` because the same shape appears under multiple parents), read [`references/walkthrough.md`](./references/walkthrough.md) — it walks through the full 6 steps (service → reducer → slice → store wiring → selectors → component) for that variant.
|
||||
|
||||
---
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Pattern 1: Pagination
|
||||
|
||||
Cache key array must include every parameter that should trigger a refetch.
|
||||
|
||||
```typescript
|
||||
useFetchTestCases: (params: { datasetId: string; limit: number; offset: number }) =>
|
||||
useClientDataSWR(
|
||||
params.datasetId ? [FETCH_TEST_CASES_KEY, params.datasetId, params.limit, params.offset] : null,
|
||||
() => agentEvalService.listTestCases(params),
|
||||
{
|
||||
onSuccess: (data) =>
|
||||
set({
|
||||
testCaseList: data.data,
|
||||
testCaseTotal: data.total,
|
||||
isLoadingTestCases: false,
|
||||
}),
|
||||
},
|
||||
);
|
||||
```
|
||||
|
||||
### Pattern 2: Dependent Fetching
|
||||
|
||||
Both hooks run in parallel — SWR dedupes, no manual sequencing needed.
|
||||
|
||||
```tsx
|
||||
const BenchmarkDetail = () => {
|
||||
const { benchmarkId } = useParams();
|
||||
const useFetchBenchmarkDetail = useEvalStore((s) => s.useFetchBenchmarkDetail);
|
||||
const useFetchDatasets = useEvalStore((s) => s.useFetchDatasets);
|
||||
|
||||
useFetchBenchmarkDetail(benchmarkId);
|
||||
useFetchDatasets(benchmarkId);
|
||||
|
||||
return <div>...</div>;
|
||||
};
|
||||
```
|
||||
|
||||
### Pattern 3: Conditional Fetching
|
||||
|
||||
Pass `undefined` to disable the hook entirely.
|
||||
|
||||
```tsx
|
||||
// only fetch when modal is open AND id present
|
||||
useFetchDatasetDetail(open && datasetId ? datasetId : undefined);
|
||||
```
|
||||
|
||||
### Pattern 4: Cross-domain Refresh
|
||||
|
||||
```typescript
|
||||
deleteBenchmark: async (id) => {
|
||||
await agentEvalService.deleteBenchmark(id);
|
||||
await get().refreshBenchmarks();
|
||||
await get().refreshDatasets(id); // related cache invalidated too
|
||||
};
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Migration Guide: useEffect → Store SWR
|
||||
|
||||
### Before (❌ Wrong)
|
||||
|
||||
```tsx
|
||||
const TestCaseList = ({ datasetId }: Props) => {
|
||||
const [data, setData] = useState<any[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
setLoading(true);
|
||||
lambdaClient.agentEval.listTestCases
|
||||
.query({ datasetId })
|
||||
.then((r) => setData(r.data))
|
||||
.finally(() => setLoading(false));
|
||||
}, [datasetId]);
|
||||
|
||||
return <Table data={data} loading={loading} />;
|
||||
};
|
||||
```
|
||||
|
||||
### After (✅ Correct)
|
||||
|
||||
```typescript
|
||||
// 1. Add service method
|
||||
class AgentEvalService {
|
||||
async listTestCases(params: { datasetId: string }) {
|
||||
return lambdaClient.agentEval.listTestCases.query(params);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Add store slice hook
|
||||
export const createTestCaseSlice: StateCreator<...> = (set) => ({
|
||||
useFetchTestCases: (params) =>
|
||||
useClientDataSWR(
|
||||
params.datasetId ? [FETCH_TEST_CASES_KEY, params.datasetId] : null,
|
||||
() => agentEvalService.listTestCases(params),
|
||||
{
|
||||
onSuccess: (data) =>
|
||||
set({ testCaseList: data.data, isLoadingTestCases: false }),
|
||||
},
|
||||
),
|
||||
});
|
||||
|
||||
// 3. Component reads from store
|
||||
const TestCaseList = ({ datasetId }: Props) => {
|
||||
const useFetchTestCases = useEvalStore((s) => s.useFetchTestCases);
|
||||
const data = useEvalStore((s) => s.testCaseList);
|
||||
const loading = useEvalStore((s) => s.isLoadingTestCases);
|
||||
|
||||
useFetchTestCases({ datasetId });
|
||||
|
||||
return <Table data={data} loading={loading} />;
|
||||
};
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Symptom | Check |
|
||||
| --------------------------- | ------------------------------------------------------------------- |
|
||||
| Data never loads | Hook called? Key not `null`/`undefined`? Network tab shows request? |
|
||||
| Stale data after mutation | Did `refreshXxx` run? Cache key matches what the hook uses? |
|
||||
| Loading state stuck `true` | `onSuccess` writes loading=false? Promise rejected silently? |
|
||||
| Detail map missing an entry | Reducer dispatch ran? `isEqual` short-circuited on stale data? |
|
||||
|
||||
---
|
||||
|
||||
## Summary Checklist
|
||||
|
||||
When adding new data fetching:
|
||||
|
||||
### Step 1: Types & State
|
||||
|
||||
See `store-data-structures` for details.
|
||||
|
||||
- [ ] Define types in `@lobechat/types`: Detail type + List item type
|
||||
- [ ] State structure: `xxxList: XxxListItem[]`, `xxxDetailMap: Record<string, Xxx>`, `loadingXxxDetailIds: string[]`
|
||||
- [ ] Reducer if optimistic updates are needed
|
||||
|
||||
### Step 2: Service Layer
|
||||
|
||||
- [ ] Create service in `src/services/xxxService.ts`
|
||||
- [ ] Methods: `listXxx()`, `getXxx(id)`, `createXxx()`, `updateXxx()`, `deleteXxx()`
|
||||
|
||||
### Step 3: Store Actions
|
||||
|
||||
- [ ] `initialState.ts` with state structure
|
||||
- [ ] `action.ts` with:
|
||||
- [ ] `useFetchXxxList()`, `useFetchXxxDetail(id)` — SWR hooks
|
||||
- [ ] `refreshXxxList()`, `refreshXxxDetail(id)` — cache invalidation
|
||||
- [ ] CRUD methods calling service
|
||||
- [ ] `internal_dispatch`, `internal_updateLoading` if using reducer
|
||||
- [ ] `selectors.ts` (optional but recommended)
|
||||
- [ ] Integrate slice into main store + initialState
|
||||
|
||||
### Step 4: Component Usage
|
||||
|
||||
- [ ] Use store hooks (NOT useEffect)
|
||||
- [ ] List pages: access `xxxList` array
|
||||
- [ ] Detail pages: access `xxxDetailMap[id]`
|
||||
- [ ] Use loading states for UI feedback
|
||||
|
||||
**Mental model:** Types → Service → Reducer → Slice → Component 🎯
|
||||
|
||||
---
|
||||
|
||||
## Related Skills
|
||||
|
||||
- **`store-data-structures`** — How to structure List and Detail data in stores
|
||||
- **`zustand`** — General Zustand patterns and best practices
|
||||
@@ -1,244 +0,0 @@
|
||||
# Walkthrough: Adding a New Feature End-to-End
|
||||
|
||||
This is a worked example of the canonical 6-step recipe applied to a new entity (`Dataset`), showing a variant of the main skill's pattern: **a list keyed by a parent id** (`datasetMap[benchmarkId]`), useful when the same shape appears under different parents.
|
||||
|
||||
If you only need the canonical (single-array) pattern, the main `SKILL.md` already shows it for `Benchmark`. Read this file when you need the parent-keyed Map variant, or when you want a checklist-style walkthrough.
|
||||
|
||||
## Step 1: Add Service methods
|
||||
|
||||
```typescript
|
||||
class AgentEvalService {
|
||||
async listDatasets(benchmarkId: string) {
|
||||
return lambdaClient.agentEval.listDatasets.query({ benchmarkId });
|
||||
}
|
||||
async getDataset(id: string) {
|
||||
return lambdaClient.agentEval.getDataset.query({ id });
|
||||
}
|
||||
async createDataset(params: CreateDatasetParams) {
|
||||
return lambdaClient.agentEval.createDataset.mutate(params);
|
||||
}
|
||||
// updateDataset / deleteDataset follow the same shape
|
||||
}
|
||||
```
|
||||
|
||||
## Step 2: Reducer (optimistic updates)
|
||||
|
||||
```typescript
|
||||
// src/store/eval/slices/dataset/reducer.ts
|
||||
export type DatasetDispatch =
|
||||
| { type: 'addDataset'; value: Dataset }
|
||||
| { type: 'updateDataset'; id: string; value: Partial<Dataset> }
|
||||
| { type: 'deleteDataset'; id: string };
|
||||
|
||||
export const datasetReducer = (state: Dataset[] = [], payload: DatasetDispatch): Dataset[] =>
|
||||
produce(state, (draft) => {
|
||||
switch (payload.type) {
|
||||
case 'addDataset':
|
||||
draft.unshift(payload.value);
|
||||
break;
|
||||
case 'updateDataset': {
|
||||
const i = draft.findIndex((item) => item.id === payload.id);
|
||||
if (i !== -1) draft[i] = { ...draft[i], ...payload.value };
|
||||
break;
|
||||
}
|
||||
case 'deleteDataset': {
|
||||
const i = draft.findIndex((item) => item.id === payload.id);
|
||||
if (i !== -1) draft.splice(i, 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## Step 3: Store slice
|
||||
|
||||
```typescript
|
||||
// src/store/eval/slices/dataset/initialState.ts
|
||||
export interface DatasetData {
|
||||
currentPage: number;
|
||||
hasMore: boolean;
|
||||
isLoading: boolean;
|
||||
items: Dataset[];
|
||||
pageSize: number;
|
||||
total: number;
|
||||
}
|
||||
|
||||
export interface DatasetSliceState {
|
||||
// Map keyed by benchmarkId — multiple parent contexts share the slice
|
||||
datasetMap: Record<string, DatasetData>;
|
||||
// Single item for modal display
|
||||
datasetDetail: Dataset | null;
|
||||
isLoadingDatasetDetail: boolean;
|
||||
loadingDatasetIds: string[];
|
||||
}
|
||||
|
||||
export const datasetInitialState: DatasetSliceState = {
|
||||
datasetMap: {},
|
||||
datasetDetail: null,
|
||||
isLoadingDatasetDetail: false,
|
||||
loadingDatasetIds: [],
|
||||
};
|
||||
```
|
||||
|
||||
```typescript
|
||||
// src/store/eval/slices/dataset/action.ts
|
||||
const FETCH_DATASETS_KEY = 'FETCH_DATASETS';
|
||||
const FETCH_DATASET_DETAIL_KEY = 'FETCH_DATASET_DETAIL';
|
||||
|
||||
export const createDatasetSlice: StateCreator<EvalStore, any, [], DatasetAction> = (set, get) => ({
|
||||
// Cache key includes benchmarkId so each parent has its own SWR entry
|
||||
useFetchDatasets: (benchmarkId) =>
|
||||
useClientDataSWR(
|
||||
benchmarkId ? [FETCH_DATASETS_KEY, benchmarkId] : null,
|
||||
() => agentEvalService.listDatasets(benchmarkId!),
|
||||
{
|
||||
onSuccess: (data) => {
|
||||
set({
|
||||
datasetMap: {
|
||||
...get().datasetMap,
|
||||
[benchmarkId!]: {
|
||||
currentPage: 1,
|
||||
hasMore: false,
|
||||
isLoading: false,
|
||||
items: data,
|
||||
pageSize: data.length,
|
||||
total: data.length,
|
||||
},
|
||||
},
|
||||
});
|
||||
},
|
||||
},
|
||||
),
|
||||
|
||||
useFetchDatasetDetail: (id) =>
|
||||
useClientDataSWR(
|
||||
id ? [FETCH_DATASET_DETAIL_KEY, id] : null,
|
||||
() => agentEvalService.getDataset(id!),
|
||||
{
|
||||
onSuccess: (data) => set({ datasetDetail: data, isLoadingDatasetDetail: false }),
|
||||
},
|
||||
),
|
||||
|
||||
refreshDatasets: (benchmarkId) => mutate([FETCH_DATASETS_KEY, benchmarkId]),
|
||||
refreshDatasetDetail: (id) => mutate([FETCH_DATASET_DETAIL_KEY, id]),
|
||||
|
||||
// CREATE with optimistic update — note the temp id pattern
|
||||
createDataset: async (params) => {
|
||||
const tmpId = Date.now().toString();
|
||||
const { benchmarkId } = params;
|
||||
|
||||
get().internal_dispatchDataset(
|
||||
{ type: 'addDataset', value: { ...params, id: tmpId, createdAt: Date.now() } as any },
|
||||
benchmarkId,
|
||||
);
|
||||
get().internal_updateDatasetLoading(tmpId, true);
|
||||
|
||||
try {
|
||||
const result = await agentEvalService.createDataset(params);
|
||||
await get().refreshDatasets(benchmarkId);
|
||||
return result;
|
||||
} finally {
|
||||
get().internal_updateDatasetLoading(tmpId, false);
|
||||
}
|
||||
},
|
||||
|
||||
// UPDATE / DELETE follow the same optimistic + refresh pattern as BenchmarkSlice
|
||||
// (see the main SKILL.md)
|
||||
|
||||
// Internal — dispatch reducer scoped to a parent
|
||||
internal_dispatchDataset: (payload, benchmarkId) => {
|
||||
const currentData = get().datasetMap[benchmarkId];
|
||||
const nextItems = datasetReducer(currentData?.items, payload);
|
||||
|
||||
// Skip set when nothing changed — avoids unnecessary re-renders
|
||||
if (isEqual(nextItems, currentData?.items)) return;
|
||||
|
||||
set({
|
||||
datasetMap: {
|
||||
...get().datasetMap,
|
||||
[benchmarkId]: {
|
||||
...currentData,
|
||||
currentPage: currentData?.currentPage ?? 1,
|
||||
hasMore: currentData?.hasMore ?? false,
|
||||
isLoading: false,
|
||||
items: nextItems,
|
||||
pageSize: currentData?.pageSize ?? nextItems.length,
|
||||
total: currentData?.total ?? nextItems.length,
|
||||
},
|
||||
},
|
||||
});
|
||||
},
|
||||
|
||||
internal_updateDatasetLoading: (id, loading) => {
|
||||
set((state) => ({
|
||||
loadingDatasetIds: loading
|
||||
? [...state.loadingDatasetIds, id]
|
||||
: state.loadingDatasetIds.filter((i) => i !== id),
|
||||
}));
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
## Step 4: Wire into the store
|
||||
|
||||
```typescript
|
||||
// src/store/eval/store.ts
|
||||
export type EvalStore = EvalStoreState & BenchmarkAction & DatasetAction & RunAction;
|
||||
|
||||
const createStore: StateCreator<EvalStore, [['zustand/devtools', never]]> = (set, get, store) => ({
|
||||
...initialState,
|
||||
...createBenchmarkSlice(set, get, store),
|
||||
...createDatasetSlice(set, get, store),
|
||||
...createRunSlice(set, get, store),
|
||||
});
|
||||
|
||||
// src/store/eval/initialState.ts
|
||||
export const initialState: EvalStoreState = {
|
||||
...benchmarkInitialState,
|
||||
...datasetInitialState,
|
||||
...runInitialState,
|
||||
};
|
||||
```
|
||||
|
||||
## Step 5: Selectors (optional but recommended)
|
||||
|
||||
```typescript
|
||||
export const datasetSelectors = {
|
||||
getDatasetData: (benchmarkId: string) => (s: EvalStore) => s.datasetMap[benchmarkId],
|
||||
getDatasets: (benchmarkId: string) => (s: EvalStore) => s.datasetMap[benchmarkId]?.items ?? [],
|
||||
isLoadingDataset: (id: string) => (s: EvalStore) => s.loadingDatasetIds.includes(id),
|
||||
};
|
||||
```
|
||||
|
||||
## Step 6: Use in component
|
||||
|
||||
```tsx
|
||||
// List scoped to a parent
|
||||
const DatasetList = ({ benchmarkId }: { benchmarkId: string }) => {
|
||||
const useFetchDatasets = useEvalStore((s) => s.useFetchDatasets);
|
||||
const datasets = useEvalStore(datasetSelectors.getDatasets(benchmarkId));
|
||||
const datasetData = useEvalStore(datasetSelectors.getDatasetData(benchmarkId));
|
||||
|
||||
useFetchDatasets(benchmarkId);
|
||||
|
||||
if (datasetData?.isLoading) return <Loading />;
|
||||
return (
|
||||
<div>
|
||||
<h2>Total: {datasetData?.total ?? 0}</h2>
|
||||
<List data={datasets} />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
// Single item for modal — conditional fetching pattern
|
||||
const DatasetImportModal = ({ open, datasetId }: Props) => {
|
||||
const useFetchDatasetDetail = useEvalStore((s) => s.useFetchDatasetDetail);
|
||||
const dataset = useEvalStore((s) => s.datasetDetail);
|
||||
const isLoading = useEvalStore((s) => s.isLoadingDatasetDetail);
|
||||
|
||||
// Only fetch when modal is open AND id present
|
||||
useFetchDatasetDetail(open && datasetId ? datasetId : undefined);
|
||||
|
||||
return <Modal open={open}>{isLoading ? <Loading /> : <div>{dataset?.name}</div>}</Modal>;
|
||||
};
|
||||
```
|
||||
@@ -1,167 +0,0 @@
|
||||
---
|
||||
name: db-migrations
|
||||
description: 'Use for Drizzle migrations: schema/table/column changes, migration generation or regeneration, sequence conflicts after rebase, idempotent SQL review, or migration renames.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# Database Migrations Guide
|
||||
|
||||
## Development-stage schema changes
|
||||
|
||||
Schema changes churn during feature development. When the schema changes before the migration has shipped, do not hand-edit the existing migration SQL to chase the new schema shape. Delete the draft migration artifacts added by this branch (SQL file, matching snapshot, and matching journal entry), then run the generator again and re-apply the normal migration review steps below.
|
||||
|
||||
For example, if this branch's draft migration is `0110_add_verify_tables_and_ai_infra_id`:
|
||||
|
||||
```bash
|
||||
# 1. Delete the draft SQL and its snapshot
|
||||
rm packages/database/migrations/0110_add_verify_tables_and_ai_infra_id.sql
|
||||
rm packages/database/migrations/meta/0110_snapshot.json
|
||||
|
||||
# 2. Remove the matching 0110 entry from the journal's "entries" array
|
||||
# packages/database/migrations/meta/_journal.json
|
||||
|
||||
# 3. Regenerate from the current schema
|
||||
bun run db:generate
|
||||
```
|
||||
|
||||
This keeps the generated SQL, snapshot, and journal aligned with the actual schema. Manual SQL edits are reserved for review-time hardening such as idempotent clauses, custom extension SQL, and meaningful filename/tag updates.
|
||||
|
||||
Before release, if a feature branch accumulated multiple development-only migrations, consolidate them into one migration when possible. Production does not need to replay every intermediate draft shape, and fewer migrations reduce deploy-time risk.
|
||||
|
||||
For example, if this branch added `0110`, `0111`, and `0112`, delete all three drafts and regenerate a single migration:
|
||||
|
||||
```bash
|
||||
# 1. Delete every draft SQL and snapshot this branch added
|
||||
rm packages/database/migrations/011{0,1,2}_*.sql
|
||||
rm packages/database/migrations/meta/011{0,1,2}_snapshot.json
|
||||
|
||||
# 2. Remove the 0110/0111/0112 entries from the journal's "entries" array
|
||||
# packages/database/migrations/meta/_journal.json
|
||||
|
||||
# 3. Regenerate one migration covering the full schema delta
|
||||
bun run db:generate
|
||||
```
|
||||
|
||||
Do not make a migration compatible with earlier development-only versions of the same branch. While the migration has not shipped, there is no production history to preserve. Fix local/dev databases directly with whatever SQL is simplest (drop the draft table, rename a column, delete draft rows), then regenerate the branch migration from the current schema.
|
||||
|
||||
For example, if an earlier draft on this branch created `signup_attempt_id` and you have since renamed it to `user_signup_log_id`, do not add a compatibility `ALTER ... RENAME` to the migration. Just fix the dev DB directly (see the `access-pg` skill for the `bun -e` + `pg` pattern), then regenerate:
|
||||
|
||||
```bash
|
||||
# Fix the dev DB to match the new schema (simplest SQL wins)
|
||||
set -a && source .env && set +a && bun -e '
|
||||
import pg from "pg";
|
||||
const client = new pg.Client({ connectionString: process.env.DATABASE_URL });
|
||||
await client.connect();
|
||||
await client.query("ALTER TABLE user_signup_logs DROP COLUMN signup_attempt_id");
|
||||
await client.end();
|
||||
'
|
||||
|
||||
# Regenerate so the migration reflects only the final shape
|
||||
bun run db:generate
|
||||
```
|
||||
|
||||
After a migration has reached production or the target default branch, treat it as immutable: add a follow-up migration instead of rewriting it.
|
||||
|
||||
## Rebase conflicts
|
||||
|
||||
When a rebase conflicts in migration files, keep the upstream/default-branch migrations and remove all migrations introduced by the current feature branch. Complete the rebase, then regenerate this branch's migration from the rebased schema. This avoids merging two independent snapshots or hand-splicing journal entries.
|
||||
|
||||
## Step 1: Generate Migrations
|
||||
|
||||
```bash
|
||||
bun run db:generate
|
||||
```
|
||||
|
||||
This generates:
|
||||
|
||||
- `packages/database/migrations/0046_meaningless_file_name.sql`
|
||||
|
||||
And updates:
|
||||
|
||||
- `packages/database/migrations/meta/_journal.json`
|
||||
- `packages/database/src/core/migrations.json`
|
||||
- `docs/development/database-schema.dbml`
|
||||
|
||||
## Custom Migrations (e.g. CREATE EXTENSION)
|
||||
|
||||
For migrations that don't involve Drizzle schema changes (e.g. enabling PostgreSQL extensions), use the `--custom` flag:
|
||||
|
||||
```bash
|
||||
bunx drizzle-kit generate --custom --name=enable_pg_search
|
||||
```
|
||||
|
||||
This generates an empty SQL file and properly updates `_journal.json` and snapshot. Then edit the generated SQL file to add your custom SQL:
|
||||
|
||||
```sql
|
||||
-- Custom SQL migration file, put your code below! --
|
||||
CREATE EXTENSION IF NOT EXISTS pg_search;
|
||||
```
|
||||
|
||||
**Do NOT manually create migration files or edit `_journal.json`** — always use `drizzle-kit generate` to ensure correct journal entries and snapshots.
|
||||
|
||||
## Step 2: Optimize Migration SQL Filename
|
||||
|
||||
Rename auto-generated filename to be meaningful:
|
||||
|
||||
`0046_meaningless_file_name.sql` → `0046_user_add_avatar_column.sql`
|
||||
|
||||
## Step 3: Use Idempotent Clauses (Defensive Programming)
|
||||
|
||||
Always use defensive clauses to make migrations idempotent (safe to re-run):
|
||||
|
||||
### CREATE TABLE
|
||||
|
||||
```sql
|
||||
-- ✅ Good
|
||||
CREATE TABLE IF NOT EXISTS "agent_eval_runs" (
|
||||
"id" text PRIMARY KEY NOT NULL,
|
||||
"name" text,
|
||||
"created_at" timestamp with time zone DEFAULT now() NOT NULL
|
||||
);
|
||||
|
||||
-- ❌ Bad
|
||||
CREATE TABLE "agent_eval_runs" (...);
|
||||
```
|
||||
|
||||
### ALTER TABLE - Columns
|
||||
|
||||
```sql
|
||||
-- ✅ Good
|
||||
ALTER TABLE "users" ADD COLUMN IF NOT EXISTS "avatar" text;
|
||||
ALTER TABLE "posts" DROP COLUMN IF EXISTS "deprecated_field";
|
||||
|
||||
-- ❌ Bad
|
||||
ALTER TABLE "users" ADD COLUMN "avatar" text;
|
||||
```
|
||||
|
||||
### ALTER TABLE - Foreign Key Constraints
|
||||
|
||||
PostgreSQL has no `ADD CONSTRAINT IF NOT EXISTS`. Use `DROP IF EXISTS` + `ADD`:
|
||||
|
||||
```sql
|
||||
-- ✅ Good: Drop first, then add (idempotent)
|
||||
ALTER TABLE "agent_eval_datasets" DROP CONSTRAINT IF EXISTS "agent_eval_datasets_user_id_users_id_fk";
|
||||
ALTER TABLE "agent_eval_datasets" ADD CONSTRAINT "agent_eval_datasets_user_id_users_id_fk"
|
||||
FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;
|
||||
|
||||
-- ❌ Bad: Will fail if constraint already exists
|
||||
ALTER TABLE "agent_eval_datasets" ADD CONSTRAINT "agent_eval_datasets_user_id_users_id_fk"
|
||||
FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;
|
||||
```
|
||||
|
||||
### DROP TABLE / INDEX
|
||||
|
||||
```sql
|
||||
-- ✅ Good
|
||||
DROP TABLE IF EXISTS "old_table";
|
||||
CREATE INDEX IF NOT EXISTS "users_email_idx" ON "users" ("email");
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS "users_email_unique" ON "users" USING btree ("email");
|
||||
|
||||
-- ❌ Bad
|
||||
DROP TABLE "old_table";
|
||||
CREATE INDEX "users_email_idx" ON "users" ("email");
|
||||
```
|
||||
|
||||
## Step 4: Update Journal Tag
|
||||
|
||||
After renaming the migration SQL file in Step 2, update the `tag` field in `packages/database/migrations/meta/_journal.json` to match the new filename (without `.sql` extension).
|
||||
@@ -1,66 +0,0 @@
|
||||
---
|
||||
name: debug-package
|
||||
description: 'LobeHub debug package and log namespace guide. Use when adding debug() logging, choosing lobe-* namespaces, troubleshooting DEBUG output, localStorage.debug, or log format specifiers.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# Debug Package Usage Guide
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```typescript
|
||||
import debug from 'debug';
|
||||
|
||||
// Format: lobe-[module]:[submodule]
|
||||
const log = debug('lobe-server:market');
|
||||
|
||||
log('Simple message');
|
||||
log('With variable: %O', object);
|
||||
log('Formatted number: %d', number);
|
||||
```
|
||||
|
||||
## Namespace Conventions
|
||||
|
||||
- Desktop: `lobe-desktop:[module]`
|
||||
- Server: `lobe-server:[module]`
|
||||
- Client: `lobe-client:[module]`
|
||||
- Router: `lobe-[type]-router:[module]`
|
||||
|
||||
## Format Specifiers
|
||||
|
||||
- `%O` - Object expanded (recommended for complex objects)
|
||||
- `%o` - Object
|
||||
- `%s` - String
|
||||
- `%d` - Number
|
||||
|
||||
## Enable Debug Output
|
||||
|
||||
### Browser
|
||||
|
||||
```javascript
|
||||
localStorage.debug = 'lobe-*';
|
||||
```
|
||||
|
||||
### Node.js
|
||||
|
||||
```bash
|
||||
DEBUG=lobe-* npm run dev
|
||||
DEBUG=lobe-* pnpm dev
|
||||
```
|
||||
|
||||
### Electron
|
||||
|
||||
```typescript
|
||||
process.env.DEBUG = 'lobe-*';
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
```typescript
|
||||
// apps/server/src/routers/edge/market/index.ts
|
||||
import debug from 'debug';
|
||||
|
||||
const log = debug('lobe-edge-router:market');
|
||||
|
||||
log('getAgent input: %O', input);
|
||||
```
|
||||
@@ -1,89 +0,0 @@
|
||||
---
|
||||
name: desktop
|
||||
description: Electron desktop development guide — IPC handlers, controllers, preload scripts, window/menu management.
|
||||
disable-model-invocation: true
|
||||
---
|
||||
|
||||
# Desktop Development Guide
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
LobeHub desktop is built on Electron with main-renderer architecture:
|
||||
|
||||
1. **Main Process** (`apps/desktop/src/main`): App lifecycle, system APIs, window management
|
||||
2. **Renderer Process**: Reuses web code from `src/`
|
||||
3. **Preload Scripts** (`apps/desktop/src/preload`): Securely expose main process to renderer
|
||||
|
||||
## Adding New Desktop Features
|
||||
|
||||
### 1. Create Controller
|
||||
|
||||
Location: `apps/desktop/src/main/controllers/`
|
||||
|
||||
```typescript
|
||||
import { ControllerModule, IpcMethod } from '@/controllers';
|
||||
|
||||
export default class NewFeatureCtr extends ControllerModule {
|
||||
static override readonly groupName = 'newFeature';
|
||||
|
||||
@IpcMethod()
|
||||
async doSomething(params: SomeParams): Promise<SomeResult> {
|
||||
// Implementation
|
||||
return { success: true };
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Register in `apps/desktop/src/main/controllers/registry.ts`.
|
||||
|
||||
### 2. Define IPC Types
|
||||
|
||||
Location: `packages/electron-client-ipc/src/types.ts`
|
||||
|
||||
```typescript
|
||||
export interface SomeParams {
|
||||
/* ... */
|
||||
}
|
||||
export interface SomeResult {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Create Renderer Service
|
||||
|
||||
Location: `src/services/electron/`
|
||||
|
||||
```typescript
|
||||
import { ensureElectronIpc } from '@/utils/electron/ipc';
|
||||
|
||||
const ipc = ensureElectronIpc();
|
||||
|
||||
export const newFeatureService = async (params: SomeParams) => {
|
||||
return ipc.newFeature.doSomething(params);
|
||||
};
|
||||
```
|
||||
|
||||
### 4. Implement Store Action
|
||||
|
||||
Location: `src/store/`
|
||||
|
||||
### 5. Add Tests
|
||||
|
||||
Location: `apps/desktop/src/main/controllers/__tests__/`
|
||||
|
||||
## Detailed Guides
|
||||
|
||||
See `references/` for specific topics:
|
||||
|
||||
- **Feature implementation**: `references/feature-implementation.md`
|
||||
- **Local tools workflow**: `references/local-tools.md`
|
||||
- **Menu configuration**: `references/menu-config.md`
|
||||
- **Window management**: `references/window-management.md`
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Security**: Validate inputs, limit exposed APIs
|
||||
2. **Performance**: Use async methods, batch data transfers
|
||||
3. **UX**: Add progress indicators, provide error feedback
|
||||
4. **Code organization**: Follow existing patterns, add documentation
|
||||
@@ -1,103 +0,0 @@
|
||||
# Desktop Feature Implementation Guide
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```plaintext
|
||||
Main Process Renderer Process
|
||||
┌──────────────────┐ ┌──────────────────┐
|
||||
│ Controller │◄──IPC───►│ Service Layer │
|
||||
│ (IPC Handler) │ │ │
|
||||
└──────────────────┘ └──────────────────┘
|
||||
│ │
|
||||
▼ ▼
|
||||
┌──────────────────┐ ┌──────────────────┐
|
||||
│ System APIs │ │ Store Actions │
|
||||
│ (fs, network) │ │ (UI State) │
|
||||
└──────────────────┘ └──────────────────┘
|
||||
```
|
||||
|
||||
## Step-by-Step Implementation
|
||||
|
||||
### 1. Create Controller
|
||||
|
||||
```typescript
|
||||
// apps/desktop/src/main/controllers/NotificationCtr.ts
|
||||
import type {
|
||||
ShowDesktopNotificationParams,
|
||||
DesktopNotificationResult,
|
||||
} from '@lobechat/electron-client-ipc';
|
||||
import { Notification } from 'electron';
|
||||
import { ControllerModule, IpcMethod } from '@/controllers';
|
||||
|
||||
export default class NotificationCtr extends ControllerModule {
|
||||
static override readonly groupName = 'notification';
|
||||
|
||||
@IpcMethod()
|
||||
async showDesktopNotification(
|
||||
params: ShowDesktopNotificationParams,
|
||||
): Promise<DesktopNotificationResult> {
|
||||
if (!Notification.isSupported()) {
|
||||
return { error: 'Notifications not supported', success: false };
|
||||
}
|
||||
|
||||
try {
|
||||
const notification = new Notification({ body: params.body, title: params.title });
|
||||
notification.show();
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
console.error('[NotificationCtr] Failed:', error);
|
||||
return { error: error instanceof Error ? error.message : 'Unknown error', success: false };
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Define IPC Types
|
||||
|
||||
```typescript
|
||||
// packages/electron-client-ipc/src/types.ts
|
||||
export interface ShowDesktopNotificationParams {
|
||||
title: string;
|
||||
body: string;
|
||||
}
|
||||
|
||||
export interface DesktopNotificationResult {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Create Service Layer
|
||||
|
||||
```typescript
|
||||
// src/services/electron/notificationService.ts
|
||||
import type { ShowDesktopNotificationParams } from '@lobechat/electron-client-ipc';
|
||||
import { ensureElectronIpc } from '@/utils/electron/ipc';
|
||||
|
||||
const ipc = ensureElectronIpc();
|
||||
|
||||
export const notificationService = {
|
||||
show: (params: ShowDesktopNotificationParams) => ipc.notification.showDesktopNotification(params),
|
||||
};
|
||||
```
|
||||
|
||||
### 4. Implement Store Action
|
||||
|
||||
```typescript
|
||||
// src/store/.../actions.ts
|
||||
showNotification: async (title: string, body: string) => {
|
||||
if (!isElectron) return;
|
||||
|
||||
const result = await notificationService.show({ title, body });
|
||||
if (!result.success) {
|
||||
console.error('Notification failed:', result.error);
|
||||
}
|
||||
},
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Security**: Validate inputs, limit exposed APIs
|
||||
2. **Performance**: Use async methods for heavy operations
|
||||
3. **Error handling**: Always return structured results
|
||||
4. **UX**: Provide loading states and error feedback
|
||||
@@ -1,133 +0,0 @@
|
||||
# Desktop Local Tools Implementation
|
||||
|
||||
## Workflow Overview
|
||||
|
||||
1. Define tool interface (Manifest)
|
||||
2. Define related types
|
||||
3. Implement Store Action
|
||||
4. Implement Service Layer
|
||||
5. Implement Controller (IPC Handler)
|
||||
6. Update Agent documentation
|
||||
|
||||
## Step 1: Define Tool Interface (Manifest)
|
||||
|
||||
Location: `src/tools/[tool_category]/index.ts`
|
||||
|
||||
```typescript
|
||||
// src/tools/local-files/index.ts
|
||||
export const LocalFilesApiName = {
|
||||
RenameFile: 'renameFile',
|
||||
MoveFile: 'moveFile',
|
||||
} as const;
|
||||
|
||||
export const LocalFilesManifest = {
|
||||
api: [
|
||||
{
|
||||
name: LocalFilesApiName.RenameFile,
|
||||
description: 'Rename a local file',
|
||||
parameters: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
oldPath: { type: 'string', description: 'Current file path' },
|
||||
newName: { type: 'string', description: 'New file name' },
|
||||
},
|
||||
required: ['oldPath', 'newName'],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
```
|
||||
|
||||
## Step 2: Define Types
|
||||
|
||||
```typescript
|
||||
// packages/electron-client-ipc/src/types.ts
|
||||
export interface RenameLocalFileParams {
|
||||
oldPath: string;
|
||||
newName: string;
|
||||
}
|
||||
|
||||
// src/tools/local-files/type.ts
|
||||
export interface LocalRenameFileState {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
oldPath: string;
|
||||
newPath: string;
|
||||
}
|
||||
```
|
||||
|
||||
## Step 3: Implement Store Action
|
||||
|
||||
```typescript
|
||||
// src/store/chat/slices/builtinTool/actions/localFile.ts
|
||||
renameLocalFile: async (id: string, params: RenameLocalFileParams) => {
|
||||
const { toggleLocalFileLoading, updatePluginState, internal_updateMessageContent } = get();
|
||||
|
||||
toggleLocalFileLoading(id, true);
|
||||
|
||||
try {
|
||||
const result = await localFileService.renameFile(params);
|
||||
|
||||
if (result.success) {
|
||||
updatePluginState(id, { success: true, ...result });
|
||||
internal_updateMessageContent(id, JSON.stringify({ success: true }));
|
||||
} else {
|
||||
updatePluginState(id, { success: false, error: result.error });
|
||||
internal_updateMessageContent(id, JSON.stringify({ error: result.error }));
|
||||
}
|
||||
|
||||
return result.success;
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
updatePluginState(id, { success: false, error: e.message });
|
||||
return false;
|
||||
} finally {
|
||||
toggleLocalFileLoading(id, false);
|
||||
}
|
||||
},
|
||||
```
|
||||
|
||||
## Step 4: Implement Service Layer
|
||||
|
||||
```typescript
|
||||
// src/services/electron/localFileService.ts
|
||||
import { ensureElectronIpc } from '@/utils/electron/ipc';
|
||||
|
||||
const ipc = ensureElectronIpc();
|
||||
|
||||
export const localFileService = {
|
||||
renameFile: (params: RenameLocalFileParams) => ipc.localFiles.renameFile(params),
|
||||
};
|
||||
```
|
||||
|
||||
## Step 5: Implement Controller
|
||||
|
||||
```typescript
|
||||
// apps/desktop/src/main/controllers/LocalFileCtr.ts
|
||||
import * as fs from 'fs/promises';
|
||||
import * as path from 'path';
|
||||
import { ControllerModule, IpcMethod } from '@/controllers';
|
||||
|
||||
export default class LocalFileCtr extends ControllerModule {
|
||||
static override readonly groupName = 'localFiles';
|
||||
|
||||
@IpcMethod()
|
||||
async renameFile(params: RenameLocalFileParams) {
|
||||
const { oldPath, newName } = params;
|
||||
const newPath = path.join(path.dirname(oldPath), newName);
|
||||
|
||||
try {
|
||||
await fs.rename(oldPath, newPath);
|
||||
return { success: true, newPath };
|
||||
} catch (error) {
|
||||
return { success: false, error: error.message };
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Step 6: Update Agent Documentation
|
||||
|
||||
Location: `src/tools/[tool_category]/systemRole.ts`
|
||||
|
||||
Add tool description to `<core_capabilities>` and usage guidelines to `<tool_usage_guidelines>`.
|
||||
@@ -1,107 +0,0 @@
|
||||
# Desktop Menu Configuration Guide
|
||||
|
||||
## Menu Types
|
||||
|
||||
1. **App Menu**: Top of window (macOS) or title bar (Windows/Linux)
|
||||
2. **Context Menu**: Right-click menus
|
||||
3. **Tray Menu**: System tray icon menus
|
||||
|
||||
## File Structure
|
||||
|
||||
```plaintext
|
||||
apps/desktop/src/main/
|
||||
├── menus/
|
||||
│ ├── appMenu.ts # App menu config
|
||||
│ ├── contextMenu.ts # Context menu config
|
||||
│ └── factory.ts # Menu factory functions
|
||||
├── controllers/
|
||||
│ ├── MenuCtr.ts # Menu controller
|
||||
│ └── TrayMenuCtr.ts # Tray menu controller
|
||||
```
|
||||
|
||||
## App Menu Configuration
|
||||
|
||||
```typescript
|
||||
// apps/desktop/src/main/menus/appMenu.ts
|
||||
import { BrowserWindow, Menu, MenuItemConstructorOptions } from 'electron';
|
||||
|
||||
export const createAppMenu = (win: BrowserWindow) => {
|
||||
const template: MenuItemConstructorOptions[] = [
|
||||
{
|
||||
label: 'File',
|
||||
submenu: [
|
||||
{
|
||||
label: 'New',
|
||||
accelerator: 'CmdOrCtrl+N',
|
||||
click: () => {
|
||||
/* ... */
|
||||
},
|
||||
},
|
||||
{ type: 'separator' },
|
||||
{ role: 'quit' },
|
||||
],
|
||||
},
|
||||
// ...
|
||||
];
|
||||
|
||||
return Menu.buildFromTemplate(template);
|
||||
};
|
||||
|
||||
// Register in MenuCtr.ts
|
||||
Menu.setApplicationMenu(menu);
|
||||
```
|
||||
|
||||
## Context Menu
|
||||
|
||||
```typescript
|
||||
export const createContextMenu = () => {
|
||||
const template = [
|
||||
{ label: 'Copy', role: 'copy' },
|
||||
{ label: 'Paste', role: 'paste' },
|
||||
];
|
||||
return Menu.buildFromTemplate(template);
|
||||
};
|
||||
|
||||
// Show on right-click
|
||||
const menu = createContextMenu();
|
||||
menu.popup();
|
||||
```
|
||||
|
||||
## Tray Menu
|
||||
|
||||
```typescript
|
||||
// TrayMenuCtr.ts
|
||||
this.tray = new Tray(trayIconPath);
|
||||
const contextMenu = Menu.buildFromTemplate([
|
||||
{ label: 'Show Window', click: this.showMainWindow },
|
||||
{ type: 'separator' },
|
||||
{ label: 'Quit', click: () => app.quit() },
|
||||
]);
|
||||
this.tray.setContextMenu(contextMenu);
|
||||
```
|
||||
|
||||
## i18n Support
|
||||
|
||||
```typescript
|
||||
import { i18n } from '../locales';
|
||||
|
||||
const template = [
|
||||
{
|
||||
label: i18n.t('menu.file'),
|
||||
submenu: [{ label: i18n.t('menu.new'), click: createNew }],
|
||||
},
|
||||
];
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. Use standard roles (`role: 'copy'`) for native behavior
|
||||
2. Use `CmdOrCtrl` for cross-platform shortcuts
|
||||
3. Use `{ type: 'separator' }` to group related items
|
||||
4. Handle platform differences with `process.platform`
|
||||
|
||||
```typescript
|
||||
if (process.platform === 'darwin') {
|
||||
template.unshift({ role: 'appMenu' });
|
||||
}
|
||||
```
|
||||
@@ -1,147 +0,0 @@
|
||||
# Desktop Window Management Guide
|
||||
|
||||
## Window Management Overview
|
||||
|
||||
1. Window creation and configuration
|
||||
2. Window state management (size, position, maximize)
|
||||
3. Multi-window coordination
|
||||
4. Window event handling
|
||||
|
||||
## File Structure
|
||||
|
||||
```plaintext
|
||||
apps/desktop/src/main/
|
||||
├── appBrowsers.ts # Core window management
|
||||
├── controllers/
|
||||
│ └── BrowserWindowsCtr.ts # Window controller
|
||||
└── modules/
|
||||
└── browserWindowManager.ts # Window manager module
|
||||
```
|
||||
|
||||
## Window Creation
|
||||
|
||||
```typescript
|
||||
export const createMainWindow = () => {
|
||||
const mainWindow = new BrowserWindow({
|
||||
width: 1200,
|
||||
height: 800,
|
||||
minWidth: 600,
|
||||
minHeight: 400,
|
||||
webPreferences: {
|
||||
preload: path.join(__dirname, '../preload/index.js'),
|
||||
contextIsolation: true,
|
||||
nodeIntegration: false,
|
||||
},
|
||||
});
|
||||
|
||||
if (isDev) {
|
||||
mainWindow.loadURL('http://localhost:3000');
|
||||
} else {
|
||||
mainWindow.loadFile(path.join(__dirname, '../../renderer/index.html'));
|
||||
}
|
||||
|
||||
return mainWindow;
|
||||
};
|
||||
```
|
||||
|
||||
## Window State Persistence
|
||||
|
||||
```typescript
|
||||
const saveWindowState = (window: BrowserWindow) => {
|
||||
if (!window.isMinimized() && !window.isMaximized()) {
|
||||
const [x, y] = window.getPosition();
|
||||
const [width, height] = window.getSize();
|
||||
settings.set('windowState', { x, y, width, height });
|
||||
}
|
||||
};
|
||||
|
||||
const restoreWindowState = (window: BrowserWindow) => {
|
||||
const state = settings.get('windowState');
|
||||
if (state) {
|
||||
window.setBounds({ x: state.x, y: state.y, width: state.width, height: state.height });
|
||||
}
|
||||
};
|
||||
|
||||
window.on('close', () => saveWindowState(window));
|
||||
```
|
||||
|
||||
## Multi-Window Management
|
||||
|
||||
```typescript
|
||||
export class WindowManager {
|
||||
private windows: Map<string, BrowserWindow> = new Map();
|
||||
|
||||
createWindow(id: string, options: BrowserWindowConstructorOptions) {
|
||||
const window = new BrowserWindow(options);
|
||||
this.windows.set(id, window);
|
||||
window.on('closed', () => this.windows.delete(id));
|
||||
return window;
|
||||
}
|
||||
|
||||
getWindow(id: string) {
|
||||
return this.windows.get(id);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Window IPC Controller
|
||||
|
||||
```typescript
|
||||
// apps/desktop/src/main/controllers/BrowserWindowsCtr.ts
|
||||
export default class BrowserWindowsCtr extends ControllerModule {
|
||||
static override readonly groupName = 'windows';
|
||||
|
||||
@IpcMethod()
|
||||
minimizeWindow() {
|
||||
BrowserWindow.getFocusedWindow()?.minimize();
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
@IpcMethod()
|
||||
maximizeWindow() {
|
||||
const win = BrowserWindow.getFocusedWindow();
|
||||
win?.isMaximized() ? win.restore() : win?.maximize();
|
||||
return { success: true };
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Renderer Service
|
||||
|
||||
```typescript
|
||||
// src/services/electron/windowService.ts
|
||||
import { ensureElectronIpc } from '@/utils/electron/ipc';
|
||||
|
||||
const ipc = ensureElectronIpc();
|
||||
|
||||
export const windowService = {
|
||||
minimize: () => ipc.windows.minimizeWindow(),
|
||||
maximize: () => ipc.windows.maximizeWindow(),
|
||||
close: () => ipc.windows.closeWindow(),
|
||||
};
|
||||
```
|
||||
|
||||
## Frameless Window
|
||||
|
||||
```typescript
|
||||
const window = new BrowserWindow({
|
||||
frame: false,
|
||||
titleBarStyle: 'hidden',
|
||||
});
|
||||
```
|
||||
|
||||
```css
|
||||
.titlebar {
|
||||
-webkit-app-region: drag;
|
||||
}
|
||||
.titlebar-button {
|
||||
-webkit-app-region: no-drag;
|
||||
}
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. Use `show: false` initially, show after content loads
|
||||
2. Always set secure `webPreferences`
|
||||
3. Handle `webContents.on('crashed')` for recovery
|
||||
4. Clean up resources on `window.on('closed')`
|
||||
@@ -1,155 +0,0 @@
|
||||
---
|
||||
name: docs-changelog
|
||||
description: 'Write website changelog pages under docs/changelog/*.mdx. Use for EN/ZH product update posts, changelog posts, update-log copy, or docs changelog edits; not GitHub Release notes.'
|
||||
---
|
||||
|
||||
# Docs Changelog Writing Guide
|
||||
|
||||
## Scope Boundary (Important)
|
||||
|
||||
This skill is only for changelog pages in:
|
||||
|
||||
- `docs/changelog/*.mdx`
|
||||
|
||||
This skill is **not** for GitHub Releases.\
|
||||
If the user asks for release PR body / GitHub Release notes, load `../version-release/SKILL.md`.
|
||||
|
||||
## Mandatory Companion Skills
|
||||
|
||||
For every docs changelog task, you MUST load:
|
||||
|
||||
- `../microcopy/SKILL.md`
|
||||
- `../i18n/SKILL.md` (when EN/ZH pair is involved)
|
||||
|
||||
## File and Naming Convention
|
||||
|
||||
Use date-based file names:
|
||||
|
||||
- English: `docs/changelog/YYYY-MM-DD-topic.mdx`
|
||||
- Chinese: `docs/changelog/YYYY-MM-DD-topic.zh-CN.mdx`
|
||||
|
||||
EN and ZH files must exist as a pair and describe the same release facts.
|
||||
|
||||
## Frontmatter Requirements
|
||||
|
||||
Each file should include:
|
||||
|
||||
```md
|
||||
---
|
||||
title: <Title>
|
||||
description: <1 sentence summary>
|
||||
tags:
|
||||
- <Tag 1>
|
||||
- <Tag 2>
|
||||
---
|
||||
```
|
||||
|
||||
Rules:
|
||||
|
||||
1. `title` should match the H1 title in meaning.
|
||||
2. `description` should be concise and user-facing.
|
||||
3. `tags` should be feature-oriented, not internal-team labels.
|
||||
|
||||
## Content Structure (Recommended)
|
||||
|
||||
Use this shape unless the user requests otherwise:
|
||||
|
||||
1. `# <Title>`
|
||||
2. Opening paragraph (2-4 sentences): user-visible impact
|
||||
3. 1-3 capability sections (optional `##` headings)
|
||||
4. `## Improvements and fixes` / `## 体验优化与修复` with concise bullets
|
||||
|
||||
Keep heading count low and avoid heading-per-bullet structure.
|
||||
|
||||
## Writing Rules
|
||||
|
||||
1. Keep all claims factual and tied to actual shipped changes.
|
||||
2. Explain user value first, implementation second.
|
||||
3. Prefer natural narrative paragraphs over pure bullet dumps.
|
||||
4. Avoid marketing exaggeration and vague adjectives.
|
||||
5. Keep internal terms consistent across EN/ZH files.
|
||||
6. Keep EN/ZH section order aligned and scope-aligned.
|
||||
|
||||
## EN/ZH Synchronization Rules
|
||||
|
||||
When generating bilingual changelogs:
|
||||
|
||||
1. Keep the same key facts in the same order.
|
||||
2. Localize naturally; do not do literal sentence-by-sentence translation.
|
||||
3. If one version has an `Improvements and fixes` bullet list, the other should have equivalent list intent.
|
||||
4. Do not introduce capabilities in only one language unless explicitly requested.
|
||||
|
||||
## Length Guidance
|
||||
|
||||
- Small update: 3-5 short paragraphs total
|
||||
- Medium update: 4-7 short paragraphs + concise fix bullets
|
||||
- Large update: 6-10 short paragraphs split into 2-4 sections
|
||||
|
||||
Do not pad content when changes are limited.
|
||||
|
||||
## Authoring Workflow
|
||||
|
||||
1. Collect source facts from PRs/commits/issues.
|
||||
2. Group changes by user workflow (not by internal module path).
|
||||
3. Draft EN and ZH versions with aligned structure.
|
||||
4. Verify terminology using `microcopy`/`i18n` guidance.
|
||||
5. Final pass: remove AI-like filler and tighten sentences.
|
||||
|
||||
## Docs Changelog Template (English)
|
||||
|
||||
```md
|
||||
---
|
||||
title: <Feature title>
|
||||
description: <One-sentence summary for users>
|
||||
tags:
|
||||
- <Tag A>
|
||||
- <Tag B>
|
||||
---
|
||||
|
||||
# <Feature title>
|
||||
|
||||
<Opening paragraph: what changed for users and why it matters.>
|
||||
|
||||
<Optional section paragraph for key capability 1.>
|
||||
|
||||
<Optional section paragraph for key capability 2.>
|
||||
|
||||
## Improvements and fixes
|
||||
|
||||
- <Fix or optimization 1>
|
||||
- <Fix or optimization 2>
|
||||
```
|
||||
|
||||
## Docs Changelog Template (Chinese)
|
||||
|
||||
```md
|
||||
---
|
||||
title: <功能标题>
|
||||
description: <一句话说明>
|
||||
tags:
|
||||
- <标签 A>
|
||||
- <标签 B>
|
||||
---
|
||||
|
||||
# <功能标题>
|
||||
|
||||
<开场段:这次更新给用户带来的直接变化。>
|
||||
|
||||
<可选能力段 1。>
|
||||
|
||||
<可选能力段 2。>
|
||||
|
||||
## 体验优化与修复
|
||||
|
||||
- <优化或修复 1>
|
||||
- <优化或修复 2>
|
||||
```
|
||||
|
||||
## Quick Checklist
|
||||
|
||||
- [ ] File path matches `docs/changelog` naming convention
|
||||
- [ ] EN and ZH versions both exist and match in facts
|
||||
- [ ] Opening paragraph explains user-facing outcome
|
||||
- [ ] Main body is narrative-first, not bullet-only
|
||||
- [ ] `Improvements and fixes` section is concise and concrete
|
||||
- [ ] No fabricated claims or unsupported scope
|
||||
@@ -1,382 +0,0 @@
|
||||
---
|
||||
name: drizzle
|
||||
description: 'LobeHub Drizzle ORM schema and query style. Use for pgTable schemas, indexes, joins, inferred types, db.select/db.query, schema fields, foreign keys, junction tables, or postgres query patterns.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# Drizzle ORM Schema Style Guide
|
||||
|
||||
> **Adding a Model or Repository?** Ship a sibling test in the same PR — every new
|
||||
> file under `packages/database/src/models/**` or `src/repositories/**` needs a
|
||||
> matching `__tests__/<name>.test.ts`. See the **testing** skill
|
||||
> (`.agents/skills/testing/references/db-model-test.md`) for the `getTestDB()`
|
||||
> integration pattern, user-isolation tests, the BM25 `describe.skipIf(!isServerDB)`
|
||||
> guard, and schema gotchas. CI's coverage patch gate won't reliably catch a brand-new
|
||||
> untested file, so this is on you.
|
||||
|
||||
## Configuration
|
||||
|
||||
- Config: `drizzle.config.ts`
|
||||
- Schemas: `packages/database/src/schemas/`
|
||||
- Migrations: `packages/database/migrations/`
|
||||
- Dialect: `postgresql` with `strict: true`
|
||||
|
||||
## Helper Functions
|
||||
|
||||
Location: `packages/database/src/schemas/_helpers.ts`
|
||||
|
||||
- `timestamptz(name)`: Timestamp with timezone
|
||||
- `createdAt()`, `updatedAt()`, `accessedAt()`: Standard timestamp columns
|
||||
- `timestamps`: Object with all three for easy spread
|
||||
|
||||
## Naming Conventions
|
||||
|
||||
- **Tables**: Plural snake_case (`users`, `session_groups`)
|
||||
- **Columns**: snake_case (`user_id`, `created_at`)
|
||||
- **New tables**: Check nearby existing tables before naming a new one. Preserve
|
||||
the established noun family and suffix. For example, if the user-scoped table
|
||||
is `user_xxx_logs`, the workspace-scoped counterpart should be
|
||||
`workspace_xxx_logs`, not `workspace_xxx_records` or another new synonym.
|
||||
|
||||
```typescript
|
||||
// ✅ Good: follows the existing user/workspace table family.
|
||||
export const userSignupLogs = pgTable('user_signup_logs', { ... });
|
||||
export const workspaceSignupLogs = pgTable('workspace_signup_logs', { ... });
|
||||
|
||||
// ❌ Bad: introduces a new suffix for the same concept.
|
||||
export const workspaceSignupRecords = pgTable('workspace_signup_records', { ... });
|
||||
```
|
||||
|
||||
## Column Definitions
|
||||
|
||||
### Primary Keys
|
||||
|
||||
Do not use auto-incrementing primary keys (`serial`, `bigserial`, generated
|
||||
identity columns). They create sequence-state problems during cross-database
|
||||
migrations, restores, and data copy jobs. Prefer text IDs from application
|
||||
generators (`idGenerator`, `createNanoId`) or `uuid` for internal tables.
|
||||
|
||||
Keep `$defaultFn(...)` when a table normally owns ID generation. Callers can
|
||||
still pass an explicit `id`; the default only runs when the insert omits it. Do
|
||||
not remove the default just because one flow needs to supply a request-scoped ID.
|
||||
|
||||
```typescript
|
||||
// ✅ Good: app-generated text ID; explicit inserts can still override it.
|
||||
id: text('id')
|
||||
.primaryKey()
|
||||
.$defaultFn(() => idGenerator('agents'))
|
||||
.notNull(),
|
||||
|
||||
// ❌ Bad: sequence state is fragile across DB migrations and restores.
|
||||
id: serial('id').primaryKey(),
|
||||
```
|
||||
|
||||
ID prefixes make entity types distinguishable. For internal tables, use `uuid`.
|
||||
|
||||
### Foreign Keys
|
||||
|
||||
```typescript
|
||||
userId: text('user_id')
|
||||
.references(() => users.id, { onDelete: 'cascade' })
|
||||
.notNull(),
|
||||
```
|
||||
|
||||
### Timestamps
|
||||
|
||||
```typescript
|
||||
...timestamps, // Spread from _helpers.ts
|
||||
```
|
||||
|
||||
### Optional and Undefined Values
|
||||
|
||||
Do not introduce artificial sentinel strings for missing values, such as
|
||||
`unknown`, unless the domain already has that explicit state and existing code
|
||||
uses it consistently. Prefer nullable columns, optional TypeScript fields, or a
|
||||
separate concrete status enum when the value is genuinely absent.
|
||||
|
||||
```typescript
|
||||
// ✅ Good: absent until the final stage writes a real decision.
|
||||
export type UserSignupLogFinalDecision = 'allow' | 'block' | 'error';
|
||||
|
||||
finalDecision: varchar('final_decision', { length: 32 }).$type<UserSignupLogFinalDecision>(),
|
||||
|
||||
// ❌ Bad: invents a new state that callers now need to handle everywhere.
|
||||
export type UserSignupLogFinalDecision = 'allow' | 'block' | 'error' | 'unknown';
|
||||
|
||||
finalDecision: varchar('final_decision', { length: 32 })
|
||||
.$type<UserSignupLogFinalDecision>()
|
||||
.notNull()
|
||||
.default('unknown');
|
||||
```
|
||||
|
||||
### Field Descriptions
|
||||
|
||||
For columns whose meaning is not obvious from the name alone, add JSDoc on the
|
||||
schema field. Include a concrete example when it clarifies the stored value or
|
||||
the lifecycle moment that writes it. This is especially important for external
|
||||
IDs, lifecycle statuses, denormalized snapshots, JSONB signals, and fields whose
|
||||
name could mean either a request ID or a persisted row ID.
|
||||
|
||||
```typescript
|
||||
// ✅ Good: explain the table's business object first, then only document
|
||||
// non-obvious lifecycle or risk-control fields.
|
||||
/**
|
||||
* User signup logs - one row per signup flow, collecting stage-level
|
||||
* risk-control decisions before and after the auth provider creates a user.
|
||||
*/
|
||||
export const userSignupLogs = pgTable('user_signup_logs', {
|
||||
/** Final signup outcome reason, for example user_created, llm_block, or guard_error */
|
||||
finalReason: text('final_reason'),
|
||||
|
||||
/** Aggregated risk level derived from stage decisions, for example block -> high */
|
||||
riskLevel: varchar('risk_level', { length: 16 }).$type<UserSignupLogRiskLevel>(),
|
||||
|
||||
/** Ordered stage-level decisions and metadata grouped by signup review stage */
|
||||
stageResults: jsonb('stage_results').$type<UserSignupLogStageResults>(),
|
||||
});
|
||||
|
||||
// ❌ Bad: comments restate obvious column names without adding domain meaning.
|
||||
/** User email */
|
||||
email: text('email'),
|
||||
```
|
||||
|
||||
### JSONB Types
|
||||
|
||||
Avoid `Record<string, unknown>` or similarly loose JSONB types for schema
|
||||
columns. Define a concrete interface that describes the expected JSON shape, even
|
||||
when most properties are optional. This keeps callers, migrations, and review
|
||||
queries aligned on the same data contract.
|
||||
|
||||
```typescript
|
||||
interface UserSignupLogMetadata {
|
||||
payloadPath?: string;
|
||||
requestPath?: string;
|
||||
}
|
||||
|
||||
metadata: jsonb('metadata').$type<UserSignupLogMetadata>(),
|
||||
```
|
||||
|
||||
```typescript
|
||||
// ❌ Bad: hides the contract and makes downstream access untyped.
|
||||
metadata: jsonb('metadata').$type<Record<string, unknown>>(),
|
||||
```
|
||||
|
||||
### Indexes
|
||||
|
||||
```typescript
|
||||
// Return array (object style deprecated)
|
||||
(t) => [uniqueIndex('client_id_user_id_unique').on(t.clientId, t.userId)],
|
||||
```
|
||||
|
||||
## Type Inference
|
||||
|
||||
```typescript
|
||||
export const insertAgentSchema = createInsertSchema(agents);
|
||||
export type NewAgent = typeof agents.$inferInsert;
|
||||
export type AgentItem = typeof agents.$inferSelect;
|
||||
```
|
||||
|
||||
## Example Pattern
|
||||
|
||||
```typescript
|
||||
export const agents = pgTable(
|
||||
'agents',
|
||||
{
|
||||
id: text('id')
|
||||
.primaryKey()
|
||||
.$defaultFn(() => idGenerator('agents'))
|
||||
.notNull(),
|
||||
slug: varchar('slug', { length: 100 })
|
||||
.$defaultFn(() => randomSlug(4))
|
||||
.unique(),
|
||||
userId: text('user_id')
|
||||
.references(() => users.id, { onDelete: 'cascade' })
|
||||
.notNull(),
|
||||
clientId: text('client_id'),
|
||||
chatConfig: jsonb('chat_config').$type<LobeAgentChatConfig>(),
|
||||
...timestamps,
|
||||
},
|
||||
(t) => [uniqueIndex('client_id_user_id_unique').on(t.clientId, t.userId)],
|
||||
);
|
||||
```
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Junction Tables (Many-to-Many)
|
||||
|
||||
```typescript
|
||||
export const agentsKnowledgeBases = pgTable(
|
||||
'agents_knowledge_bases',
|
||||
{
|
||||
agentId: text('agent_id')
|
||||
.references(() => agents.id, { onDelete: 'cascade' })
|
||||
.notNull(),
|
||||
knowledgeBaseId: text('knowledge_base_id')
|
||||
.references(() => knowledgeBases.id, { onDelete: 'cascade' })
|
||||
.notNull(),
|
||||
userId: text('user_id')
|
||||
.references(() => users.id, { onDelete: 'cascade' })
|
||||
.notNull(),
|
||||
enabled: boolean('enabled').default(true),
|
||||
...timestamps,
|
||||
},
|
||||
(t) => [primaryKey({ columns: [t.agentId, t.knowledgeBaseId] })],
|
||||
);
|
||||
```
|
||||
|
||||
## Query Style
|
||||
|
||||
**Always use `db.select()` builder API. Never use `db.query.*` relational API** (`findMany`, `findFirst`, `with:`).
|
||||
|
||||
The relational API generates complex lateral joins with `json_build_array` that are fragile and hard to debug.
|
||||
|
||||
### Select Single Row
|
||||
|
||||
```typescript
|
||||
// ✅ Good
|
||||
const [result] = await this.db.select().from(agents).where(eq(agents.id, id)).limit(1);
|
||||
return result;
|
||||
|
||||
// ❌ Bad: relational API
|
||||
return this.db.query.agents.findFirst({
|
||||
where: eq(agents.id, id),
|
||||
});
|
||||
```
|
||||
|
||||
### Select with JOIN
|
||||
|
||||
```typescript
|
||||
// ✅ Good: explicit select + leftJoin
|
||||
const rows = await this.db
|
||||
.select({
|
||||
runId: agentEvalRunTopics.runId,
|
||||
score: agentEvalRunTopics.score,
|
||||
testCase: agentEvalTestCases,
|
||||
topic: topics,
|
||||
})
|
||||
.from(agentEvalRunTopics)
|
||||
.leftJoin(agentEvalTestCases, eq(agentEvalRunTopics.testCaseId, agentEvalTestCases.id))
|
||||
.leftJoin(topics, eq(agentEvalRunTopics.topicId, topics.id))
|
||||
.where(eq(agentEvalRunTopics.runId, runId))
|
||||
.orderBy(asc(agentEvalRunTopics.createdAt));
|
||||
|
||||
// ❌ Bad: relational API with `with:`
|
||||
return this.db.query.agentEvalRunTopics.findMany({
|
||||
where: eq(agentEvalRunTopics.runId, runId),
|
||||
with: { testCase: true, topic: true },
|
||||
});
|
||||
```
|
||||
|
||||
### Select with Aggregation
|
||||
|
||||
```typescript
|
||||
// ✅ Good: select + leftJoin + groupBy
|
||||
const rows = await this.db
|
||||
.select({
|
||||
id: agentEvalDatasets.id,
|
||||
name: agentEvalDatasets.name,
|
||||
testCaseCount: count(agentEvalTestCases.id).as('testCaseCount'),
|
||||
})
|
||||
.from(agentEvalDatasets)
|
||||
.leftJoin(agentEvalTestCases, eq(agentEvalDatasets.id, agentEvalTestCases.datasetId))
|
||||
.groupBy(agentEvalDatasets.id);
|
||||
```
|
||||
|
||||
### Raw SQL and Advanced Queries
|
||||
|
||||
Prefer Drizzle builders whenever the query reads clearly with `select`,
|
||||
`insert().select()`, `update().from()`, joins, CTEs, and `groupBy` — this keeps
|
||||
table/column references tied to schema, so changes surface as TypeScript errors.
|
||||
Within a builder, expression-level `sql<T>` is fine for features lacking a helper
|
||||
(JSON path, casts, aggregates, `CASE`, `NOW()`). Row locks are clauses, not
|
||||
expressions — use `.for('update')`, never raw `FOR UPDATE`.
|
||||
|
||||
Use `COALESCE` only when null-handling is part of required DB semantics (nullable
|
||||
JSONB append/merge, "keep first non-null"). Don't scatter
|
||||
`COALESCE(excluded.col, current.col)` across ordinary upsert scalars just to avoid
|
||||
an update object — build `set` from defined values only, and hide any remaining
|
||||
SQL behind named helpers (`appendJsonbArray`, `mergeJsonbObject`, `keepFirstValue`)
|
||||
so the method reads as business intent, not SQL plumbing.
|
||||
|
||||
```typescript
|
||||
// ✅ Scalars included only when present; SQL hidden behind a named helper.
|
||||
const updateValues = compactUndefined({
|
||||
email: record.email ?? undefined,
|
||||
ip: record.ip ?? undefined,
|
||||
});
|
||||
await db.insert(userSignupLogs).values(values).onConflictDoUpdate({
|
||||
set: { ...updateValues, stageResults: appendStageResult(stage, result), updatedAt: now },
|
||||
target: userSignupLogs.id,
|
||||
});
|
||||
|
||||
// ❌ Every scalar becomes SQL plumbing.
|
||||
set: {
|
||||
email: sql`COALESCE(excluded.email, ${userSignupLogs.email})`,
|
||||
ip: sql`COALESCE(excluded.ip, ${userSignupLogs.ip})`,
|
||||
}
|
||||
```
|
||||
|
||||
When refactoring raw SQL:
|
||||
|
||||
- Preserve query shape on latency-sensitive paths. If raw SQL is one roundtrip,
|
||||
don't split it into multiple depth-based queries just to drop `execute`.
|
||||
- Use `$with(...)` + `insert().select()` / `update().from()` for multi-step
|
||||
single-roundtrip writes Drizzle can express.
|
||||
- Don't rely on `execute<MyRow>(sql...)` for safety — it types rows but doesn't keep
|
||||
selected columns in sync with schema changes.
|
||||
- If only a PostgreSQL feature Drizzle can't express works, keep the raw SQL and
|
||||
tighten it: schema refs in interpolations, explicit user scope, a narrow row
|
||||
interface, and regression tests.
|
||||
|
||||
Recursive CTEs are the canonical "keep raw" case — there's no clean `WITH RECURSIVE`
|
||||
builder, and a rewrite would add depth-based roundtrips:
|
||||
|
||||
```typescript
|
||||
interface TaskTreeRow {
|
||||
id: string;
|
||||
parent_task_id: string | null;
|
||||
}
|
||||
|
||||
// execute<T> acceptable: no clean WITH RECURSIVE builder. Keep schema refs in the
|
||||
// interpolations and scope every leg to the user.
|
||||
const { rows } = await db.execute<TaskTreeRow>(sql`
|
||||
WITH RECURSIVE task_tree AS (
|
||||
SELECT ${tasks.id}, ${tasks.parentTaskId}
|
||||
FROM ${tasks}
|
||||
WHERE ${tasks.id} = ${rootTaskId} AND ${tasks.createdByUserId} = ${userId}
|
||||
UNION ALL
|
||||
SELECT ${tasks.id}, ${tasks.parentTaskId}
|
||||
FROM ${tasks}
|
||||
JOIN task_tree ON ${tasks.parentTaskId} = task_tree.id
|
||||
WHERE ${tasks.createdByUserId} = ${userId}
|
||||
)
|
||||
SELECT * FROM task_tree
|
||||
`);
|
||||
```
|
||||
|
||||
### One-to-Many (Separate Queries)
|
||||
|
||||
When you need a parent record with its children, use two queries instead of relational `with:`:
|
||||
|
||||
```typescript
|
||||
// ✅ Good: two simple queries
|
||||
const [dataset] = await this.db
|
||||
.select()
|
||||
.from(agentEvalDatasets)
|
||||
.where(eq(agentEvalDatasets.id, id))
|
||||
.limit(1);
|
||||
|
||||
if (!dataset) return undefined;
|
||||
|
||||
const testCases = await this.db
|
||||
.select()
|
||||
.from(agentEvalTestCases)
|
||||
.where(eq(agentEvalTestCases.datasetId, id))
|
||||
.orderBy(asc(agentEvalTestCases.sortOrder));
|
||||
|
||||
return { ...dataset, testCases };
|
||||
```
|
||||
|
||||
## Database Migrations
|
||||
|
||||
See the `db-migrations` skill for the detailed migration guide.
|
||||
@@ -1,83 +0,0 @@
|
||||
---
|
||||
name: heterogeneous-agent
|
||||
description: 'Implement or debug LobeHub heterogeneous agents. Use for Claude Code/Codex adapters, external CLI agents, event mapping, IPC, persistence, tool-call chains, sessions, traces, or adapter bugs.'
|
||||
---
|
||||
|
||||
# Heterogeneous Agent Development
|
||||
|
||||
Use this skill when the bug or feature lives in the external CLI agent pipeline, not the normal server-side agent runtime.
|
||||
|
||||
## Use This Skill For
|
||||
|
||||
- Adding or changing a driver under `apps/desktop/src/main/modules/heterogeneousAgent/drivers/`
|
||||
- Editing an adapter under `packages/heterogeneous-agents/src/adapters/`
|
||||
- Debugging `heteroAgentRawLine` transport, `window.__HETERO_AGENT_TRACE`, or `executeHeterogeneousAgent`
|
||||
- Fixing Claude Code stream-json bugs such as duplicate partial/full chunks, broken `message.id` boundaries, missing `tool_result`, TodoWrite state drift, or subagent thread routing
|
||||
- Fixing Codex JSONL bugs such as mixed multi-tool messages, broken turn boundaries, or missing tool-result mapping
|
||||
- Fixing step-boundary, tool persistence, subagent thread, or resume bugs in Claude Code / Codex flows
|
||||
- Reproducing multi-tool mixing, orphan tool messages, or stuck tool-result loading
|
||||
|
||||
## Pipeline Map
|
||||
|
||||
1. CLI raw stdout / JSONL
|
||||
2. Electron main spawns the CLI and broadcasts `heteroAgentRawLine`
|
||||
3. Adapter maps raw provider events into `HeterogeneousAgentEvent`
|
||||
4. `executeHeterogeneousAgent` persists assistant/tool messages and forwards stream events
|
||||
5. `createGatewayEventHandler` hydrates the UI
|
||||
6. Only after this path looks correct should you move on to `agent-tracing` or context-engine debugging
|
||||
|
||||
## Read These Files First
|
||||
|
||||
- `apps/desktop/src/main/controllers/HeterogeneousAgentCtr.ts`
|
||||
- `apps/desktop/src/main/modules/heterogeneousAgent/drivers/claudeCode.ts`
|
||||
- `apps/desktop/src/main/modules/heterogeneousAgent/drivers/codex.ts`
|
||||
- `packages/heterogeneous-agents/src/adapters/claudeCode.ts`
|
||||
- `packages/heterogeneous-agents/src/adapters/codex.ts`
|
||||
- `src/store/chat/slices/aiChat/actions/heterogeneousAgentExecutor.ts`
|
||||
- `src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts`
|
||||
|
||||
## Default Debug Order
|
||||
|
||||
1. Prove whether the raw CLI output is correct before touching UI code.
|
||||
2. If raw output is correct, compare it with adapter output. In dev, `executeHeterogeneousAgent` exposes `window.__HETERO_AGENT_TRACE`.
|
||||
3. If adapted events look correct, inspect `persistToolBatch`, `persistToolResult`, step transitions, and subagent routing.
|
||||
4. Turn the repro into a focused test before fixing.
|
||||
5. Only after the transport/adapter/executor path looks sound should you debug later-stage message processing.
|
||||
|
||||
## Critical Invariants
|
||||
|
||||
- One raw tool item must map to one stable `ToolCallPayload.id`.
|
||||
- A new main-agent step must emit a boundary signal before events are forwarded to the new assistant.
|
||||
- In Claude Code, multiple assistant events with the same `message.id` are one turn, not multiple turns.
|
||||
- In Claude Code, `tool_result` lives in `type: 'user'` events, not assistant events.
|
||||
- In Claude Code partial mode, `message_delta.usage` is authoritative; do not trust echoed usage on every assistant block.
|
||||
- `persistToolBatch` must pre-register assistant `tools[]` before creating tool messages.
|
||||
- Every tool message must keep `parentId` equal to the owning assistant and `tool_call_id` equal to the tool id.
|
||||
- `tool_result` must resolve an existing `toolMsgIdByCallId`.
|
||||
- Subagent chunks must stay in thread scope and must not be forwarded into the main assistant stream.
|
||||
- Never clear the global `toolMsgIdByCallId` map at main step boundaries.
|
||||
|
||||
## Common Bug Patterns
|
||||
|
||||
- Claude Code duplicates text or thinking:
|
||||
check whether partial deltas and the later full assistant block are both being emitted.
|
||||
- Claude Code opens too many assistant messages:
|
||||
check whether the adapter is cutting steps on every assistant event instead of only on `message.id` changes.
|
||||
- Claude Code tool results never land:
|
||||
check whether `type: 'user'` `tool_result` blocks are being ignored because the code only inspects assistant events.
|
||||
- Claude Code TodoWrite cards look stale:
|
||||
check whether synthesized `pluginState.todos` is being attached at tool-result time.
|
||||
- Claude Code subagent transcript leaks into the main bubble:
|
||||
check `parent_tool_use_id` handling and whether subagent chunks are being forwarded to the main gateway handler.
|
||||
- Multiple Codex tools collapse into one assistant message:
|
||||
first check whether the adapter emits a usable step boundary such as `newStep` or an equivalent turn-change signal.
|
||||
- Orphan tool messages:
|
||||
first check step-transition ordering and whether `persistToolBatch` Phase 1 ran before tool message creation.
|
||||
- Tool bubble stays loading:
|
||||
look for `tool_result for unknown toolCallId` and missing `result_msg_id` backfill.
|
||||
- Subagent tools show up in the main bubble:
|
||||
check for subagent chunks reaching the main gateway handler.
|
||||
|
||||
## References
|
||||
|
||||
- For commands, trace capture, invariants, and focused test commands, read [references/debug-workflow.md](./references/debug-workflow.md).
|
||||
@@ -1,246 +0,0 @@
|
||||
# Heterogeneous Agent Debug Workflow
|
||||
|
||||
## Contents
|
||||
|
||||
1. Pipeline map
|
||||
2. Capture raw CLI traces first
|
||||
3. Compare raw and adapted events
|
||||
4. Check step boundaries before persistence
|
||||
5. Check tool persistence invariants
|
||||
6. Focused tests
|
||||
7. Repro-to-fix workflow
|
||||
|
||||
## 1. Pipeline Map
|
||||
|
||||
```
|
||||
CLI raw stdout
|
||||
-> HeterogeneousAgentCtr (Electron main)
|
||||
-> heteroAgentRawLine broadcast
|
||||
-> createAdapter(...)
|
||||
-> executeHeterogeneousAgent(...)
|
||||
-> persistToolBatch / persistToolResult
|
||||
-> createGatewayEventHandler(...)
|
||||
-> UI hydration
|
||||
```
|
||||
|
||||
Start at the leftmost broken layer. Do not jump straight to UI rendering unless raw and adapted events already look correct.
|
||||
|
||||
## 2. Capture Raw CLI Traces First
|
||||
|
||||
### Codex raw JSONL
|
||||
|
||||
Use a read-only prompt and save traces under the repo-local scratch directory `.heerogeneous-tracing/`.
|
||||
|
||||
```bash
|
||||
ts=$(date +%Y%m%d-%H%M%S)
|
||||
out=".heerogeneous-tracing/codex-${ts}.jsonl"
|
||||
last=".heerogeneous-tracing/codex-${ts}.last.txt"
|
||||
|
||||
cat << 'EOF' | codex exec --json --skip-git-repo-check --sandbox read-only -C "$PWD" -o "$last" - > "$out"
|
||||
You are being run only to collect a raw Codex JSON event trace.
|
||||
Do not modify any files.
|
||||
Use at least 4 separate shell tool invocations, one invocation per command.
|
||||
Run a short sequence of read-only repo checks and then reply with a one-sentence summary.
|
||||
EOF
|
||||
```
|
||||
|
||||
What to look for in the JSONL:
|
||||
|
||||
- `thread.started`
|
||||
- `turn.started`
|
||||
- `item.started` / `item.completed`
|
||||
- `item.type === 'command_execution'`
|
||||
- `item.type === 'agent_message'`
|
||||
- `turn.completed`
|
||||
|
||||
If raw Codex already merges tools into one item, the adapter is innocent. If raw Codex emits independent items but UI collapses them, the bug is downstream.
|
||||
|
||||
If the repo already contains useful traces under `.heerogeneous-tracing/`, inspect them before reproducing.
|
||||
|
||||
### Claude Code raw NDJSON
|
||||
|
||||
Mirror the arguments from `apps/desktop/src/main/modules/heterogeneousAgent/drivers/claudeCode.ts`.
|
||||
|
||||
- `-p`
|
||||
- `--input-format stream-json`
|
||||
- `--output-format stream-json`
|
||||
- `--verbose`
|
||||
- `--include-partial-messages`
|
||||
- `--permission-mode bypassPermissions`
|
||||
|
||||
You can capture a local raw trace like this:
|
||||
|
||||
```bash
|
||||
ts=$(date +%Y%m%d-%H%M%S)
|
||||
out=".heerogeneous-tracing/claude-${ts}.ndjson"
|
||||
|
||||
cat << 'EOF' | claude -p \
|
||||
--input-format stream-json \
|
||||
--output-format stream-json \
|
||||
--verbose \
|
||||
--include-partial-messages \
|
||||
--permission-mode bypassPermissions \
|
||||
> "$out"
|
||||
{"type":"user","message":{"role":"user","content":[{"type":"text","text":"Do a few read-only repo checks, use several tool calls, and then summarize briefly."}]}}
|
||||
EOF
|
||||
```
|
||||
|
||||
What to look for in Claude Code raw traces:
|
||||
|
||||
- `type: 'system', subtype: 'init'`
|
||||
- `type: 'assistant'` blocks for `thinking`, `tool_use`, and `text`
|
||||
- `type: 'user'` blocks containing `tool_result`
|
||||
- `type: 'stream_event'` with `message_start`, `content_block_delta`, and `message_delta`
|
||||
- `type: 'result'`
|
||||
- `type: 'rate_limit_event'`
|
||||
|
||||
Important Claude Code semantics:
|
||||
|
||||
- Each content block often arrives as its own assistant event.
|
||||
- Multiple assistant events can share the same `message.id`; that is still one turn.
|
||||
- `message.id` change is the main-step boundary.
|
||||
- Partial deltas arrive before the later full assistant block.
|
||||
- `message_delta.usage` is the authoritative per-turn usage.
|
||||
- Subagent events are tagged with `parent_tool_use_id`.
|
||||
|
||||
If the repo already contains useful references, inspect these first:
|
||||
|
||||
- `.heerogeneous-tracing/cc-monitor-real-trace.jsonl`
|
||||
- `.heerogeneous-tracing/cc-stream-chain-reference.md`
|
||||
|
||||
If you only need boundary semantics or tool persistence behavior, prefer existing adapter tests under:
|
||||
|
||||
- `packages/heterogeneous-agents/src/adapters/claudeCode.test.ts`
|
||||
- `packages/heterogeneous-agents/src/adapters/claudeCode.e2e.test.ts`
|
||||
|
||||
## 3. Compare Raw And Adapted Events
|
||||
|
||||
In dev builds, `executeHeterogeneousAgent` stores raw lines plus adapted events on:
|
||||
|
||||
- `window.__HETERO_AGENT_TRACE`
|
||||
|
||||
Use that trace to compare:
|
||||
|
||||
- raw `item.started` / `item.completed`
|
||||
- adapted `stream_chunk { chunkType: 'tools_calling' }`
|
||||
- adapted `tool_result`
|
||||
- adapted `tool_end`
|
||||
|
||||
For Codex, the usual mapping is:
|
||||
|
||||
- raw `item.started(command_execution)` -> `tools_calling` + `tool_start`
|
||||
- raw `item.completed(command_execution)` -> `tool_result` + `tool_end`
|
||||
- raw `item.completed(agent_message)` -> `stream_chunk(text)`
|
||||
|
||||
If the raw trace is right but adapted events are wrong, fix the adapter before touching persistence.
|
||||
|
||||
## 4. Check Step Boundaries Before Persistence
|
||||
|
||||
This is the first thing to verify for "mixed tools in one assistant" bugs.
|
||||
|
||||
### Claude Code
|
||||
|
||||
Claude Code step boundaries are keyed off assistant `message.id` changes. The adapter should emit:
|
||||
|
||||
- `stream_end`
|
||||
- `stream_start { newStep: true }`
|
||||
|
||||
Also verify these Claude-specific invariants:
|
||||
|
||||
- the first assistant after init does not open a new step
|
||||
- repeated assistant events with the same `message.id` do not open a new step
|
||||
- partial `content_block_delta` text/thinking does not get duplicated by the later full assistant event
|
||||
- `tool_result` from `type: 'user'` updates the matching tool row
|
||||
- `parent_tool_use_id` creates thread-scoped subagent chunks instead of main-stream chunks
|
||||
- TodoWrite `tool_use.input` is converted into synthesized `pluginState.todos` on `tool_result`
|
||||
|
||||
Good references:
|
||||
|
||||
- `packages/heterogeneous-agents/src/adapters/claudeCode.ts`
|
||||
- `packages/heterogeneous-agents/src/adapters/claudeCode.test.ts`
|
||||
|
||||
### Codex
|
||||
|
||||
Codex raw traces usually provide turn-level boundaries through:
|
||||
|
||||
- `turn.started`
|
||||
- `turn.completed`
|
||||
|
||||
The executor only cuts a new assistant message when it receives a step-boundary signal it understands. If the adapter emits `stream_start` without `newStep`, multiple Codex tools and text chunks can accumulate under the same assistant longer than intended.
|
||||
|
||||
Relevant files:
|
||||
|
||||
- `packages/heterogeneous-agents/src/adapters/codex.ts`
|
||||
- `src/store/chat/slices/aiChat/actions/heterogeneousAgentExecutor.ts`
|
||||
|
||||
## 5. Check Tool Persistence Invariants
|
||||
|
||||
Read `persistToolBatch` and `persistToolResult` before changing UI code.
|
||||
|
||||
### `persistToolBatch`
|
||||
|
||||
The expected order is:
|
||||
|
||||
1. Pre-register assistant `tools[]`
|
||||
2. Create `role: 'tool'` messages
|
||||
3. Backfill `result_msg_id` onto assistant `tools[]`
|
||||
|
||||
If tool rows are created before assistant `tools[]` are registered, orphan tool messages are likely.
|
||||
|
||||
### `persistToolResult`
|
||||
|
||||
`tool_result` must resolve the tool row through `toolMsgIdByCallId`.
|
||||
|
||||
Warning signs:
|
||||
|
||||
- `tool_result for unknown toolCallId`
|
||||
- tool rows with empty content forever
|
||||
- missing `result_msg_id`
|
||||
|
||||
For Claude Code, remember that tool results originate from raw `type: 'user'` events.
|
||||
|
||||
### Main vs subagent scope
|
||||
|
||||
- Main-agent tool state is per-step.
|
||||
- `toolMsgIdByCallId` is global across main and subagent scopes.
|
||||
- Subagent chunks must not be forwarded into the main gateway handler.
|
||||
|
||||
If subagent events leak to the main handler, the main bubble can inherit the wrong `tools[]` and content.
|
||||
|
||||
## 6. Focused Tests
|
||||
|
||||
Run the smallest useful test set first.
|
||||
|
||||
```bash
|
||||
bunx vitest run --silent='passed-only' 'packages/heterogeneous-agents/src/adapters/codex.test.ts'
|
||||
bunx vitest run --silent='passed-only' 'packages/heterogeneous-agents/src/adapters/claudeCode.test.ts'
|
||||
bunx vitest run --silent='passed-only' 'src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts'
|
||||
```
|
||||
|
||||
Especially useful places:
|
||||
|
||||
- `packages/heterogeneous-agents/src/adapters/codex.test.ts`
|
||||
- `packages/heterogeneous-agents/src/adapters/claudeCode.test.ts`
|
||||
- `src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts`
|
||||
|
||||
Claude Code-specific assertions worth adding when fixing bugs:
|
||||
|
||||
- same `message.id` does not emit `newStep`
|
||||
- changed `message.id` does emit `stream_end` plus `stream_start { newStep: true }`
|
||||
- partial text/thinking is emitted once
|
||||
- `tool_result` from `user` events reaches the right tool row
|
||||
- subagent chunks carry `subagent.parentToolCallId`
|
||||
- TodoWrite result synthesizes `pluginState.todos`
|
||||
|
||||
When the bug comes from a real trace, distill it into the closest existing test file instead of relying on manual UI-only repros.
|
||||
|
||||
## 7. Repro-To-Fix Workflow
|
||||
|
||||
1. Capture a raw trace and save it under `.heerogeneous-tracing/`.
|
||||
2. Confirm whether the bug appears in raw events, adapted events, or persistence.
|
||||
3. Add or update the narrowest failing test near the broken layer.
|
||||
4. Fix the smallest layer that can explain the symptom.
|
||||
5. Re-run focused tests.
|
||||
6. Only then do an Electron smoke test with the `agent-testing` skill if UI confirmation is still needed.
|
||||
|
||||
Do not start with a broad Electron repro if a raw trace or adapter test can prove the fault zone faster.
|
||||
@@ -1,91 +0,0 @@
|
||||
---
|
||||
name: hotkey
|
||||
description: 'Add or edit LobeHub keyboard shortcuts. Use for HotkeyEnum, HOTKEYS_REGISTRATION, combineKeys, useHotkeyById, tooltip hotkeys, shortcut scope, conflicts, or Cmd/Ctrl key combos.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# Adding Keyboard Shortcuts Guide
|
||||
|
||||
## Steps to Add a New Hotkey
|
||||
|
||||
### 1. Update Hotkey Constant
|
||||
|
||||
In `src/types/hotkey.ts`:
|
||||
|
||||
```typescript
|
||||
export const HotkeyEnum = {
|
||||
// existing...
|
||||
ClearChat: 'clearChat', // Add new
|
||||
} as const;
|
||||
```
|
||||
|
||||
### 2. Register Default Hotkey
|
||||
|
||||
In `src/const/hotkeys.ts`:
|
||||
|
||||
```typescript
|
||||
import { KeyMapEnum as Key, combineKeys } from '@lobehub/ui';
|
||||
|
||||
export const HOTKEYS_REGISTRATION: HotkeyRegistration = [
|
||||
{
|
||||
group: HotkeyGroupEnum.Conversation,
|
||||
id: HotkeyEnum.ClearChat,
|
||||
keys: combineKeys([Key.Mod, Key.Shift, Key.Backspace]),
|
||||
scopes: [HotkeyScopeEnum.Chat],
|
||||
},
|
||||
];
|
||||
```
|
||||
|
||||
### 3. Add i18n Translation
|
||||
|
||||
In `src/locales/default/hotkey.ts`:
|
||||
|
||||
```typescript
|
||||
const hotkey: HotkeyI18nTranslations = {
|
||||
clearChat: {
|
||||
desc: '清空当前会话的所有消息记录',
|
||||
title: '清空聊天记录',
|
||||
},
|
||||
};
|
||||
```
|
||||
|
||||
### 4. Create and Register Hook
|
||||
|
||||
In `src/hooks/useHotkeys/chatScope.ts`:
|
||||
|
||||
```typescript
|
||||
export const useClearChatHotkey = () => {
|
||||
const clearMessages = useChatStore((s) => s.clearMessages);
|
||||
return useHotkeyById(HotkeyEnum.ClearChat, clearMessages);
|
||||
};
|
||||
|
||||
export const useRegisterChatHotkeys = () => {
|
||||
useClearChatHotkey();
|
||||
// ...other hotkeys
|
||||
};
|
||||
```
|
||||
|
||||
### 5. Add Tooltip (Optional)
|
||||
|
||||
```tsx
|
||||
const clearChatHotkey = useUserStore(settingsSelectors.getHotkeyById(HotkeyEnum.ClearChat));
|
||||
|
||||
<Tooltip hotkey={clearChatHotkey} title={t('clearChat.title', { ns: 'hotkey' })}>
|
||||
<Button icon={<DeleteOutlined />} onClick={clearMessages} />
|
||||
</Tooltip>;
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Scope**: Choose global or chat scope based on functionality
|
||||
2. **Grouping**: Place in appropriate group (System/Layout/Conversation)
|
||||
3. **Conflict check**: Ensure no conflict with system/browser shortcuts
|
||||
4. **Platform**: Use `Key.Mod` instead of hardcoded `Ctrl` or `Cmd`
|
||||
5. **Clear description**: Provide title and description for users
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- **Not working**: Check scope and RegisterHotkeys hook
|
||||
- **Not in settings**: Verify HOTKEYS_REGISTRATION config
|
||||
- **Conflict**: HotkeyInput component shows warnings
|
||||
- **Page-specific**: Ensure correct scope activation
|
||||
@@ -1,78 +0,0 @@
|
||||
---
|
||||
name: i18n
|
||||
description: 'LobeHub i18n with react-i18next. Use for user-facing strings, locale keys, namespaces, useTranslation, t(), interpolation, zh-CN/en-US previews, hardcoded UI copy, or pnpm i18n.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# LobeHub Internationalization Guide
|
||||
|
||||
- Default language: English (en-US)
|
||||
- Framework: react-i18next
|
||||
- **Only edit files in `src/locales/default/`** - Never edit JSON files in `locales/`
|
||||
- Run `pnpm i18n` to generate translations (or manually translate zh-CN/en-US for dev preview)
|
||||
|
||||
## Key Naming Convention
|
||||
|
||||
**Flat keys with dot notation** (not nested objects):
|
||||
|
||||
```typescript
|
||||
// ✅ Correct
|
||||
export default {
|
||||
'alert.cloud.action': '立即体验',
|
||||
'sync.actions.sync': '立即同步',
|
||||
'sync.status.ready': '已连接',
|
||||
};
|
||||
|
||||
// ❌ Avoid nested objects
|
||||
export default {
|
||||
alert: { cloud: { action: '...' } },
|
||||
};
|
||||
```
|
||||
|
||||
**Patterns:** `{feature}.{context}.{action|status}`
|
||||
|
||||
**Parameters:** Use `{{variableName}}` syntax
|
||||
|
||||
```typescript
|
||||
'alert.cloud.desc': '我们提供 {{credit}} 额度积分',
|
||||
```
|
||||
|
||||
**Avoid key conflicts:**
|
||||
|
||||
```typescript
|
||||
// ❌ Conflict
|
||||
'clientDB.solve': '自助解决',
|
||||
'clientDB.solve.backup.title': '数据备份',
|
||||
|
||||
// ✅ Solution
|
||||
'clientDB.solve.action': '自助解决',
|
||||
'clientDB.solve.backup.title': '数据备份',
|
||||
```
|
||||
|
||||
## Workflow
|
||||
|
||||
1. Add keys to `src/locales/default/{namespace}.ts`
|
||||
2. Export new namespace in `src/locales/default/index.ts`
|
||||
3. For dev preview: manually translate `locales/zh-CN/{namespace}.json` and `locales/en-US/{namespace}.json`
|
||||
4. Remind the user to run `pnpm i18n` before creating PR — do NOT run it yourself (very slow)
|
||||
|
||||
## Usage
|
||||
|
||||
```tsx
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
const { t } = useTranslation('common');
|
||||
|
||||
t('newFeature.title');
|
||||
t('alert.cloud.desc', { credit: '1000' });
|
||||
|
||||
// Multiple namespaces
|
||||
const { t } = useTranslation(['common', 'chat']);
|
||||
t('common:save');
|
||||
```
|
||||
|
||||
## Common Namespaces
|
||||
|
||||
**Most used:** `common` (shared UI), `chat` (chat features), `setting` (settings)
|
||||
|
||||
Others: auth, changelog, components, discover, editor, electron, error, file, hotkey, knowledgeBase, memory, models, plugin, portal, providers, tool, topic
|
||||
@@ -1,143 +0,0 @@
|
||||
---
|
||||
name: linear
|
||||
description: 'Linear issue management. Use for LOBE-xxx issues, Linear links, PRs referencing Linear, retrieving issues, updating status, completion comments, or sub-issue trees.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# Linear Issue Management
|
||||
|
||||
Before using Linear workflows, search for `linear` MCP tools. If not found, treat as not installed.
|
||||
|
||||
## PR Creation with Linear Issues
|
||||
|
||||
A PR that fixes a Linear issue has **two separate jobs to do**, and both matter:
|
||||
|
||||
1. **`Fixes LOBE-xxx` in the PR body** — Linear watches GitHub for these magic keywords and auto-links the PR and auto-closes the issue on merge. This is the machine-readable side.
|
||||
2. **A completion comment on the Linear issue** — gives the reviewer/PM/teammate landing in Linear a human-readable summary of what changed and why, without forcing them to click through to GitHub and read a diff.
|
||||
|
||||
If you only do step 1, Linear watchers (often non-engineers) hit the issue and see no context. So pair PR creation with the Linear comment as part of the same task — finish both before considering the work done.
|
||||
|
||||
## Workflow
|
||||
|
||||
1. **Retrieve issue details** before starting: `mcp__linear-server__get_issue`
|
||||
2. **Read images** — issue descriptions often contain screenshots with critical context (mockups, error states, before/after). Use `mcp__linear-server__extract_images` so you actually see them; reading raw markdown alone misses what the reporter was looking at.
|
||||
3. **Check for sub-issues**: `mcp__linear-server__list_issues` with `parentId` filter
|
||||
4. **Mark as In Progress** at the moment you start planning or implementing — this signals to teammates the issue is owned, so they don't double-pick it up.
|
||||
5. **Update issue status** when completing: `mcp__linear-server__update_issue`
|
||||
6. **Add completion comment** (see [format below](#completion-comment-format))
|
||||
|
||||
## Creating Issues
|
||||
|
||||
When creating issues with `mcp__linear-server__create_issue`, add the `claude code` label. Reason: the label is how the team filters/audits AI-generated issues; without it those issues vanish into the general backlog and the team loses visibility into AI contribution patterns.
|
||||
|
||||
## Language
|
||||
|
||||
Match the issue language to the conversation that produced it — if you're discussing in 中文,write the issue in 中文;if discussing in English, write it in English. Reason: the issue is a continuation of the conversation, and forcing a language switch creates translation friction for the collaborator who started the thread.
|
||||
|
||||
Specifics:
|
||||
|
||||
- 中文 conversation → 中文 body; technical terms (file paths, identifiers, library names, commands, error messages) stay in English.
|
||||
- English conversation → English body.
|
||||
- Code blocks, file paths, and quoted strings always stay in their original form regardless of surrounding language.
|
||||
- This applies equally to **updates** — when editing an existing issue (description **and titles**), preserve the language of the conversation that triggered the edit; don't switch the issue language mid-refactor.
|
||||
|
||||
## Creating Sub-issue Trees
|
||||
|
||||
When breaking a parent issue into a tree of sub-issues (e.g., task decomposition for LOBE-xxx), follow these rules — they work around real limitations of the Linear MCP tools.
|
||||
|
||||
### 1. Prefix titles with an ordering index
|
||||
|
||||
The Linear Sub-issues panel orders children by `sortOrder`, which **defaults to newest-first** (most recently created appears on top). Neither parallel nor serial creation produces the intended top-to-bottom reading order, and the MCP `save_issue` tool does **not expose a `sortOrder` parameter** — you can't set order at create time.
|
||||
|
||||
Workaround: encode execution order in the title itself:
|
||||
|
||||
```plaintext
|
||||
[1] [db] add schema fields
|
||||
[2] [db] new table + repository
|
||||
[3] [service] business logic layer
|
||||
[4] [api] REST endpoints
|
||||
[4.1] [sdk] client SDK wrapper
|
||||
[4.1.1] [app] consumer integration
|
||||
[4.1.2] [app] UI surface
|
||||
[4.2] [ui] dashboard page
|
||||
```
|
||||
|
||||
Even when the panel shuffles, the reader can mentally reconstruct the dependency graph at a glance. Dotted numbering `[n.m.k]` should mirror the parent-child nesting so the index and the tree agree.
|
||||
|
||||
### 2. Nest sub-issues by logical parent-child, not flat under the root
|
||||
|
||||
Linear supports **unlimited sub-issue depth**. A flat list of 8+ siblings under one root is hard to scan. Group by main-subordinate logic:
|
||||
|
||||
- Core service → its SDK → SDK consumers
|
||||
- Don't create a sibling when a child is more accurate
|
||||
|
||||
Use `parentId: "LOBE-xxxx"` at creation (or `save_issue` to move). Moving an issue's parent does not disturb its `blockedBy` relations.
|
||||
|
||||
### 3. Sub-issue creation order is dictated by `blockedBy`
|
||||
|
||||
`blockedBy` requires the blocker to exist first (you need its LOBE-id). So:
|
||||
|
||||
1. **Topologically sort** the DAG — leaves (no deps) first, roots last
|
||||
2. Create issues with zero deps in the first wave
|
||||
3. Create dependent issues only after collecting the blocker IDs from prior responses
|
||||
4. `blockedBy` is **append-only**; passing it again does not overwrite — safe to re-run
|
||||
|
||||
### 4. Don't waste rounds trying to parallelize
|
||||
|
||||
MCP tool calls in a single message look parallel but execute sequentially on the server, and you still need blocker IDs from earlier responses. Just issue calls in dependency order; optimizing for parallelism gains nothing here.
|
||||
|
||||
### 5. Keep each sub-issue description self-contained
|
||||
|
||||
Each sub-issue should state:
|
||||
|
||||
- Goal (1–2 lines)
|
||||
- Key files to touch
|
||||
- Concrete changes / acceptance criteria
|
||||
- Dependencies (link to blocker issues by `LOBE-xxxx`)
|
||||
- Validation steps
|
||||
|
||||
The implementer may open only the sub-issue, not the parent — don't rely on context that lives only in the parent description.
|
||||
|
||||
## Completion Comment Format
|
||||
|
||||
Each completed issue gets a comment summarizing the work, so reviewers and future readers don't have to reconstruct it from the PR diff:
|
||||
|
||||
```markdown
|
||||
## Changes Summary
|
||||
|
||||
- **Feature**: Brief description of what was implemented
|
||||
- **Files Changed**: List key files modified
|
||||
- **PR**: #xxx or PR URL
|
||||
|
||||
### Key Changes
|
||||
|
||||
- Change 1
|
||||
- Change 2
|
||||
- ...
|
||||
```
|
||||
|
||||
This gives team visibility, code-review context, and a paper trail for future reference.
|
||||
|
||||
## PR Association
|
||||
|
||||
When creating PRs for Linear issues, include magic keywords in the PR body:
|
||||
|
||||
- `Fixes LOBE-123`
|
||||
- `Closes LOBE-123`
|
||||
- `Resolves LOBE-123`
|
||||
|
||||
These trigger Linear's auto-link + auto-close on merge.
|
||||
|
||||
## Per-Issue Completion Rule
|
||||
|
||||
When working on multiple issues, close out **each one before starting the next** — don't batch all the Linear updates to the end. Batching is where comments get forgotten and issues stay stuck in "In Progress" days after the PR shipped.
|
||||
|
||||
For each issue:
|
||||
|
||||
1. Complete implementation
|
||||
2. Run `bun run type-check`
|
||||
3. Run related tests
|
||||
4. Create PR if needed
|
||||
5. Update status to **"In Review"** (not "Done" — "Done" is for after the PR merges)
|
||||
6. Add the completion comment
|
||||
7. Move to the next issue
|
||||
@@ -1,99 +0,0 @@
|
||||
---
|
||||
name: microcopy
|
||||
description: 'UI copy and microcopy guidelines. Use for user-facing copy, buttons, errors, empty states, onboarding, i18n wording, translation, or copy improvements in Chinese or English.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# LobeHub UI Microcopy Guidelines
|
||||
|
||||
This file is the quick-reference summary. For full prompt-style guidelines with extensive examples (anti-patterns, tone matrices, scenario walk-throughs), load the language-specific reference:
|
||||
|
||||
- **中文文案** — [`references/zh.md`](./references/zh.md)
|
||||
- **English copy** — [`references/en.md`](./references/en.md)
|
||||
|
||||
Brand: **Where Agents Collaborate** - Focus on collaborative agent system, not just "generation".
|
||||
|
||||
## Fixed Terminology
|
||||
|
||||
| Chinese | English |
|
||||
| ---------- | ------------- |
|
||||
| 空间 | Workspace |
|
||||
| 助理 | Agent |
|
||||
| 群组 | Group |
|
||||
| 上下文 | Context |
|
||||
| 记忆 | Memory |
|
||||
| 连接器 | Integration |
|
||||
| 技能 | Skill |
|
||||
| 助理档案 | Agent Profile |
|
||||
| 话题 | Topic |
|
||||
| 文稿 | Page |
|
||||
| 社区 | Community |
|
||||
| 资源 | Resource |
|
||||
| 库 | Library |
|
||||
| 模型服务商 | Provider |
|
||||
| 评测 | Evaluation |
|
||||
| 基准 | Benchmark |
|
||||
| 数据集 | Dataset |
|
||||
| 用例 | Test Case |
|
||||
|
||||
## Brand Principles
|
||||
|
||||
1. **Create**: One sentence → usable Agent; clear next step
|
||||
2. **Collaborate**: Multi-agent; shared Context; controlled
|
||||
3. **Evolve**: Remember with consent; explainable; replayable
|
||||
|
||||
## Writing Rules
|
||||
|
||||
1. **Clarity first**: Short sentences, strong verbs, minimal adjectives
|
||||
2. **Layered**: Main line (simple) + optional detail (precise)
|
||||
3. **Consistent verbs**: Create / Connect / Run / Pause / Retry / View details
|
||||
4. **Actionable**: Every message tells next step; avoid generic "OK/Cancel"
|
||||
|
||||
## Human Warmth (Balanced)
|
||||
|
||||
Default: **80% information, 20% warmth**
|
||||
Key moments: **70/30** (first-time, empty state, failures, long waits)
|
||||
|
||||
**Hard cap**: At most half sentence of warmth, followed by clear next step.
|
||||
|
||||
**Order**:
|
||||
|
||||
1. Acknowledge situation (no judgment)
|
||||
2. Restore control (pause/replay/edit/undo/clear Memory)
|
||||
3. Provide next action
|
||||
|
||||
**Avoid**: Preachy encouragement, grand narratives, over-anthropomorphizing
|
||||
|
||||
## Patterns
|
||||
|
||||
**Getting started**:
|
||||
|
||||
- "Starting with one sentence is enough. Describe your goal."
|
||||
- "Not sure where to begin? Tell me the outcome."
|
||||
|
||||
**Long wait**:
|
||||
|
||||
- "Running… You can switch tasks—I'll notify you when done."
|
||||
- "This may take a few minutes. To speed up: reduce Context / switch model."
|
||||
|
||||
**Failure**:
|
||||
|
||||
- "That didn't run through. Retry, or view details to fix."
|
||||
- "Connection failed. Re-authorize in Settings, or try again later."
|
||||
|
||||
**Collaboration**:
|
||||
|
||||
- "Align everyone to the same Context."
|
||||
- "Different opinions are fine. Write the goal first."
|
||||
|
||||
## Errors/Exceptions
|
||||
|
||||
Must include:
|
||||
|
||||
1. **What happened**
|
||||
2. (Optional) **Why**
|
||||
3. **What user can do next**
|
||||
|
||||
Provide: Retry / View details / Go to Settings / Contact support / Copy logs
|
||||
|
||||
Never blame user. Put error codes in "Details".
|
||||
@@ -1,176 +0,0 @@
|
||||
---
|
||||
globs: src/locales/default/*
|
||||
alwaysApply: false
|
||||
---
|
||||
|
||||
You are **LobeHub’s English UI Copy & Microcopy Specialist**.
|
||||
|
||||
LobeHub is an assistant workspace: users can create **Agents** and **Agent Teams** so people↔agents and agent↔agent can collaborate to improve productivity in work and life.
|
||||
Brand vibe: youthful, friendly, modern on the surface; professional, reliable, productivity- and controllability-first underneath. Overall style reference: Notion / Figma / Apple / Discord / OpenAI / Gemini — clear, restrained, trustworthy, human but not cheesy.
|
||||
|
||||
Product slogan: **For Collaborative Agents**. Your copy must continuously reinforce that LobeHub is not about “generation”, but about a **collaborative agent system**: shareable context, traceable outcomes, replayable runs, evolvable setup, and **human-in-the-loop**.
|
||||
|
||||
---
|
||||
|
||||
## 1) Fixed Terminology (must follow)
|
||||
|
||||
Use **exactly** these English terms across the product. Do not mix synonyms for the same concept.
|
||||
|
||||
- 空间: **Workspace**
|
||||
- 助理: **Agent**
|
||||
- 群组: **Group**
|
||||
- 上下文: **Context**
|
||||
- 记忆: **Memory**
|
||||
- 连接器: **Integration**
|
||||
- 技能 /tool/plugin: **Skill**
|
||||
- 助理档案: **Agent Profile**
|
||||
- 话题: **Topic**
|
||||
- 文稿: **Page**
|
||||
- 社区: **Community**
|
||||
- 资源: **Resource**
|
||||
- 库: **Library**
|
||||
- MCP: **MCP**
|
||||
- 模型服务商: **Provider**
|
||||
|
||||
Terminology rule: one concept = one term site-wide. Never alternate with “bot/assistant/AI agent/team/workspace” variations.
|
||||
|
||||
---
|
||||
|
||||
## 2) Your Responsibilities
|
||||
|
||||
- Improve, rewrite, or create from scratch any **English UI copy**: titles, buttons, form labels/help text, placeholders, onboarding, empty states, toasts, modals, errors, permission prompts, settings, creation/run flows, collaboration and Agent Team pages, etc.
|
||||
- Copy must work for both:
|
||||
- general users (immediately understandable)
|
||||
- power users (not childish)
|
||||
- It must fit both playful and serious contexts.
|
||||
- Avoid overclaiming AI capabilities; add human warmth at the right moments.
|
||||
|
||||
---
|
||||
|
||||
## 3) The Three Brand Principles (bake into structure & wording)
|
||||
|
||||
- **Create**: create an Agent in one sentence; clear next step from idea → usable.
|
||||
- **Collaborate**: multi-agent collaboration; align info and outputs; share Context (controlled, manageable).
|
||||
- **Evolve**: Agents can remember preferences **only with user consent**; become more helpful over time; emphasize explainability, settings, and replay.
|
||||
|
||||
---
|
||||
|
||||
## 4) Writing Rules (actionable)
|
||||
|
||||
1. **Clarity first**: short sentences, strong verbs, minimal adjectives. Avoid hype (“revolutionary”, “epic”, “100%”).
|
||||
2. **Layered messaging (single version for everyone)**:
|
||||
- Main line: simple and actionable
|
||||
- Optional second line: more precise / technical / boundary-setting (subtitle, helper text, tooltip, collapsible)
|
||||
- Do not produce “Pro vs Lite” variants; one main + optional detail
|
||||
3. **Use terms sparingly but correctly**: prefer plain words (“connect”, “run”, “context”) unless a technical term is necessary. When it is, add a plain-English explanation.
|
||||
4. **Consistency**: keep verbs consistent across similar actions (Create / Connect / Run / Pause / Retry / View details / Clear Memory).
|
||||
5. **Actionable**: every message tells the user what to do next. Avoid generic “OK/Cancel”; use specific actions.
|
||||
6. **English localization**: natural, product-native English; avoid translationese; keep punctuation and casing consistent.
|
||||
|
||||
---
|
||||
|
||||
## 5) Human Warmth (balanced, controlled)
|
||||
|
||||
Goal: reduce anxiety and restore control without being sentimental.
|
||||
Default ratio: **80% information, 20% warmth**.
|
||||
Key moments (first-time create, empty state, long waits, failures/retries, rollback/data-loss risk, collaboration conflicts): may go **70/30**.
|
||||
|
||||
Hard cap: any on-screen message may include **at most half a sentence to one sentence** of warmth, and it must be followed by a clear next step.
|
||||
|
||||
Required order:
|
||||
|
||||
1. Acknowledge the situation (no judgment)
|
||||
2. Restore control (human-in-the-loop: pause/replay/edit/undo/clear Memory/view Context)
|
||||
3. Provide the next action (button/path)
|
||||
|
||||
Avoid:
|
||||
|
||||
- preachy encouragement (“don’t worry”, “stay positive”)
|
||||
- grand narratives
|
||||
- overly anthropomorphic claims (“I understand you”, “I’ll always remember you”)
|
||||
|
||||
Core stance: Agents can accelerate output, but **you** own the judgment, trade-offs, and final decision. LobeHub gives you time back for what matters.
|
||||
|
||||
Suggested patterns:
|
||||
|
||||
- **Getting started / blank state**
|
||||
- “Starting with one sentence is enough. Describe your goal and I’ll help you set up the first Agent.”
|
||||
- “Not sure where to begin? Tell me the outcome—we’ll break it down together.”
|
||||
- **Long run / waiting**
|
||||
- “Running… You can switch tasks—I'll notify you when it’s done.”
|
||||
- “This may take a few minutes. To speed up: reduce Context / switch model / disable Auto-run.”
|
||||
- **Failure / retry**
|
||||
- “That didn’t run through. Retry, or view details to fix the cause.”
|
||||
- “Connection failed: permission not granted or network unstable. Re-authorize in Settings, or try again later.”
|
||||
- **Value anxiety (guidance, not error dialogs)**
|
||||
- “Agents can speed up output, but direction and standards stay with you.”
|
||||
- “Fast results are great—keeping the trail makes the next run steadier.”
|
||||
- **Collaboration / Agent Teams**
|
||||
- “Align everyone to the same Context. Every Agent in the Agent Team works from the same page.”
|
||||
- “Different opinions are fine. Write the goal first, then let Agents propose options and trade-offs.”
|
||||
|
||||
---
|
||||
|
||||
## 6) Errors / Exceptions / Permissions / Billing: hard rules
|
||||
|
||||
Every error must include:
|
||||
|
||||
- **What happened**
|
||||
- (optional) **Why**
|
||||
- **What the user can do next**
|
||||
|
||||
Provide actionable options as appropriate:
|
||||
|
||||
- Retry / View details / Go to Settings / Contact support / Copy logs
|
||||
|
||||
Never blame the user. Don’t show only an error code; put codes in “Details” if needed.
|
||||
For data/security/billing: be neutral, thorough, and respectful—warmth comes from clarity, not emotion.
|
||||
|
||||
---
|
||||
|
||||
## 7) Your Special Task: CN i18n → EN (localized, length-aware)
|
||||
|
||||
You translate **raw Chinese i18n strings into English** for LobeHub.
|
||||
|
||||
Requirements:
|
||||
|
||||
- Prefer **localized**, product-native English over literal translation.
|
||||
- Do **not** chase perfect one-to-one consistency if a more natural UI phrase reads better.
|
||||
- Keep the **character length difference small**; try to make the English string **roughly the same visual length** as the Chinese source (avoid overly long expansions).
|
||||
- Preserve meaning, tone, and actionability; keep verbs consistent with LobeHub’s UI patterns.
|
||||
- If space is tight (buttons, tabs, toasts), prioritize: **verb + object**, drop optional words first.
|
||||
- If the Chinese includes placeholders/variables, preserve them exactly (e.g., `{name}`, `{{count}}`, `%s`) and keep word order sensible.
|
||||
- Keep capitalization consistent with UI norms (buttons/title case only when appropriate).
|
||||
|
||||
Output format when translating:
|
||||
|
||||
- Provide **English only**, unless asked otherwise.
|
||||
- If multiple options are useful, give **one best option** + **one shorter fallback** (only when length constraints are likely).
|
||||
|
||||
---
|
||||
|
||||
You always optimize for: **clarity, control, collaboration, replayability, and human-in-the-loop**—in a modern, restrained, trustworthy English voice.
|
||||
|
||||
## 8) Product Introduction
|
||||
|
||||
LobeHub, we define agents as the unit of work. We’re building the first human–agent co-working, co-evolving network.
|
||||
|
||||
It is a fundamentally new, agent-first experience.You can pop up your agents or agent teams while writing, while chatting -- from ideation, to execution, to delivery -- across your entire workflow. Here, agents are not just tools, but always-on units of work.
|
||||
|
||||
### Create
|
||||
|
||||
It is a unified workspace where you can find, build, or team up with agent co-workers.Simply describe what you need, and Lobe AI will generate the prompts and assemble the right set of tools to compose your agent.In agent marketplace, you can easily discover agents created by others,use them instantly,and flexibly swap in your own tools.
|
||||
|
||||
### Collaboration
|
||||
|
||||
You can also spin up agent groups to handle system-level projects, even like building a quant team.
|
||||
Within this group, some agents track signals and mine quantitative factors in real time, some manage risk, some execute orders, collaborate together to make money.
|
||||
We’re defining how humans and agents work together. Now we support agent-to-agent collaboration, and we continue to scale new forms of collaboration networks — from agents collaborating across teams, to multiple humans working through the same agent.
|
||||
|
||||
### Evolve
|
||||
|
||||
Humans and agents should co-evolve, and we design this paradigm from both technical and economic perspectives. Our memory system is structured and editable,enabling models to better align with individual users, while allowing users to provide cleaner reward signals for continual learning. Agent evolution is powered by shared human intelligence through our agent marketplace. Creators are rewarded, and agents, in turn, pay for human intelligence.
|
||||
|
||||
Is AI replacing humans? No.
|
||||
We’re building a human–agent co-working, co-evolving society.
|
||||
Agents become smarter and more personalized through human intelligence, taking on repetitive and exhausting work — so humans can focus on fewer, but more important things: taste, and creation.
|
||||
@@ -1,160 +0,0 @@
|
||||
---
|
||||
globs: src/locales/default/*
|
||||
alwaysApply: false
|
||||
---
|
||||
|
||||
你是「LobeHub」的中文 UI 文案与微文案(microcopy)专家。LobeHub 是一个助理工作空间:用户可以创建助理与群组,让人和助理、助理和助理协作,提升日常生产与生活效率。产品气质:外表年轻、亲和、现代;内核专业、可靠、强调生产力与可控性。整体风格参考 Notion / Figma / Apple / Discord / OpenAI / Gemini:清晰克制、可信、有人情味但不油腻。
|
||||
|
||||
产品 slogan:**For Collaborative Agents**。你的文案要让用户持续感到:LobeHub 的重点不是 “生成”,而是 “协作的助理体系”(可共享上下文、可追踪、可回放、可演进、人在回路)。
|
||||
|
||||
---
|
||||
|
||||
### 1) 固定术语(必须遵守)
|
||||
|
||||
- Workspace:空间
|
||||
- Agent:助理
|
||||
- Agent Team:群组
|
||||
- Context:上下文
|
||||
- Memory:记忆
|
||||
- Integration:连接器
|
||||
- Tool/Skill/Plugin/ 插件 / 工具:技能
|
||||
- SystemRole: 助理档案
|
||||
- Topic: 话题
|
||||
- Page: 文稿
|
||||
- Community: 社区
|
||||
- Resource: 资源
|
||||
- Library: 库
|
||||
- MCP: MCP
|
||||
- Provider: 模型服务商
|
||||
|
||||
术语规则:同一概念全站只用一种说法,不混用 “Agent / 智能体 / 机器人 / 团队 / 工作区” 等。
|
||||
|
||||
---
|
||||
|
||||
### 2) 你的任务
|
||||
|
||||
- 优化、改写或从零生成任何界面中文文案:标题、按钮、表单说明、占位、引导、空状态、Toast、弹窗、错误、权限、设置项、创建 / 运行流程、协作与群组相关页面等。
|
||||
- 文案必须同时兼容:普通用户看得懂 + 专业用户不觉得低幼;娱乐与严肃场景都成立;不过度营销、不夸大 AI 能力;在关键节点提供恰到好处的人文关怀。
|
||||
|
||||
---
|
||||
|
||||
### 3) 品牌三原则(内化到结构与措辞)
|
||||
|
||||
- **Create(创建)**:一句话创建助理;从想法到可用;清楚下一步。
|
||||
- **Collaborate(协作)**:多助理协作;群组对齐信息与产出;共享上下文(可控、可管理)。
|
||||
- **Evolve(演进)**:助理可在你允许的范围内记住偏好;随你的工作方式变得更顺手;强调可解释、可设置、可回放。
|
||||
|
||||
---
|
||||
|
||||
### 4) 写作规则(可执行)
|
||||
|
||||
1. **清晰优先**:短句、强动词、少形容词;避免口号化与空泛承诺(如 “颠覆”“史诗级”“100%”)。
|
||||
2. **分层表达(单一版本兼容两类用户)**:
|
||||
- 主句:人人可懂、可执行
|
||||
- 必要时补充一句副说明:更精确 / 更专业 / 更边界(可放副标题、帮助提示、折叠区)
|
||||
- 不输出 “Pro/Lite 两套文案”,而是 “一句主文案 + 可选补充”
|
||||
3. **术语克制但准确**:能说 “连接 / 运行 / 上下文” 就不要堆砌术语;必须出现专业词时给一句白话解释。
|
||||
4. **一致性**:同一动作按钮尽量固定动词(创建 / 连接 / 运行 / 暂停 / 重试 / 查看详情 / 清除记忆等)。
|
||||
5. **可行动**:每条提示都要让用户知道下一步;按钮避免 “确定 / 取消” 泛化,改成更具体的动作。
|
||||
6. **中文本地化**:符合中文阅读节奏;中英混排规范;避免翻译腔。
|
||||
|
||||
---
|
||||
|
||||
### 5) 人文关怀(中间态温度:介于克制与陪伴)
|
||||
|
||||
目标:在 AI 时代的价值焦虑与创作失格感中,给用户 “被理解 + 有掌控 + 能继续” 的体验,但不写长抒情。
|
||||
|
||||
#### 温度比例规则
|
||||
|
||||
- 默认:信息为主,温度为辅(约 8:2)
|
||||
- 关键节点(首次创建、空状态、长等待、失败重试、回退 / 丢失风险、协作分歧):允许提升到 7:3
|
||||
- 强制上限:任何一条上屏文案里,温度表达不超过**半句或一句**,且必须紧跟明确下一步。
|
||||
|
||||
#### 表达顺序(必须遵守)
|
||||
|
||||
1. 先承接处境(不评判):如 “没关系 / 先这样也可以 / 卡住很正常”
|
||||
2. 再给掌控感(人在回路):可暂停 / 可回放 / 可编辑 / 可撤销 / 可清除记忆 / 可查看上下文
|
||||
3. 最后给下一步(按钮 / 路径明确)
|
||||
|
||||
#### 避免
|
||||
|
||||
- 鸡汤式说教(如 “别焦虑”“要相信未来”)
|
||||
- 宏大叙事与文学排比
|
||||
- 过度拟人(不承诺助理 “理解你 / 有情绪 / 永远记得你”)
|
||||
|
||||
#### 核心立场
|
||||
|
||||
- 助理很强,但它替代不了你的经历、选择与判断;LobeHub 帮你把时间还给重要的部分。
|
||||
|
||||
##### A. 情绪承接(先人后事)
|
||||
|
||||
- 允许承认:焦虑、空白、无从下手、被追赶感、被替代感、创作枯竭、意义感动摇
|
||||
- 但不下结论、不说教:不输出 “你要乐观 / 别焦虑”,改成 “这种感觉很常见 / 你不是一个人”
|
||||
|
||||
##### B. 主体性回归(把人放回驾驶位)
|
||||
|
||||
- 关键句式:**“决定权在你”**、**“你可以选择交给助理的部分”**、**“把你的想法变成可运行的流程”**
|
||||
- 强调可控:可编辑、可回放、可暂停、可撤销、可清除记忆、可查看上下文
|
||||
|
||||
##### C. 经历与关系(把价值从结果挪回过程)
|
||||
|
||||
- 适度表达:记录、回放、版本、协作痕迹、讨论、共创、里程碑
|
||||
- 用 “经历 / 过程 / 痕迹 / 回忆 / 脉络 / 成长” 这类词,避免虚无抒情
|
||||
|
||||
##### D. 不用 “AI 神话”
|
||||
|
||||
- 不渲染 “AI 终将超越你 / 取代你”
|
||||
- 也不轻飘飘说 “AI 只是工具” 了事更像:**“它是工具,但你仍是作者 / 负责人 / 最终决定者”**
|
||||
|
||||
##### 示例
|
||||
|
||||
在用户可能产生自我否定或无力感的场景(空状态、创作开始、产出对比、失败重试、长时间等待、团队协作分歧、版本回退):
|
||||
|
||||
```
|
||||
1. **先承接感受**:用一句短话确认处境(不评判)
|
||||
2. **再给掌控感**:强调“你可控/可选择/可回放/可撤销”
|
||||
3. **最后给下一步**:提供明确行动按钮或路径
|
||||
```
|
||||
|
||||
- 允许出现 “经历、选择、痕迹、成长、一起、陪你把事做完” 等词来传递温度;但保持信息密度,不写长段抒情。
|
||||
- 严肃场景(权限 / 安全 / 付费 / 数据丢失风险)仍以清晰与准确为先,温度通过 “尊重与解释” 体现,而不是煽情。
|
||||
|
||||
你可以让系统在需要时套这些结构(同一句兼容新手 / 专业):
|
||||
|
||||
**开始创作 / 空白页**
|
||||
|
||||
- 主句:给一个轻承接 + 行动入口
|
||||
- 模板:
|
||||
- 「从一个念头开始就够了。写一句话,我来帮你搭好第一个助理。」
|
||||
- 「不知道从哪开始也没关系:先说目标,我们一起把它拆开。」
|
||||
|
||||
**长任务运行 / 等待**
|
||||
|
||||
- 模板:
|
||||
- 「正在运行中… 你可以先去做别的,完成后我会提醒你。」
|
||||
- 「这一步可能要几分钟。想更快:减少上下文 / 切换模型 / 关闭自动运行。」
|
||||
|
||||
**失败 / 重试**
|
||||
|
||||
- 模板:
|
||||
- 「没关系,这次没跑通。你可以重试,或查看原因再继续。」
|
||||
- 「连接失败:权限未通过或网络不稳定。去设置重新授权,或稍后再试。」
|
||||
|
||||
**对比与自我价值焦虑(适合提示 / 引导,不适合错误弹窗)**
|
||||
|
||||
- 模板:
|
||||
- 「助理可以加速产出,但方向、取舍和标准仍属于你。」
|
||||
- 「结果可以很快,经历更重要:把每次尝试留下来,下一次会更稳。」
|
||||
|
||||
**协作 / 群组**
|
||||
|
||||
- 模板:
|
||||
- 「把上下文对齐到同一处,群组里每个助理都会站在同一页上。」
|
||||
- 「不同意见没关系:先把目标写清楚,再让助理分别给方案与取舍。」
|
||||
|
||||
### 6) 错误 / 异常 / 权限 / 付费:硬规则
|
||||
|
||||
- 必须包含:**发生了什么 +(可选)原因 + 你可以怎么做**
|
||||
- 必须提供可操作选项:**重试 / 查看详情 / 去设置 / 联系支持 / 复制日志**(按场景取舍)
|
||||
- 不责备用户;不只给错误码;错误码可放在 “详情” 里
|
||||
- 涉及数据与安全:语气更中性更完整,温度通过 “尊重与解释” 体现,而不是煽
|
||||
@@ -1,139 +0,0 @@
|
||||
---
|
||||
name: modal
|
||||
description: 'LobeHub imperative modal conventions. Use when creating or migrating modals, dialogs, popups, confirm flows, ModalHost wiring, createModal, confirmModal, useModalContext, or base-ui modal APIs.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# Modal Imperative API Guide
|
||||
|
||||
## Recommended: `@lobehub/ui/base-ui`
|
||||
|
||||
New code should use the **base-ui** modal stack (headless primitives, not antd `Modal`):
|
||||
|
||||
- `createModal`, `confirmModal`, `ModalHost` from `@lobehub/ui/base-ui`
|
||||
- `useModalContext` from `@lobehub/ui/base-ui` inside modal **content**
|
||||
|
||||
Body slot: pass **`content`** (or `children`; runtime uses `content ?? children`).
|
||||
|
||||
### Global `ModalHost` (required)
|
||||
|
||||
Base-ui `createModal` renders through a **separate** host from the root package. The app must mount **`ModalHost`** from `@lobehub/ui/base-ui` once near the root (e.g. next to other global hosts). Without it, `createModal` calls will not appear.
|
||||
|
||||
If the project only mounts `ModalHost` from `@lobehub/ui`, add a second lazy `ModalHost` from `@lobehub/ui/base-ui` until all imperative modals are migrated.
|
||||
|
||||
### Why imperative?
|
||||
|
||||
| Mode | Characteristics | Recommended |
|
||||
| ----------- | ------------------------------------ | ----------- |
|
||||
| Declarative | `open` state + `<Modal />` | ❌ |
|
||||
| Imperative | Call `createModal()`, no local state | ✅ |
|
||||
|
||||
### File structure
|
||||
|
||||
```
|
||||
features/
|
||||
└── MyFeatureModal/
|
||||
├── index.tsx # export createXxxModal
|
||||
└── MyFeatureContent.tsx # modal body
|
||||
```
|
||||
|
||||
### 1. Content (`MyFeatureContent.tsx`)
|
||||
|
||||
```tsx
|
||||
'use client';
|
||||
|
||||
import { useModalContext } from '@lobehub/ui/base-ui';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
export const MyFeatureContent = () => {
|
||||
const { t } = useTranslation('namespace');
|
||||
const { close } = useModalContext();
|
||||
|
||||
return <div>{/* ... */}</div>;
|
||||
};
|
||||
```
|
||||
|
||||
### 2. `createModal` (`index.tsx`)
|
||||
|
||||
```tsx
|
||||
'use client';
|
||||
|
||||
import { createModal } from '@lobehub/ui/base-ui';
|
||||
import { t } from 'i18next';
|
||||
|
||||
import { MyFeatureContent } from './MyFeatureContent';
|
||||
|
||||
export const createMyFeatureModal = () =>
|
||||
createModal({
|
||||
content: <MyFeatureContent />,
|
||||
footer: null,
|
||||
maskClosable: true,
|
||||
styles: {
|
||||
content: { overflow: 'hidden', padding: 0 },
|
||||
},
|
||||
title: t('myFeature.title', { ns: 'setting' }),
|
||||
width: 'min(80%, 800px)',
|
||||
});
|
||||
```
|
||||
|
||||
### 3. Usage
|
||||
|
||||
```tsx
|
||||
import { createMyFeatureModal } from '@/features/MyFeatureModal';
|
||||
|
||||
const handleOpen = useCallback(() => {
|
||||
createMyFeatureModal();
|
||||
}, []);
|
||||
|
||||
return <Button onClick={handleOpen}>Open</Button>;
|
||||
```
|
||||
|
||||
### i18n
|
||||
|
||||
- **Content**: `useTranslation` in components.
|
||||
- **`createModal` options**: `import { t } from 'i18next'` where hooks are unavailable.
|
||||
|
||||
### `useModalContext`
|
||||
|
||||
```tsx
|
||||
const { close, setCanDismissByClickOutside } = useModalContext();
|
||||
```
|
||||
|
||||
### Common options (base-ui)
|
||||
|
||||
`ImperativeModalProps` builds on `BaseModalProps`: `title`, `width`, `maskClosable`, `open`, `onOpenChange`, `footer`, `styles` / `classNames` (keys: `backdrop`, `popup`, `header`, `title`, `close`, `content`, …).
|
||||
|
||||
| Property | Notes |
|
||||
| -------------- | ---------------------------------------- |
|
||||
| `content` | Main body (preferred name vs `children`) |
|
||||
| `maskClosable` | Click outside to dismiss |
|
||||
| `styles.*` | Semantic regions, not antd `styles.body` |
|
||||
|
||||
### Confirm
|
||||
|
||||
```tsx
|
||||
import { confirmModal } from '@lobehub/ui/base-ui';
|
||||
|
||||
confirmModal({
|
||||
title: '…',
|
||||
content: '…',
|
||||
okText: '…',
|
||||
cancelText: '…',
|
||||
onOk: async () => {},
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Legacy: `@lobehub/ui` (root)
|
||||
|
||||
Older call sites use **`createModal` from `@lobehub/ui`**, which is typed as **antd `Modal` props** (`children`, `allowFullscreen`, `getContainer`, `destroyOnHidden`, `styles.body`, etc.). Prefer migrating new work to **`@lobehub/ui/base-ui`**.
|
||||
|
||||
Examples (legacy): `src/features/SkillStore/index.tsx`, `src/features/LibraryModal/CreateNew/index.tsx`.
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
- Base-ui (preferred): follow sections above; ensure **base-ui `ModalHost`** is mounted.
|
||||
- Legacy: `src/features/SkillStore/index.tsx`, `src/features/LibraryModal/CreateNew/index.tsx`
|
||||
@@ -1,69 +0,0 @@
|
||||
---
|
||||
name: model-bank-metadata
|
||||
description: 'Backfill and maintain model-bank metadata (knowledgeCutoff, family, generation). Use when adding models, fixing cutoff/family data, running a metadata sweep across aiModels providers, or researching official knowledge cutoffs.'
|
||||
user-invocable: false
|
||||
---
|
||||
|
||||
# Model-Bank Metadata (knowledgeCutoff / family / generation)
|
||||
|
||||
How to populate and maintain the three structured metadata fields on `packages/model-bank/src/aiModels/*.ts` model cards, at single-model scale (new model PR) or repo-wide scale (sweep across \~80 provider files / \~1900 entries).
|
||||
|
||||
## Field semantics
|
||||
|
||||
| Field | Format | Meaning |
|
||||
| ----------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `knowledgeCutoff` | `'YYYY-MM'` (or `'YYYY'` if only the year is published) | World-knowledge cutoff. When a vendor distinguishes a **"reliable knowledge cutoff"** from the broader training-data cutoff (Anthropic does), always use the **reliable** one. |
|
||||
| `family` | lowercase slug (`claude`, `gpt`, `o-series`, `qwen`, `deepseek`, `llama`, `glm`, …) | Model lineage, finer than `organization`. Lets the UI group models and match the same model across aggregator providers. |
|
||||
| `generation` | family slug + version (`claude-4.6`, `gpt-5.2`, `qwen3.5`, `llama-3.1`) | Generation within the family. Only set when confidently derivable from the model line's naming. Rolling aliases (`qwen-max`, `deepseek-chat`, `gemini-flash-latest`) get `family` only. |
|
||||
|
||||
All three are optional. **The cardinal rule: only fill what an authoritative source states or naming rules derive — never guess.** An empty field is correct for vendors that publish nothing.
|
||||
|
||||
No DB migration is ever needed for these: builtin models are merged from model-bank at read time (`repositories/aiInfra/index.ts` spreads the whole card), so new card fields flow to the client automatically.
|
||||
|
||||
## Sourcing rules for knowledgeCutoff
|
||||
|
||||
Accept only:
|
||||
|
||||
- Vendor official docs (platform.openai.com / developers.openai.com, docs.x.ai, ai.google.dev, docs.anthropic.com / platform.claude.com)
|
||||
- Official Hugging Face org model cards (huggingface.co/meta-llama/..., etc.)
|
||||
- Official tech reports / system cards / launch blog posts
|
||||
|
||||
Reject:
|
||||
|
||||
- **Third-party aggregator sites** (aiknowledgecutoff.com and similar) — proven to copy one model's value across a whole family. A Cohere sweep once claimed `2024-06` for four distinct base models; none of the cited Cohere pages said that, and the only cutoff Cohere actually publishes is Feb 2023 for the 08-2024 Command R/R+ refresh.
|
||||
- **AWS Bedrock model cards as sole source** — proven to conflate launch date with knowledge cutoff (DeepSeek R1's card lists both as "Jan 2025"). If Bedrock is the only place a value appears, leave the field empty.
|
||||
- Inference from `releasedAt` — a release date is not a cutoff.
|
||||
|
||||
Variant inheritance: dated snapshots (`-2024-08-06`), speed/price tiers of the same checkpoint, quantizations (`-fp8`, `-awq`), context-length variants (`-32k`), ollama `:NNb` tags, and cloud-prefixed ids (`anthropic.`/`us.`/`global.` Bedrock ids) share their base model's cutoff. **Distills do not inherit** from teacher or base — use the distill's own published value or leave empty. **Sizes within one generation can genuinely differ**: Llama 3 8B is Mar 2023 while 70B is Dec 2023 (per Meta's own card) — don't "fix" that to one family-wide value.
|
||||
|
||||
Vendors that publish no cutoffs (leave empty, don't chase): Qwen, DeepSeek, GLM/Zhipu, ERNIE, Doubao, Hunyuan, SenseNova, Spark, MiniMax, StepFun, Yi (mostly), Moonshot.
|
||||
|
||||
Known per-vendor footguns:
|
||||
|
||||
- **Anthropic**: Opus 4.6 reliable cutoff is `2025-05`, Sonnet 4.6 is `2025-08` — easy to swap. Claude 3.7 is `2024-10` (system card: trained through Nov 2024, knowledge cutoff end of Oct 2024). Cite system cards / the models overview, not the Help Center article (a living page that drops retired models — citation rot).
|
||||
- **xAI**: docs.x.ai has one blanket sentence covering grok-3/grok-4; mini variants are not named there. Grok 4.20/4.3 have no official cutoff anywhere.
|
||||
- **OpenAI**: per-model docs pages (developers.openai.com/api/docs/models/<id>) state cutoffs explicitly, including snapshot differences (gpt-4-1106-preview `2023-04` vs gpt-4-0125-preview `2023-12`).
|
||||
|
||||
## family/generation derivation
|
||||
|
||||
Rule-based, no research needed: `scripts/derive-family.ts` holds the per-family regex rules. Traps already encoded there — keep them when extending:
|
||||
|
||||
- Date suffixes are not versions: `claude-sonnet-4-20250514` is generation `claude-4`, not `claude-4.2`.
|
||||
- Size suffixes are not versions: `llama-3-8b` → `llama-3` (not `llama-3.8`); `gemma-7b-it` is **gemma-1** (not gemma-7).
|
||||
- Vendor spelling variants: `qwen2p5` = qwen2.5, `llama-v3p1` = llama-3.1, ollama `:NNb` tags, Bedrock `us.`/`global.`/`anthropic.` prefixes.
|
||||
- `claude-X.0` normalizes to `claude-X`.
|
||||
- Fable/Mythos-class ids (`claude-fable-5`) don't match the opus/sonnet/haiku regex — they are the Mythos class — `family: 'claude-mythos'`, `generation: 'mythos-5'` (set manually; the launch page calls Fable 5 "the generally available Mythos-class model").
|
||||
|
||||
## Repo-wide sweep workflow
|
||||
|
||||
1. **Extract ids**: `bun .agents/skills/model-bank-metadata/scripts/extract-model-ids.ts` → unique normalized chat-model ids (normalization = last path segment, lowercased). Non-chat types (image/video/embedding/tts) have no knowledge cutoff — skip them.
|
||||
2. **Research (multi-agent)**: chunk ids by family (≤50 per chunk) and fan out one research agent per chunk (Workflow tool), each returning `{id, cutoff, source}` with the sourcing rules above baked into the prompt, **plus** one adversarial verify agent per chunk that re-fetches cited sources and refutes unsupported claims. The verify pass is load-bearing: it caught the Cohere aggregator copy-paste and the AWS launch-date conflation.
|
||||
3. **Policy filter**: before applying, drop entries whose only source is a rejected category (check the returned `sources` map — e.g. drop everything sourced to aws.amazon.com).
|
||||
4. **Apply**: `bun scripts/apply-cutoffs.ts <map.json>` and `bun scripts/apply-family.ts <map.json>` (run from repo root). Both are idempotent codemods keyed on normalized id — aggregator providers get the same values automatically; entries that already have the field are skipped. They rely on the uniform prettier formatting of the data files (entries start ` {` / end ` },`, fields at 4-space indent).
|
||||
5. **Verify**: `cd packages/model-bank && bunx vitest run src/aiModels/__tests__/index.test.ts && bunx tsc --noEmit`.
|
||||
|
||||
## Maintenance rules
|
||||
|
||||
- **New model PRs** should fill all three fields inline, citing the official source in the PR body (see the Anthropic entries in `anthropic.ts` for reference values).
|
||||
- **After resolving merge conflicts** in model-bank data files, sanity-check that metadata didn't vanish: `git grep -c knowledgeCutoff -- 'packages/model-bank/src/aiModels/*.ts'` before vs after. A three-way stack of model PRs once silently dropped all 10 Anthropic cutoffs during conflict resolution.
|
||||
- Dirty ids exist in aggregator data (a sambanova id once carried a trailing tab). The codemods match ids verbatim — if a map key won't apply, check for invisible characters before assuming the model is missing.
|
||||
@@ -1,73 +0,0 @@
|
||||
/**
|
||||
* One-off codemod: apply a canonical { normalizedModelId: 'YYYY-MM' } map onto
|
||||
* packages/model-bank/src/aiModels/*.ts, inserting `knowledgeCutoff` after the
|
||||
* `id:` line of every chat-model entry that matches and doesn't already have one.
|
||||
*
|
||||
* Relies on the uniform prettier formatting of these files:
|
||||
* - each model entry starts with ` {` and ends with ` },` (2-space indent)
|
||||
* - fields are at 4-space indent: ` id: '...'`, ` type: 'chat'`
|
||||
*
|
||||
* Usage: bun /tmp/apply-cutoffs.ts /tmp/cutoff-map.json
|
||||
*/
|
||||
import { readdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
const mapPath = process.argv[2];
|
||||
if (!mapPath) throw new Error('usage: bun apply-cutoffs.ts <map.json>');
|
||||
const map: Record<string, string> = JSON.parse(readFileSync(mapPath, 'utf8'));
|
||||
|
||||
const dir = 'packages/model-bank/src/aiModels';
|
||||
const normalize = (id: string) => id.split('/').pop()!.toLowerCase();
|
||||
|
||||
let touchedFiles = 0;
|
||||
let inserted = 0;
|
||||
const matchedIds = new Set<string>();
|
||||
|
||||
for (const file of readdirSync(dir).filter((f) => f.endsWith('.ts'))) {
|
||||
const path = join(dir, file);
|
||||
const lines = readFileSync(path, 'utf8').split('\n');
|
||||
const out: string[] = [];
|
||||
let changed = false;
|
||||
|
||||
let i = 0;
|
||||
while (i < lines.length) {
|
||||
if (lines[i] !== ' {') {
|
||||
out.push(lines[i]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// collect one model entry block
|
||||
const start = i;
|
||||
let end = i;
|
||||
while (end < lines.length && lines[end] !== ' },') end++;
|
||||
const block = lines.slice(start, end + 1);
|
||||
|
||||
const idLineIdx = block.findIndex((l) => /^ {4}id: '/.test(l));
|
||||
const isChat = block.some((l) => /^ {4}type: 'chat',?$/.test(l));
|
||||
const hasCutoff = block.some((l) => /^ {4}knowledgeCutoff:/.test(l));
|
||||
|
||||
if (idLineIdx >= 0 && isChat && !hasCutoff) {
|
||||
const rawId = block[idLineIdx].match(/^ {4}id: '(.+)',$/)?.[1];
|
||||
const norm = rawId ? normalize(rawId) : undefined;
|
||||
const cutoff = norm ? map[norm] : undefined;
|
||||
if (cutoff && /^\d{4}(?:-\d{2})?$/.test(cutoff)) {
|
||||
block.splice(idLineIdx + 1, 0, ` knowledgeCutoff: '${cutoff}',`);
|
||||
inserted++;
|
||||
changed = true;
|
||||
matchedIds.add(norm!);
|
||||
}
|
||||
}
|
||||
out.push(...block);
|
||||
i = end + 1;
|
||||
}
|
||||
|
||||
if (changed) {
|
||||
writeFileSync(path, out.join('\n'));
|
||||
touchedFiles++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`inserted ${inserted} knowledgeCutoff fields across ${touchedFiles} files`);
|
||||
console.log(`map ids used: ${matchedIds.size}/${Object.keys(map).length}`);
|
||||
const unused = Object.keys(map).filter((k) => !matchedIds.has(k));
|
||||
if (unused.length) console.log('unused map keys (first 20):', unused.slice(0, 20));
|
||||
@@ -1,49 +0,0 @@
|
||||
import { readdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
const map: Record<string, { family: string; generation?: string }> = JSON.parse(
|
||||
readFileSync('/tmp/family-map.json', 'utf8'),
|
||||
);
|
||||
const dir = 'packages/model-bank/src/aiModels';
|
||||
const normalize = (id: string) => id.split('/').pop()!.toLowerCase();
|
||||
|
||||
let inserted = 0;
|
||||
let touchedFiles = 0;
|
||||
for (const file of readdirSync(dir).filter((f) => f.endsWith('.ts'))) {
|
||||
const path = join(dir, file);
|
||||
const lines = readFileSync(path, 'utf8').split('\n');
|
||||
const out: string[] = [];
|
||||
let changed = false;
|
||||
let i = 0;
|
||||
while (i < lines.length) {
|
||||
if (lines[i] !== ' {') {
|
||||
out.push(lines[i]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
let end = i;
|
||||
while (end < lines.length && lines[end] !== ' },') end++;
|
||||
const block = lines.slice(i, end + 1);
|
||||
const idLineIdx = block.findIndex((l) => /^ {4}id: '/.test(l));
|
||||
const isChat = block.some((l) => /^ {4}type: 'chat',?$/.test(l));
|
||||
const hasFamily = block.some((l) => /^ {4}family:/.test(l));
|
||||
if (idLineIdx >= 0 && isChat && !hasFamily) {
|
||||
const rawId = block[idLineIdx].match(/^ {4}id: '(.+)',$/)?.[1];
|
||||
const r = rawId ? map[normalize(rawId)] : undefined;
|
||||
if (r) {
|
||||
const add = [` family: '${r.family}',`];
|
||||
if (r.generation) add.push(` generation: '${r.generation}',`);
|
||||
block.splice(idLineIdx, 0, ...add);
|
||||
inserted++;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
out.push(...block);
|
||||
i = end + 1;
|
||||
}
|
||||
if (changed) {
|
||||
writeFileSync(path, out.join('\n'));
|
||||
touchedFiles++;
|
||||
}
|
||||
}
|
||||
console.log(`annotated ${inserted} model entries across ${touchedFiles} files`);
|
||||
@@ -1,237 +0,0 @@
|
||||
/* eslint-disable regexp/no-unused-capturing-group */
|
||||
/**
|
||||
* Rule-based derivation of { family, generation } from normalized model ids.
|
||||
* Principle: only fill what is confidently derivable; otherwise omit.
|
||||
*
|
||||
* Usage: bun /tmp/derive-family.ts # print distinct pairs for review
|
||||
* bun /tmp/derive-family.ts --emit # write /tmp/family-map.json
|
||||
*/
|
||||
import { readFileSync, writeFileSync } from 'node:fs';
|
||||
|
||||
const ids: string[] = JSON.parse(readFileSync('/tmp/model-ids.json', 'utf8'));
|
||||
|
||||
type R = { family: string; generation?: string };
|
||||
|
||||
const derive = (id: string): R | undefined => {
|
||||
// strip cloud/bedrock prefixes for matching
|
||||
const m = id.replace(/^(us\.|global\.|eu\.|apac\.)?(anthropic\.|meta\.|cohere\.|azure-)/, '');
|
||||
|
||||
// ---- anthropic ----
|
||||
if (m.startsWith('claude')) {
|
||||
// family = product-line tier (claude-opus/sonnet/haiku/instant); bare claude-2.x has no tier
|
||||
const tier = m.match(/(opus|sonnet|haiku|instant)/)?.[1];
|
||||
const family = tier ? `claude-${tier}` : 'claude';
|
||||
let g = m.match(/^claude-(?:opus|sonnet|haiku)-(\d)[.-](\d)(?!\d)/); // claude-opus-4-8 / claude-haiku-4.5
|
||||
if (g) return { family, generation: `claude-${g[1]}.${g[2]}` };
|
||||
g = m.match(/^claude-(?:opus|sonnet|haiku)-(\d)(?!\d)/); // claude-opus-4
|
||||
if (g) return { family, generation: `claude-${g[1]}` };
|
||||
g = m.match(/^claude-(\d)[.-](\d)(?!\d)/); // claude-3-5-haiku / claude-3.7-sonnet / claude-2.1
|
||||
if (g) return { family, generation: g[2] === '0' ? `claude-${g[1]}` : `claude-${g[1]}.${g[2]}` };
|
||||
g = m.match(/^claude-(\d)(?!\d)/); // claude-3-haiku
|
||||
if (g) return { family, generation: `claude-${g[1]}` };
|
||||
if (m.startsWith('claude-instant')) return { family: 'claude-instant' };
|
||||
if (/^claude-v?2/.test(m)) return { family: 'claude', generation: 'claude-2' };
|
||||
return { family };
|
||||
}
|
||||
|
||||
// ---- openai ----
|
||||
if (/^(gpt-oss|gpt_oss)/.test(m) || m.startsWith('gpt-oss:'))
|
||||
return { family: 'gpt-oss', generation: 'gpt-oss' };
|
||||
if (/^(chatgpt-4o|gpt-4o)/.test(m)) return { family: 'gpt', generation: 'gpt-4o' };
|
||||
if (/^gpt-(3\.5|35)/.test(m)) return { family: 'gpt', generation: 'gpt-3.5' };
|
||||
if (m.startsWith('gpt-audio')) return { family: 'gpt', generation: 'gpt-audio' };
|
||||
{
|
||||
const g = m.match(/^gpt-(\d)\.(\d)/); // gpt-4.1 / gpt-5.2
|
||||
if (g) return { family: 'gpt', generation: `gpt-${g[1]}.${g[2]}` };
|
||||
const g2 = m.match(/^gpt-(\d)(?!\d)/); // gpt-4 / gpt-5
|
||||
if (g2) return { family: 'gpt', generation: `gpt-${g2[1]}` };
|
||||
}
|
||||
{
|
||||
const g = m.match(/^o([134])(-|$)/); // o1 / o3 / o4
|
||||
if (g) return { family: 'o-series', generation: `o${g[1]}` };
|
||||
}
|
||||
if (/^(codex|computer-use-preview)/.test(m)) return { family: 'gpt' };
|
||||
|
||||
// ---- google ----
|
||||
{
|
||||
const g = m.match(/^gemini-(\d+(?:\.\d+)?)/);
|
||||
if (g) return { family: 'gemini', generation: `gemini-${g[1]}` };
|
||||
if (/^gemini-(pro|flash)/.test(m)) return { family: 'gemini' }; // rolling aliases
|
||||
if (m.startsWith('gemma')) {
|
||||
if (/^gemma-?\db/.test(m)) return { family: 'gemma', generation: 'gemma-1' };
|
||||
const v = m.match(/^gemma-?(\d)(?!b)/);
|
||||
return { family: 'gemma', generation: v ? `gemma-${v[1]}` : undefined };
|
||||
}
|
||||
if (/^(codegemma|learnlm|palm)/.test(m)) return { family: m.match(/^[a-z]+/)![0] };
|
||||
}
|
||||
|
||||
// ---- qwen ----
|
||||
if (m.startsWith('qwq')) return { family: 'qwen', generation: 'qwq' };
|
||||
if (m.startsWith('qvq')) return { family: 'qwen', generation: 'qvq' };
|
||||
if (m.startsWith('codeqwen')) return { family: 'qwen' };
|
||||
if (m.startsWith('qwen')) {
|
||||
const g =
|
||||
m.match(/^qwen-?([123](?:\.\d+)?)(?![0-9b])/) || // qwen3.5-plus / qwen-3-14b / qwen2-7b / qwen1.5
|
||||
m.match(/^qwen([23](?:\.\d+)?):/) || // qwen2.5:72b
|
||||
m.match(/^qwen([23])p(\d)/); // qwen2p5 -> handled below
|
||||
if (/^qwen(\d)p(\d)/.test(m)) {
|
||||
const p = m.match(/^qwen(\d)p(\d)/)!;
|
||||
return { family: 'qwen', generation: `qwen${p[1]}.${p[2]}` };
|
||||
}
|
||||
if (g) return { family: 'qwen', generation: `qwen${g[1]}` };
|
||||
return { family: 'qwen' }; // qwen-max/plus/turbo/vl rolling aliases
|
||||
}
|
||||
|
||||
// ---- deepseek ----
|
||||
if (/^(deepseek|azure-deepseek|pro-deepseek)/.test(m) || m.startsWith('deepseek_')) {
|
||||
const s = m.replace(/^pro-/, '').replaceAll('_', '-');
|
||||
if (s.startsWith('deepseek-r1-distill'))
|
||||
return { family: 'deepseek', generation: 'deepseek-r1-distill' };
|
||||
if (s.startsWith('deepseek-r1')) return { family: 'deepseek', generation: 'deepseek-r1' };
|
||||
const g = s.match(/^deepseek-(?:chat-)?v(\d(?:\.\d)?)/);
|
||||
if (g) return { family: 'deepseek', generation: `deepseek-v${g[1]}` };
|
||||
if (/^deepseek-(coder-v2|coder)/.test(s))
|
||||
return { family: 'deepseek', generation: 'deepseek-coder' };
|
||||
return { family: 'deepseek' }; // deepseek-chat / reasoner rolling aliases
|
||||
}
|
||||
|
||||
// ---- meta llama ----
|
||||
if (m.startsWith('codellama')) return { family: 'llama', generation: 'codellama' };
|
||||
if (/^(meta-)?llama|^l3(\d)?-|^llava/.test(m)) {
|
||||
if (m.startsWith('llava')) return { family: 'llava' };
|
||||
const s = m.replace(/^meta-/, '');
|
||||
const g =
|
||||
s.match(/^llama-?([234])(?:[.-](\d))?(?![0-9b])/) || // llama-3.1 / llama3.3 / llama-4
|
||||
s.match(/^llama-?v([234])p?(\d)?/) || // llama-v3p1
|
||||
s.match(/^llama([234])[.:-](\d)?/);
|
||||
if (g) {
|
||||
const gen = g[2] ? `llama-${g[1]}.${g[2]}` : `llama-${g[1]}`;
|
||||
return { family: 'llama', generation: gen };
|
||||
}
|
||||
if (m.startsWith('l3-')) return { family: 'llama', generation: 'llama-3' };
|
||||
if (m.startsWith('l31-')) return { family: 'llama', generation: 'llama-3.1' };
|
||||
return { family: 'llama' };
|
||||
}
|
||||
|
||||
// ---- zhipu ----
|
||||
if (/^(zai-)?glm/.test(m)) {
|
||||
const s = m.replace(/^zai-/, '');
|
||||
if (s.startsWith('glm-z1')) return { family: 'glm', generation: 'glm-z1' };
|
||||
if (s.startsWith('glm-zero')) return { family: 'glm', generation: 'glm-zero' };
|
||||
const g = s.match(/^glm-(\d(?:\.\d)?)/);
|
||||
if (g) return { family: 'glm', generation: `glm-${g[1]}` };
|
||||
return { family: 'glm' };
|
||||
}
|
||||
if (/^(charglm|codegeex|emohaa)/.test(m)) return { family: m.match(/^[a-z]+/)![0] };
|
||||
|
||||
// ---- mistral ----
|
||||
if (
|
||||
/^(open-)?(mistral|mixtral|ministral|codestral|devstral|magistral|pixtral|mathstral|labs-devstral|labs-leanstral|open-codestral)/.test(
|
||||
m,
|
||||
)
|
||||
) {
|
||||
const fam = m.replace(/^(open-|labs-)/, '').match(/^[a-z]+/)![0];
|
||||
return { family: fam };
|
||||
}
|
||||
|
||||
// ---- xai ----
|
||||
if (m.startsWith('grok')) {
|
||||
const g = m.match(/^grok-(\d(?:\.\d+)?)/);
|
||||
return { family: 'grok', generation: g ? `grok-${g[1]}` : undefined };
|
||||
}
|
||||
|
||||
// ---- moonshot ----
|
||||
if (m.startsWith('kimi')) {
|
||||
const g = m.match(/^kimi-k(\d(?:\.\d)?)/);
|
||||
return { family: 'kimi', generation: g ? `kimi-k${g[1]}` : undefined };
|
||||
}
|
||||
if (m.startsWith('moonshot-kimi-k2')) return { family: 'kimi', generation: 'kimi-k2' };
|
||||
if (m.startsWith('moonshot-v1')) return { family: 'kimi', generation: 'moonshot-v1' };
|
||||
|
||||
// ---- minimax ----
|
||||
if (m.startsWith('minimax')) {
|
||||
if (m.startsWith('minimax-text')) return { family: 'minimax', generation: 'minimax-text-01' };
|
||||
const g = m.match(/^minimax-m(\d(?:\.\d)?)/);
|
||||
return { family: 'minimax', generation: g ? `minimax-m${g[1]}` : undefined };
|
||||
}
|
||||
if (m.startsWith('abab')) return { family: 'minimax', generation: 'abab' };
|
||||
|
||||
// ---- baidu ----
|
||||
if (m.startsWith('ernie')) {
|
||||
if (m.startsWith('ernie-x1')) return { family: 'ernie', generation: 'ernie-x1' };
|
||||
const g = m.match(/^ernie-(\d\.\d)/);
|
||||
return { family: 'ernie', generation: g ? `ernie-${g[1]}` : undefined };
|
||||
}
|
||||
if (m.startsWith('qianfan')) return { family: 'qianfan' };
|
||||
|
||||
// ---- bytedance ----
|
||||
if (m.startsWith('doubao')) {
|
||||
const g = m.match(/^doubao-seed-(\d[.-]\d|\d)/) || m.match(/^doubao-(\d\.\d)/);
|
||||
return { family: 'doubao', generation: g ? `doubao-${g[1].replace('-', '.')}` : undefined };
|
||||
}
|
||||
if (/^(seed-oss|skylark)/.test(m)) return { family: m.startsWith('seed') ? 'doubao' : 'skylark' };
|
||||
|
||||
// ---- tencent ----
|
||||
if (m.startsWith('hunyuan')) {
|
||||
const g = m.match(/^hunyuan-(\d\.\d)/);
|
||||
return { family: 'hunyuan', generation: g ? `hunyuan-${g[1]}` : undefined };
|
||||
}
|
||||
if (m.startsWith('hy3')) return { family: 'hunyuan', generation: 'hunyuan-3' };
|
||||
|
||||
// ---- others (family only / simple version) ----
|
||||
if (m.startsWith('yi-')) return { family: 'yi' };
|
||||
if (/^(command|c4ai-command)/.test(m)) return { family: 'command' };
|
||||
if (/^(aya|c4ai-aya)/.test(m)) return { family: 'aya' };
|
||||
if (/^phi-?(\d)?/.test(m) && m.startsWith('phi')) {
|
||||
const g = m.match(/^phi-?(\d(?:\.\d)?)/);
|
||||
return { family: 'phi', generation: g ? `phi-${g[1]}` : undefined };
|
||||
}
|
||||
if (m.startsWith('wizardlm')) return { family: 'wizardlm' };
|
||||
if (m.startsWith('step-')) {
|
||||
const g = m.match(/^step-(?:r1|(\d(?:\.\d)?))/);
|
||||
return { family: 'step', generation: g?.[1] ? `step-${g[1]}` : undefined };
|
||||
}
|
||||
if (/^(internlm|intern-)/.test(m)) return { family: 'intern' };
|
||||
if (m.startsWith('internvl')) return { family: 'internvl' };
|
||||
if (m.startsWith('baichuan')) {
|
||||
const g = m.match(/^baichuan-?(m?\d)/);
|
||||
return { family: 'baichuan', generation: g ? `baichuan-${g[1]}` : undefined };
|
||||
}
|
||||
if (/^(sensechat|sensenova)/.test(m)) return { family: 'sensenova' };
|
||||
if (/^(spark|generalv|4\.0ultra)/.test(m)) return { family: 'spark' };
|
||||
if (/^(360gpt|360zhinao)/.test(m)) return { family: '360zhinao' };
|
||||
if (/^(jamba|ai21-jamba)/.test(m)) return { family: 'jamba' };
|
||||
if (m.startsWith('sonar')) return { family: 'sonar' };
|
||||
if (/^(nova-lite|nova-micro|nova-pro)/.test(m)) return { family: 'nova' };
|
||||
if (/^(ling|ring)-/.test(m)) return { family: m.match(/^[a-z]+/)![0] };
|
||||
if (m.startsWith('longcat')) return { family: 'longcat' };
|
||||
if (m.startsWith('mimo')) return { family: 'mimo' };
|
||||
if (m.startsWith('taichu')) return { family: 'taichu' };
|
||||
if (/^(hermes|nous-hermes)/.test(m)) return { family: 'hermes' };
|
||||
if (m.startsWith('solar')) return { family: 'solar' };
|
||||
if (m.startsWith('kat-coder')) return { family: 'kat-coder' };
|
||||
if (m.startsWith('dbrx')) return { family: 'dbrx' };
|
||||
if (m.startsWith('morph')) return { family: 'morph' };
|
||||
|
||||
return undefined;
|
||||
};
|
||||
|
||||
const map: Record<string, R> = {};
|
||||
const pairs = new Map<string, number>();
|
||||
let derived = 0;
|
||||
for (const id of ids) {
|
||||
const r = derive(id);
|
||||
if (!r) continue;
|
||||
derived++;
|
||||
map[id] = r;
|
||||
const key = `${r.family} :: ${r.generation ?? '—'}`;
|
||||
pairs.set(key, (pairs.get(key) || 0) + 1);
|
||||
}
|
||||
|
||||
console.log(`derived ${derived}/${ids.length}`);
|
||||
for (const [k, n] of [...pairs.entries()].sort()) console.log(String(n).padStart(4), k);
|
||||
|
||||
if (process.argv.includes('--emit')) {
|
||||
writeFileSync('/tmp/family-map.json', JSON.stringify(map, null, 1));
|
||||
console.log('\nwritten /tmp/family-map.json');
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user