diff --git a/locales/en-US/chat.json b/locales/en-US/chat.json index 7621e6c9cf..b4596ca86e 100644 --- a/locales/en-US/chat.json +++ b/locales/en-US/chat.json @@ -771,6 +771,12 @@ "thread.closeSubagentThread": "Hide Detail", "thread.divider": "Subtopic", "thread.openSubagentThread": "View Detail", + "thread.subagentMetrics.modelLabel": "Model", + "thread.subagentMetrics.tokens": "{{count}} tokens", + "thread.subagentMetrics.toolCalls_one": "{{count}} tool call", + "thread.subagentMetrics.toolCalls_other": "{{count}} tool calls", + "thread.subagentMetrics.toolsShort_one": "{{count}} tool", + "thread.subagentMetrics.toolsShort_other": "{{count}} tools", "thread.subagentReadOnlyHint": "SubAgent conversations are read-only — execution is driven by the parent agent.", "thread.threadMessageCount": "{{messageCount}} messages", "thread.title": "Subtopic", diff --git a/locales/zh-CN/chat.json b/locales/zh-CN/chat.json index b7837746ec..79585af88a 100644 --- a/locales/zh-CN/chat.json +++ b/locales/zh-CN/chat.json @@ -771,6 +771,12 @@ "thread.closeSubagentThread": "隐藏详情", "thread.divider": "子话题", "thread.openSubagentThread": "查看详情", + "thread.subagentMetrics.modelLabel": "模型", + "thread.subagentMetrics.tokens": "{{count}} tokens", + "thread.subagentMetrics.toolCalls_one": "{{count}} 次工具调用", + "thread.subagentMetrics.toolCalls_other": "{{count}} 次工具调用", + "thread.subagentMetrics.toolsShort_one": "{{count}} tool", + "thread.subagentMetrics.toolsShort_other": "{{count}} tools", "thread.subagentReadOnlyHint": "SubAgent 对话仅可查看,由父智能体驱动执行", "thread.threadMessageCount": "{{messageCount}} 条消息", "thread.title": "子话题", diff --git a/packages/builtin-tool-claude-code/src/client/Inspector/Agent.tsx b/packages/builtin-tool-claude-code/src/client/Inspector/Agent.tsx index dea8282518..2e37425e77 100644 --- a/packages/builtin-tool-claude-code/src/client/Inspector/Agent.tsx +++ b/packages/builtin-tool-claude-code/src/client/Inspector/Agent.tsx @@ -2,11 +2,16 @@ import { inspectorTextStyles, shinyTextStyles } from '@lobechat/shared-tool-ui/styles'; import type { BuiltinInspectorProps } from '@lobechat/types'; +import { Tooltip } from '@lobehub/ui'; import { GroupBotIcon } from '@lobehub/ui/icons'; import { createStaticStyles, cx } from 'antd-style'; import { memo } from 'react'; import { useTranslation } from 'react-i18next'; +import { useChatStore } from '@/store/chat'; +import { threadSelectors } from '@/store/chat/selectors'; +import { aggregateSubagentMetrics } from '@/utils/subagentMetrics'; + import { type AgentArgs, ClaudeCodeApiName } from '../../types'; import { resolveCCSubagentType } from '../subagentTypes'; @@ -44,18 +49,89 @@ const styles = createStaticStyles(({ css, cssVar }) => ({ flex-shrink: 0; color: ${cssVar.colorText}; `, + metrics: css` + display: inline-flex; + flex-shrink: 0; + gap: 6px; + align-items: center; + + margin-inline-start: 8px; + + font-size: 12px; + color: ${cssVar.colorTextDescription}; + `, + metricsDot: css` + color: ${cssVar.colorTextQuaternary}; + `, })); +const formatTokens = (n: number): string => { + if (n < 1000) return String(n); + if (n < 10_000) return `${(n / 1000).toFixed(1)}k`; + if (n < 1_000_000) return `${Math.round(n / 1000)}k`; + return `${(n / 1_000_000).toFixed(1)}M`; +}; + +interface SubagentMetrics { + hasAny: boolean; + model?: string; + toolCalls: number; + totalTokens: number; +} + +/** + * Subagent metrics for the chip, derived from the child messages via the shared + * `aggregateSubagentMetrics` (tool count, SUM of every turn's + * `usage.totalTokens`, pinned model). + * + * Two data sources, ONE rule: + * - **Live** (streaming / still hydrated): aggregate the in-memory child + * messages from `dbMessagesMap`. + * - **Cold-load**: the child messages aren't hydrated, so fall back to the + * `thread.metadata.*` totals the server derives in `threadModel.queryByTopicId` + * — which runs the SAME aggregation in SQL over the SAME rows, so the two + * paths can't diverge. + */ +const useSubagentMetrics = (toolCallId: string | undefined): SubagentMetrics | null => + useChatStore((s) => { + if (!toolCallId) return null; + const thread = (threadSelectors.currentTopicThreads(s) ?? []).find( + (t) => t.metadata?.sourceToolCallId === toolCallId, + ); + if (!thread) return null; + + const live = aggregateSubagentMetrics(threadSelectors.getThreadDbMessages(thread.id)(s)); + + // Live values win when the child messages are present; otherwise read the + // server-derived rollup off `thread.metadata`. + const toolCalls = live.toolCalls || thread.metadata?.totalToolCalls || 0; + const totalTokens = live.totalTokens || thread.metadata?.totalTokens || 0; + const model = live.model || thread.metadata?.model; + + return { + hasAny: toolCalls > 0 || totalTokens > 0 || !!model, + model, + toolCalls, + totalTokens, + }; + }); + /** * CC's subagent-spawn tool. `subagent_type` ("Explore", "general-purpose", ...) * is the variant; we prefix it with "Agent:" so the row visibly reads as a * subagent dispatch rather than a regular tool — the icon alone isn't enough * signal. `description` is the 3-5 word title the model writes and goes in the * chip; the full `prompt` is too long for a collapsed header. + * + * The trailing metrics segment (tool count · tokens) is sourced from the + * subagent's child thread when one exists. It updates live during streaming so + * users get a progress sense, and is hydrated from `thread.metadata` after + * finalize. Model name lives in the tooltip to keep the inline row compact. */ export const AgentInspector = memo>( - ({ args, partialArgs, isArgumentsStreaming, isLoading }) => { + ({ args, partialArgs, isArgumentsStreaming, isLoading, toolCallId }) => { const { t } = useTranslation('plugin'); + const { t: tChat } = useTranslation('chat'); const fallbackLabel = t(ClaudeCodeApiName.Agent as any); const source = args ?? partialArgs; @@ -68,6 +144,42 @@ export const AgentInspector = memo>( const Icon = resolved?.icon ?? GroupBotIcon; const labelText = resolved?.label ?? fallbackLabel; + const metrics = useSubagentMetrics(toolCallId); + + const tooltipLines: string[] = []; + if (metrics?.model) { + tooltipLines.push(`${tChat('thread.subagentMetrics.modelLabel')}: ${metrics.model}`); + } + if (metrics && metrics.toolCalls > 0) { + tooltipLines.push( + tChat('thread.subagentMetrics.toolCalls', { count: metrics.toolCalls }) as string, + ); + } + if (metrics && metrics.totalTokens > 0) { + tooltipLines.push( + tChat('thread.subagentMetrics.tokens', { + count: metrics.totalTokens.toLocaleString('en-US'), + }) as string, + ); + } + + const metricsNode = + metrics?.hasAny && (metrics.toolCalls > 0 || metrics.totalTokens > 0) ? ( + 0 ? tooltipLines.join(' · ') : undefined}> + + {metrics.toolCalls > 0 && ( + + {tChat('thread.subagentMetrics.toolsShort', { count: metrics.toolCalls })} + + )} + {metrics.toolCalls > 0 && metrics.totalTokens > 0 && ( + · + )} + {metrics.totalTokens > 0 && {formatTokens(metrics.totalTokens)}} + + + ) : null; + return (
Agent: @@ -78,6 +190,7 @@ export const AgentInspector = memo>( {description} )} + {metricsNode}
); }, diff --git a/packages/database/src/models/__tests__/thread.test.ts b/packages/database/src/models/__tests__/thread.test.ts index 8f6834311d..3fcaa1d62f 100644 --- a/packages/database/src/models/__tests__/thread.test.ts +++ b/packages/database/src/models/__tests__/thread.test.ts @@ -3,7 +3,7 @@ import { eq } from 'drizzle-orm'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { getTestDB } from '../../core/getTestDB'; -import { sessions, threads, topics, users } from '../../schemas'; +import { messages, sessions, threads, topics, users } from '../../schemas'; import type { LobeChatDatabase } from '../../type'; import { ThreadModel } from '../thread'; @@ -182,6 +182,68 @@ describe('ThreadModel', () => { expect(result).toHaveLength(0); }); + + it('derives subagent metrics (SUM tokens, COUNT tools, model) from child messages', async () => { + await serverDB.transaction(async (tx) => { + await tx.insert(threads).values({ + id: 'sub-thread', + metadata: { sourceToolCallId: 'tc-1' }, + status: ThreadStatus.Active, + topicId, + type: ThreadType.Standalone, + userId, + }); + await tx.insert(messages).values([ + // two assistant turns → tokens SUM to 1000 + 1800 = 2800 + { + id: 'm-a1', + model: 'claude-opus-4-8', + role: 'assistant', + threadId: 'sub-thread', + topicId, + usage: { totalTokens: 1000 }, + userId, + }, + { id: 'm-t1', role: 'tool', threadId: 'sub-thread', topicId, userId }, + // legacy row: usage only under metadata.usage (no promoted column) + { + id: 'm-a2', + metadata: { usage: { totalTokens: 1800 } }, + role: 'assistant', + threadId: 'sub-thread', + topicId, + userId, + }, + { id: 'm-t2', role: 'tool', threadId: 'sub-thread', topicId, userId }, + ]); + }); + + const [thread] = await threadModel.queryByTopicId(topicId); + + expect(thread.id).toBe('sub-thread'); + expect(thread.metadata?.totalTokens).toBe(2800); + expect(thread.metadata?.totalToolCalls).toBe(2); + expect(thread.metadata?.model).toBe('claude-opus-4-8'); + // create-time metadata preserved + expect(thread.metadata?.sourceToolCallId).toBe('tc-1'); + }); + + it('omits derived metrics for a thread with no child messages', async () => { + await serverDB.insert(threads).values({ + id: 'empty-thread', + status: ThreadStatus.Active, + topicId, + type: ThreadType.Standalone, + userId, + }); + + const [thread] = await threadModel.queryByTopicId(topicId); + + expect(thread.id).toBe('empty-thread'); + expect(thread.metadata?.totalTokens).toBeUndefined(); + expect(thread.metadata?.totalToolCalls).toBeUndefined(); + expect(thread.metadata?.model).toBeUndefined(); + }); }); describe('findById', () => { diff --git a/packages/database/src/models/thread.ts b/packages/database/src/models/thread.ts index 8ea923c0a8..64e7dfa1be 100644 --- a/packages/database/src/models/thread.ts +++ b/packages/database/src/models/thread.ts @@ -1,11 +1,54 @@ import type { CreateThreadParams } from '@lobechat/types'; import { ThreadStatus } from '@lobechat/types'; -import { and, desc, eq } from 'drizzle-orm'; +import { and, desc, eq, sql } from 'drizzle-orm'; import type { ThreadItem } from '../schemas'; -import { threads } from '../schemas'; +import { messages, threads } from '../schemas'; import type { LobeChatDatabase } from '../type'; +/** + * Per-thread subagent metrics, derived from the child messages at read time + * (single source of truth = the messages, not a denormalized write). Mirrors + * `aggregateSubagentMetrics` in the app: SUM of assistant `usage.totalTokens` + * (prefer the promoted `usage` column, fall back to legacy `metadata.usage`), + * COUNT of `role='tool'`, and a pinned model. Folded onto `metadata.*` so the + * subagent inspector chip can read it without hydrating the child messages. + */ +const subagentMetricColumns = { + _model: sql< + string | null + >`MAX(CASE WHEN ${messages.role} = 'assistant' THEN ${messages.model} END)`.as('_sa_model'), + _totalToolCalls: sql`COUNT(CASE WHEN ${messages.role} = 'tool' THEN 1 END)`.as( + '_sa_tool_calls', + ), + _totalTokens: + sql`COALESCE(SUM(CASE WHEN ${messages.role} = 'assistant' THEN (COALESCE(${messages.usage}, ${messages.metadata} -> 'usage') ->> 'totalTokens')::numeric END), 0)`.as( + '_sa_total_tokens', + ), +}; + +type ThreadMetricRow = ThreadItem & { + _model: string | null; + _totalToolCalls: number | string; + _totalTokens: number | string; +}; + +/** Fold the SQL-derived metric columns onto `metadata` and drop the temp keys. */ +const foldSubagentMetrics = (rows: ThreadMetricRow[]): ThreadItem[] => + rows.map(({ _model, _totalToolCalls, _totalTokens, ...thread }) => { + const totalToolCalls = Number(_totalToolCalls); + const totalTokens = Number(_totalTokens); + return { + ...thread, + metadata: { + ...thread.metadata, + ...(totalToolCalls > 0 && { totalToolCalls }), + ...(totalTokens > 0 && { totalTokens }), + ...(_model && { model: _model }), + }, + }; + }); + const queryColumns = { agentId: threads.agentId, createdAt: threads.createdAt, @@ -60,13 +103,18 @@ export class ThreadModel { }; queryByTopicId = async (topicId: string) => { + // LEFT JOIN + GROUP BY threads.id (PK ⇒ Postgres lets us select the plain + // thread columns alongside the per-thread aggregates). `threadId` join + // naturally scopes to in-thread rows, excluding the spawning parent. const data = await this.db - .select(queryColumns) + .select({ ...queryColumns, ...subagentMetricColumns }) .from(threads) + .leftJoin(messages, eq(messages.threadId, threads.id)) .where(and(eq(threads.topicId, topicId), eq(threads.userId, this.userId))) + .groupBy(threads.id) .orderBy(desc(threads.updatedAt)); - return data as ThreadItem[]; + return foldSubagentMetrics(data as ThreadMetricRow[]); }; findById = async (id: string) => { diff --git a/packages/heterogeneous-agents/src/adapters/claudeCode.test.ts b/packages/heterogeneous-agents/src/adapters/claudeCode.test.ts index 630d26cb4b..811b487917 100644 --- a/packages/heterogeneous-agents/src/adapters/claudeCode.test.ts +++ b/packages/heterogeneous-agents/src/adapters/claudeCode.test.ts @@ -2014,7 +2014,7 @@ describe('ClaudeCodeAdapter', () => { expect(starts.some((e) => e.data?.newStep)).toBe(false); }); - it('does NOT emit turn_metadata step_complete for subagent events', () => { + it('emits subagent-tagged turn_metadata step_complete carrying message.usage', () => { const adapter = new ClaudeCodeAdapter(); adapter.adapt(init); adapter.adapt( @@ -2037,6 +2037,41 @@ describe('ClaudeCodeAdapter', () => { type: 'assistant', }); + const meta = events.find( + (e) => e.type === 'step_complete' && e.data?.phase === 'turn_metadata', + ); + expect(meta).toBeDefined(); + // Subagent ctx tag is what stops the executor from writing this usage + // onto the main agent (which would double-count vs the result event). + expect(meta?.data?.subagent?.parentToolCallId).toBe('toolu_parent'); + expect(meta?.data?.subagent?.subagentMessageId).toBe('msg_sub'); + expect(meta?.data?.model).toBe('claude-sonnet-4-6'); + expect(meta?.data?.usage?.totalInputTokens).toBe(5); + expect(meta?.data?.usage?.totalOutputTokens).toBe(10); + }); + + it('does NOT emit turn_metadata for subagent events without message.usage', () => { + const adapter = new ClaudeCodeAdapter(); + adapter.adapt(init); + adapter.adapt( + mainAssistant('msg_main', { + id: 'toolu_parent', + input: {}, + name: 'Agent', + type: 'tool_use', + }), + ); + + const events = adapter.adapt({ + message: { + content: [{ id: 'toolu_child', input: {}, name: 'Bash', type: 'tool_use' }], + id: 'msg_sub', + model: 'claude-sonnet-4-6', + }, + parent_tool_use_id: 'toolu_parent', + type: 'assistant', + }); + const meta = events.find( (e) => e.type === 'step_complete' && e.data?.phase === 'turn_metadata', ); diff --git a/packages/heterogeneous-agents/src/adapters/claudeCode.ts b/packages/heterogeneous-agents/src/adapters/claudeCode.ts index 756ca9f797..3435b2b913 100644 --- a/packages/heterogeneous-agents/src/adapters/claudeCode.ts +++ b/packages/heterogeneous-agents/src/adapters/claudeCode.ts @@ -829,13 +829,9 @@ export class ClaudeCodeAdapter implements AgentEventAdapter { * Handle a subagent assistant event (tagged with `parent_tool_use_id`). * * Subagent events are a side-channel of the main agent's stream and have - * two hard constraints: - * - no main-agent step boundary (each subagent turn introduces a new - * `message.id`; flushing that as a newStep would orphan main-agent - * bubbles) - * - no model / usage tracking on the main agent (CC's `result` event - * carries the authoritative grand total; re-summing per-turn deltas - * here would double-count against the main agent) + * one hard constraint: no main-agent step boundary (each subagent turn + * introduces a new `message.id`; flushing that as a newStep would orphan + * main-agent bubbles). * * Text / reasoning from subagent events ARE emitted — as `stream_chunk` * events tagged with the `subagent` peer field — so the executor can @@ -844,6 +840,17 @@ export class ClaudeCodeAdapter implements AgentEventAdapter { * → tools → assistant text → ...). Without this the thread only ever * shows tool calls with no closing reasoning / summary. * + * Usage on `raw.message.usage` is also emitted, as a + * `step_complete{phase:turn_metadata, subagent}` event so the executor + * can route the per-turn delta onto the subagent's in-thread assistant + * (and bump the subagent run's running totalTokens for the inspector + * chip). Note this is the FULL message.usage (subagent assistant events + * are not partial-streamed, unlike main-agent assistant events which + * carry stale `message_start` snapshots), so no de-stale logic is + * needed here. The subagent ctx tag prevents the executor from writing + * the same usage to the main agent's assistant — CC's `result` event + * remains the grand total across main + subagents. + * * Subagent lineage lives as event-level **peer fields** on each chunk * (`subagent.parentToolCallId` + `subagent.subagentMessageId`), not on * individual `ToolCallPayload` items — tool payloads stay minimal and @@ -920,6 +927,20 @@ export class ClaudeCodeAdapter implements AgentEventAdapter { ); } events.push(...this.emitToolChunk(newToolCalls, messageId, subagentCtx)); + + const usage = toUsageData(raw.message?.usage); + if (usage) { + events.push( + this.makeEvent('step_complete', { + model: raw.message?.model, + phase: 'turn_metadata', + provider: 'claude-code', + subagent: subagentCtx, + usage, + }), + ); + } + return events; } diff --git a/packages/types/src/tool/builtin.ts b/packages/types/src/tool/builtin.ts index 4a906449fe..ebe65b28bb 100644 --- a/packages/types/src/tool/builtin.ts +++ b/packages/types/src/tool/builtin.ts @@ -303,6 +303,12 @@ export interface BuiltinInspectorProps { partialArgs?: Arguments; pluginState?: State; result?: { content: string | null; error?: any; state?: any }; + /** + * Stable id of this tool call. Required for inspectors that need to correlate + * with side data — e.g. CC's `Agent` inspector joining to the subagent Thread + * via `metadata.sourceToolCallId`. + */ + toolCallId?: string; } export type BuiltinInspector = (props: BuiltinInspectorProps) => ReactNode; diff --git a/packages/types/src/topic/thread.ts b/packages/types/src/topic/thread.ts index 9a68712699..5db2611d4b 100644 --- a/packages/types/src/topic/thread.ts +++ b/packages/types/src/topic/thread.ts @@ -38,6 +38,13 @@ export interface ThreadMetadata { duration?: number; /** Error details when task failed */ error?: any; + /** + * Model the subagent ran on (e.g. CC's per-turn `message.model`). Pinned + * once for the run and rolled up here on finalize so historical / cold-load + * viewers can surface it (e.g. the subagent inspector chip tooltip) without + * the child messages being loaded. + */ + model?: string; /** Operation ID for tracking */ operationId?: string; /** @@ -111,6 +118,7 @@ export const threadMetadataSchema = z.object({ completedAt: z.string().optional(), duration: z.number().optional(), error: z.any().optional(), + model: z.string().optional(), operationId: z.string().optional(), sourceToolCallId: z.string().optional(), startedAt: z.string().optional(), diff --git a/src/features/Conversation/Messages/AssistantGroup/Tool/Inspector/index.tsx b/src/features/Conversation/Messages/AssistantGroup/Tool/Inspector/index.tsx index ab25873645..b97396ed92 100644 --- a/src/features/Conversation/Messages/AssistantGroup/Tool/Inspector/index.tsx +++ b/src/features/Conversation/Messages/AssistantGroup/Tool/Inspector/index.tsx @@ -95,6 +95,7 @@ const Inspectors = memo( partialArgs={partialJson} pluginState={result?.state} result={result} + toolCallId={toolCallId} /> {
- +
diff --git a/src/features/DevPanel/RenderGallery/toolSurfaces.tsx b/src/features/DevPanel/RenderGallery/toolSurfaces.tsx index b6871901c4..89e70f77d6 100644 --- a/src/features/DevPanel/RenderGallery/toolSurfaces.tsx +++ b/src/features/DevPanel/RenderGallery/toolSurfaces.tsx @@ -73,33 +73,37 @@ const coerceInspectorContent = (value: unknown): string | null => { interface ToolInspectorSlotProps { api: ApiEntry; derived: DerivedFixtureProps; + toolCallId?: string; variant: ToolRenderFixtureVariant; } /** Renders the API's Inspector with the lifecycle-derived props, or a Missing hint. */ -export const ToolInspectorSlot = memo(({ api, derived, variant }) => { - const Inspector = api.inspector; - if (!Inspector) return ; +export const ToolInspectorSlot = memo( + ({ api, derived, toolCallId, variant }) => { + const Inspector = api.inspector; + if (!Inspector) return ; - return ( - - - - ); -}); + return ( + + + + ); + }, +); ToolInspectorSlot.displayName = 'ToolInspectorSlot'; diff --git a/src/locales/default/chat.ts b/src/locales/default/chat.ts index c70d6a9b67..cb1b145b8c 100644 --- a/src/locales/default/chat.ts +++ b/src/locales/default/chat.ts @@ -846,6 +846,12 @@ export default { 'thread.closeSubagentThread': 'Hide Detail', 'thread.divider': 'Subtopic', 'thread.openSubagentThread': 'View Detail', + 'thread.subagentMetrics.modelLabel': 'Model', + 'thread.subagentMetrics.toolCalls_one': '{{count}} tool call', + 'thread.subagentMetrics.toolCalls_other': '{{count}} tool calls', + 'thread.subagentMetrics.tokens': '{{count}} tokens', + 'thread.subagentMetrics.toolsShort_one': '{{count}} tool', + 'thread.subagentMetrics.toolsShort_other': '{{count}} tools', 'thread.subagentReadOnlyHint': 'SubAgent conversations are read-only — execution is driven by the parent agent.', 'thread.threadMessageCount': '{{messageCount}} messages', diff --git a/src/server/services/heterogeneousAgent/HeterogeneousPersistenceHandler.ts b/src/server/services/heterogeneousAgent/HeterogeneousPersistenceHandler.ts index a5883e7c87..ddf5910043 100644 --- a/src/server/services/heterogeneousAgent/HeterogeneousPersistenceHandler.ts +++ b/src/server/services/heterogeneousAgent/HeterogeneousPersistenceHandler.ts @@ -703,6 +703,27 @@ export class HeterogeneousPersistenceHandler { private async handleTurnMetadata(state: OperationState, event: AgentStreamEvent) { const { model, provider, usage } = event.data ?? {}; + const subagentCtx = (event.data as any)?.subagent as SubagentEventContext | undefined; + + if (subagentCtx) { + // Subagent-tagged usage: write it (plus the subagent's own model/provider) + // onto the subagent's in-thread assistant. The chip's totals are derived + // from these per-message `usage` snapshots on read (live aggregation + + // SQL rollup in `threadModel.queryByTopicId`), so nothing is tracked on + // the run. Do NOT touch `state.lastModel` / `state.lastProvider` — those + // carry main-agent step boundary state and would contaminate the next + // main-agent assistant create. + if (!usage) return; + const run = state.subagentRuns.get(subagentCtx.parentToolCallId); + if (!run) return; + await this.deps.messageModel.update(run.currentAssistantMsgId, { + metadata: { usage }, + ...(model && { model }), + ...(provider && { provider }), + }); + return; + } + if (model) state.lastModel = model; if (provider) state.lastProvider = provider; @@ -1236,8 +1257,11 @@ export class HeterogeneousPersistenceHandler { run.lastChainParentId = terminal.id; } - // Mark the thread completed. Idempotent — re-running on a retry just - // re-writes the same status; downstream UI badges are derived state. + // Mark the thread complete (created as `Processing`). The chip's + // tool-count / token / model metrics are NOT denormalized here — they're + // derived on read from the child messages (`threadModel.queryByTopicId` + // aggregates them in SQL, mirroring the live `aggregateSubagentMetrics`), + // so finalize owns only the status transition. Idempotent. await this.deps.threadModel.update(run.threadId, { status: ThreadStatus.Active }); state.subagentRuns.delete(parentToolCallId); diff --git a/src/server/services/heterogeneousAgent/__tests__/HeterogeneousPersistenceHandler.test.ts b/src/server/services/heterogeneousAgent/__tests__/HeterogeneousPersistenceHandler.test.ts index 1bbc6cc4e2..71c8c38f99 100644 --- a/src/server/services/heterogeneousAgent/__tests__/HeterogeneousPersistenceHandler.test.ts +++ b/src/server/services/heterogeneousAgent/__tests__/HeterogeneousPersistenceHandler.test.ts @@ -1,5 +1,6 @@ // @vitest-environment node import type { AgentStreamEvent } from '@lobechat/agent-gateway-client'; +import { ThreadStatus } from '@lobechat/types'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { @@ -133,6 +134,7 @@ const createHarness = (params: { threads.set(thread.id, thread); return thread; }), + findById: vi.fn(async (id: string) => threads.get(id) ?? null), update: vi.fn(async (id: string, patch: Partial) => { const existing = threads.get(id); if (!existing) return; @@ -927,6 +929,72 @@ describe('HeterogeneousPersistenceHandler', () => { const thread = h.threads.get(threadId)!; expect(thread.status).toBeDefined(); }); + + it('writes subagent usage + model onto the in-thread assistant, and finalize only flips status', async () => { + const h = createHarness({ + assistantMessageId: 'asst-1', + operationId: 'op-1', + topicId: 'topic-1', + }); + + const subagentCtx = { + parentToolCallId: 'tc-spawn-1', + spawnMetadata: { prompt: 'p', subagentType: 'Explore' }, + subagentMessageId: 'sub-1', + }; + + await h.handler.ingest({ + events: [ + buildEvent('stream_chunk', 0, { + chunkType: 'text', + content: 'working', + subagent: subagentCtx, + }), + buildEvent('stream_chunk', 1, { + chunkType: 'tools_calling', + subagent: subagentCtx, + toolsCalling: [ + { + apiName: 'Bash', + arguments: '{}', + id: 'tc-child', + identifier: 'bash', + type: 'default', + }, + ], + }), + // Subagent turn_metadata carries the authoritative per-turn usage + model. + buildEvent('step_complete', 2, { + model: 'claude-opus-4-8', + phase: 'turn_metadata', + provider: 'claude-code', + subagent: subagentCtx, + usage: { totalInputTokens: 10, totalOutputTokens: 5, totalTokens: 15 }, + }), + buildEvent('tool_result', 3, { content: 'final', toolCallId: 'tc-spawn-1' }), + ], + operationId: 'op-1', + topicId: 'topic-1', + }); + + const threadId = [...h.threads.keys()][0]; + const thread = h.threads.get(threadId)!; + // Metrics are NOT denormalized onto metadata — derived on read instead. + expect(thread.metadata?.totalTokens).toBeUndefined(); + expect(thread.metadata?.totalToolCalls).toBeUndefined(); + // Create-time peer fields untouched; finalize only flips status. + expect(thread.metadata?.sourceToolCallId).toBe('tc-spawn-1'); + expect(thread.metadata?.subagentType).toBe('Explore'); + expect(thread.status).toBe(ThreadStatus.Active); + + // The in-thread assistant got usage + model written — the rows the + // read-time aggregation later sums over. + const threadAssts = [...h.messages.values()].filter( + (m) => m.threadId === threadId && m.role === 'assistant', + ); + const withUsage = threadAssts.find((m) => m.metadata?.usage?.totalTokens === 15); + expect(withUsage?.model).toBe('claude-opus-4-8'); + }); }); describe('terminal events and finish()', () => { diff --git a/src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts b/src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts index 5c12369e9a..f5c667bf79 100644 --- a/src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts +++ b/src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts @@ -14,6 +14,7 @@ import path from 'node:path'; import { HeterogeneousAgentSessionErrorCode } from '@lobechat/electron-client-ipc'; import type { AgentEventAdapter } from '@lobechat/heterogeneous-agents'; import { createAdapter } from '@lobechat/heterogeneous-agents'; +import { ThreadStatus } from '@lobechat/types'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { createGatewayEventHandler } from '../gatewayEventHandler'; @@ -41,9 +42,11 @@ vi.mock('@/services/message', () => ({ // threadService — subagent Thread creation (CC `Task` tool_use) const mockCreateThread = vi.fn(); +const mockUpdateThread = vi.fn(); vi.mock('@/services/thread', () => ({ threadService: { - createThread: (...args: any[]) => mockCreateThread(...args), + createThread: (...args: unknown[]) => mockCreateThread(...args), + updateThread: (...args: unknown[]) => mockUpdateThread(...args), }, })); @@ -2426,6 +2429,74 @@ describe('heterogeneousAgentExecutor DB persistence', () => { expect(finalizeWrite).toBeDefined(); }); + it('marks the subagent thread Active on finalize without denormalizing metrics', async () => { + // Under read-time derivation the chip metrics (tool count / tokens / + // model) are NOT written onto `thread.metadata` at finalize — they're + // aggregated from the child messages on read. Finalize only flips the + // thread status Processing → Active. + await runWithEvents([ + ccInit(), + ccToolUse('msg_main', 'toolu_task', 'Task', { + description: 'x', + prompt: 'go', + subagent_type: 'Explore', + }), + ccSubagentToolUse('msg_sub_1', 'toolu_task', 'toolu_child', 'Bash', { command: 'ls' }), + ccSubagentToolResult('toolu_child', 'toolu_task', 'file list'), + { + message: { + content: [{ text: 'summary', type: 'text' }], + id: 'msg_sub_2', + model: 'claude-opus-4-8', + role: 'assistant', + usage: { input_tokens: 1000, output_tokens: 200 }, + }, + parent_tool_use_id: 'toolu_task', + type: 'assistant', + }, + ccSubagentSpawnResult('toolu_task', 'final answer'), + ccResult(), + ]); + + const threadId = mockCreateThread.mock.calls[0][0].id; + const finalize = mockUpdateThread.mock.calls.find(([id]: any) => id === threadId); + expect(finalize).toBeDefined(); + // Status-only — no metrics denormalized onto metadata. + expect(finalize![1]).toEqual({ status: ThreadStatus.Active }); + }); + + it('writes the subagent model onto the in-thread assistant for the live tooltip', async () => { + // The chip tooltip derives the model from the child assistant's `model` + // field live (before finalize). The turn_metadata branch must persist it. + await runWithEvents([ + ccInit(), + ccToolUse('msg_main', 'toolu_task', 'Task', { + description: 'x', + prompt: 'go', + subagent_type: 'Explore', + }), + { + message: { + content: [{ text: 'summary', type: 'text' }], + id: 'msg_sub', + model: 'claude-opus-4-8', + role: 'assistant', + usage: { input_tokens: 1000, output_tokens: 200 }, + }, + parent_tool_use_id: 'toolu_task', + type: 'assistant', + }, + ccSubagentSpawnResult('toolu_task', 'final answer'), + ccResult(), + ]); + + const modelWrite = mockUpdateMessage.mock.calls.find( + ([, val]: any) => val.model === 'claude-opus-4-8' && val.metadata?.usage, + ); + expect(modelWrite).toBeDefined(); + expect(modelWrite![1].provider).toBe('claude-code'); + }); + it('retains subagent buffers + pinned target when the finalize flush fails', async () => { // Transient DB failures on the finalize-time flush used to silently // wipe the accumulators (buffer clear was outside the try/catch), so diff --git a/src/store/chat/slices/aiChat/actions/heterogeneousAgentExecutor.ts b/src/store/chat/slices/aiChat/actions/heterogeneousAgentExecutor.ts index 68c4ac25e5..5a4873872c 100644 --- a/src/store/chat/slices/aiChat/actions/heterogeneousAgentExecutor.ts +++ b/src/store/chat/slices/aiChat/actions/heterogeneousAgentExecutor.ts @@ -575,6 +575,7 @@ const ensureSubagentRun = async ( if (!run) { const { spawnMetadata } = subagentCtx; const threadId = generateThreadId(); + const startedAt = new Date().toISOString(); const title = spawnMetadata?.description?.slice(0, 80) || spawnMetadata?.subagentType || 'Subagent'; @@ -583,7 +584,7 @@ const ensureSubagentRun = async ( id: threadId, metadata: { sourceToolCallId: subagentCtx.parentToolCallId, - startedAt: new Date().toISOString(), + startedAt, subagentType: spawnMetadata?.subagentType, }, sourceMessageId: mainAssistantMessageId, @@ -974,6 +975,18 @@ const finalizeSubagentRun = async ({ } } + // Mark the subagent Thread complete (created as `Processing`). The chip's + // tool-count / token / model metrics are NOT written here — they're derived + // on read from the child messages (live: `aggregateSubagentMetrics` over + // `dbMessagesMap`; cold-load: the same aggregation in SQL via + // `threadModel.queryByTopicId`), so finalize owns only the status transition. + // Best-effort — a failure here must not break finalize. + try { + await threadService.updateThread(run.threadId, { status: ThreadStatus.Active }); + } catch (err) { + console.error('[HeterogeneousAgent] Failed to mark subagent thread complete:', err); + } + completeSubOp(run.subOperationId); }; @@ -1515,9 +1528,48 @@ export const executeHeterogeneousAgent = async ( // of all prior steps. Sum of turn_metadata equals result_usage for // a healthy run. if (event.type === 'step_complete' && event.data?.phase === 'turn_metadata') { + const subagentCtx = event.data.subagent as SubagentEventContext | undefined; + const turnUsage = event.data.usage; + + if (subagentCtx) { + // Subagent-tagged usage: write it (plus the subagent's own + // model/provider) onto the subagent's in-thread assistant — NOT the + // main agent's. The chip derives its totals from these per-message + // `usage` snapshots (live + cold-load both aggregate the messages), + // so nothing is tracked on the run. Don't touch the MAIN agent's + // `lastModel` / `lastProvider` — those are main-agent step state and + // would contaminate the next main turn's create. + const turnModel = event.data.model as string | undefined; + const turnProvider = event.data.provider as string | undefined; + if (turnUsage) { + persistQueue = persistQueue.then(async () => { + const run = subagentRuns.get(subagentCtx.parentToolCallId); + if (!run) return; + + const update = { + metadata: { usage: turnUsage }, + ...(turnModel && { model: turnModel }), + ...(turnProvider && { provider: turnProvider }), + }; + // Mirror the DB write into the thread's local message bucket + // so the inspector chip's live aggregation sees the usage as + // it lands. Without this `run.stream.update`, dbMessagesMap + // only learns the new metadata.usage after the next thread + // refresh — i.e. the chip stays at 0 tokens during streaming. + run.stream.update(run.currentAssistantMsgId, update as Partial); + await messageService + .updateMessage(run.currentAssistantMsgId, update, { + agentId: context.agentId, + topicId: context.topicId, + }) + .catch(console.error); + }); + } + return; + } + if (event.data.model) lastModel = event.data.model; if (event.data.provider) lastProvider = event.data.provider; - const turnUsage = event.data.usage; if (turnUsage) { persistQueue = persistQueue.then(async () => { await messageService diff --git a/src/store/chat/slices/thread/selectors/index.ts b/src/store/chat/slices/thread/selectors/index.ts index 6991544bef..4f9f7c565d 100644 --- a/src/store/chat/slices/thread/selectors/index.ts +++ b/src/store/chat/slices/thread/selectors/index.ts @@ -102,6 +102,31 @@ const getThreadChildMessages = return data.filter((m) => !!id && m.threadId === id); }; +/** + * Raw DB-level child messages for a thread, keyed by `messageMapKey` thread scope. + * + * Use this for *counting* / *aggregating* over individual messages (e.g. the + * subagent inspector chip's tool count + token total). Do NOT use it for + * rendering — the display layer reads from `messagesMap` (which groups tools + * into a virtual `assistantGroup`), so the shapes intentionally differ. + * + * Why `dbMessagesMap` not `messagesMap`: `messagesMap[thread_*]` only holds + * the rendered shape ([user, assistantGroup]); individual `role==='tool'` / + * `role==='assistant'` rows live in `dbMessagesMap[thread_*]`. + */ +const getThreadDbMessages = + (id?: string) => + (s: ChatStoreState): UIChatMessage[] => { + if (!id || !s.activeAgentId) return []; + const key = messageMapKey({ + agentId: s.activeAgentId, + groupId: s.activeGroupId, + threadId: id, + topicId: s.activeTopicId, + }); + return (s.dbMessagesMap?.[key] || []) as UIChatMessage[]; + }; + /** * Portal AI chats - used for AI title summarization */ @@ -140,6 +165,8 @@ export const threadSelectors = { currentActiveThread, currentPortalThread, currentTopicThreads, + getThreadChildMessages, + getThreadDbMessages, getThreadsBySourceMsgId, getThreadsByTopic, hasThreadBySourceMsgId, diff --git a/src/utils/subagentMetrics.test.ts b/src/utils/subagentMetrics.test.ts new file mode 100644 index 0000000000..dc04fd25e7 --- /dev/null +++ b/src/utils/subagentMetrics.test.ts @@ -0,0 +1,55 @@ +import { describe, expect, it } from 'vitest'; + +import { aggregateSubagentMetrics } from './subagentMetrics'; + +describe('aggregateSubagentMetrics', () => { + it('counts role=tool messages and sums every assistant turn usage', () => { + const result = aggregateSubagentMetrics([ + { role: 'user' }, + { metadata: { usage: { totalTokens: 1000 } }, model: 'claude-opus-4-8', role: 'assistant' }, + { role: 'tool' }, + { metadata: { usage: { totalTokens: 1800 } }, model: 'claude-opus-4-8', role: 'assistant' }, + { role: 'tool' }, + { metadata: { usage: { totalTokens: 2600 } }, role: 'assistant' }, + ]); + + // SUM, not last-turn: 1000 + 1800 + 2600 + expect(result.totalTokens).toBe(5400); + expect(result.toolCalls).toBe(2); + expect(result.model).toBe('claude-opus-4-8'); + }); + + it('reads usage from the promoted top-level field too', () => { + const result = aggregateSubagentMetrics([ + { role: 'assistant', usage: { totalTokens: 300 } }, + { metadata: { usage: { totalTokens: 700 } }, role: 'assistant' }, + ]); + + expect(result.totalTokens).toBe(1000); + }); + + it('returns zeros / undefined model for an empty or usage-less set', () => { + expect(aggregateSubagentMetrics([])).toEqual({ + model: undefined, + toolCalls: 0, + totalTokens: 0, + }); + expect(aggregateSubagentMetrics([{ role: 'assistant' }, { role: 'user' }])).toEqual({ + model: undefined, + toolCalls: 0, + totalTokens: 0, + }); + }); + + it('pins the first assistant model and ignores user/tool rows for tokens', () => { + const result = aggregateSubagentMetrics([ + { model: 'model-a', role: 'assistant', usage: { totalTokens: 10 } }, + { model: 'model-b', role: 'assistant', usage: { totalTokens: 20 } }, + // user/tool rows never contribute tokens even if they carried a usage blob + { role: 'user', usage: { totalTokens: 999 } }, + ]); + + expect(result.model).toBe('model-a'); + expect(result.totalTokens).toBe(30); + }); +}); diff --git a/src/utils/subagentMetrics.ts b/src/utils/subagentMetrics.ts new file mode 100644 index 0000000000..e4ec45fe8d --- /dev/null +++ b/src/utils/subagentMetrics.ts @@ -0,0 +1,52 @@ +/** + * Single source of truth for the CC subagent inspector-chip metrics. + * + * Both the live path (chip selector aggregating the in-memory streamed child + * messages) and the cold-load path (server `threadModel.queryByTopicId` + * aggregating the persisted rows in SQL) compute the SAME projection over the + * SAME messages — so the two can't diverge by construction. This TS helper is + * the live encoding; the SQL encoding mirrors it (SUM of assistant + * `usage.totalTokens`, COUNT of `role='tool'`, a pinned model). + * + * `totalTokens` is a plain SUM of each turn's `usage.totalTokens` — the same + * convention as the project's token-usage heatmap (`MessageModel`), i.e. "total + * tokens processed", not "final context size". CC re-feeds the growing context + * each turn so the sum is dominated by (mostly cached) context re-reads, which + * is exactly what the heatmap counts too. + */ + +interface MetricMessage { + metadata?: { usage?: { totalTokens?: number | null } | null } | null; + model?: string | null; + role?: string | null; + usage?: { totalTokens?: number | null } | null; +} + +export interface SubagentMetrics { + /** Model the subagent ran on (first assistant turn that carries one). */ + model?: string; + /** Number of `role='tool'` child messages. */ + toolCalls: number; + /** Sum of every assistant turn's `usage.totalTokens`. */ + totalTokens: number; +} + +export const aggregateSubagentMetrics = (messages: MetricMessage[]): SubagentMetrics => { + let toolCalls = 0; + let totalTokens = 0; + let model: string | undefined; + + for (const m of messages) { + if (m.role === 'tool') { + toolCalls += 1; + } else if (m.role === 'assistant') { + // dbMessagesMap holds the raw DB shape (`metadata.usage`); the + // display-bound UIChatMessage promotes it to a top-level `usage` — accept + // either so the same helper serves both call sites. + totalTokens += m.metadata?.usage?.totalTokens ?? m.usage?.totalTokens ?? 0; + if (!model && m.model) model = m.model; + } + } + + return { model, toolCalls, totalTokens }; +};