diff --git a/locales/en-US/chat.json b/locales/en-US/chat.json
index 7621e6c9cf..b4596ca86e 100644
--- a/locales/en-US/chat.json
+++ b/locales/en-US/chat.json
@@ -771,6 +771,12 @@
   "thread.closeSubagentThread": "Hide Detail",
   "thread.divider": "Subtopic",
   "thread.openSubagentThread": "View Detail",
+  "thread.subagentMetrics.modelLabel": "Model",
+  "thread.subagentMetrics.tokens": "{{count}} tokens",
+  "thread.subagentMetrics.toolCalls_one": "{{count}} tool call",
+  "thread.subagentMetrics.toolCalls_other": "{{count}} tool calls",
+  "thread.subagentMetrics.toolsShort_one": "{{count}} tool",
+  "thread.subagentMetrics.toolsShort_other": "{{count}} tools",
   "thread.subagentReadOnlyHint": "SubAgent conversations are read-only — execution is driven by the parent agent.",
   "thread.threadMessageCount": "{{messageCount}} messages",
   "thread.title": "Subtopic",
diff --git a/locales/zh-CN/chat.json b/locales/zh-CN/chat.json
index b7837746ec..79585af88a 100644
--- a/locales/zh-CN/chat.json
+++ b/locales/zh-CN/chat.json
@@ -771,6 +771,12 @@
   "thread.closeSubagentThread": "隐藏详情",
   "thread.divider": "子话题",
   "thread.openSubagentThread": "查看详情",
+  "thread.subagentMetrics.modelLabel": "模型",
+  "thread.subagentMetrics.tokens": "{{count}} tokens",
+  "thread.subagentMetrics.toolCalls_one": "{{count}} 次工具调用",
+  "thread.subagentMetrics.toolCalls_other": "{{count}} 次工具调用",
+  "thread.subagentMetrics.toolsShort_one": "{{count}} tool",
+  "thread.subagentMetrics.toolsShort_other": "{{count}} tools",
   "thread.subagentReadOnlyHint": "SubAgent 对话仅可查看,由父智能体驱动执行",
   "thread.threadMessageCount": "{{messageCount}} 条消息",
   "thread.title": "子话题",
diff --git a/packages/builtin-tool-claude-code/src/client/Inspector/Agent.tsx b/packages/builtin-tool-claude-code/src/client/Inspector/Agent.tsx
index dea8282518..2e37425e77 100644
--- a/packages/builtin-tool-claude-code/src/client/Inspector/Agent.tsx
+++ b/packages/builtin-tool-claude-code/src/client/Inspector/Agent.tsx
@@ -2,11 +2,16 @@
 
 import { inspectorTextStyles, shinyTextStyles } from '@lobechat/shared-tool-ui/styles';
 import type { BuiltinInspectorProps } from '@lobechat/types';
+import { Tooltip } from '@lobehub/ui';
 import { GroupBotIcon } from '@lobehub/ui/icons';
 import { createStaticStyles, cx } from 'antd-style';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 
+import { useChatStore } from '@/store/chat';
+import { threadSelectors } from '@/store/chat/selectors';
+import { aggregateSubagentMetrics } from '@/utils/subagentMetrics';
+
 import { type AgentArgs, ClaudeCodeApiName } from '../../types';
 import { resolveCCSubagentType } from '../subagentTypes';
 
@@ -44,18 +49,89 @@ const styles = createStaticStyles(({ css, cssVar }) => ({
     flex-shrink: 0;
     color: ${cssVar.colorText};
   `,
+  metrics: css`
+    display: inline-flex;
+    flex-shrink: 0;
+    gap: 6px;
+    align-items: center;
+
+    margin-inline-start: 8px;
+
+    font-size: 12px;
+    color: ${cssVar.colorTextDescription};
+  `,
+  metricsDot: css`
+    color: ${cssVar.colorTextQuaternary};
+  `,
 }));
 
+const formatTokens = (n: number): string => {
+  if (n < 1000) return String(n);
+  if (n < 10_000) return `${(n / 1000).toFixed(1)}k`;
+  if (n < 1_000_000) return `${Math.round(n / 1000)}k`;
+  return `${(n / 1_000_000).toFixed(1)}M`;
+};
+
+interface SubagentMetrics {
+  hasAny: boolean;
+  model?: string;
+  toolCalls: number;
+  totalTokens: number;
+}
+
+/**
+ * Subagent metrics for the chip, derived from the child messages via the shared
+ * `aggregateSubagentMetrics` (tool count, SUM of every turn's
+ * `usage.totalTokens`, pinned model).
+ *
+ * Two data sources, ONE rule:
+ *  - **Live** (streaming / still hydrated): aggregate the in-memory child
+ *    messages from `dbMessagesMap`.
+ *  - **Cold-load**: the child messages aren't hydrated, so fall back to the
+ *    `thread.metadata.*` totals the server derives in `threadModel.queryByTopicId`
+ *    — which runs the SAME aggregation in SQL over the SAME rows, so the two
+ *    paths can't diverge.
+ */
+const useSubagentMetrics = (toolCallId: string | undefined): SubagentMetrics | null =>
+  useChatStore((s) => {
+    if (!toolCallId) return null;
+    const thread = (threadSelectors.currentTopicThreads(s) ?? []).find(
+      (t) => t.metadata?.sourceToolCallId === toolCallId,
+    );
+    if (!thread) return null;
+
+    const live = aggregateSubagentMetrics(threadSelectors.getThreadDbMessages(thread.id)(s));
+
+    // Live values win when the child messages are present; otherwise read the
+    // server-derived rollup off `thread.metadata`.
+    const toolCalls = live.toolCalls || thread.metadata?.totalToolCalls || 0;
+    const totalTokens = live.totalTokens || thread.metadata?.totalTokens || 0;
+    const model = live.model || thread.metadata?.model;
+
+    return {
+      hasAny: toolCalls > 0 || totalTokens > 0 || !!model,
+      model,
+      toolCalls,
+      totalTokens,
+    };
+  });
+
 /**
  * CC's subagent-spawn tool. `subagent_type` ("Explore", "general-purpose", ...)
  * is the variant; we prefix it with "Agent:" so the row visibly reads as a
  * subagent dispatch rather than a regular tool — the icon alone isn't enough
  * signal. `description` is the 3-5 word title the model writes and goes in the
  * chip; the full `prompt` is too long for a collapsed header.
+ *
+ * The trailing metrics segment (tool count · tokens) is sourced from the
+ * subagent's child thread when one exists. It updates live during streaming so
+ * users get a progress sense, and is hydrated from `thread.metadata` after
+ * finalize. Model name lives in the tooltip to keep the inline row compact.
  */
 export const AgentInspector = memo<BuiltinInspectorProps<AgentArgs>>(
-  ({ args, partialArgs, isArgumentsStreaming, isLoading }) => {
+  ({ args, partialArgs, isArgumentsStreaming, isLoading, toolCallId }) => {
     const { t } = useTranslation('plugin');
+    const { t: tChat } = useTranslation('chat');
     const fallbackLabel = t(ClaudeCodeApiName.Agent as any);
 
     const source = args ?? partialArgs;
@@ -68,6 +144,42 @@ export const AgentInspector = memo<BuiltinInspectorProps<AgentArgs>>(
     const Icon = resolved?.icon ?? GroupBotIcon;
     const labelText = resolved?.label ?? fallbackLabel;
 
+    const metrics = useSubagentMetrics(toolCallId);
+
+    const tooltipLines: string[] = [];
+    if (metrics?.model) {
+      tooltipLines.push(`${tChat('thread.subagentMetrics.modelLabel')}: ${metrics.model}`);
+    }
+    if (metrics && metrics.toolCalls > 0) {
+      tooltipLines.push(
+        tChat('thread.subagentMetrics.toolCalls', { count: metrics.toolCalls }) as string,
+      );
+    }
+    if (metrics && metrics.totalTokens > 0) {
+      tooltipLines.push(
+        tChat('thread.subagentMetrics.tokens', {
+          count: metrics.totalTokens.toLocaleString('en-US'),
+        }) as string,
+      );
+    }
+
+    const metricsNode =
+      metrics?.hasAny && (metrics.toolCalls > 0 || metrics.totalTokens > 0) ? (
+        <Tooltip title={tooltipLines.length > 0 ? tooltipLines.join(' · ') : undefined}>
+          <span className={styles.metrics}>
+            {metrics.toolCalls > 0 && (
+              <span>
+                {tChat('thread.subagentMetrics.toolsShort', { count: metrics.toolCalls })}
+              </span>
+            )}
+            {metrics.toolCalls > 0 && metrics.totalTokens > 0 && (
+              <span className={styles.metricsDot}>·</span>
+            )}
+            {metrics.totalTokens > 0 && <span>{formatTokens(metrics.totalTokens)}</span>}
+          </span>
+        </Tooltip>
+      ) : null;
+
     return (
       <div className={cx(inspectorTextStyles.root, isShiny && shinyTextStyles.shinyText)}>
         <span className={styles.label}>Agent:</span>
@@ -78,6 +190,7 @@ export const AgentInspector = memo<BuiltinInspectorProps<AgentArgs>>(
             <span className={styles.chipText}>{description}</span>
           </span>
         )}
+        {metricsNode}
       </div>
     );
   },
diff --git a/packages/database/src/models/__tests__/thread.test.ts b/packages/database/src/models/__tests__/thread.test.ts
index 8f6834311d..3fcaa1d62f 100644
--- a/packages/database/src/models/__tests__/thread.test.ts
+++ b/packages/database/src/models/__tests__/thread.test.ts
@@ -3,7 +3,7 @@ import { eq } from 'drizzle-orm';
 import { afterEach, beforeEach, describe, expect, it } from 'vitest';
 
 import { getTestDB } from '../../core/getTestDB';
-import { sessions, threads, topics, users } from '../../schemas';
+import { messages, sessions, threads, topics, users } from '../../schemas';
 import type { LobeChatDatabase } from '../../type';
 import { ThreadModel } from '../thread';
 
@@ -182,6 +182,68 @@ describe('ThreadModel', () => {
 
       expect(result).toHaveLength(0);
     });
+
+    it('derives subagent metrics (SUM tokens, COUNT tools, model) from child messages', async () => {
+      await serverDB.transaction(async (tx) => {
+        await tx.insert(threads).values({
+          id: 'sub-thread',
+          metadata: { sourceToolCallId: 'tc-1' },
+          status: ThreadStatus.Active,
+          topicId,
+          type: ThreadType.Standalone,
+          userId,
+        });
+        await tx.insert(messages).values([
+          // two assistant turns → tokens SUM to 1000 + 1800 = 2800
+          {
+            id: 'm-a1',
+            model: 'claude-opus-4-8',
+            role: 'assistant',
+            threadId: 'sub-thread',
+            topicId,
+            usage: { totalTokens: 1000 },
+            userId,
+          },
+          { id: 'm-t1', role: 'tool', threadId: 'sub-thread', topicId, userId },
+          // legacy row: usage only under metadata.usage (no promoted column)
+          {
+            id: 'm-a2',
+            metadata: { usage: { totalTokens: 1800 } },
+            role: 'assistant',
+            threadId: 'sub-thread',
+            topicId,
+            userId,
+          },
+          { id: 'm-t2', role: 'tool', threadId: 'sub-thread', topicId, userId },
+        ]);
+      });
+
+      const [thread] = await threadModel.queryByTopicId(topicId);
+
+      expect(thread.id).toBe('sub-thread');
+      expect(thread.metadata?.totalTokens).toBe(2800);
+      expect(thread.metadata?.totalToolCalls).toBe(2);
+      expect(thread.metadata?.model).toBe('claude-opus-4-8');
+      // create-time metadata preserved
+      expect(thread.metadata?.sourceToolCallId).toBe('tc-1');
+    });
+
+    it('omits derived metrics for a thread with no child messages', async () => {
+      await serverDB.insert(threads).values({
+        id: 'empty-thread',
+        status: ThreadStatus.Active,
+        topicId,
+        type: ThreadType.Standalone,
+        userId,
+      });
+
+      const [thread] = await threadModel.queryByTopicId(topicId);
+
+      expect(thread.id).toBe('empty-thread');
+      expect(thread.metadata?.totalTokens).toBeUndefined();
+      expect(thread.metadata?.totalToolCalls).toBeUndefined();
+      expect(thread.metadata?.model).toBeUndefined();
+    });
   });
 
   describe('findById', () => {
diff --git a/packages/database/src/models/thread.ts b/packages/database/src/models/thread.ts
index 8ea923c0a8..64e7dfa1be 100644
--- a/packages/database/src/models/thread.ts
+++ b/packages/database/src/models/thread.ts
@@ -1,11 +1,54 @@
 import type { CreateThreadParams } from '@lobechat/types';
 import { ThreadStatus } from '@lobechat/types';
-import { and, desc, eq } from 'drizzle-orm';
+import { and, desc, eq, sql } from 'drizzle-orm';
 
 import type { ThreadItem } from '../schemas';
-import { threads } from '../schemas';
+import { messages, threads } from '../schemas';
 import type { LobeChatDatabase } from '../type';
 
+/**
+ * Per-thread subagent metrics, derived from the child messages at read time
+ * (single source of truth = the messages, not a denormalized write). Mirrors
+ * `aggregateSubagentMetrics` in the app: SUM of assistant `usage.totalTokens`
+ * (prefer the promoted `usage` column, fall back to legacy `metadata.usage`),
+ * COUNT of `role='tool'`, and a pinned model. Folded onto `metadata.*` so the
+ * subagent inspector chip can read it without hydrating the child messages.
+ */
+const subagentMetricColumns = {
+  _model: sql<
+    string | null
+  >`MAX(CASE WHEN ${messages.role} = 'assistant' THEN ${messages.model} END)`.as('_sa_model'),
+  _totalToolCalls: sql<number>`COUNT(CASE WHEN ${messages.role} = 'tool' THEN 1 END)`.as(
+    '_sa_tool_calls',
+  ),
+  _totalTokens:
+    sql<number>`COALESCE(SUM(CASE WHEN ${messages.role} = 'assistant' THEN (COALESCE(${messages.usage}, ${messages.metadata} -> 'usage') ->> 'totalTokens')::numeric END), 0)`.as(
+      '_sa_total_tokens',
+    ),
+};
+
+type ThreadMetricRow = ThreadItem & {
+  _model: string | null;
+  _totalToolCalls: number | string;
+  _totalTokens: number | string;
+};
+
+/** Fold the SQL-derived metric columns onto `metadata` and drop the temp keys. */
+const foldSubagentMetrics = (rows: ThreadMetricRow[]): ThreadItem[] =>
+  rows.map(({ _model, _totalToolCalls, _totalTokens, ...thread }) => {
+    const totalToolCalls = Number(_totalToolCalls);
+    const totalTokens = Number(_totalTokens);
+    return {
+      ...thread,
+      metadata: {
+        ...thread.metadata,
+        ...(totalToolCalls > 0 && { totalToolCalls }),
+        ...(totalTokens > 0 && { totalTokens }),
+        ...(_model && { model: _model }),
+      },
+    };
+  });
+
 const queryColumns = {
   agentId: threads.agentId,
   createdAt: threads.createdAt,
@@ -60,13 +103,18 @@ export class ThreadModel {
   };
 
   queryByTopicId = async (topicId: string) => {
+    // LEFT JOIN + GROUP BY threads.id (PK ⇒ Postgres lets us select the plain
+    // thread columns alongside the per-thread aggregates). `threadId` join
+    // naturally scopes to in-thread rows, excluding the spawning parent.
     const data = await this.db
-      .select(queryColumns)
+      .select({ ...queryColumns, ...subagentMetricColumns })
       .from(threads)
+      .leftJoin(messages, eq(messages.threadId, threads.id))
       .where(and(eq(threads.topicId, topicId), eq(threads.userId, this.userId)))
+      .groupBy(threads.id)
       .orderBy(desc(threads.updatedAt));
 
-    return data as ThreadItem[];
+    return foldSubagentMetrics(data as ThreadMetricRow[]);
   };
 
   findById = async (id: string) => {
diff --git a/packages/heterogeneous-agents/src/adapters/claudeCode.test.ts b/packages/heterogeneous-agents/src/adapters/claudeCode.test.ts
index 630d26cb4b..811b487917 100644
--- a/packages/heterogeneous-agents/src/adapters/claudeCode.test.ts
+++ b/packages/heterogeneous-agents/src/adapters/claudeCode.test.ts
@@ -2014,7 +2014,7 @@ describe('ClaudeCodeAdapter', () => {
       expect(starts.some((e) => e.data?.newStep)).toBe(false);
     });
 
-    it('does NOT emit turn_metadata step_complete for subagent events', () => {
+    it('emits subagent-tagged turn_metadata step_complete carrying message.usage', () => {
       const adapter = new ClaudeCodeAdapter();
       adapter.adapt(init);
       adapter.adapt(
@@ -2037,6 +2037,41 @@ describe('ClaudeCodeAdapter', () => {
         type: 'assistant',
       });
 
+      const meta = events.find(
+        (e) => e.type === 'step_complete' && e.data?.phase === 'turn_metadata',
+      );
+      expect(meta).toBeDefined();
+      // Subagent ctx tag is what stops the executor from writing this usage
+      // onto the main agent (which would double-count vs the result event).
+      expect(meta?.data?.subagent?.parentToolCallId).toBe('toolu_parent');
+      expect(meta?.data?.subagent?.subagentMessageId).toBe('msg_sub');
+      expect(meta?.data?.model).toBe('claude-sonnet-4-6');
+      expect(meta?.data?.usage?.totalInputTokens).toBe(5);
+      expect(meta?.data?.usage?.totalOutputTokens).toBe(10);
+    });
+
+    it('does NOT emit turn_metadata for subagent events without message.usage', () => {
+      const adapter = new ClaudeCodeAdapter();
+      adapter.adapt(init);
+      adapter.adapt(
+        mainAssistant('msg_main', {
+          id: 'toolu_parent',
+          input: {},
+          name: 'Agent',
+          type: 'tool_use',
+        }),
+      );
+
+      const events = adapter.adapt({
+        message: {
+          content: [{ id: 'toolu_child', input: {}, name: 'Bash', type: 'tool_use' }],
+          id: 'msg_sub',
+          model: 'claude-sonnet-4-6',
+        },
+        parent_tool_use_id: 'toolu_parent',
+        type: 'assistant',
+      });
+
       const meta = events.find(
         (e) => e.type === 'step_complete' && e.data?.phase === 'turn_metadata',
       );
diff --git a/packages/heterogeneous-agents/src/adapters/claudeCode.ts b/packages/heterogeneous-agents/src/adapters/claudeCode.ts
index 756ca9f797..3435b2b913 100644
--- a/packages/heterogeneous-agents/src/adapters/claudeCode.ts
+++ b/packages/heterogeneous-agents/src/adapters/claudeCode.ts
@@ -829,13 +829,9 @@ export class ClaudeCodeAdapter implements AgentEventAdapter {
    * Handle a subagent assistant event (tagged with `parent_tool_use_id`).
    *
    * Subagent events are a side-channel of the main agent's stream and have
-   * two hard constraints:
-   *  - no main-agent step boundary (each subagent turn introduces a new
-   *    `message.id`; flushing that as a newStep would orphan main-agent
-   *    bubbles)
-   *  - no model / usage tracking on the main agent (CC's `result` event
-   *    carries the authoritative grand total; re-summing per-turn deltas
-   *    here would double-count against the main agent)
+   * one hard constraint: no main-agent step boundary (each subagent turn
+   * introduces a new `message.id`; flushing that as a newStep would orphan
+   * main-agent bubbles).
    *
    * Text / reasoning from subagent events ARE emitted — as `stream_chunk`
    * events tagged with the `subagent` peer field — so the executor can
@@ -844,6 +840,17 @@ export class ClaudeCodeAdapter implements AgentEventAdapter {
    * → tools → assistant text → ...). Without this the thread only ever
    * shows tool calls with no closing reasoning / summary.
    *
+   * Usage on `raw.message.usage` is also emitted, as a
+   * `step_complete{phase:turn_metadata, subagent}` event so the executor
+   * can route the per-turn delta onto the subagent's in-thread assistant
+   * (and bump the subagent run's running totalTokens for the inspector
+   * chip). Note this is the FULL message.usage (subagent assistant events
+   * are not partial-streamed, unlike main-agent assistant events which
+   * carry stale `message_start` snapshots), so no de-stale logic is
+   * needed here. The subagent ctx tag prevents the executor from writing
+   * the same usage to the main agent's assistant — CC's `result` event
+   * remains the grand total across main + subagents.
+   *
    * Subagent lineage lives as event-level **peer fields** on each chunk
    * (`subagent.parentToolCallId` + `subagent.subagentMessageId`), not on
    * individual `ToolCallPayload` items — tool payloads stay minimal and
@@ -920,6 +927,20 @@ export class ClaudeCodeAdapter implements AgentEventAdapter {
       );
     }
     events.push(...this.emitToolChunk(newToolCalls, messageId, subagentCtx));
+
+    const usage = toUsageData(raw.message?.usage);
+    if (usage) {
+      events.push(
+        this.makeEvent('step_complete', {
+          model: raw.message?.model,
+          phase: 'turn_metadata',
+          provider: 'claude-code',
+          subagent: subagentCtx,
+          usage,
+        }),
+      );
+    }
+
     return events;
   }
 
diff --git a/packages/types/src/tool/builtin.ts b/packages/types/src/tool/builtin.ts
index 4a906449fe..ebe65b28bb 100644
--- a/packages/types/src/tool/builtin.ts
+++ b/packages/types/src/tool/builtin.ts
@@ -303,6 +303,12 @@ export interface BuiltinInspectorProps<Arguments = any, State = any> {
   partialArgs?: Arguments;
   pluginState?: State;
   result?: { content: string | null; error?: any; state?: any };
+  /**
+   * Stable id of this tool call. Required for inspectors that need to correlate
+   * with side data — e.g. CC's `Agent` inspector joining to the subagent Thread
+   * via `metadata.sourceToolCallId`.
+   */
+  toolCallId?: string;
 }
 
 export type BuiltinInspector = <A = any, S = any>(props: BuiltinInspectorProps<A, S>) => ReactNode;
diff --git a/packages/types/src/topic/thread.ts b/packages/types/src/topic/thread.ts
index 9a68712699..5db2611d4b 100644
--- a/packages/types/src/topic/thread.ts
+++ b/packages/types/src/topic/thread.ts
@@ -38,6 +38,13 @@ export interface ThreadMetadata {
   duration?: number;
   /** Error details when task failed */
   error?: any;
+  /**
+   * Model the subagent ran on (e.g. CC's per-turn `message.model`). Pinned
+   * once for the run and rolled up here on finalize so historical / cold-load
+   * viewers can surface it (e.g. the subagent inspector chip tooltip) without
+   * the child messages being loaded.
+   */
+  model?: string;
   /** Operation ID for tracking */
   operationId?: string;
   /**
@@ -111,6 +118,7 @@ export const threadMetadataSchema = z.object({
   completedAt: z.string().optional(),
   duration: z.number().optional(),
   error: z.any().optional(),
+  model: z.string().optional(),
   operationId: z.string().optional(),
   sourceToolCallId: z.string().optional(),
   startedAt: z.string().optional(),
diff --git a/src/features/Conversation/Messages/AssistantGroup/Tool/Inspector/index.tsx b/src/features/Conversation/Messages/AssistantGroup/Tool/Inspector/index.tsx
index ab25873645..b97396ed92 100644
--- a/src/features/Conversation/Messages/AssistantGroup/Tool/Inspector/index.tsx
+++ b/src/features/Conversation/Messages/AssistantGroup/Tool/Inspector/index.tsx
@@ -95,6 +95,7 @@ const Inspectors = memo<InspectorProps>(
               partialArgs={partialJson}
               pluginState={result?.state}
               result={result}
+              toolCallId={toolCallId}
             />
           </SafeBoundary>
           <ExecutionTime
diff --git a/src/features/DevPanel/RenderGallery/ToolPreview.tsx b/src/features/DevPanel/RenderGallery/ToolPreview.tsx
index f0743aebf8..ea488b3127 100644
--- a/src/features/DevPanel/RenderGallery/ToolPreview.tsx
+++ b/src/features/DevPanel/RenderGallery/ToolPreview.tsx
@@ -123,7 +123,12 @@ const ToolPreview = ({ api, mode }: ToolPreviewProps) => {
             </Text>
           </Flexbox>
           <div className={styles.previewShell}>
-            <ToolInspectorSlot api={api} derived={derived} variant={activeVariant} />
+            <ToolInspectorSlot
+              api={api}
+              derived={derived}
+              toolCallId={toolCallId}
+              variant={activeVariant}
+            />
           </div>
         </Flexbox>
 
diff --git a/src/features/DevPanel/RenderGallery/toolSurfaces.tsx b/src/features/DevPanel/RenderGallery/toolSurfaces.tsx
index b6871901c4..89e70f77d6 100644
--- a/src/features/DevPanel/RenderGallery/toolSurfaces.tsx
+++ b/src/features/DevPanel/RenderGallery/toolSurfaces.tsx
@@ -73,33 +73,37 @@ const coerceInspectorContent = (value: unknown): string | null => {
 interface ToolInspectorSlotProps {
   api: ApiEntry;
   derived: DerivedFixtureProps;
+  toolCallId?: string;
   variant: ToolRenderFixtureVariant;
 }
 
 /** Renders the API's Inspector with the lifecycle-derived props, or a Missing hint. */
-export const ToolInspectorSlot = memo<ToolInspectorSlotProps>(({ api, derived, variant }) => {
-  const Inspector = api.inspector;
-  if (!Inspector) return <Missing kind={'inspector'} />;
+export const ToolInspectorSlot = memo<ToolInspectorSlotProps>(
+  ({ api, derived, toolCallId, variant }) => {
+    const Inspector = api.inspector;
+    if (!Inspector) return <Missing kind={'inspector'} />;
 
-  return (
-    <RenderBoundary label={'Inspector'}>
-      <Inspector
-        apiName={api.apiName}
-        args={derived.args}
-        identifier={api.identifier}
-        isArgumentsStreaming={derived.isArgumentsStreaming}
-        isLoading={derived.isLoading}
-        partialArgs={derived.partialArgs}
-        pluginState={derived.pluginState}
-        result={{
-          content: coerceInspectorContent(variant.content),
-          error: derived.pluginError,
-          state: derived.pluginState,
-        }}
-      />
-    </RenderBoundary>
-  );
-});
+    return (
+      <RenderBoundary label={'Inspector'}>
+        <Inspector
+          apiName={api.apiName}
+          args={derived.args}
+          identifier={api.identifier}
+          isArgumentsStreaming={derived.isArgumentsStreaming}
+          isLoading={derived.isLoading}
+          partialArgs={derived.partialArgs}
+          pluginState={derived.pluginState}
+          toolCallId={toolCallId}
+          result={{
+            content: coerceInspectorContent(variant.content),
+            error: derived.pluginError,
+            state: derived.pluginState,
+          }}
+        />
+      </RenderBoundary>
+    );
+  },
+);
 
 ToolInspectorSlot.displayName = 'ToolInspectorSlot';
 
diff --git a/src/locales/default/chat.ts b/src/locales/default/chat.ts
index c70d6a9b67..cb1b145b8c 100644
--- a/src/locales/default/chat.ts
+++ b/src/locales/default/chat.ts
@@ -846,6 +846,12 @@ export default {
   'thread.closeSubagentThread': 'Hide Detail',
   'thread.divider': 'Subtopic',
   'thread.openSubagentThread': 'View Detail',
+  'thread.subagentMetrics.modelLabel': 'Model',
+  'thread.subagentMetrics.toolCalls_one': '{{count}} tool call',
+  'thread.subagentMetrics.toolCalls_other': '{{count}} tool calls',
+  'thread.subagentMetrics.tokens': '{{count}} tokens',
+  'thread.subagentMetrics.toolsShort_one': '{{count}} tool',
+  'thread.subagentMetrics.toolsShort_other': '{{count}} tools',
   'thread.subagentReadOnlyHint':
     'SubAgent conversations are read-only — execution is driven by the parent agent.',
   'thread.threadMessageCount': '{{messageCount}} messages',
diff --git a/src/server/services/heterogeneousAgent/HeterogeneousPersistenceHandler.ts b/src/server/services/heterogeneousAgent/HeterogeneousPersistenceHandler.ts
index a5883e7c87..ddf5910043 100644
--- a/src/server/services/heterogeneousAgent/HeterogeneousPersistenceHandler.ts
+++ b/src/server/services/heterogeneousAgent/HeterogeneousPersistenceHandler.ts
@@ -703,6 +703,27 @@ export class HeterogeneousPersistenceHandler {
 
   private async handleTurnMetadata(state: OperationState, event: AgentStreamEvent) {
     const { model, provider, usage } = event.data ?? {};
+    const subagentCtx = (event.data as any)?.subagent as SubagentEventContext | undefined;
+
+    if (subagentCtx) {
+      // Subagent-tagged usage: write it (plus the subagent's own model/provider)
+      // onto the subagent's in-thread assistant. The chip's totals are derived
+      // from these per-message `usage` snapshots on read (live aggregation +
+      // SQL rollup in `threadModel.queryByTopicId`), so nothing is tracked on
+      // the run. Do NOT touch `state.lastModel` / `state.lastProvider` — those
+      // carry main-agent step boundary state and would contaminate the next
+      // main-agent assistant create.
+      if (!usage) return;
+      const run = state.subagentRuns.get(subagentCtx.parentToolCallId);
+      if (!run) return;
+      await this.deps.messageModel.update(run.currentAssistantMsgId, {
+        metadata: { usage },
+        ...(model && { model }),
+        ...(provider && { provider }),
+      });
+      return;
+    }
+
     if (model) state.lastModel = model;
     if (provider) state.lastProvider = provider;
 
@@ -1236,8 +1257,11 @@ export class HeterogeneousPersistenceHandler {
       run.lastChainParentId = terminal.id;
     }
 
-    // Mark the thread completed. Idempotent — re-running on a retry just
-    // re-writes the same status; downstream UI badges are derived state.
+    // Mark the thread complete (created as `Processing`). The chip's
+    // tool-count / token / model metrics are NOT denormalized here — they're
+    // derived on read from the child messages (`threadModel.queryByTopicId`
+    // aggregates them in SQL, mirroring the live `aggregateSubagentMetrics`),
+    // so finalize owns only the status transition. Idempotent.
     await this.deps.threadModel.update(run.threadId, { status: ThreadStatus.Active });
 
     state.subagentRuns.delete(parentToolCallId);
diff --git a/src/server/services/heterogeneousAgent/__tests__/HeterogeneousPersistenceHandler.test.ts b/src/server/services/heterogeneousAgent/__tests__/HeterogeneousPersistenceHandler.test.ts
index 1bbc6cc4e2..71c8c38f99 100644
--- a/src/server/services/heterogeneousAgent/__tests__/HeterogeneousPersistenceHandler.test.ts
+++ b/src/server/services/heterogeneousAgent/__tests__/HeterogeneousPersistenceHandler.test.ts
@@ -1,5 +1,6 @@
 // @vitest-environment node
 import type { AgentStreamEvent } from '@lobechat/agent-gateway-client';
+import { ThreadStatus } from '@lobechat/types';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
 import {
@@ -133,6 +134,7 @@ const createHarness = (params: {
       threads.set(thread.id, thread);
       return thread;
     }),
+    findById: vi.fn(async (id: string) => threads.get(id) ?? null),
     update: vi.fn(async (id: string, patch: Partial<FakeThread>) => {
       const existing = threads.get(id);
       if (!existing) return;
@@ -927,6 +929,72 @@ describe('HeterogeneousPersistenceHandler', () => {
       const thread = h.threads.get(threadId)!;
       expect(thread.status).toBeDefined();
     });
+
+    it('writes subagent usage + model onto the in-thread assistant, and finalize only flips status', async () => {
+      const h = createHarness({
+        assistantMessageId: 'asst-1',
+        operationId: 'op-1',
+        topicId: 'topic-1',
+      });
+
+      const subagentCtx = {
+        parentToolCallId: 'tc-spawn-1',
+        spawnMetadata: { prompt: 'p', subagentType: 'Explore' },
+        subagentMessageId: 'sub-1',
+      };
+
+      await h.handler.ingest({
+        events: [
+          buildEvent('stream_chunk', 0, {
+            chunkType: 'text',
+            content: 'working',
+            subagent: subagentCtx,
+          }),
+          buildEvent('stream_chunk', 1, {
+            chunkType: 'tools_calling',
+            subagent: subagentCtx,
+            toolsCalling: [
+              {
+                apiName: 'Bash',
+                arguments: '{}',
+                id: 'tc-child',
+                identifier: 'bash',
+                type: 'default',
+              },
+            ],
+          }),
+          // Subagent turn_metadata carries the authoritative per-turn usage + model.
+          buildEvent('step_complete', 2, {
+            model: 'claude-opus-4-8',
+            phase: 'turn_metadata',
+            provider: 'claude-code',
+            subagent: subagentCtx,
+            usage: { totalInputTokens: 10, totalOutputTokens: 5, totalTokens: 15 },
+          }),
+          buildEvent('tool_result', 3, { content: 'final', toolCallId: 'tc-spawn-1' }),
+        ],
+        operationId: 'op-1',
+        topicId: 'topic-1',
+      });
+
+      const threadId = [...h.threads.keys()][0];
+      const thread = h.threads.get(threadId)!;
+      // Metrics are NOT denormalized onto metadata — derived on read instead.
+      expect(thread.metadata?.totalTokens).toBeUndefined();
+      expect(thread.metadata?.totalToolCalls).toBeUndefined();
+      // Create-time peer fields untouched; finalize only flips status.
+      expect(thread.metadata?.sourceToolCallId).toBe('tc-spawn-1');
+      expect(thread.metadata?.subagentType).toBe('Explore');
+      expect(thread.status).toBe(ThreadStatus.Active);
+
+      // The in-thread assistant got usage + model written — the rows the
+      // read-time aggregation later sums over.
+      const threadAssts = [...h.messages.values()].filter(
+        (m) => m.threadId === threadId && m.role === 'assistant',
+      );
+      const withUsage = threadAssts.find((m) => m.metadata?.usage?.totalTokens === 15);
+      expect(withUsage?.model).toBe('claude-opus-4-8');
+    });
   });
 
   describe('terminal events and finish()', () => {
diff --git a/src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts b/src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts
index 5c12369e9a..f5c667bf79 100644
--- a/src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts
+++ b/src/store/chat/slices/aiChat/actions/__tests__/heterogeneousAgentExecutor.test.ts
@@ -14,6 +14,7 @@ import path from 'node:path';
 import { HeterogeneousAgentSessionErrorCode } from '@lobechat/electron-client-ipc';
 import type { AgentEventAdapter } from '@lobechat/heterogeneous-agents';
 import { createAdapter } from '@lobechat/heterogeneous-agents';
+import { ThreadStatus } from '@lobechat/types';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
 import { createGatewayEventHandler } from '../gatewayEventHandler';
@@ -41,9 +42,11 @@ vi.mock('@/services/message', () => ({
 
 // threadService — subagent Thread creation (CC `Task` tool_use)
 const mockCreateThread = vi.fn();
+const mockUpdateThread = vi.fn();
 vi.mock('@/services/thread', () => ({
   threadService: {
-    createThread: (...args: any[]) => mockCreateThread(...args),
+    createThread: (...args: unknown[]) => mockCreateThread(...args),
+    updateThread: (...args: unknown[]) => mockUpdateThread(...args),
   },
 }));
 
@@ -2426,6 +2429,74 @@ describe('heterogeneousAgentExecutor DB persistence', () => {
       expect(finalizeWrite).toBeDefined();
     });
 
+    it('marks the subagent thread Active on finalize without denormalizing metrics', async () => {
+      // Under read-time derivation the chip metrics (tool count / tokens /
+      // model) are NOT written onto `thread.metadata` at finalize — they're
+      // aggregated from the child messages on read. Finalize only flips the
+      // thread status Processing → Active.
+      await runWithEvents([
+        ccInit(),
+        ccToolUse('msg_main', 'toolu_task', 'Task', {
+          description: 'x',
+          prompt: 'go',
+          subagent_type: 'Explore',
+        }),
+        ccSubagentToolUse('msg_sub_1', 'toolu_task', 'toolu_child', 'Bash', { command: 'ls' }),
+        ccSubagentToolResult('toolu_child', 'toolu_task', 'file list'),
+        {
+          message: {
+            content: [{ text: 'summary', type: 'text' }],
+            id: 'msg_sub_2',
+            model: 'claude-opus-4-8',
+            role: 'assistant',
+            usage: { input_tokens: 1000, output_tokens: 200 },
+          },
+          parent_tool_use_id: 'toolu_task',
+          type: 'assistant',
+        },
+        ccSubagentSpawnResult('toolu_task', 'final answer'),
+        ccResult(),
+      ]);
+
+      const threadId = mockCreateThread.mock.calls[0][0].id;
+      const finalize = mockUpdateThread.mock.calls.find(([id]: any) => id === threadId);
+      expect(finalize).toBeDefined();
+      // Status-only — no metrics denormalized onto metadata.
+      expect(finalize![1]).toEqual({ status: ThreadStatus.Active });
+    });
+
+    it('writes the subagent model onto the in-thread assistant for the live tooltip', async () => {
+      // The chip tooltip derives the model from the child assistant's `model`
+      // field live (before finalize). The turn_metadata branch must persist it.
+      await runWithEvents([
+        ccInit(),
+        ccToolUse('msg_main', 'toolu_task', 'Task', {
+          description: 'x',
+          prompt: 'go',
+          subagent_type: 'Explore',
+        }),
+        {
+          message: {
+            content: [{ text: 'summary', type: 'text' }],
+            id: 'msg_sub',
+            model: 'claude-opus-4-8',
+            role: 'assistant',
+            usage: { input_tokens: 1000, output_tokens: 200 },
+          },
+          parent_tool_use_id: 'toolu_task',
+          type: 'assistant',
+        },
+        ccSubagentSpawnResult('toolu_task', 'final answer'),
+        ccResult(),
+      ]);
+
+      const modelWrite = mockUpdateMessage.mock.calls.find(
+        ([, val]: any) => val.model === 'claude-opus-4-8' && val.metadata?.usage,
+      );
+      expect(modelWrite).toBeDefined();
+      expect(modelWrite![1].provider).toBe('claude-code');
+    });
+
     it('retains subagent buffers + pinned target when the finalize flush fails', async () => {
       // Transient DB failures on the finalize-time flush used to silently
       // wipe the accumulators (buffer clear was outside the try/catch), so
diff --git a/src/store/chat/slices/aiChat/actions/heterogeneousAgentExecutor.ts b/src/store/chat/slices/aiChat/actions/heterogeneousAgentExecutor.ts
index 68c4ac25e5..5a4873872c 100644
--- a/src/store/chat/slices/aiChat/actions/heterogeneousAgentExecutor.ts
+++ b/src/store/chat/slices/aiChat/actions/heterogeneousAgentExecutor.ts
@@ -575,6 +575,7 @@ const ensureSubagentRun = async (
   if (!run) {
     const { spawnMetadata } = subagentCtx;
     const threadId = generateThreadId();
+    const startedAt = new Date().toISOString();
     const title =
       spawnMetadata?.description?.slice(0, 80) || spawnMetadata?.subagentType || 'Subagent';
 
@@ -583,7 +584,7 @@ const ensureSubagentRun = async (
         id: threadId,
         metadata: {
           sourceToolCallId: subagentCtx.parentToolCallId,
-          startedAt: new Date().toISOString(),
+          startedAt,
           subagentType: spawnMetadata?.subagentType,
         },
         sourceMessageId: mainAssistantMessageId,
@@ -974,6 +975,18 @@ const finalizeSubagentRun = async ({
     }
   }
 
+  // Mark the subagent Thread complete (created as `Processing`). The chip's
+  // tool-count / token / model metrics are NOT written here — they're derived
+  // on read from the child messages (live: `aggregateSubagentMetrics` over
+  // `dbMessagesMap`; cold-load: the same aggregation in SQL via
+  // `threadModel.queryByTopicId`), so finalize owns only the status transition.
+  // Best-effort — a failure here must not break finalize.
+  try {
+    await threadService.updateThread(run.threadId, { status: ThreadStatus.Active });
+  } catch (err) {
+    console.error('[HeterogeneousAgent] Failed to mark subagent thread complete:', err);
+  }
+
   completeSubOp(run.subOperationId);
 };
 
@@ -1515,9 +1528,48 @@ export const executeHeterogeneousAgent = async (
       // of all prior steps. Sum of turn_metadata equals result_usage for
       // a healthy run.
       if (event.type === 'step_complete' && event.data?.phase === 'turn_metadata') {
+        const subagentCtx = event.data.subagent as SubagentEventContext | undefined;
+        const turnUsage = event.data.usage;
+
+        if (subagentCtx) {
+          // Subagent-tagged usage: write it (plus the subagent's own
+          // model/provider) onto the subagent's in-thread assistant — NOT the
+          // main agent's. The chip derives its totals from these per-message
+          // `usage` snapshots (live + cold-load both aggregate the messages),
+          // so nothing is tracked on the run. Don't touch the MAIN agent's
+          // `lastModel` / `lastProvider` — those are main-agent step state and
+          // would contaminate the next main turn's create.
+          const turnModel = event.data.model as string | undefined;
+          const turnProvider = event.data.provider as string | undefined;
+          if (turnUsage) {
+            persistQueue = persistQueue.then(async () => {
+              const run = subagentRuns.get(subagentCtx.parentToolCallId);
+              if (!run) return;
+
+              const update = {
+                metadata: { usage: turnUsage },
+                ...(turnModel && { model: turnModel }),
+                ...(turnProvider && { provider: turnProvider }),
+              };
+              // Mirror the DB write into the thread's local message bucket
+              // so the inspector chip's live aggregation sees the usage as
+              // it lands. Without this `run.stream.update`, dbMessagesMap
+              // only learns the new metadata.usage after the next thread
+              // refresh — i.e. the chip stays at 0 tokens during streaming.
+              run.stream.update(run.currentAssistantMsgId, update as Partial<UIChatMessage>);
+              await messageService
+                .updateMessage(run.currentAssistantMsgId, update, {
+                  agentId: context.agentId,
+                  topicId: context.topicId,
+                })
+                .catch(console.error);
+            });
+          }
+          return;
+        }
+
         if (event.data.model) lastModel = event.data.model;
         if (event.data.provider) lastProvider = event.data.provider;
-        const turnUsage = event.data.usage;
         if (turnUsage) {
           persistQueue = persistQueue.then(async () => {
             await messageService
diff --git a/src/store/chat/slices/thread/selectors/index.ts b/src/store/chat/slices/thread/selectors/index.ts
index 6991544bef..4f9f7c565d 100644
--- a/src/store/chat/slices/thread/selectors/index.ts
+++ b/src/store/chat/slices/thread/selectors/index.ts
@@ -102,6 +102,31 @@ const getThreadChildMessages =
     return data.filter((m) => !!id && m.threadId === id);
   };
 
+/**
+ * Raw DB-level child messages for a thread, keyed by `messageMapKey` thread scope.
+ *
+ * Use this for *counting* / *aggregating* over individual messages (e.g. the
+ * subagent inspector chip's tool count + token total). Do NOT use it for
+ * rendering — the display layer reads from `messagesMap` (which groups tools
+ * into a virtual `assistantGroup`), so the shapes intentionally differ.
+ *
+ * Why `dbMessagesMap` not `messagesMap`: `messagesMap[thread_*]` only holds
+ * the rendered shape ([user, assistantGroup]); individual `role==='tool'` /
+ * `role==='assistant'` rows live in `dbMessagesMap[thread_*]`.
+ */
+const getThreadDbMessages =
+  (id?: string) =>
+  (s: ChatStoreState): UIChatMessage[] => {
+    if (!id || !s.activeAgentId) return [];
+    const key = messageMapKey({
+      agentId: s.activeAgentId,
+      groupId: s.activeGroupId,
+      threadId: id,
+      topicId: s.activeTopicId,
+    });
+    return (s.dbMessagesMap?.[key] || []) as UIChatMessage[];
+  };
+
 /**
  * Portal AI chats - used for AI title summarization
  */
@@ -140,6 +165,8 @@ export const threadSelectors = {
   currentActiveThread,
   currentPortalThread,
   currentTopicThreads,
+  getThreadChildMessages,
+  getThreadDbMessages,
   getThreadsBySourceMsgId,
   getThreadsByTopic,
   hasThreadBySourceMsgId,
diff --git a/src/utils/subagentMetrics.test.ts b/src/utils/subagentMetrics.test.ts
new file mode 100644
index 0000000000..dc04fd25e7
--- /dev/null
+++ b/src/utils/subagentMetrics.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect, it } from 'vitest';
+
+import { aggregateSubagentMetrics } from './subagentMetrics';
+
+describe('aggregateSubagentMetrics', () => {
+  it('counts role=tool messages and sums every assistant turn usage', () => {
+    const result = aggregateSubagentMetrics([
+      { role: 'user' },
+      { metadata: { usage: { totalTokens: 1000 } }, model: 'claude-opus-4-8', role: 'assistant' },
+      { role: 'tool' },
+      { metadata: { usage: { totalTokens: 1800 } }, model: 'claude-opus-4-8', role: 'assistant' },
+      { role: 'tool' },
+      { metadata: { usage: { totalTokens: 2600 } }, role: 'assistant' },
+    ]);
+
+    // SUM, not last-turn: 1000 + 1800 + 2600
+    expect(result.totalTokens).toBe(5400);
+    expect(result.toolCalls).toBe(2);
+    expect(result.model).toBe('claude-opus-4-8');
+  });
+
+  it('reads usage from the promoted top-level field too', () => {
+    const result = aggregateSubagentMetrics([
+      { role: 'assistant', usage: { totalTokens: 300 } },
+      { metadata: { usage: { totalTokens: 700 } }, role: 'assistant' },
+    ]);
+
+    expect(result.totalTokens).toBe(1000);
+  });
+
+  it('returns zeros / undefined model for an empty or usage-less set', () => {
+    expect(aggregateSubagentMetrics([])).toEqual({
+      model: undefined,
+      toolCalls: 0,
+      totalTokens: 0,
+    });
+    expect(aggregateSubagentMetrics([{ role: 'assistant' }, { role: 'user' }])).toEqual({
+      model: undefined,
+      toolCalls: 0,
+      totalTokens: 0,
+    });
+  });
+
+  it('pins the first assistant model and ignores user/tool rows for tokens', () => {
+    const result = aggregateSubagentMetrics([
+      { model: 'model-a', role: 'assistant', usage: { totalTokens: 10 } },
+      { model: 'model-b', role: 'assistant', usage: { totalTokens: 20 } },
+      // user/tool rows never contribute tokens even if they carried a usage blob
+      { role: 'user', usage: { totalTokens: 999 } },
+    ]);
+
+    expect(result.model).toBe('model-a');
+    expect(result.totalTokens).toBe(30);
+  });
+});
diff --git a/src/utils/subagentMetrics.ts b/src/utils/subagentMetrics.ts
new file mode 100644
index 0000000000..e4ec45fe8d
--- /dev/null
+++ b/src/utils/subagentMetrics.ts
@@ -0,0 +1,52 @@
+/**
+ * Single source of truth for the CC subagent inspector-chip metrics.
+ *
+ * Both the live path (chip selector aggregating the in-memory streamed child
+ * messages) and the cold-load path (server `threadModel.queryByTopicId`
+ * aggregating the persisted rows in SQL) compute the SAME projection over the
+ * SAME messages — so the two can't diverge by construction. This TS helper is
+ * the live encoding; the SQL encoding mirrors it (SUM of assistant
+ * `usage.totalTokens`, COUNT of `role='tool'`, a pinned model).
+ *
+ * `totalTokens` is a plain SUM of each turn's `usage.totalTokens` — the same
+ * convention as the project's token-usage heatmap (`MessageModel`), i.e. "total
+ * tokens processed", not "final context size". CC re-feeds the growing context
+ * each turn so the sum is dominated by (mostly cached) context re-reads, which
+ * is exactly what the heatmap counts too.
+ */
+
+interface MetricMessage {
+  metadata?: { usage?: { totalTokens?: number | null } | null } | null;
+  model?: string | null;
+  role?: string | null;
+  usage?: { totalTokens?: number | null } | null;
+}
+
+export interface SubagentMetrics {
+  /** Model the subagent ran on (first assistant turn that carries one). */
+  model?: string;
+  /** Number of `role='tool'` child messages. */
+  toolCalls: number;
+  /** Sum of every assistant turn's `usage.totalTokens`. */
+  totalTokens: number;
+}
+
+export const aggregateSubagentMetrics = (messages: MetricMessage[]): SubagentMetrics => {
+  let toolCalls = 0;
+  let totalTokens = 0;
+  let model: string | undefined;
+
+  for (const m of messages) {
+    if (m.role === 'tool') {
+      toolCalls += 1;
+    } else if (m.role === 'assistant') {
+      // dbMessagesMap holds the raw DB shape (`metadata.usage`); the
+      // display-bound UIChatMessage promotes it to a top-level `usage` — accept
+      // either so the same helper serves both call sites.
+      totalTokens += m.metadata?.usage?.totalTokens ?? m.usage?.totalTokens ?? 0;
+      if (!model && m.model) model = m.model;
+    }
+  }
+
+  return { model, toolCalls, totalTokens };
+};