🐛 fix(context-engine): account for tool_calls + reasoning + tool defs in compression budget (#14813)

🐛 fix(context-engine): account for tool_calls + reasoning + tool defs in compression budget The pre-compression token check (`shouldCompress`) only counted `msg.content`, which under-counted typical agent conversations by ~58% — tool_calls (~33% of payload), reasoning traces (~17%), and top-level tool definitions (~2%) were all silently ignored. As a result, conversations that the provider tokenizer measured at ~656K passed the harness's 524K threshold without firing compression, and were rejected upstream as ExceededContextWindow. Verified empirically against 2 op snapshots in the same topic that hit the failure mode (LOBE-8964): harness counted 267K, deepseek measured 649K — a 380K (58.8%) gap. ~92% of that gap is fixable by accounting for the missing fields; the remaining ~8% is `tokenx` vs provider tokenizer drift, compensated by a 1.25× multiplier on the trigger path. Changes: - New `@lobechat/context-engine/tokenAccounting` module exporting `countContextTokens({messages, tools, options})`. Returns structured per-source + per-message + per-tool breakdown — usable both by the compression trigger and by UI panels showing "context by type". - `shouldCompress` in agent-runtime delegates to `countContextTokens`, applies the 1.25× drift multiplier on `adjustedTotal` for the trigger decision, exposes raw count via `currentTokenCount`. Signature now takes `UIChatMessage[]` directly. - Removed deprecated `calculateMessageTokens` / `estimateTokens` / `TokenCountMessage` from agent-runtime — the new module supersedes them. `createAgentExecutors.ts` updated to call `countContextTokens` directly for post-compression telemetry. - Added `raw-md` plugin to agent-runtime vitest config (needed once context-engine is imported transitively, since the import graph pulls in `@lobechat/agent-templates` `.md` files). What's intentionally NOT counted (DB-only fields not sent to provider): `plugin`, `pluginState`, `chunksList`, `extra`, `fileList`, etc. Counting these would over-estimate and trigger compression too early. Tests: - 19 new unit tests for `countContextTokens` covering content / tool_calls / reasoning / tool_call_id / tool definitions / fast-path / aggregation / DB-only field exclusion. - `tokenCounter.test.ts` updated for new drift semantics + UIChatMessage signature; one boundary case now triggers compression (intentional — the drift multiplier kicks in at the threshold). Refs: LOBE-8964 (ECW edge boundary), LOBE-8972 (ECW umbrella), LOBE-8973 (openrouter `:free` ctx), LOBE-8976 (compression diagnostics). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-13 19:20:04 +00:00 · 2026-05-15 13:22:19 +08:00
parent da7e18281d
commit 6e6970f1b2
9 changed files with 844 additions and 193 deletions
@@ -1,110 +1,24 @@
+import type { UIChatMessage } from '@lobechat/types';
 import { describe, expect, it } from 'vitest';

 import {
-  calculateMessageTokens,
  DEFAULT_MAX_CONTEXT,
  DEFAULT_THRESHOLD_RATIO,
-  estimateTokens,
  getCompressionThreshold,
  shouldCompress,
 } from './tokenCounter';

+// Test fixtures only set the fields shouldCompress / countContextTokens read.
+const mkMsg = (m: Partial<UIChatMessage> & { role: UIChatMessage['role'] }): UIChatMessage =>
+  ({
+    content: '',
+    createdAt: 0,
+    id: 'm',
+    updatedAt: 0,
+    ...m,
+  }) as UIChatMessage;
+
 describe('tokenCounter', () => {
-  describe('estimateTokens', () => {
-    it('should estimate tokens for string content', () => {
-      const tokens = estimateTokens('Hello, world!');
-      expect(tokens).toBeGreaterThan(0);
-    });
-
-    it('should return 0 for empty string', () => {
-      expect(estimateTokens('')).toBe(0);
-    });
-
-    it('should handle null/undefined content', () => {
-      expect(estimateTokens(null)).toBe(0);
-      expect(estimateTokens(undefined)).toBe(0);
-    });
-
-    it('should handle object content by JSON stringifying', () => {
-      const tokens = estimateTokens({ key: 'value', nested: { a: 1 } });
-      expect(tokens).toBeGreaterThan(0);
-    });
-
-    it('should handle array content', () => {
-      const tokens = estimateTokens(['item1', 'item2', 'item3']);
-      expect(tokens).toBeGreaterThan(0);
-    });
-  });
-
-  describe('calculateMessageTokens', () => {
-    it('should use totalOutputTokens for assistant messages when available', () => {
-      const messages = [
-        {
-          content: 'This content should be ignored',
-          metadata: { usage: { totalOutputTokens: 100 } },
-          role: 'assistant',
-        },
-      ];
-      expect(calculateMessageTokens(messages)).toBe(100);
-    });
-
-    it('should estimate tokens for assistant messages without usage data', () => {
-      const messages = [{ content: 'Hello from assistant', role: 'assistant' }];
-      const tokens = calculateMessageTokens(messages);
-      expect(tokens).toBeGreaterThan(0);
-      // Should be estimated, not 0
-      expect(tokens).not.toBe(100);
-    });
-
-    it('should estimate tokens for user messages', () => {
-      const messages = [{ content: 'Hello from user', role: 'user' }];
-      const tokens = calculateMessageTokens(messages);
-      expect(tokens).toBeGreaterThan(0);
-    });
-
-    it('should estimate tokens for system messages', () => {
-      const messages = [{ content: 'System prompt', role: 'system' }];
-      const tokens = calculateMessageTokens(messages);
-      expect(tokens).toBeGreaterThan(0);
-    });
-
-    it('should sum tokens from multiple messages', () => {
-      const messages = [
-        { content: 'Hello', role: 'user' },
-        { content: 'Hi there!', metadata: { usage: { totalOutputTokens: 50 } }, role: 'assistant' },
-        { content: 'How are you?', role: 'user' },
-      ];
-      const tokens = calculateMessageTokens(messages);
-      // Should be 50 (assistant) + estimated tokens for user messages
-      expect(tokens).toBeGreaterThan(50);
-    });
-
-    it('should handle empty messages array', () => {
-      expect(calculateMessageTokens([])).toBe(0);
-    });
-
-    it('should handle messages with empty content', () => {
-      const messages = [
-        { content: '', role: 'user' },
-        { content: undefined, role: 'assistant' },
-      ];
-      expect(calculateMessageTokens(messages)).toBe(0);
-    });
-
-    it('should skip assistant usage with 0 tokens and estimate instead', () => {
-      const messages = [
-        {
-          content: 'Some content',
-          metadata: { usage: { totalOutputTokens: 0 } },
-          role: 'assistant',
-        },
-      ];
-      const tokens = calculateMessageTokens(messages);
-      // Should estimate since totalOutputTokens is 0
-      expect(tokens).toBeGreaterThan(0);
-    });
-  });
-
  describe('getCompressionThreshold', () => {
    it('should use default values', () => {
      const threshold = getCompressionThreshold();
@@ -141,8 +55,7 @@ describe('tokenCounter', () => {

  describe('shouldCompress', () => {
    it('should return needsCompression=false when under threshold', () => {
-      const messages = [{ content: 'Hi', role: 'user' }];
-      const result = shouldCompress(messages);
+      const result = shouldCompress([mkMsg({ role: 'user', content: 'Hi' })]);

      expect(result.needsCompression).toBe(false);
      expect(result.currentTokenCount).toBeGreaterThan(0);
@@ -150,48 +63,62 @@ describe('tokenCounter', () => {
    });

    it('should return needsCompression=true when over threshold', () => {
-      // Create a message with usage that exceeds threshold
-      const messages = [
-        {
-          content: '',
-          metadata: { usage: { totalOutputTokens: 70_000 } },
+      const result = shouldCompress([
+        mkMsg({
          role: 'assistant',
-        },
-      ];
-      const result = shouldCompress(messages);
+          metadata: { usage: { totalOutputTokens: 70_000 } as any } as any,
+        }),
+      ]);

      expect(result.needsCompression).toBe(true);
      expect(result.currentTokenCount).toBe(70_000);
      expect(result.threshold).toBe(64_000); // 128k * 0.5
    });

-    it('should return needsCompression=false when exactly at threshold', () => {
-      const messages = [
-        {
-          content: '',
-          metadata: { usage: { totalOutputTokens: 64_000 } },
+    it('should return needsCompression=true when raw count is at threshold (drift pushes over)', () => {
+      // 1.25× default drift multiplier means raw==threshold → adjusted > threshold
+      // → compression fires. This is intentional: we want to compress before the
+      // upstream tokenizer overflows the model's context window.
+      const result = shouldCompress([
+        mkMsg({
          role: 'assistant',
-        },
-      ];
-      const result = shouldCompress(messages);
+          metadata: { usage: { totalOutputTokens: 64_000 } as any } as any,
+        }),
+      ]);
+
+      expect(result.needsCompression).toBe(true);
+      expect(result.currentTokenCount).toBe(64_000);
+    });
+
+    it('should NOT trigger at threshold when driftMultiplier is 1', () => {
+      // Disabling drift restores strict "raw > threshold" semantics
+      const result = shouldCompress(
+        [
+          mkMsg({
+            role: 'assistant',
+            metadata: { usage: { totalOutputTokens: 64_000 } as any } as any,
+          }),
+        ],
+        { driftMultiplier: 1 },
+      );

-      // Exactly at threshold should not trigger compression
      expect(result.needsCompression).toBe(false);
      expect(result.currentTokenCount).toBe(64_000);
    });

    it('should use custom options', () => {
-      const messages = [
+      const result = shouldCompress(
+        [
+          mkMsg({
+            role: 'assistant',
+            metadata: { usage: { totalOutputTokens: 50_000 } as any } as any,
+          }),
+        ],
        {
-          content: '',
-          metadata: { usage: { totalOutputTokens: 50_000 } },
-          role: 'assistant',
+          maxWindowToken: 60_000,
+          thresholdRatio: 0.75,
        },
-      ];
-      const result = shouldCompress(messages, {
-        maxWindowToken: 60_000,
-        thresholdRatio: 0.75,
-      });
+      );

      // threshold = 60k * 0.75 = 45k, current = 50k > 45k
      expect(result.needsCompression).toBe(true);
@@ -1,13 +1,26 @@
-import { estimateTokenCount } from 'tokenx';
+import { countContextTokens, DEFAULT_DRIFT_MULTIPLIER } from '@lobechat/context-engine';
+import type { UIChatMessage } from '@lobechat/types';

 /**
 * Options for token counting and compression threshold calculation
 */
 export interface TokenCountOptions {
+  /**
+   * Optional drift multiplier override forwarded to {@link countContextTokens}.
+   * Default {@link DEFAULT_DRIFT_MULTIPLIER} (1.25).
+   */
+  driftMultiplier?: number;
  /** Model's max context window token count */
  maxWindowToken?: number;
-  /** Threshold ratio for triggering compression, default 0.75 */
+  /** Threshold ratio for triggering compression, default 0.5 */
  thresholdRatio?: number;
+  /**
+   * Optional top-level tool definitions for the upcoming LLM call. When
+   * provided, tool definition tokens are counted toward the budget — matches
+   * what the provider actually charges. Pass the same `tools` array that will
+   * be sent in the request payload.
+   */
+  tools?: unknown[];
 }

 /** Default max context window (128k tokens) */
@@ -16,60 +29,8 @@ export const DEFAULT_MAX_CONTEXT = 128_000;
 /** Default threshold ratio (50% of max context) */
 export const DEFAULT_THRESHOLD_RATIO = 0.5;

-/**
- * Message interface for token counting
- */
-export interface TokenCountMessage {
-  content?: string | unknown;
-  metadata?: {
-    usage?: {
-      totalOutputTokens?: number;
-    };
-  } | null;
-  role: string;
-}
-
-/**
- * Estimate token count for text content using tokenx
- * @param content - Text content or object to estimate tokens for
- * @returns Estimated token count
- */
-export function estimateTokens(content: string | unknown): number {
-  // Handle null/undefined early
-  if (content === null || content === undefined) return 0;
-
-  const text = typeof content === 'string' ? content : JSON.stringify(content);
-  if (!text) return 0;
-  return estimateTokenCount(text);
-}
-
-/**
- * Calculate total token count for a list of messages
- * - Assistant messages: Use metadata.usage.totalOutputTokens if available (exact value)
- * - User/System messages: Use tokenx estimation
- *
- * @param messages - List of messages to count tokens for
- * @returns Total token count
- */
-export function calculateMessageTokens(messages: TokenCountMessage[]): number {
-  return messages.reduce((total, msg) => {
-    // For assistant messages, prefer the recorded token count from usage metadata
-    if (msg.role === 'assistant') {
-      const outputTokens = msg.metadata?.usage?.totalOutputTokens;
-      if (outputTokens && outputTokens > 0) {
-        return total + outputTokens;
-      }
-    }
-
-    // For user/system messages or assistant messages without usage data, estimate tokens
-    return total + estimateTokens(msg.content);
-  }, 0);
-}
-
 /**
 * Calculate the compression threshold based on max context window
- * @param options - Token count options
- * @returns Compression threshold in tokens
 */
 export function getCompressionThreshold(options: TokenCountOptions = {}): number {
  const maxContext = options.maxWindowToken ?? DEFAULT_MAX_CONTEXT;
@@ -81,30 +42,43 @@ export function getCompressionThreshold(options: TokenCountOptions = {}): number
 * Result of compression check
 */
 export interface CompressionCheckResult {
-  /** Current total token count */
+  /**
+   * Best raw estimate of current input tokens (sum of message content +
+   * tool calls + reasoning + tool_call_id + tool definitions).
+   */
  currentTokenCount: number;
-  /** Whether compression is needed */
+  /**
+   * `true` when `adjustedTokenCount > threshold`. The adjusted count includes
+   * a drift multiplier (default 1.25×) to compensate for the gap between
+   * `tokenx`'s heuristic and provider tokenizers, so compression fires before
+   * upstream tokenizers actually overflow the model's context window.
+   */
  needsCompression: boolean;
-  /** Compression threshold */
+  /** Compression threshold (`maxWindowToken × thresholdRatio`) */
  threshold: number;
 }

 /**
- * Check if messages need compression based on token count
- * @param messages - List of messages to check
- * @param options - Token count options
- * @returns Compression check result
+ * Check if messages need compression based on token count.
+ *
+ * Uses {@link countContextTokens} under the hood, so the input estimate
+ * accounts for tool calls, reasoning, and tool definitions in addition to
+ * `content` (see LOBE-8964 for the calibration data).
 */
 export function shouldCompress(
-  messages: TokenCountMessage[],
+  messages: UIChatMessage[],
  options: TokenCountOptions = {},
 ): CompressionCheckResult {
-  const currentTokenCount = calculateMessageTokens(messages);
+  const accounting = countContextTokens({
+    messages,
+    options: { driftMultiplier: options.driftMultiplier ?? DEFAULT_DRIFT_MULTIPLIER },
+    tools: options.tools,
+  });
  const threshold = getCompressionThreshold(options);

  return {
-    currentTokenCount,
-    needsCompression: currentTokenCount > threshold,
+    currentTokenCount: accounting.rawTotal,
+    needsCompression: accounting.adjustedTotal > threshold,
    threshold,
  };
 }
@@ -1,6 +1,14 @@
 import { defineConfig } from 'vitest/config';

 export default defineConfig({
+  plugins: [
+    {
+      name: 'raw-md',
+      transform(_, id) {
+        if (id.endsWith('.md')) return { code: 'export default ""', map: null };
+      },
+    },
+  ],
  test: {
    coverage: {
      exclude: [
@@ -24,6 +24,7 @@
    "debug": "^4.4.3",
    "es-toolkit": "^1.43.0",
    "immer": "^10.2.0",
+    "tokenx": "^1.2.1",
    "ts-md5": "^2.0.1",
    "unist-builder": "^4.0.0",
    "xast-util-to-xml": "^4.0.0",
@@ -16,6 +16,16 @@ export { ContextEngine } from './pipeline';

 // Context Providers
 export * from './providers';
+
+// Token accounting (compression triggers + UI breakdown)
+export type {
+  ContextTokenAccounting,
+  CountContextTokensParams,
+  MessageTokenBreakdown,
+  TokenSourceType,
+  ToolDefinitionTokenBreakdown,
+} from './tokenAccounting';
+export { countContextTokens, DEFAULT_DRIFT_MULTIPLIER } from './tokenAccounting';
 // Processors
 export type { PlaceholderValue, PlaceholderValueMap } from './processors';
 export {
@@ -0,0 +1,433 @@
+import type { UIChatMessage } from '@lobechat/types';
+import { describe, expect, it } from 'vitest';
+
+import { countContextTokens, DEFAULT_DRIFT_MULTIPLIER } from '../index';
+
+// Minimal helper — UIChatMessage has many optional fields; tests only set the
+// ones that affect token accounting.
+const mkMsg = (m: Partial<UIChatMessage> & { role: UIChatMessage['role'] }): UIChatMessage =>
+  ({
+    content: '',
+    createdAt: 0,
+    id: 'm',
+    updatedAt: 0,
+    ...m,
+  }) as UIChatMessage;
+
+describe('countContextTokens', () => {
+  describe('basic shape & defaults', () => {
+    it('returns zero accounting for empty input', () => {
+      const result = countContextTokens({ messages: [] });
+
+      expect(result.rawTotal).toBe(0);
+      expect(result.adjustedTotal).toBe(0);
+      expect(result.driftMultiplier).toBe(DEFAULT_DRIFT_MULTIPLIER);
+      expect(result.messages).toEqual([]);
+      expect(result.tools).toEqual([]);
+      expect(result.bySource).toEqual({
+        content: 0,
+        reasoning: 0,
+        thoughtSignature: 0,
+        toolCallId: 0,
+        toolCalls: 0,
+        toolDefinition: 0,
+      });
+    });
+
+    it('respects a custom driftMultiplier', () => {
+      const msgs: UIChatMessage[] = [mkMsg({ role: 'user', content: 'hello world '.repeat(100) })];
+      const r1 = countContextTokens({ messages: msgs });
+      const r2 = countContextTokens({ messages: msgs, options: { driftMultiplier: 1 } });
+
+      expect(r1.rawTotal).toBe(r2.rawTotal);
+      expect(r2.adjustedTotal).toBe(r2.rawTotal); // 1.0 means no adjustment
+      expect(r1.adjustedTotal).toBe(Math.ceil(r1.rawTotal * DEFAULT_DRIFT_MULTIPLIER));
+    });
+
+    it('produces one breakdown entry per message in original order', () => {
+      const msgs: UIChatMessage[] = [
+        mkMsg({ role: 'user', content: 'a' }),
+        mkMsg({ role: 'assistant', content: 'b' }),
+        mkMsg({ role: 'tool', content: 'c' }),
+      ];
+      const r = countContextTokens({ messages: msgs });
+
+      expect(r.messages).toHaveLength(3);
+      expect(r.messages.map((m) => [m.index, m.role])).toEqual([
+        [0, 'user'],
+        [1, 'assistant'],
+        [2, 'tool'],
+      ]);
+    });
+  });
+
+  describe('content counting', () => {
+    it('counts user message content', () => {
+      const r = countContextTokens({
+        messages: [mkMsg({ role: 'user', content: 'hello world '.repeat(50) })],
+      });
+      expect(r.bySource.content).toBeGreaterThan(0);
+      expect(r.messages[0].bySource.content).toBe(r.bySource.content);
+      expect(r.messages[0].total).toBe(r.messages[0].bySource.content);
+    });
+
+    it('uses recorded usage.totalOutputTokens for assistant when present', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: 'short text', // would estimate to a small count
+            metadata: {
+              usage: { totalOutputTokens: 5000 } as any,
+            } as any,
+          }),
+        ],
+      });
+      expect(r.bySource.content).toBe(5000);
+      expect(r.messages[0].bySource.content).toBe(5000);
+    });
+
+    it('falls back to estimating content when usage is missing or zero', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: 'long text that needs estimating '.repeat(100),
+            metadata: { usage: { totalOutputTokens: 0 } as any } as any,
+          }),
+        ],
+      });
+      expect(r.bySource.content).toBeGreaterThan(0);
+    });
+  });
+
+  describe('tool calls (assistant.tools)', () => {
+    it('counts tool call payloads on assistant messages', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: '',
+            tools: [
+              {
+                apiName: 'searchWeb',
+                arguments: '{"query": "very long query string that takes some tokens"}',
+                id: 'call_abc123',
+                identifier: 'search-plugin',
+                type: 'default',
+              },
+            ] as any,
+          }),
+        ],
+      });
+
+      expect(r.bySource.toolCalls).toBeGreaterThan(0);
+      expect(r.messages[0].bySource.toolCalls).toBe(r.bySource.toolCalls);
+    });
+
+    it('does NOT count tools on non-assistant messages', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'user',
+            content: '',
+            // user messages with `tools` shouldn't be a thing, but if it slips
+            // through it must not be counted toward toolCalls.
+            tools: [
+              { apiName: 'x', arguments: '{}', id: '1', identifier: 'p', type: 'default' },
+            ] as any,
+          }),
+        ],
+      });
+      expect(r.bySource.toolCalls).toBe(0);
+    });
+
+    it('does NOT count tool calls when assistant has recorded usage (fast-path)', () => {
+      // The assistant fast-path attributes recorded output tokens to `content`
+      // because the recorded count already includes generated tool_calls.
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: '',
+            metadata: { usage: { totalOutputTokens: 1234 } as any } as any,
+            tools: [
+              {
+                apiName: 'foo',
+                arguments: '{"a":1}',
+                id: 'c1',
+                identifier: 'p',
+                thoughtSignature: 'sig-skipped-on-fast-path'.repeat(20),
+                type: 'default',
+              },
+            ] as any,
+          }),
+        ],
+      });
+      expect(r.bySource.content).toBe(1234);
+      expect(r.bySource.toolCalls).toBe(0);
+      expect(r.bySource.thoughtSignature).toBe(0);
+    });
+  });
+
+  describe('thoughtSignature on tool calls (Gemini)', () => {
+    it('counts thoughtSignature separately from toolCalls', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: '',
+            tools: [
+              {
+                apiName: 'searchWeb',
+                arguments: '{"query":"x"}',
+                id: 'call_1',
+                identifier: 'p',
+                thoughtSignature: 'opaque signature payload '.repeat(40),
+                type: 'default',
+              },
+            ] as any,
+          }),
+        ],
+      });
+      expect(r.bySource.toolCalls).toBeGreaterThan(0);
+      expect(r.bySource.thoughtSignature).toBeGreaterThan(0);
+      // Buckets must not overlap — thoughtSignature should not be added to toolCalls
+      const tcOnlyArgs = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: '',
+            tools: [
+              {
+                apiName: 'searchWeb',
+                arguments: '{"query":"x"}',
+                id: 'call_1',
+                identifier: 'p',
+                type: 'default',
+              },
+            ] as any,
+          }),
+        ],
+      });
+      expect(r.bySource.toolCalls).toBe(tcOnlyArgs.bySource.toolCalls);
+    });
+
+    it('sums thoughtSignature across multiple tool calls', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: '',
+            tools: [
+              {
+                apiName: 'a',
+                arguments: '{}',
+                id: '1',
+                identifier: 'p',
+                thoughtSignature: 'sig-A '.repeat(30),
+                type: 'default',
+              },
+              {
+                apiName: 'b',
+                arguments: '{}',
+                id: '2',
+                identifier: 'p',
+                thoughtSignature: 'sig-B '.repeat(30),
+                type: 'default',
+              },
+            ] as any,
+          }),
+        ],
+      });
+      // Two distinct signatures both contribute
+      expect(r.bySource.thoughtSignature).toBeGreaterThan(0);
+      expect(r.messages[0].bySource.thoughtSignature).toBe(r.bySource.thoughtSignature);
+    });
+
+    it('does not count thoughtSignature when absent', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: '',
+            tools: [
+              { apiName: 'a', arguments: '{}', id: '1', identifier: 'p', type: 'default' },
+            ] as any,
+          }),
+        ],
+      });
+      expect(r.bySource.thoughtSignature).toBe(0);
+      expect(r.messages[0].bySource.thoughtSignature).toBeUndefined();
+    });
+  });
+
+  describe('reasoning trace', () => {
+    it('counts ModelReasoning.content on assistant messages', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: '',
+            reasoning: { content: 'long reasoning chain '.repeat(50) },
+          }),
+        ],
+      });
+      expect(r.bySource.reasoning).toBeGreaterThan(0);
+    });
+
+    it('handles reasoning passed as a plain string', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: '',
+            reasoning: 'plain string reasoning' as any,
+          }),
+        ],
+      });
+      expect(r.bySource.reasoning).toBeGreaterThan(0);
+    });
+
+    it('skips reasoning when fast-path recorded usage is present', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: '',
+            metadata: { usage: { totalOutputTokens: 100 } as any } as any,
+            reasoning: { content: 'this should not be re-counted'.repeat(50) },
+          }),
+        ],
+      });
+      expect(r.bySource.reasoning).toBe(0);
+    });
+  });
+
+  describe('tool_call_id (tool messages)', () => {
+    it('counts tool_call_id regardless of role', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'tool',
+            content: '{"result":"ok"}',
+            tool_call_id: 'call_abc123_xyz',
+          }),
+        ],
+      });
+      expect(r.bySource.toolCallId).toBeGreaterThan(0);
+      expect(r.bySource.content).toBeGreaterThan(0);
+    });
+
+    it('still counts tool_call_id on assistant fast-path', () => {
+      // tool_call_id can appear on assistant in some flows; the fast-path
+      // covers content/reasoning/toolCalls but tool_call_id is a separate
+      // field that's always added.
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'assistant',
+            content: '',
+            metadata: { usage: { totalOutputTokens: 100 } as any } as any,
+            tool_call_id: 'call_xyz',
+          }),
+        ],
+      });
+      expect(r.bySource.toolCallId).toBeGreaterThan(0);
+    });
+  });
+
+  describe('tool definitions (top-level tools[])', () => {
+    it('counts each tool definition and exposes a per-tool breakdown', () => {
+      const tools = [
+        { function: { name: 'search', parameters: { type: 'object' } }, type: 'function' },
+        { function: { name: 'lookup', parameters: { type: 'object' } }, type: 'function' },
+      ];
+      const r = countContextTokens({ messages: [], tools });
+
+      expect(r.tools).toHaveLength(2);
+      expect(r.tools.map((t) => t.name)).toEqual(['search', 'lookup']);
+      expect(r.tools.every((t) => t.total > 0)).toBe(true);
+      expect(r.bySource.toolDefinition).toBe(r.tools.reduce((s, t) => s + t.total, 0));
+    });
+
+    it('falls back to top-level name when function.name is absent', () => {
+      const r = countContextTokens({
+        messages: [],
+        tools: [{ name: 'plain_tool', schema: {} }],
+      });
+      expect(r.tools[0].name).toBe('plain_tool');
+    });
+
+    it('uses "unknown" for tools with no resolvable name', () => {
+      const r = countContextTokens({
+        messages: [],
+        tools: [{ description: 'nameless' }],
+      });
+      expect(r.tools[0].name).toBe('unknown');
+    });
+  });
+
+  describe('does NOT count DB-only fields', () => {
+    it('ignores plugin / pluginState / extra / chunksList / metadata extras', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({
+            role: 'tool',
+            content: 'real_content',
+            tool_call_id: 'tcid',
+            // All of these are DB-only; counting them would over-estimate.
+            plugin: {
+              apiName: 'x',
+              arguments: 'a'.repeat(5000),
+              identifier: 'p',
+              type: 'default',
+            } as any,
+            pluginState: { output: 'b'.repeat(5000), success: true } as any,
+            chunksList: [{ id: 'c'.repeat(5000) }] as any,
+            extra: { translate: 'd'.repeat(5000) } as any,
+          }),
+        ],
+      });
+      // Only content + tool_call_id should contribute; the other fields' bulk
+      // must not show up.
+      const expectedSources = new Set<string>(['content', 'toolCallId']);
+      for (const k of Object.keys(r.messages[0].bySource)) {
+        expect(expectedSources.has(k)).toBe(true);
+      }
+    });
+  });
+
+  describe('aggregation', () => {
+    it('sums bySource across multiple messages and tools', () => {
+      const r = countContextTokens({
+        messages: [
+          mkMsg({ role: 'user', content: 'first '.repeat(30) }),
+          mkMsg({
+            role: 'assistant',
+            content: 'second '.repeat(30),
+            tools: [
+              { apiName: 'a', arguments: '{}', id: '1', identifier: 'p', type: 'default' },
+            ] as any,
+            reasoning: { content: 'reason '.repeat(30) },
+          }),
+          mkMsg({ role: 'tool', content: '{"r":1}', tool_call_id: 'cid' }),
+        ],
+        tools: [
+          { function: { name: 'tool_a' }, type: 'function' },
+          { function: { name: 'tool_b' }, type: 'function' },
+        ],
+      });
+
+      const sumOfBySource = Object.values(r.bySource).reduce((s, v) => s + v, 0);
+      expect(r.rawTotal).toBe(sumOfBySource);
+
+      const sumOfMessageTotals = r.messages.reduce((s, m) => s + m.total, 0);
+      const messagesContrib = sumOfMessageTotals;
+      const toolsContrib = r.bySource.toolDefinition;
+      expect(r.rawTotal).toBe(messagesContrib + toolsContrib);
+
+      expect(r.adjustedTotal).toBe(Math.ceil(r.rawTotal * DEFAULT_DRIFT_MULTIPLIER));
+    });
+  });
+});
@@ -0,0 +1,193 @@
+import { estimateTokenCount } from 'tokenx';
+
+import type {
+  ContextTokenAccounting,
+  CountContextTokensParams,
+  MessageTokenBreakdown,
+  TokenSourceType,
+  ToolDefinitionTokenBreakdown,
+} from './types';
+
+export const DEFAULT_DRIFT_MULTIPLIER = 1.25;
+
+const ZERO_BY_SOURCE = (): Record<TokenSourceType, number> => ({
+  content: 0,
+  reasoning: 0,
+  thoughtSignature: 0,
+  toolCallId: 0,
+  toolCalls: 0,
+  toolDefinition: 0,
+});
+
+const estimate = (value: unknown): number => {
+  if (value == null) return 0;
+  const text = typeof value === 'string' ? value : JSON.stringify(value);
+  return text ? estimateTokenCount(text) : 0;
+};
+
+const bumpSource = (
+  bySource: Partial<Record<TokenSourceType, number>>,
+  key: TokenSourceType,
+  amount: number,
+) => {
+  if (amount <= 0) return;
+  bySource[key] = (bySource[key] ?? 0) + amount;
+};
+
+/**
+ * Account every token that will be sent to the provider for one chat request,
+ * broken down by source category and per-item.
+ *
+ * **What's counted (and why)**
+ * | source             | field on UIChatMessage                                     | sent to provider as              |
+ * |--------------------|------------------------------------------------------------|----------------------------------|
+ * | `content`          | `msg.content`                                              | `message.content`                |
+ * | `toolCalls`        | `msg.tools[]` (lobe internal, not OpenAI's `tool_calls`)   | `message.tool_calls`             |
+ * | `thoughtSignature` | `msg.tools[N].thoughtSignature` (Gemini-specific)          | echoed back per tool call        |
+ * | `reasoning`        | `msg.reasoning.content` / `msg.reasoning` (string variant) | echoed back next turn (thinking) |
+ * | `toolCallId`       | `msg.tool_call_id`                                         | `message.tool_call_id`           |
+ * | `toolDefinition`   | top-level `tools[]` param                                  | request `tools` array            |
+ *
+ * **What's NOT counted (intentionally)** — these are DB-only fields the
+ * harness stores but doesn't ship to the provider:
+ *
+ *   `plugin`, `pluginState`, `pluginIntervention`, `pluginError`, `chunksList`,
+ *   `editorData`, `extra`, `fileList`, `imageList`, `videoList`, `metadata`
+ *   (other than `metadata.usage.totalOutputTokens` shortcut for assistant)
+ *
+ * Counting them would over-estimate and trigger compression too early.
+ *
+ * **Token estimation accuracy**
+ *
+ * Uses the `tokenx` heuristic estimator (~96% accuracy on typical English text).
+ * For agent conversations heavy in JSON / code / mixed CJK, `tokenx` typically
+ * under-counts by 10–15% vs provider tokenizers. The default
+ * `driftMultiplier: 1.25` compensates for that drift PLUS leaves ~10% headroom
+ * so callers using the result as a compression trigger fire before the upstream
+ * tokenizer reaches its limit.
+ *
+ * **Assistant fast-path**
+ *
+ * If an assistant message has `metadata.usage.totalOutputTokens > 0`, that
+ * recorded provider-side count is used for `content` (skipping per-field
+ * estimation for that message), since it already covers the assistant's
+ * content + tool_calls + reasoning that the provider tokenized. Other sources
+ * (incoming tool messages' `tool_call_id`, etc.) are still added separately.
+ *
+ * @example
+ * const accounting = countContextTokens({
+ *   messages: state.messages,
+ *   tools: payload.tools,
+ * });
+ *
+ * // Compression trigger:
+ * if (accounting.adjustedTotal > threshold) compress();
+ *
+ * // UI "context by type" panel:
+ * accounting.bySource;
+ * // → { content: 267058, toolCalls: 201762, reasoning: 110107, toolCallId: 758, toolDefinition: 14339 }
+ *
+ * // UI per-message inspector:
+ * accounting.messages;
+ * // → [{ index: 0, role: 'user', bySource: { content: 1234 }, total: 1234 }, ...]
+ */
+export const countContextTokens = ({
+  messages,
+  tools = [],
+  options,
+}: CountContextTokensParams): ContextTokenAccounting => {
+  const driftMultiplier = options?.driftMultiplier ?? DEFAULT_DRIFT_MULTIPLIER;
+
+  const messageBreakdowns: MessageTokenBreakdown[] = messages.map((msg, index) => {
+    const bySource: Partial<Record<TokenSourceType, number>> = {};
+
+    // Assistant fast-path: recorded usage covers content + tool_calls + reasoning
+    // produced by THIS turn's generation. Use it directly when available.
+    const recordedOutputTokens =
+      msg.role === 'assistant' ? msg.metadata?.usage?.totalOutputTokens : undefined;
+
+    if (recordedOutputTokens && recordedOutputTokens > 0) {
+      bumpSource(bySource, 'content', recordedOutputTokens);
+    } else {
+      // Per-field estimation
+      bumpSource(bySource, 'content', estimate(msg.content));
+
+      // Tool calls: lobe stores these on `msg.tools` (NOT OpenAI's `tool_calls`)
+      // We project to what's actually sent: id + apiName + arguments + type.
+      // Skipping internal-only fields (intervention, source, executor, result_msg_id)
+      // which don't ship to the provider.
+      // Gemini's `thoughtSignature` is preserved by ToolCallProcessor and
+      // forwarded by the Google context builder — count it under its own
+      // bucket since it's provider-specific and can be sizeable on every call.
+      if (msg.role === 'assistant' && Array.isArray(msg.tools) && msg.tools.length > 0) {
+        let tcSum = 0;
+        let sigSum = 0;
+        for (const tc of msg.tools) {
+          tcSum += estimate(tc.id);
+          tcSum += estimate(tc.apiName);
+          tcSum += estimate(tc.arguments);
+          tcSum += estimate(tc.type);
+          sigSum += estimate(tc.thoughtSignature);
+        }
+        bumpSource(bySource, 'toolCalls', tcSum);
+        bumpSource(bySource, 'thoughtSignature', sigSum);
+      }
+
+      // Reasoning trace (thinking-mode models echo this back next turn)
+      const reasoning = msg.reasoning;
+      if (reasoning) {
+        const reasoningContent = typeof reasoning === 'string' ? reasoning : reasoning.content;
+        bumpSource(bySource, 'reasoning', estimate(reasoningContent));
+      }
+    }
+
+    // tool_call_id is sent regardless of fast-path (it's on `tool` role messages,
+    // not assistant)
+    if (msg.tool_call_id) {
+      bumpSource(bySource, 'toolCallId', estimate(msg.tool_call_id));
+    }
+
+    let total = 0;
+    for (const v of Object.values(bySource)) total += v ?? 0;
+
+    return { bySource, index, role: msg.role, total };
+  });
+
+  // Tool definitions
+  const toolBreakdowns: ToolDefinitionTokenBreakdown[] = tools.map((tool) => {
+    const t = tool as { function?: { name?: string }; name?: string };
+    return {
+      name: t.function?.name ?? t.name ?? 'unknown',
+      total: estimate(tool),
+    };
+  });
+
+  // Aggregate
+  const bySource = ZERO_BY_SOURCE();
+  for (const m of messageBreakdowns) {
+    for (const [k, v] of Object.entries(m.bySource)) {
+      bySource[k as TokenSourceType] += v ?? 0;
+    }
+  }
+  bySource.toolDefinition = toolBreakdowns.reduce((s, t) => s + t.total, 0);
+
+  let rawTotal = 0;
+  for (const v of Object.values(bySource)) rawTotal += v;
+
+  return {
+    adjustedTotal: Math.ceil(rawTotal * driftMultiplier),
+    bySource,
+    driftMultiplier,
+    messages: messageBreakdowns,
+    rawTotal,
+    tools: toolBreakdowns,
+  };
+};
+
+export type {
+  ContextTokenAccounting,
+  CountContextTokensParams,
+  MessageTokenBreakdown,
+  TokenSourceType,
+  ToolDefinitionTokenBreakdown,
+} from './types';
@@ -0,0 +1,103 @@
+import type { UIChatMessage } from '@lobechat/types';
+
+/**
+ * Source category each token belongs to.
+ *
+ * - `content`           — `msg.content` (the text body sent to provider)
+ * - `toolCalls`         — assistant's tool call payloads (`msg.tools[]`, equivalent
+ *                         to OpenAI `tool_calls` once transformed): `id`, `apiName`,
+ *                         `arguments`, `type` are sent to provider
+ * - `thoughtSignature`  — Gemini-specific opaque signature attached to each tool
+ *                         call (`msg.tools[N].thoughtSignature`); preserved by
+ *                         `ToolCallProcessor` and forwarded to Google's API; can
+ *                         be sizeable on every function call
+ * - `reasoning`         — thinking-mode trace (`msg.reasoning.content`); deepseek /
+ *                         o1 / claude-thinking models echo this back into next
+ *                         turn's input
+ * - `toolCallId`        — tool message's `tool_call_id` linking back to the
+ *                         assistant tool call
+ * - `toolDefinition`    — top-level `tools[]` array sent alongside messages
+ *                         (function schema + description)
+ */
+export type TokenSourceType =
+  | 'content'
+  | 'toolCalls'
+  | 'thoughtSignature'
+  | 'reasoning'
+  | 'toolCallId'
+  | 'toolDefinition';
+
+/**
+ * Per-message token breakdown. `bySource` only includes non-zero entries.
+ */
+export interface MessageTokenBreakdown {
+  /** Token counts split by source. Absent keys = 0 tokens. */
+  bySource: Partial<Record<TokenSourceType, number>>;
+  /** Index in the original messages array */
+  index: number;
+  /** Echoed from the message for UI grouping */
+  role: UIChatMessage['role'];
+  /** Sum of bySource values */
+  total: number;
+}
+
+/**
+ * Per tool-definition breakdown — useful for UI to highlight which tools are
+ * the most expensive in the context budget.
+ */
+export interface ToolDefinitionTokenBreakdown {
+  /** Best-effort tool name (function.name → name → 'unknown') */
+  name: string;
+  total: number;
+}
+
+/**
+ * Result of {@link countContextTokens}. Provides both a per-source aggregate
+ * (for "show me input by type" UI) and per-item breakdowns (for "show me which
+ * messages / tools dominate" UI), plus the drift-adjusted total to feed to
+ * compression triggers.
+ */
+export interface ContextTokenAccounting {
+  /** Drift-adjusted total — equals `Math.ceil(rawTotal * driftMultiplier)` */
+  adjustedTotal: number;
+  /** Token totals grouped by source (always present, zero when nothing of that source) */
+  bySource: Record<TokenSourceType, number>;
+  /** The drift multiplier actually applied */
+  driftMultiplier: number;
+  /** Per-message breakdown (length = messages.length) */
+  messages: MessageTokenBreakdown[];
+  /** Sum of all raw token counts before drift adjustment */
+  rawTotal: number;
+  /** Per-tool-definition breakdown (length = tools.length) */
+  tools: ToolDefinitionTokenBreakdown[];
+}
+
+/**
+ * Input shape for {@link countContextTokens}.
+ */
+export interface CountContextTokensParams {
+  /** Conversation messages — typically the same array fed into the compression check */
+  messages: UIChatMessage[];
+  /**
+   * Optional behavior tweaks
+   */
+  options?: {
+    /**
+     * Multiplier applied to the raw total to compensate for `tokenx`'s
+     * systematic under-count vs provider-side tokenizers (deepseek / openai /
+     * anthropic). Empirically ~1.10–1.15× for typical mixed CJK/EN/JSON content;
+     * default 1.25 leaves an extra ~10% safety margin so compression triggers
+     * before the upstream tokenizer reaches the model's context limit.
+     *
+     * @default 1.25
+     */
+    driftMultiplier?: number;
+  };
+  /**
+   * Top-level tool definitions sent to the provider in the same request. Pass
+   * an empty array (or omit) when the call has no tools. The shape is
+   * intentionally `unknown[]` — anything serializable works because we just
+   * stringify and estimate.
+   */
+  tools?: unknown[];
+}
@@ -18,9 +18,9 @@ import type {
  SubAgentResultPayload,
  SubAgentsBatchResultPayload,
 } from '@lobechat/agent-runtime';
-import { calculateMessageTokens, UsageCounter } from '@lobechat/agent-runtime';
+import { UsageCounter } from '@lobechat/agent-runtime';
 import { isDesktop } from '@lobechat/const';
-import type { ToolsEngine } from '@lobechat/context-engine';
+import { countContextTokens, type ToolsEngine } from '@lobechat/context-engine';
 import { chainCompressContext } from '@lobechat/prompts';
 import {
  type ChatMessageError,
@@ -2827,7 +2827,9 @@ export const createAgentExecutors = (context: {
        events.push({ type: 'compression_complete', groupId, parentMessageId });

        // Calculate new token count
-        const compressedTokenCount = calculateMessageTokens(compressedMessages);
+        const compressedTokenCount = countContextTokens({
+          messages: compressedMessages,
+        }).rawTotal;

        return {
          events,