mirror of
https://github.com/lobehub/lobe-chat.git
synced 2026-06-13 19:20:04 +00:00
🐛 fix(context-engine): account for tool_calls + reasoning + tool defs in compression budget (#14813)
🐛 fix(context-engine): account for tool_calls + reasoning + tool defs in compression budget The pre-compression token check (`shouldCompress`) only counted `msg.content`, which under-counted typical agent conversations by ~58% — tool_calls (~33% of payload), reasoning traces (~17%), and top-level tool definitions (~2%) were all silently ignored. As a result, conversations that the provider tokenizer measured at ~656K passed the harness's 524K threshold without firing compression, and were rejected upstream as ExceededContextWindow. Verified empirically against 2 op snapshots in the same topic that hit the failure mode (LOBE-8964): harness counted 267K, deepseek measured 649K — a 380K (58.8%) gap. ~92% of that gap is fixable by accounting for the missing fields; the remaining ~8% is `tokenx` vs provider tokenizer drift, compensated by a 1.25× multiplier on the trigger path. Changes: - New `@lobechat/context-engine/tokenAccounting` module exporting `countContextTokens({messages, tools, options})`. Returns structured per-source + per-message + per-tool breakdown — usable both by the compression trigger and by UI panels showing "context by type". - `shouldCompress` in agent-runtime delegates to `countContextTokens`, applies the 1.25× drift multiplier on `adjustedTotal` for the trigger decision, exposes raw count via `currentTokenCount`. Signature now takes `UIChatMessage[]` directly. - Removed deprecated `calculateMessageTokens` / `estimateTokens` / `TokenCountMessage` from agent-runtime — the new module supersedes them. `createAgentExecutors.ts` updated to call `countContextTokens` directly for post-compression telemetry. - Added `raw-md` plugin to agent-runtime vitest config (needed once context-engine is imported transitively, since the import graph pulls in `@lobechat/agent-templates` `.md` files). What's intentionally NOT counted (DB-only fields not sent to provider): `plugin`, `pluginState`, `chunksList`, `extra`, `fileList`, etc. Counting these would over-estimate and trigger compression too early. Tests: - 19 new unit tests for `countContextTokens` covering content / tool_calls / reasoning / tool_call_id / tool definitions / fast-path / aggregation / DB-only field exclusion. - `tokenCounter.test.ts` updated for new drift semantics + UIChatMessage signature; one boundary case now triggers compression (intentional — the drift multiplier kicks in at the threshold). Refs: LOBE-8964 (ECW edge boundary), LOBE-8972 (ECW umbrella), LOBE-8973 (openrouter `:free` ctx), LOBE-8976 (compression diagnostics). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,110 +1,24 @@
|
||||
import type { UIChatMessage } from '@lobechat/types';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
calculateMessageTokens,
|
||||
DEFAULT_MAX_CONTEXT,
|
||||
DEFAULT_THRESHOLD_RATIO,
|
||||
estimateTokens,
|
||||
getCompressionThreshold,
|
||||
shouldCompress,
|
||||
} from './tokenCounter';
|
||||
|
||||
// Test fixtures only set the fields shouldCompress / countContextTokens read.
|
||||
const mkMsg = (m: Partial<UIChatMessage> & { role: UIChatMessage['role'] }): UIChatMessage =>
|
||||
({
|
||||
content: '',
|
||||
createdAt: 0,
|
||||
id: 'm',
|
||||
updatedAt: 0,
|
||||
...m,
|
||||
}) as UIChatMessage;
|
||||
|
||||
describe('tokenCounter', () => {
|
||||
describe('estimateTokens', () => {
|
||||
it('should estimate tokens for string content', () => {
|
||||
const tokens = estimateTokens('Hello, world!');
|
||||
expect(tokens).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should return 0 for empty string', () => {
|
||||
expect(estimateTokens('')).toBe(0);
|
||||
});
|
||||
|
||||
it('should handle null/undefined content', () => {
|
||||
expect(estimateTokens(null)).toBe(0);
|
||||
expect(estimateTokens(undefined)).toBe(0);
|
||||
});
|
||||
|
||||
it('should handle object content by JSON stringifying', () => {
|
||||
const tokens = estimateTokens({ key: 'value', nested: { a: 1 } });
|
||||
expect(tokens).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should handle array content', () => {
|
||||
const tokens = estimateTokens(['item1', 'item2', 'item3']);
|
||||
expect(tokens).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('calculateMessageTokens', () => {
|
||||
it('should use totalOutputTokens for assistant messages when available', () => {
|
||||
const messages = [
|
||||
{
|
||||
content: 'This content should be ignored',
|
||||
metadata: { usage: { totalOutputTokens: 100 } },
|
||||
role: 'assistant',
|
||||
},
|
||||
];
|
||||
expect(calculateMessageTokens(messages)).toBe(100);
|
||||
});
|
||||
|
||||
it('should estimate tokens for assistant messages without usage data', () => {
|
||||
const messages = [{ content: 'Hello from assistant', role: 'assistant' }];
|
||||
const tokens = calculateMessageTokens(messages);
|
||||
expect(tokens).toBeGreaterThan(0);
|
||||
// Should be estimated, not 0
|
||||
expect(tokens).not.toBe(100);
|
||||
});
|
||||
|
||||
it('should estimate tokens for user messages', () => {
|
||||
const messages = [{ content: 'Hello from user', role: 'user' }];
|
||||
const tokens = calculateMessageTokens(messages);
|
||||
expect(tokens).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should estimate tokens for system messages', () => {
|
||||
const messages = [{ content: 'System prompt', role: 'system' }];
|
||||
const tokens = calculateMessageTokens(messages);
|
||||
expect(tokens).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should sum tokens from multiple messages', () => {
|
||||
const messages = [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{ content: 'Hi there!', metadata: { usage: { totalOutputTokens: 50 } }, role: 'assistant' },
|
||||
{ content: 'How are you?', role: 'user' },
|
||||
];
|
||||
const tokens = calculateMessageTokens(messages);
|
||||
// Should be 50 (assistant) + estimated tokens for user messages
|
||||
expect(tokens).toBeGreaterThan(50);
|
||||
});
|
||||
|
||||
it('should handle empty messages array', () => {
|
||||
expect(calculateMessageTokens([])).toBe(0);
|
||||
});
|
||||
|
||||
it('should handle messages with empty content', () => {
|
||||
const messages = [
|
||||
{ content: '', role: 'user' },
|
||||
{ content: undefined, role: 'assistant' },
|
||||
];
|
||||
expect(calculateMessageTokens(messages)).toBe(0);
|
||||
});
|
||||
|
||||
it('should skip assistant usage with 0 tokens and estimate instead', () => {
|
||||
const messages = [
|
||||
{
|
||||
content: 'Some content',
|
||||
metadata: { usage: { totalOutputTokens: 0 } },
|
||||
role: 'assistant',
|
||||
},
|
||||
];
|
||||
const tokens = calculateMessageTokens(messages);
|
||||
// Should estimate since totalOutputTokens is 0
|
||||
expect(tokens).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getCompressionThreshold', () => {
|
||||
it('should use default values', () => {
|
||||
const threshold = getCompressionThreshold();
|
||||
@@ -141,8 +55,7 @@ describe('tokenCounter', () => {
|
||||
|
||||
describe('shouldCompress', () => {
|
||||
it('should return needsCompression=false when under threshold', () => {
|
||||
const messages = [{ content: 'Hi', role: 'user' }];
|
||||
const result = shouldCompress(messages);
|
||||
const result = shouldCompress([mkMsg({ role: 'user', content: 'Hi' })]);
|
||||
|
||||
expect(result.needsCompression).toBe(false);
|
||||
expect(result.currentTokenCount).toBeGreaterThan(0);
|
||||
@@ -150,48 +63,62 @@ describe('tokenCounter', () => {
|
||||
});
|
||||
|
||||
it('should return needsCompression=true when over threshold', () => {
|
||||
// Create a message with usage that exceeds threshold
|
||||
const messages = [
|
||||
{
|
||||
content: '',
|
||||
metadata: { usage: { totalOutputTokens: 70_000 } },
|
||||
const result = shouldCompress([
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
},
|
||||
];
|
||||
const result = shouldCompress(messages);
|
||||
metadata: { usage: { totalOutputTokens: 70_000 } as any } as any,
|
||||
}),
|
||||
]);
|
||||
|
||||
expect(result.needsCompression).toBe(true);
|
||||
expect(result.currentTokenCount).toBe(70_000);
|
||||
expect(result.threshold).toBe(64_000); // 128k * 0.5
|
||||
});
|
||||
|
||||
it('should return needsCompression=false when exactly at threshold', () => {
|
||||
const messages = [
|
||||
{
|
||||
content: '',
|
||||
metadata: { usage: { totalOutputTokens: 64_000 } },
|
||||
it('should return needsCompression=true when raw count is at threshold (drift pushes over)', () => {
|
||||
// 1.25× default drift multiplier means raw==threshold → adjusted > threshold
|
||||
// → compression fires. This is intentional: we want to compress before the
|
||||
// upstream tokenizer overflows the model's context window.
|
||||
const result = shouldCompress([
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
},
|
||||
];
|
||||
const result = shouldCompress(messages);
|
||||
metadata: { usage: { totalOutputTokens: 64_000 } as any } as any,
|
||||
}),
|
||||
]);
|
||||
|
||||
expect(result.needsCompression).toBe(true);
|
||||
expect(result.currentTokenCount).toBe(64_000);
|
||||
});
|
||||
|
||||
it('should NOT trigger at threshold when driftMultiplier is 1', () => {
|
||||
// Disabling drift restores strict "raw > threshold" semantics
|
||||
const result = shouldCompress(
|
||||
[
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
metadata: { usage: { totalOutputTokens: 64_000 } as any } as any,
|
||||
}),
|
||||
],
|
||||
{ driftMultiplier: 1 },
|
||||
);
|
||||
|
||||
// Exactly at threshold should not trigger compression
|
||||
expect(result.needsCompression).toBe(false);
|
||||
expect(result.currentTokenCount).toBe(64_000);
|
||||
});
|
||||
|
||||
it('should use custom options', () => {
|
||||
const messages = [
|
||||
const result = shouldCompress(
|
||||
[
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
metadata: { usage: { totalOutputTokens: 50_000 } as any } as any,
|
||||
}),
|
||||
],
|
||||
{
|
||||
content: '',
|
||||
metadata: { usage: { totalOutputTokens: 50_000 } },
|
||||
role: 'assistant',
|
||||
maxWindowToken: 60_000,
|
||||
thresholdRatio: 0.75,
|
||||
},
|
||||
];
|
||||
const result = shouldCompress(messages, {
|
||||
maxWindowToken: 60_000,
|
||||
thresholdRatio: 0.75,
|
||||
});
|
||||
);
|
||||
|
||||
// threshold = 60k * 0.75 = 45k, current = 50k > 45k
|
||||
expect(result.needsCompression).toBe(true);
|
||||
|
||||
@@ -1,13 +1,26 @@
|
||||
import { estimateTokenCount } from 'tokenx';
|
||||
import { countContextTokens, DEFAULT_DRIFT_MULTIPLIER } from '@lobechat/context-engine';
|
||||
import type { UIChatMessage } from '@lobechat/types';
|
||||
|
||||
/**
|
||||
* Options for token counting and compression threshold calculation
|
||||
*/
|
||||
export interface TokenCountOptions {
|
||||
/**
|
||||
* Optional drift multiplier override forwarded to {@link countContextTokens}.
|
||||
* Default {@link DEFAULT_DRIFT_MULTIPLIER} (1.25).
|
||||
*/
|
||||
driftMultiplier?: number;
|
||||
/** Model's max context window token count */
|
||||
maxWindowToken?: number;
|
||||
/** Threshold ratio for triggering compression, default 0.75 */
|
||||
/** Threshold ratio for triggering compression, default 0.5 */
|
||||
thresholdRatio?: number;
|
||||
/**
|
||||
* Optional top-level tool definitions for the upcoming LLM call. When
|
||||
* provided, tool definition tokens are counted toward the budget — matches
|
||||
* what the provider actually charges. Pass the same `tools` array that will
|
||||
* be sent in the request payload.
|
||||
*/
|
||||
tools?: unknown[];
|
||||
}
|
||||
|
||||
/** Default max context window (128k tokens) */
|
||||
@@ -16,60 +29,8 @@ export const DEFAULT_MAX_CONTEXT = 128_000;
|
||||
/** Default threshold ratio (50% of max context) */
|
||||
export const DEFAULT_THRESHOLD_RATIO = 0.5;
|
||||
|
||||
/**
|
||||
* Message interface for token counting
|
||||
*/
|
||||
export interface TokenCountMessage {
|
||||
content?: string | unknown;
|
||||
metadata?: {
|
||||
usage?: {
|
||||
totalOutputTokens?: number;
|
||||
};
|
||||
} | null;
|
||||
role: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate token count for text content using tokenx
|
||||
* @param content - Text content or object to estimate tokens for
|
||||
* @returns Estimated token count
|
||||
*/
|
||||
export function estimateTokens(content: string | unknown): number {
|
||||
// Handle null/undefined early
|
||||
if (content === null || content === undefined) return 0;
|
||||
|
||||
const text = typeof content === 'string' ? content : JSON.stringify(content);
|
||||
if (!text) return 0;
|
||||
return estimateTokenCount(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate total token count for a list of messages
|
||||
* - Assistant messages: Use metadata.usage.totalOutputTokens if available (exact value)
|
||||
* - User/System messages: Use tokenx estimation
|
||||
*
|
||||
* @param messages - List of messages to count tokens for
|
||||
* @returns Total token count
|
||||
*/
|
||||
export function calculateMessageTokens(messages: TokenCountMessage[]): number {
|
||||
return messages.reduce((total, msg) => {
|
||||
// For assistant messages, prefer the recorded token count from usage metadata
|
||||
if (msg.role === 'assistant') {
|
||||
const outputTokens = msg.metadata?.usage?.totalOutputTokens;
|
||||
if (outputTokens && outputTokens > 0) {
|
||||
return total + outputTokens;
|
||||
}
|
||||
}
|
||||
|
||||
// For user/system messages or assistant messages without usage data, estimate tokens
|
||||
return total + estimateTokens(msg.content);
|
||||
}, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the compression threshold based on max context window
|
||||
* @param options - Token count options
|
||||
* @returns Compression threshold in tokens
|
||||
*/
|
||||
export function getCompressionThreshold(options: TokenCountOptions = {}): number {
|
||||
const maxContext = options.maxWindowToken ?? DEFAULT_MAX_CONTEXT;
|
||||
@@ -81,30 +42,43 @@ export function getCompressionThreshold(options: TokenCountOptions = {}): number
|
||||
* Result of compression check
|
||||
*/
|
||||
export interface CompressionCheckResult {
|
||||
/** Current total token count */
|
||||
/**
|
||||
* Best raw estimate of current input tokens (sum of message content +
|
||||
* tool calls + reasoning + tool_call_id + tool definitions).
|
||||
*/
|
||||
currentTokenCount: number;
|
||||
/** Whether compression is needed */
|
||||
/**
|
||||
* `true` when `adjustedTokenCount > threshold`. The adjusted count includes
|
||||
* a drift multiplier (default 1.25×) to compensate for the gap between
|
||||
* `tokenx`'s heuristic and provider tokenizers, so compression fires before
|
||||
* upstream tokenizers actually overflow the model's context window.
|
||||
*/
|
||||
needsCompression: boolean;
|
||||
/** Compression threshold */
|
||||
/** Compression threshold (`maxWindowToken × thresholdRatio`) */
|
||||
threshold: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if messages need compression based on token count
|
||||
* @param messages - List of messages to check
|
||||
* @param options - Token count options
|
||||
* @returns Compression check result
|
||||
* Check if messages need compression based on token count.
|
||||
*
|
||||
* Uses {@link countContextTokens} under the hood, so the input estimate
|
||||
* accounts for tool calls, reasoning, and tool definitions in addition to
|
||||
* `content` (see LOBE-8964 for the calibration data).
|
||||
*/
|
||||
export function shouldCompress(
|
||||
messages: TokenCountMessage[],
|
||||
messages: UIChatMessage[],
|
||||
options: TokenCountOptions = {},
|
||||
): CompressionCheckResult {
|
||||
const currentTokenCount = calculateMessageTokens(messages);
|
||||
const accounting = countContextTokens({
|
||||
messages,
|
||||
options: { driftMultiplier: options.driftMultiplier ?? DEFAULT_DRIFT_MULTIPLIER },
|
||||
tools: options.tools,
|
||||
});
|
||||
const threshold = getCompressionThreshold(options);
|
||||
|
||||
return {
|
||||
currentTokenCount,
|
||||
needsCompression: currentTokenCount > threshold,
|
||||
currentTokenCount: accounting.rawTotal,
|
||||
needsCompression: accounting.adjustedTotal > threshold,
|
||||
threshold,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [
|
||||
{
|
||||
name: 'raw-md',
|
||||
transform(_, id) {
|
||||
if (id.endsWith('.md')) return { code: 'export default ""', map: null };
|
||||
},
|
||||
},
|
||||
],
|
||||
test: {
|
||||
coverage: {
|
||||
exclude: [
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
"debug": "^4.4.3",
|
||||
"es-toolkit": "^1.43.0",
|
||||
"immer": "^10.2.0",
|
||||
"tokenx": "^1.2.1",
|
||||
"ts-md5": "^2.0.1",
|
||||
"unist-builder": "^4.0.0",
|
||||
"xast-util-to-xml": "^4.0.0",
|
||||
|
||||
@@ -16,6 +16,16 @@ export { ContextEngine } from './pipeline';
|
||||
|
||||
// Context Providers
|
||||
export * from './providers';
|
||||
|
||||
// Token accounting (compression triggers + UI breakdown)
|
||||
export type {
|
||||
ContextTokenAccounting,
|
||||
CountContextTokensParams,
|
||||
MessageTokenBreakdown,
|
||||
TokenSourceType,
|
||||
ToolDefinitionTokenBreakdown,
|
||||
} from './tokenAccounting';
|
||||
export { countContextTokens, DEFAULT_DRIFT_MULTIPLIER } from './tokenAccounting';
|
||||
// Processors
|
||||
export type { PlaceholderValue, PlaceholderValueMap } from './processors';
|
||||
export {
|
||||
|
||||
@@ -0,0 +1,433 @@
|
||||
import type { UIChatMessage } from '@lobechat/types';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { countContextTokens, DEFAULT_DRIFT_MULTIPLIER } from '../index';
|
||||
|
||||
// Minimal helper — UIChatMessage has many optional fields; tests only set the
|
||||
// ones that affect token accounting.
|
||||
const mkMsg = (m: Partial<UIChatMessage> & { role: UIChatMessage['role'] }): UIChatMessage =>
|
||||
({
|
||||
content: '',
|
||||
createdAt: 0,
|
||||
id: 'm',
|
||||
updatedAt: 0,
|
||||
...m,
|
||||
}) as UIChatMessage;
|
||||
|
||||
describe('countContextTokens', () => {
|
||||
describe('basic shape & defaults', () => {
|
||||
it('returns zero accounting for empty input', () => {
|
||||
const result = countContextTokens({ messages: [] });
|
||||
|
||||
expect(result.rawTotal).toBe(0);
|
||||
expect(result.adjustedTotal).toBe(0);
|
||||
expect(result.driftMultiplier).toBe(DEFAULT_DRIFT_MULTIPLIER);
|
||||
expect(result.messages).toEqual([]);
|
||||
expect(result.tools).toEqual([]);
|
||||
expect(result.bySource).toEqual({
|
||||
content: 0,
|
||||
reasoning: 0,
|
||||
thoughtSignature: 0,
|
||||
toolCallId: 0,
|
||||
toolCalls: 0,
|
||||
toolDefinition: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it('respects a custom driftMultiplier', () => {
|
||||
const msgs: UIChatMessage[] = [mkMsg({ role: 'user', content: 'hello world '.repeat(100) })];
|
||||
const r1 = countContextTokens({ messages: msgs });
|
||||
const r2 = countContextTokens({ messages: msgs, options: { driftMultiplier: 1 } });
|
||||
|
||||
expect(r1.rawTotal).toBe(r2.rawTotal);
|
||||
expect(r2.adjustedTotal).toBe(r2.rawTotal); // 1.0 means no adjustment
|
||||
expect(r1.adjustedTotal).toBe(Math.ceil(r1.rawTotal * DEFAULT_DRIFT_MULTIPLIER));
|
||||
});
|
||||
|
||||
it('produces one breakdown entry per message in original order', () => {
|
||||
const msgs: UIChatMessage[] = [
|
||||
mkMsg({ role: 'user', content: 'a' }),
|
||||
mkMsg({ role: 'assistant', content: 'b' }),
|
||||
mkMsg({ role: 'tool', content: 'c' }),
|
||||
];
|
||||
const r = countContextTokens({ messages: msgs });
|
||||
|
||||
expect(r.messages).toHaveLength(3);
|
||||
expect(r.messages.map((m) => [m.index, m.role])).toEqual([
|
||||
[0, 'user'],
|
||||
[1, 'assistant'],
|
||||
[2, 'tool'],
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('content counting', () => {
|
||||
it('counts user message content', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [mkMsg({ role: 'user', content: 'hello world '.repeat(50) })],
|
||||
});
|
||||
expect(r.bySource.content).toBeGreaterThan(0);
|
||||
expect(r.messages[0].bySource.content).toBe(r.bySource.content);
|
||||
expect(r.messages[0].total).toBe(r.messages[0].bySource.content);
|
||||
});
|
||||
|
||||
it('uses recorded usage.totalOutputTokens for assistant when present', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: 'short text', // would estimate to a small count
|
||||
metadata: {
|
||||
usage: { totalOutputTokens: 5000 } as any,
|
||||
} as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.content).toBe(5000);
|
||||
expect(r.messages[0].bySource.content).toBe(5000);
|
||||
});
|
||||
|
||||
it('falls back to estimating content when usage is missing or zero', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: 'long text that needs estimating '.repeat(100),
|
||||
metadata: { usage: { totalOutputTokens: 0 } as any } as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.content).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('tool calls (assistant.tools)', () => {
|
||||
it('counts tool call payloads on assistant messages', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
tools: [
|
||||
{
|
||||
apiName: 'searchWeb',
|
||||
arguments: '{"query": "very long query string that takes some tokens"}',
|
||||
id: 'call_abc123',
|
||||
identifier: 'search-plugin',
|
||||
type: 'default',
|
||||
},
|
||||
] as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
expect(r.bySource.toolCalls).toBeGreaterThan(0);
|
||||
expect(r.messages[0].bySource.toolCalls).toBe(r.bySource.toolCalls);
|
||||
});
|
||||
|
||||
it('does NOT count tools on non-assistant messages', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'user',
|
||||
content: '',
|
||||
// user messages with `tools` shouldn't be a thing, but if it slips
|
||||
// through it must not be counted toward toolCalls.
|
||||
tools: [
|
||||
{ apiName: 'x', arguments: '{}', id: '1', identifier: 'p', type: 'default' },
|
||||
] as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.toolCalls).toBe(0);
|
||||
});
|
||||
|
||||
it('does NOT count tool calls when assistant has recorded usage (fast-path)', () => {
|
||||
// The assistant fast-path attributes recorded output tokens to `content`
|
||||
// because the recorded count already includes generated tool_calls.
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
metadata: { usage: { totalOutputTokens: 1234 } as any } as any,
|
||||
tools: [
|
||||
{
|
||||
apiName: 'foo',
|
||||
arguments: '{"a":1}',
|
||||
id: 'c1',
|
||||
identifier: 'p',
|
||||
thoughtSignature: 'sig-skipped-on-fast-path'.repeat(20),
|
||||
type: 'default',
|
||||
},
|
||||
] as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.content).toBe(1234);
|
||||
expect(r.bySource.toolCalls).toBe(0);
|
||||
expect(r.bySource.thoughtSignature).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('thoughtSignature on tool calls (Gemini)', () => {
|
||||
it('counts thoughtSignature separately from toolCalls', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
tools: [
|
||||
{
|
||||
apiName: 'searchWeb',
|
||||
arguments: '{"query":"x"}',
|
||||
id: 'call_1',
|
||||
identifier: 'p',
|
||||
thoughtSignature: 'opaque signature payload '.repeat(40),
|
||||
type: 'default',
|
||||
},
|
||||
] as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.toolCalls).toBeGreaterThan(0);
|
||||
expect(r.bySource.thoughtSignature).toBeGreaterThan(0);
|
||||
// Buckets must not overlap — thoughtSignature should not be added to toolCalls
|
||||
const tcOnlyArgs = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
tools: [
|
||||
{
|
||||
apiName: 'searchWeb',
|
||||
arguments: '{"query":"x"}',
|
||||
id: 'call_1',
|
||||
identifier: 'p',
|
||||
type: 'default',
|
||||
},
|
||||
] as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.toolCalls).toBe(tcOnlyArgs.bySource.toolCalls);
|
||||
});
|
||||
|
||||
it('sums thoughtSignature across multiple tool calls', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
tools: [
|
||||
{
|
||||
apiName: 'a',
|
||||
arguments: '{}',
|
||||
id: '1',
|
||||
identifier: 'p',
|
||||
thoughtSignature: 'sig-A '.repeat(30),
|
||||
type: 'default',
|
||||
},
|
||||
{
|
||||
apiName: 'b',
|
||||
arguments: '{}',
|
||||
id: '2',
|
||||
identifier: 'p',
|
||||
thoughtSignature: 'sig-B '.repeat(30),
|
||||
type: 'default',
|
||||
},
|
||||
] as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
// Two distinct signatures both contribute
|
||||
expect(r.bySource.thoughtSignature).toBeGreaterThan(0);
|
||||
expect(r.messages[0].bySource.thoughtSignature).toBe(r.bySource.thoughtSignature);
|
||||
});
|
||||
|
||||
it('does not count thoughtSignature when absent', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
tools: [
|
||||
{ apiName: 'a', arguments: '{}', id: '1', identifier: 'p', type: 'default' },
|
||||
] as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.thoughtSignature).toBe(0);
|
||||
expect(r.messages[0].bySource.thoughtSignature).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('reasoning trace', () => {
|
||||
it('counts ModelReasoning.content on assistant messages', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
reasoning: { content: 'long reasoning chain '.repeat(50) },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.reasoning).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('handles reasoning passed as a plain string', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
reasoning: 'plain string reasoning' as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.reasoning).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('skips reasoning when fast-path recorded usage is present', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
metadata: { usage: { totalOutputTokens: 100 } as any } as any,
|
||||
reasoning: { content: 'this should not be re-counted'.repeat(50) },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.reasoning).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('tool_call_id (tool messages)', () => {
|
||||
it('counts tool_call_id regardless of role', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'tool',
|
||||
content: '{"result":"ok"}',
|
||||
tool_call_id: 'call_abc123_xyz',
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.toolCallId).toBeGreaterThan(0);
|
||||
expect(r.bySource.content).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('still counts tool_call_id on assistant fast-path', () => {
|
||||
// tool_call_id can appear on assistant in some flows; the fast-path
|
||||
// covers content/reasoning/toolCalls but tool_call_id is a separate
|
||||
// field that's always added.
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
metadata: { usage: { totalOutputTokens: 100 } as any } as any,
|
||||
tool_call_id: 'call_xyz',
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(r.bySource.toolCallId).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('tool definitions (top-level tools[])', () => {
|
||||
it('counts each tool definition and exposes a per-tool breakdown', () => {
|
||||
const tools = [
|
||||
{ function: { name: 'search', parameters: { type: 'object' } }, type: 'function' },
|
||||
{ function: { name: 'lookup', parameters: { type: 'object' } }, type: 'function' },
|
||||
];
|
||||
const r = countContextTokens({ messages: [], tools });
|
||||
|
||||
expect(r.tools).toHaveLength(2);
|
||||
expect(r.tools.map((t) => t.name)).toEqual(['search', 'lookup']);
|
||||
expect(r.tools.every((t) => t.total > 0)).toBe(true);
|
||||
expect(r.bySource.toolDefinition).toBe(r.tools.reduce((s, t) => s + t.total, 0));
|
||||
});
|
||||
|
||||
it('falls back to top-level name when function.name is absent', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [],
|
||||
tools: [{ name: 'plain_tool', schema: {} }],
|
||||
});
|
||||
expect(r.tools[0].name).toBe('plain_tool');
|
||||
});
|
||||
|
||||
it('uses "unknown" for tools with no resolvable name', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [],
|
||||
tools: [{ description: 'nameless' }],
|
||||
});
|
||||
expect(r.tools[0].name).toBe('unknown');
|
||||
});
|
||||
});
|
||||
|
||||
describe('does NOT count DB-only fields', () => {
|
||||
it('ignores plugin / pluginState / extra / chunksList / metadata extras', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({
|
||||
role: 'tool',
|
||||
content: 'real_content',
|
||||
tool_call_id: 'tcid',
|
||||
// All of these are DB-only; counting them would over-estimate.
|
||||
plugin: {
|
||||
apiName: 'x',
|
||||
arguments: 'a'.repeat(5000),
|
||||
identifier: 'p',
|
||||
type: 'default',
|
||||
} as any,
|
||||
pluginState: { output: 'b'.repeat(5000), success: true } as any,
|
||||
chunksList: [{ id: 'c'.repeat(5000) }] as any,
|
||||
extra: { translate: 'd'.repeat(5000) } as any,
|
||||
}),
|
||||
],
|
||||
});
|
||||
// Only content + tool_call_id should contribute; the other fields' bulk
|
||||
// must not show up.
|
||||
const expectedSources = new Set<string>(['content', 'toolCallId']);
|
||||
for (const k of Object.keys(r.messages[0].bySource)) {
|
||||
expect(expectedSources.has(k)).toBe(true);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('aggregation', () => {
|
||||
it('sums bySource across multiple messages and tools', () => {
|
||||
const r = countContextTokens({
|
||||
messages: [
|
||||
mkMsg({ role: 'user', content: 'first '.repeat(30) }),
|
||||
mkMsg({
|
||||
role: 'assistant',
|
||||
content: 'second '.repeat(30),
|
||||
tools: [
|
||||
{ apiName: 'a', arguments: '{}', id: '1', identifier: 'p', type: 'default' },
|
||||
] as any,
|
||||
reasoning: { content: 'reason '.repeat(30) },
|
||||
}),
|
||||
mkMsg({ role: 'tool', content: '{"r":1}', tool_call_id: 'cid' }),
|
||||
],
|
||||
tools: [
|
||||
{ function: { name: 'tool_a' }, type: 'function' },
|
||||
{ function: { name: 'tool_b' }, type: 'function' },
|
||||
],
|
||||
});
|
||||
|
||||
const sumOfBySource = Object.values(r.bySource).reduce((s, v) => s + v, 0);
|
||||
expect(r.rawTotal).toBe(sumOfBySource);
|
||||
|
||||
const sumOfMessageTotals = r.messages.reduce((s, m) => s + m.total, 0);
|
||||
const messagesContrib = sumOfMessageTotals;
|
||||
const toolsContrib = r.bySource.toolDefinition;
|
||||
expect(r.rawTotal).toBe(messagesContrib + toolsContrib);
|
||||
|
||||
expect(r.adjustedTotal).toBe(Math.ceil(r.rawTotal * DEFAULT_DRIFT_MULTIPLIER));
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,193 @@
|
||||
import { estimateTokenCount } from 'tokenx';
|
||||
|
||||
import type {
|
||||
ContextTokenAccounting,
|
||||
CountContextTokensParams,
|
||||
MessageTokenBreakdown,
|
||||
TokenSourceType,
|
||||
ToolDefinitionTokenBreakdown,
|
||||
} from './types';
|
||||
|
||||
export const DEFAULT_DRIFT_MULTIPLIER = 1.25;
|
||||
|
||||
const ZERO_BY_SOURCE = (): Record<TokenSourceType, number> => ({
|
||||
content: 0,
|
||||
reasoning: 0,
|
||||
thoughtSignature: 0,
|
||||
toolCallId: 0,
|
||||
toolCalls: 0,
|
||||
toolDefinition: 0,
|
||||
});
|
||||
|
||||
const estimate = (value: unknown): number => {
|
||||
if (value == null) return 0;
|
||||
const text = typeof value === 'string' ? value : JSON.stringify(value);
|
||||
return text ? estimateTokenCount(text) : 0;
|
||||
};
|
||||
|
||||
const bumpSource = (
|
||||
bySource: Partial<Record<TokenSourceType, number>>,
|
||||
key: TokenSourceType,
|
||||
amount: number,
|
||||
) => {
|
||||
if (amount <= 0) return;
|
||||
bySource[key] = (bySource[key] ?? 0) + amount;
|
||||
};
|
||||
|
||||
/**
|
||||
* Account every token that will be sent to the provider for one chat request,
|
||||
* broken down by source category and per-item.
|
||||
*
|
||||
* **What's counted (and why)**
|
||||
* | source | field on UIChatMessage | sent to provider as |
|
||||
* |--------------------|------------------------------------------------------------|----------------------------------|
|
||||
* | `content` | `msg.content` | `message.content` |
|
||||
* | `toolCalls` | `msg.tools[]` (lobe internal, not OpenAI's `tool_calls`) | `message.tool_calls` |
|
||||
* | `thoughtSignature` | `msg.tools[N].thoughtSignature` (Gemini-specific) | echoed back per tool call |
|
||||
* | `reasoning` | `msg.reasoning.content` / `msg.reasoning` (string variant) | echoed back next turn (thinking) |
|
||||
* | `toolCallId` | `msg.tool_call_id` | `message.tool_call_id` |
|
||||
* | `toolDefinition` | top-level `tools[]` param | request `tools` array |
|
||||
*
|
||||
* **What's NOT counted (intentionally)** — these are DB-only fields the
|
||||
* harness stores but doesn't ship to the provider:
|
||||
*
|
||||
* `plugin`, `pluginState`, `pluginIntervention`, `pluginError`, `chunksList`,
|
||||
* `editorData`, `extra`, `fileList`, `imageList`, `videoList`, `metadata`
|
||||
* (other than `metadata.usage.totalOutputTokens` shortcut for assistant)
|
||||
*
|
||||
* Counting them would over-estimate and trigger compression too early.
|
||||
*
|
||||
* **Token estimation accuracy**
|
||||
*
|
||||
* Uses the `tokenx` heuristic estimator (~96% accuracy on typical English text).
|
||||
* For agent conversations heavy in JSON / code / mixed CJK, `tokenx` typically
|
||||
* under-counts by 10–15% vs provider tokenizers. The default
|
||||
* `driftMultiplier: 1.25` compensates for that drift PLUS leaves ~10% headroom
|
||||
* so callers using the result as a compression trigger fire before the upstream
|
||||
* tokenizer reaches its limit.
|
||||
*
|
||||
* **Assistant fast-path**
|
||||
*
|
||||
* If an assistant message has `metadata.usage.totalOutputTokens > 0`, that
|
||||
* recorded provider-side count is used for `content` (skipping per-field
|
||||
* estimation for that message), since it already covers the assistant's
|
||||
* content + tool_calls + reasoning that the provider tokenized. Other sources
|
||||
* (incoming tool messages' `tool_call_id`, etc.) are still added separately.
|
||||
*
|
||||
* @example
|
||||
* const accounting = countContextTokens({
|
||||
* messages: state.messages,
|
||||
* tools: payload.tools,
|
||||
* });
|
||||
*
|
||||
* // Compression trigger:
|
||||
* if (accounting.adjustedTotal > threshold) compress();
|
||||
*
|
||||
* // UI "context by type" panel:
|
||||
* accounting.bySource;
|
||||
* // → { content: 267058, toolCalls: 201762, reasoning: 110107, toolCallId: 758, toolDefinition: 14339 }
|
||||
*
|
||||
* // UI per-message inspector:
|
||||
* accounting.messages;
|
||||
* // → [{ index: 0, role: 'user', bySource: { content: 1234 }, total: 1234 }, ...]
|
||||
*/
|
||||
export const countContextTokens = ({
|
||||
messages,
|
||||
tools = [],
|
||||
options,
|
||||
}: CountContextTokensParams): ContextTokenAccounting => {
|
||||
const driftMultiplier = options?.driftMultiplier ?? DEFAULT_DRIFT_MULTIPLIER;
|
||||
|
||||
const messageBreakdowns: MessageTokenBreakdown[] = messages.map((msg, index) => {
|
||||
const bySource: Partial<Record<TokenSourceType, number>> = {};
|
||||
|
||||
// Assistant fast-path: recorded usage covers content + tool_calls + reasoning
|
||||
// produced by THIS turn's generation. Use it directly when available.
|
||||
const recordedOutputTokens =
|
||||
msg.role === 'assistant' ? msg.metadata?.usage?.totalOutputTokens : undefined;
|
||||
|
||||
if (recordedOutputTokens && recordedOutputTokens > 0) {
|
||||
bumpSource(bySource, 'content', recordedOutputTokens);
|
||||
} else {
|
||||
// Per-field estimation
|
||||
bumpSource(bySource, 'content', estimate(msg.content));
|
||||
|
||||
// Tool calls: lobe stores these on `msg.tools` (NOT OpenAI's `tool_calls`)
|
||||
// We project to what's actually sent: id + apiName + arguments + type.
|
||||
// Skipping internal-only fields (intervention, source, executor, result_msg_id)
|
||||
// which don't ship to the provider.
|
||||
// Gemini's `thoughtSignature` is preserved by ToolCallProcessor and
|
||||
// forwarded by the Google context builder — count it under its own
|
||||
// bucket since it's provider-specific and can be sizeable on every call.
|
||||
if (msg.role === 'assistant' && Array.isArray(msg.tools) && msg.tools.length > 0) {
|
||||
let tcSum = 0;
|
||||
let sigSum = 0;
|
||||
for (const tc of msg.tools) {
|
||||
tcSum += estimate(tc.id);
|
||||
tcSum += estimate(tc.apiName);
|
||||
tcSum += estimate(tc.arguments);
|
||||
tcSum += estimate(tc.type);
|
||||
sigSum += estimate(tc.thoughtSignature);
|
||||
}
|
||||
bumpSource(bySource, 'toolCalls', tcSum);
|
||||
bumpSource(bySource, 'thoughtSignature', sigSum);
|
||||
}
|
||||
|
||||
// Reasoning trace (thinking-mode models echo this back next turn)
|
||||
const reasoning = msg.reasoning;
|
||||
if (reasoning) {
|
||||
const reasoningContent = typeof reasoning === 'string' ? reasoning : reasoning.content;
|
||||
bumpSource(bySource, 'reasoning', estimate(reasoningContent));
|
||||
}
|
||||
}
|
||||
|
||||
// tool_call_id is sent regardless of fast-path (it's on `tool` role messages,
|
||||
// not assistant)
|
||||
if (msg.tool_call_id) {
|
||||
bumpSource(bySource, 'toolCallId', estimate(msg.tool_call_id));
|
||||
}
|
||||
|
||||
let total = 0;
|
||||
for (const v of Object.values(bySource)) total += v ?? 0;
|
||||
|
||||
return { bySource, index, role: msg.role, total };
|
||||
});
|
||||
|
||||
// Tool definitions
|
||||
const toolBreakdowns: ToolDefinitionTokenBreakdown[] = tools.map((tool) => {
|
||||
const t = tool as { function?: { name?: string }; name?: string };
|
||||
return {
|
||||
name: t.function?.name ?? t.name ?? 'unknown',
|
||||
total: estimate(tool),
|
||||
};
|
||||
});
|
||||
|
||||
// Aggregate
|
||||
const bySource = ZERO_BY_SOURCE();
|
||||
for (const m of messageBreakdowns) {
|
||||
for (const [k, v] of Object.entries(m.bySource)) {
|
||||
bySource[k as TokenSourceType] += v ?? 0;
|
||||
}
|
||||
}
|
||||
bySource.toolDefinition = toolBreakdowns.reduce((s, t) => s + t.total, 0);
|
||||
|
||||
let rawTotal = 0;
|
||||
for (const v of Object.values(bySource)) rawTotal += v;
|
||||
|
||||
return {
|
||||
adjustedTotal: Math.ceil(rawTotal * driftMultiplier),
|
||||
bySource,
|
||||
driftMultiplier,
|
||||
messages: messageBreakdowns,
|
||||
rawTotal,
|
||||
tools: toolBreakdowns,
|
||||
};
|
||||
};
|
||||
|
||||
export type {
|
||||
ContextTokenAccounting,
|
||||
CountContextTokensParams,
|
||||
MessageTokenBreakdown,
|
||||
TokenSourceType,
|
||||
ToolDefinitionTokenBreakdown,
|
||||
} from './types';
|
||||
@@ -0,0 +1,103 @@
|
||||
import type { UIChatMessage } from '@lobechat/types';
|
||||
|
||||
/**
|
||||
* Source category each token belongs to.
|
||||
*
|
||||
* - `content` — `msg.content` (the text body sent to provider)
|
||||
* - `toolCalls` — assistant's tool call payloads (`msg.tools[]`, equivalent
|
||||
* to OpenAI `tool_calls` once transformed): `id`, `apiName`,
|
||||
* `arguments`, `type` are sent to provider
|
||||
* - `thoughtSignature` — Gemini-specific opaque signature attached to each tool
|
||||
* call (`msg.tools[N].thoughtSignature`); preserved by
|
||||
* `ToolCallProcessor` and forwarded to Google's API; can
|
||||
* be sizeable on every function call
|
||||
* - `reasoning` — thinking-mode trace (`msg.reasoning.content`); deepseek /
|
||||
* o1 / claude-thinking models echo this back into next
|
||||
* turn's input
|
||||
* - `toolCallId` — tool message's `tool_call_id` linking back to the
|
||||
* assistant tool call
|
||||
* - `toolDefinition` — top-level `tools[]` array sent alongside messages
|
||||
* (function schema + description)
|
||||
*/
|
||||
export type TokenSourceType =
|
||||
| 'content'
|
||||
| 'toolCalls'
|
||||
| 'thoughtSignature'
|
||||
| 'reasoning'
|
||||
| 'toolCallId'
|
||||
| 'toolDefinition';
|
||||
|
||||
/**
|
||||
* Per-message token breakdown. `bySource` only includes non-zero entries.
|
||||
*/
|
||||
export interface MessageTokenBreakdown {
|
||||
/** Token counts split by source. Absent keys = 0 tokens. */
|
||||
bySource: Partial<Record<TokenSourceType, number>>;
|
||||
/** Index in the original messages array */
|
||||
index: number;
|
||||
/** Echoed from the message for UI grouping */
|
||||
role: UIChatMessage['role'];
|
||||
/** Sum of bySource values */
|
||||
total: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per tool-definition breakdown — useful for UI to highlight which tools are
|
||||
* the most expensive in the context budget.
|
||||
*/
|
||||
export interface ToolDefinitionTokenBreakdown {
|
||||
/** Best-effort tool name (function.name → name → 'unknown') */
|
||||
name: string;
|
||||
total: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of {@link countContextTokens}. Provides both a per-source aggregate
|
||||
* (for "show me input by type" UI) and per-item breakdowns (for "show me which
|
||||
* messages / tools dominate" UI), plus the drift-adjusted total to feed to
|
||||
* compression triggers.
|
||||
*/
|
||||
export interface ContextTokenAccounting {
|
||||
/** Drift-adjusted total — equals `Math.ceil(rawTotal * driftMultiplier)` */
|
||||
adjustedTotal: number;
|
||||
/** Token totals grouped by source (always present, zero when nothing of that source) */
|
||||
bySource: Record<TokenSourceType, number>;
|
||||
/** The drift multiplier actually applied */
|
||||
driftMultiplier: number;
|
||||
/** Per-message breakdown (length = messages.length) */
|
||||
messages: MessageTokenBreakdown[];
|
||||
/** Sum of all raw token counts before drift adjustment */
|
||||
rawTotal: number;
|
||||
/** Per-tool-definition breakdown (length = tools.length) */
|
||||
tools: ToolDefinitionTokenBreakdown[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Input shape for {@link countContextTokens}.
|
||||
*/
|
||||
export interface CountContextTokensParams {
|
||||
/** Conversation messages — typically the same array fed into the compression check */
|
||||
messages: UIChatMessage[];
|
||||
/**
|
||||
* Optional behavior tweaks
|
||||
*/
|
||||
options?: {
|
||||
/**
|
||||
* Multiplier applied to the raw total to compensate for `tokenx`'s
|
||||
* systematic under-count vs provider-side tokenizers (deepseek / openai /
|
||||
* anthropic). Empirically ~1.10–1.15× for typical mixed CJK/EN/JSON content;
|
||||
* default 1.25 leaves an extra ~10% safety margin so compression triggers
|
||||
* before the upstream tokenizer reaches the model's context limit.
|
||||
*
|
||||
* @default 1.25
|
||||
*/
|
||||
driftMultiplier?: number;
|
||||
};
|
||||
/**
|
||||
* Top-level tool definitions sent to the provider in the same request. Pass
|
||||
* an empty array (or omit) when the call has no tools. The shape is
|
||||
* intentionally `unknown[]` — anything serializable works because we just
|
||||
* stringify and estimate.
|
||||
*/
|
||||
tools?: unknown[];
|
||||
}
|
||||
@@ -18,9 +18,9 @@ import type {
|
||||
SubAgentResultPayload,
|
||||
SubAgentsBatchResultPayload,
|
||||
} from '@lobechat/agent-runtime';
|
||||
import { calculateMessageTokens, UsageCounter } from '@lobechat/agent-runtime';
|
||||
import { UsageCounter } from '@lobechat/agent-runtime';
|
||||
import { isDesktop } from '@lobechat/const';
|
||||
import type { ToolsEngine } from '@lobechat/context-engine';
|
||||
import { countContextTokens, type ToolsEngine } from '@lobechat/context-engine';
|
||||
import { chainCompressContext } from '@lobechat/prompts';
|
||||
import {
|
||||
type ChatMessageError,
|
||||
@@ -2827,7 +2827,9 @@ export const createAgentExecutors = (context: {
|
||||
events.push({ type: 'compression_complete', groupId, parentMessageId });
|
||||
|
||||
// Calculate new token count
|
||||
const compressedTokenCount = calculateMessageTokens(compressedMessages);
|
||||
const compressedTokenCount = countContextTokens({
|
||||
messages: compressedMessages,
|
||||
}).rawTotal;
|
||||
|
||||
return {
|
||||
events,
|
||||
|
||||
Reference in New Issue
Block a user