Compare commits

...

4 Commits

Author SHA1 Message Date
YuTengjing 4b2447baba refactor: simplify createTokenSpeedCalculator 2025-10-05 12:18:31 +08:00
YuTengjing b80f52bfd9 🔧 fix: remove unnecessary console log in createRuntime function 2025-10-04 10:33:29 +08:00
YuTengjing 0b3b913b85 🔧 fix: correct and simplify tps calc 2025-10-04 10:30:39 +08:00
YuTengjing 27dcab86cf chore: remove legacy files 2025-10-04 10:27:32 +08:00
7 changed files with 14 additions and 528 deletions
@@ -181,7 +181,6 @@ export const createRouterRuntime = ({
for (const runtimeItem of runtimes) {
const models = runtimeItem.models || [];
if (models.includes(model)) {
console.log(`get runtime ${runtimeItem.id} ${model}`);
return runtimeItem.runtime;
}
}
@@ -384,7 +384,6 @@ export const createTokenSpeedCalculator = (
}: { enableStreaming?: boolean; inputStartAt?: number; streamStack?: StreamContext } = {},
) => {
let outputStartAt: number | undefined;
let outputThinking: boolean | undefined;
const process = (chunk: StreamProtocolChunk) => {
let result = [chunk];
@@ -393,37 +392,25 @@ export const createTokenSpeedCalculator = (
outputStartAt = Date.now();
}
/**
* 部分 provider 在正式输出 reasoning 前,可能会先输出 content 为空字符串的 chunk
* 其中 reasoning 可能为 null,会导致判断是否输出思考内容错误,所以过滤掉 null 或者空字符串。
* 也可能是某些特殊 token,所以不修改 outputStartAt 的逻辑。
*/
if (
outputThinking === undefined &&
(chunk.type === 'text' || chunk.type === 'reasoning') &&
typeof chunk.data === 'string' &&
chunk.data.length > 0
) {
outputThinking = chunk.type === 'reasoning';
}
// if the chunk is the stop chunk, set as output finish
if (inputStartAt && outputStartAt && chunk.type === 'usage') {
const totalOutputTokens =
chunk.data?.totalOutputTokens ??
(chunk.data?.outputTextTokens ?? 0) + (chunk.data?.outputImageTokens ?? 0);
const reasoningTokens = chunk.data?.outputReasoningTokens ?? 0;
const outputTokens =
(outputThinking ?? false)
? totalOutputTokens
: Math.max(0, totalOutputTokens - reasoningTokens);
// TPS should always include all generated tokens (including reasoning tokens)
// because it measures generation speed, not just visible content
const usage = chunk.data as ModelUsage;
const outputTokens = usage?.totalOutputTokens ?? 0;
const now = Date.now();
const elapsed = now - (enableStreaming ? outputStartAt : inputStartAt);
const duration = now - outputStartAt;
const latency = now - inputStartAt;
const ttft = outputStartAt - inputStartAt;
const tps = elapsed === 0 ? undefined : (outputTokens / elapsed) * 1000;
result.push({
data: {
duration: now - outputStartAt,
latency: now - inputStartAt,
tps: elapsed === 0 ? undefined : (outputTokens / elapsed) * 1000,
ttft: outputStartAt - inputStartAt,
duration,
latency,
tps,
ttft,
} as ModelSpeed,
id: TOKEN_SPEED_CHUNK_ID,
type: 'speed',
@@ -8,4 +8,3 @@ export * from './textToImage';
export * from './toolsCalling';
export * from './tts';
export * from './type';
export * from './usage';
-27
View File
@@ -1,27 +0,0 @@
export interface ModelTokensUsage {
acceptedPredictionTokens?: number;
inputAudioTokens?: number;
inputCacheMissTokens?: number;
inputCachedTokens?: number;
/**
* currently only pplx has citation_tokens
*/
inputCitationTokens?: number;
/**
* user prompt image
*/
inputImageTokens?: number;
/**
* user prompt input
*/
inputTextTokens?: number;
inputWriteCacheTokens?: number;
outputAudioTokens?: number;
outputImageTokens?: number;
outputReasoningTokens?: number;
outputTextTokens?: number;
rejectedPredictionTokens?: number;
totalInputTokens?: number;
totalOutputTokens?: number;
totalTokens?: number;
}
@@ -1,351 +0,0 @@
import OpenAI from 'openai';
import { describe, expect, it } from 'vitest';
import { convertResponseUsage, convertUsage } from './usageConverter';
describe('convertUsage', () => {
it('should convert basic OpenAI usage data correctly', () => {
// Arrange
const openaiUsage: OpenAI.Completions.CompletionUsage = {
prompt_tokens: 100,
completion_tokens: 50,
total_tokens: 150,
};
// Act
const result = convertUsage(openaiUsage);
// Assert
expect(result).toEqual({
inputTextTokens: 100,
totalInputTokens: 100,
totalOutputTokens: 50,
outputTextTokens: 50,
totalTokens: 150,
});
});
it('should handle PPLX citation tokens correctly', () => {
// Arrange
const pplxUsage = {
prompt_tokens: 80,
citation_tokens: 20,
completion_tokens: 50,
total_tokens: 150,
} as OpenAI.Completions.CompletionUsage;
// Act
const result = convertUsage(pplxUsage);
// Assert
expect(result).toEqual({
inputTextTokens: 80,
inputCitationTokens: 20,
totalInputTokens: 100,
totalOutputTokens: 50,
outputTextTokens: 50,
totalTokens: 170, // 150 + 20 (citation tokens)
});
});
it('should handle cached tokens correctly', () => {
// Arrange
const usageWithCache = {
prompt_tokens: 100,
prompt_cache_hit_tokens: 30,
prompt_cache_miss_tokens: 70,
completion_tokens: 50,
total_tokens: 150,
} as OpenAI.Completions.CompletionUsage;
// Act
const result = convertUsage(usageWithCache);
// Assert
expect(result).toEqual({
inputTextTokens: 100,
inputCachedTokens: 30,
inputCacheMissTokens: 70,
totalInputTokens: 100,
totalOutputTokens: 50,
outputTextTokens: 50,
totalTokens: 150,
});
});
it('should handle cached tokens using prompt_tokens_details', () => {
// Arrange
const usageWithTokenDetails = {
prompt_tokens: 100,
prompt_tokens_details: {
cached_tokens: 30,
},
completion_tokens: 50,
total_tokens: 150,
} as OpenAI.Completions.CompletionUsage;
// Act
const result = convertUsage(usageWithTokenDetails);
// Assert
expect(result).toEqual({
inputTextTokens: 100,
inputCachedTokens: 30,
inputCacheMissTokens: 70, // 100 - 30
totalInputTokens: 100,
totalOutputTokens: 50,
outputTextTokens: 50,
totalTokens: 150,
});
});
it('should handle audio tokens in input correctly', () => {
// Arrange
const usageWithAudioInput = {
prompt_tokens: 100,
prompt_tokens_details: {
audio_tokens: 20,
},
completion_tokens: 50,
total_tokens: 150,
} as OpenAI.Completions.CompletionUsage;
// Act
const result = convertUsage(usageWithAudioInput);
// Assert
expect(result).toEqual({
inputTextTokens: 100,
inputAudioTokens: 20,
totalInputTokens: 100,
totalOutputTokens: 50,
outputTextTokens: 50,
totalTokens: 150,
});
});
it('should handle detailed output tokens correctly', () => {
// Arrange
const usageWithOutputDetails = {
prompt_tokens: 100,
completion_tokens: 100,
completion_tokens_details: {
reasoning_tokens: 30,
audio_tokens: 20,
},
total_tokens: 200,
} as OpenAI.Completions.CompletionUsage;
// Act
const result = convertUsage(usageWithOutputDetails);
// Assert
expect(result).toEqual({
inputTextTokens: 100,
totalInputTokens: 100,
totalOutputTokens: 100,
outputReasoningTokens: 30,
outputAudioTokens: 20,
outputTextTokens: 50, // 100 - 30 - 20
totalTokens: 200,
});
});
it('should handle prediction tokens correctly', () => {
// Arrange
const usageWithPredictions = {
prompt_tokens: 100,
completion_tokens: 80,
completion_tokens_details: {
accepted_prediction_tokens: 30,
rejected_prediction_tokens: 10,
},
total_tokens: 180,
} as OpenAI.Completions.CompletionUsage;
// Act
const result = convertUsage(usageWithPredictions);
// Assert
expect(result).toEqual({
inputTextTokens: 100,
totalInputTokens: 100,
totalOutputTokens: 80,
outputTextTokens: 80,
acceptedPredictionTokens: 30,
rejectedPredictionTokens: 10,
totalTokens: 180,
});
});
it('should handle complex usage with all fields correctly', () => {
// Arrange
const complexUsage = {
prompt_tokens: 150,
prompt_tokens_details: {
audio_tokens: 50,
cached_tokens: 40,
},
citation_tokens: 30,
completion_tokens: 120,
completion_tokens_details: {
reasoning_tokens: 40,
audio_tokens: 30,
accepted_prediction_tokens: 20,
rejected_prediction_tokens: 5,
},
total_tokens: 300,
} as OpenAI.Completions.CompletionUsage;
// Act
const result = convertUsage(complexUsage);
// Assert
expect(result).toEqual({
inputTextTokens: 150,
inputAudioTokens: 50,
inputCachedTokens: 40,
inputCacheMissTokens: 140, // 180 - 40 (totalInputTokens - cachedTokens)
inputCitationTokens: 30,
totalInputTokens: 180, // 150 + 30
outputTextTokens: 50, // 120 - 40 - 30
outputReasoningTokens: 40,
outputAudioTokens: 30,
totalOutputTokens: 120,
acceptedPredictionTokens: 20,
rejectedPredictionTokens: 5,
totalTokens: 330, // 300 + 30 (citation_tokens)
});
});
it('should omit zero or undefined values in the final output', () => {
// Arrange
const usageWithZeros = {
prompt_tokens: 100,
completion_tokens: 50,
total_tokens: 150,
completion_tokens_details: {
reasoning_tokens: 0,
audio_tokens: undefined,
},
} as OpenAI.Completions.CompletionUsage;
// Act
const result = convertUsage(usageWithZeros);
// Assert
expect(result).toEqual({
inputTextTokens: 100,
totalInputTokens: 100,
totalOutputTokens: 50,
outputTextTokens: 50,
totalTokens: 150,
});
// These should not be present in the result
expect(result).not.toHaveProperty('outputReasoningTokens');
expect(result).not.toHaveProperty('outputAudioTokens');
});
it('should handle XAI provider correctly where completion_tokens does not include reasoning_tokens', () => {
// Arrange
const xaiUsage: OpenAI.Completions.CompletionUsage = {
prompt_tokens: 6103,
completion_tokens: 66, // 这个不包含 reasoning_tokens
total_tokens: 6550,
prompt_tokens_details: {
audio_tokens: 0,
cached_tokens: 0,
},
completion_tokens_details: {
accepted_prediction_tokens: 0,
audio_tokens: 0,
reasoning_tokens: 381, // 这是额外的 reasoning tokens
rejected_prediction_tokens: 0,
},
};
// Act
const xaiResult = convertUsage(xaiUsage, 'xai');
// Assert
expect(xaiResult).toMatchObject({
totalInputTokens: 6103,
totalOutputTokens: 447, // 66 + 381xai的reasoning_tokens和completion_tokens价格一样
outputTextTokens: 66, // 不减去 reasoning_tokens
outputReasoningTokens: 381,
totalTokens: 6550,
});
// 测试其他 provider(默认行为)
const defaultResult = convertUsage(xaiUsage);
// 默认行为: outputTextTokens 应该是 completion_tokens - reasoning_tokens - audio_tokens = 66 - 381 - 0 = -315
expect(defaultResult.outputTextTokens).toBe(-315);
expect(defaultResult).toMatchObject({
totalInputTokens: 6103,
totalOutputTokens: 66,
outputTextTokens: -315, // 负数确实会出现在结果中
outputReasoningTokens: 381,
totalTokens: 6550,
});
});
it('should handle output image tokens correctly', () => {
// Arrange
const usageWithImage = {
prompt_tokens: 100,
completion_tokens: 200,
completion_tokens_details: {
image_tokens: 60,
reasoning_tokens: 30,
},
total_tokens: 300,
} as OpenAI.Completions.CompletionUsage;
// Act
const result = convertUsage(usageWithImage);
// Assert
expect(result).toEqual({
inputTextTokens: 100,
totalInputTokens: 100,
totalOutputTokens: 200,
outputImageTokens: 60,
outputReasoningTokens: 30,
outputTextTokens: 110, // 200 - 60 - 30
totalTokens: 300,
});
});
it('should handle response output image tokens correctly for ResponseUsage', () => {
// Arrange
const responseUsage = {
input_tokens: 100,
input_tokens_details: {
cached_tokens: 0,
},
output_tokens: 200,
output_tokens_details: {
image_tokens: 60,
reasoning_tokens: 30,
},
total_tokens: 300,
} as OpenAI.Responses.ResponseUsage;
// Act
const result = convertResponseUsage(responseUsage);
// Assert
expect(result).toEqual({
inputTextTokens: 100,
inputCacheMissTokens: 100, // 100 - 0
totalInputTokens: 100,
totalOutputTokens: 200,
outputImageTokens: 60,
outputReasoningTokens: 30,
outputTextTokens: 170, // 200 - 30
totalTokens: 300,
});
});
});
@@ -1,122 +0,0 @@
import { ModelTokensUsage } from '@lobechat/types';
import OpenAI from 'openai';
export const convertUsage = (
usage: OpenAI.Completions.CompletionUsage,
provider?: string,
): ModelTokensUsage => {
// 目前只有 pplx 才有 citation_tokens
const inputTextTokens = usage.prompt_tokens || 0;
const inputCitationTokens = (usage as any).citation_tokens || 0;
const totalInputTokens = inputCitationTokens + inputTextTokens;
const cachedTokens =
(usage as any).prompt_cache_hit_tokens || usage.prompt_tokens_details?.cached_tokens;
const inputCacheMissTokens =
(usage as any).prompt_cache_miss_tokens || totalInputTokens - cachedTokens;
const totalOutputTokens = usage.completion_tokens;
const outputReasoning = usage.completion_tokens_details?.reasoning_tokens || 0;
const outputAudioTokens = usage.completion_tokens_details?.audio_tokens || 0;
const outputImageTokens = (usage.completion_tokens_details as any)?.image_tokens || 0;
// XAI 的 completion_tokens 不包含 reasoning_tokens,需要特殊处理
const outputTextTokens =
provider === 'xai'
? totalOutputTokens - outputAudioTokens
: totalOutputTokens - outputReasoning - outputAudioTokens - outputImageTokens;
const totalOutputTokensNormalized =
provider === 'xai' ? totalOutputTokens + outputReasoning : totalOutputTokens;
const totalTokens = inputCitationTokens + usage.total_tokens;
const data = {
acceptedPredictionTokens: usage.completion_tokens_details?.accepted_prediction_tokens,
inputAudioTokens: usage.prompt_tokens_details?.audio_tokens,
inputCacheMissTokens: inputCacheMissTokens,
inputCachedTokens: cachedTokens,
inputCitationTokens: inputCitationTokens,
inputTextTokens: inputTextTokens,
outputAudioTokens: outputAudioTokens,
outputImageTokens: outputImageTokens,
outputReasoningTokens: outputReasoning,
outputTextTokens: outputTextTokens,
rejectedPredictionTokens: usage.completion_tokens_details?.rejected_prediction_tokens,
totalInputTokens,
totalOutputTokens: totalOutputTokensNormalized,
totalTokens,
} satisfies ModelTokensUsage;
const finalData = {};
Object.entries(data).forEach(([key, value]) => {
if (!!value) {
// @ts-ignore
finalData[key] = value;
}
});
return finalData;
};
export const convertResponseUsage = (usage: OpenAI.Responses.ResponseUsage): ModelTokensUsage => {
// 1. Extract and default primary values
const totalInputTokens = usage.input_tokens || 0;
const inputCachedTokens = usage.input_tokens_details?.cached_tokens || 0;
const totalOutputTokens = usage.output_tokens || 0;
const outputReasoningTokens = usage.output_tokens_details?.reasoning_tokens || 0;
const overallTotalTokens = usage.total_tokens || 0;
// 2. Calculate derived values
const inputCacheMissTokens = totalInputTokens - inputCachedTokens;
// For ResponseUsage, inputTextTokens is effectively totalInputTokens as no further breakdown is given.
const inputTextTokens = totalInputTokens;
// For ResponseUsage, outputTextTokens is totalOutputTokens minus reasoning, as no audio output tokens are specified.
const outputTextTokens = totalOutputTokens - outputReasoningTokens;
const outputImageTokens = (usage.output_tokens_details as any)?.image_tokens || 0;
// 3. Construct the comprehensive data object (matching ModelTokensUsage structure)
const data = {
// Fields from ModelTokensUsage that are not in ResponseUsage will be undefined or 0
// and potentially filtered out later.
acceptedPredictionTokens: undefined, // Not in ResponseUsage
inputAudioTokens: undefined, // Not in ResponseUsage
inputCacheMissTokens: inputCacheMissTokens,
inputCachedTokens: inputCachedTokens,
inputCitationTokens: undefined, // Not in ResponseUsage
inputTextTokens: inputTextTokens,
outputAudioTokens: undefined, // Not in ResponseUsage
outputImageTokens: outputImageTokens,
outputReasoningTokens: outputReasoningTokens,
outputTextTokens: outputTextTokens,
rejectedPredictionTokens: undefined, // Not in ResponseUsage
totalInputTokens: totalInputTokens,
totalOutputTokens: totalOutputTokens,
totalTokens: overallTotalTokens,
} satisfies ModelTokensUsage; // This helps ensure all keys of ModelTokensUsage are considered
// 4. Filter out zero/falsy values, as done in the reference implementation
const finalData: Partial<ModelTokensUsage> = {}; // Use Partial for type safety during construction
Object.entries(data).forEach(([key, value]) => {
if (
value !== undefined &&
value !== null &&
(typeof value !== 'number' || value !== 0) && // A more explicit check than `!!value` if we want to be very specific about
// keeping non-numeric truthy values, but the reference uses `!!value`.
// `!!value` will filter out 0, which is often desired for token counts.
// Let's stick to the reference's behavior:
!!value
) {
// @ts-ignore - We are building an object that will conform to ModelTokensUsage
// by selectively adding properties.
finalData[key as keyof ModelTokensUsage] = value as number;
}
});
return finalData as ModelTokensUsage; // Cast because we've built it to match
};
+1
View File
@@ -61,6 +61,7 @@ export interface ModelTokensUsage {
rejectedPredictionTokens?: number;
// Total tokens
// TODO: make all following fields required
totalInputTokens?: number;
totalOutputTokens?: number;
totalTokens?: number;