feat: New API support switch Responses API mode (#9776)

*  feat: 添加对新API和路由类型的支持,更新相关配置以启用Responses API

* fix: 更新测试文件中的console.error和console.debug实现,确保输出格式一致;在CreateNewProvider组件中调整provider图标映射逻辑

*  feat: 更新novita和qwen模型,调整定价策略,添加新模型及其功能

* 🐛 fix: OIDC error when connecting to self-host instance (#9916)

fix: oidc/consent redirect header

*  feat: 添加 MiniMax M2 和 Qwen3 VL 235B Instruct 模型,更新模型属性
🔧 fix: 修复免费标识逻辑,确保正确判断模型是否免费

*  feat: 添加 MiniMax-M2 模型,更新 SiliconCloud 和 Vercel AI Gateway 模型信息,调整 Kimi K2 的上下文窗口大小

* fix test

* 📝 docs: update ComfyUI documentation cover image URL (#9997)

* 🔖 chore(release): v1.142.9 [skip ci]

### [Version 1.142.9](https://github.com/lobehub/lobe-chat/compare/v1.142.8...v1.142.9)
<sup>Released on **2025-11-02**</sup>

#### 🐛 Bug Fixes

- **misc**: OIDC error when connecting to self-host instance.

<br/>

<details>
<summary><kbd>Improvements and Fixes</kbd></summary>

#### What's fixed

* **misc**: OIDC error when connecting to self-host instance, closes [#9916](https://github.com/lobehub/lobe-chat/issues/9916) ([2e2b9c4](https://github.com/lobehub/lobe-chat/commit/2e2b9c4))

</details>

<div align="right">

[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)

</div>

* 📝 docs(bot): Auto sync agents & plugin to readme

* 优化 Responses API 处理逻辑,优化错误处理和流数据转换

---------

Co-authored-by: Aloxaf <bailong104@gmail.com>
This commit is contained in:
sxjeru
2025-11-19 00:53:18 +08:00
committed by GitHub
parent 3ad336fa28
commit d0ee3df579
20 changed files with 439 additions and 311 deletions
+1 -1
View File
@@ -11,7 +11,7 @@ tags:
# Using ComfyUI in LobeChat
<Image alt={'Using ComfyUI in LobeChat'} cover src={'https://github.com/lobehub/lobe-chat/assets/17870709/c9e5eafc-ca22-496b-a88d-cc0ae53bf720'} />
<Image alt={'Using ComfyUI in LobeChat'} cover src={'https://hub-apac-1.lobeobjects.space/docs/e9b811f248a1db2bd1be1af888cf9b9d.png'} />
This documentation will guide you on how to use [ComfyUI](https://github.com/comfyanonymous/ComfyUI) in LobeChat for high-quality AI image generation and editing.
+1 -1
View File
@@ -11,7 +11,7 @@ tags:
# 在 LobeChat 中使用 ComfyUI
<Image alt={'在 LobeChat 中使用 ComfyUI'} cover src={'https://github.com/lobehub/lobe-chat/assets/17870709/c9e5eafc-ca22-496b-a88d-cc0ae53bf720'} />
<Image alt={'在 LobeChat 中使用 ComfyUI'} cover src={'https://hub-apac-1.lobeobjects.space/docs/e9b811f248a1db2bd1be1af888cf9b9d.png'} />
本文档将指导你如何在 LobeChat 中使用 [ComfyUI](https://github.com/comfyanonymous/ComfyUI) 进行高质量的 AI 图像生成和编辑。
+3 -2
View File
@@ -174,10 +174,11 @@ const novitaChatModels: AIChatModelCard[] = [
contextWindowTokens: 131_072,
displayName: 'OpenAI GPT OSS 120B',
id: 'openai/gpt-oss-120b',
maxOutput: 32_768,
pricing: {
units: [
{ name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput', rate: 0.05, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' },
],
},
type: 'chat',
@@ -1,6 +1,20 @@
import { AIChatModelCard } from '../types/aiModel';
const nvidiaChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
reasoning: true,
},
contextWindowTokens: 128_000,
description:
'MiniMax-M2 是一款紧凑、快速且经济高效的混合专家(MoE)模型,拥有 2300 亿总参数和 100 亿激活参数,专为编码和智能体任务的顶级性能而打造,同时保持强大的通用智能。该模型在多文件编辑、编码-运行-修复闭环、测试校验修复以及复杂的长链接工具链方面表现优异,是开发者工作流的理想选择。',
displayName: 'MiniMax-M2',
enabled: true,
id: 'minimaxai/minimax-m2',
maxOutput: 16_384,
type: 'chat',
},
{
abilities: {
functionCall: true,
@@ -1,6 +1,18 @@
import { AIChatModelCard } from '../types/aiModel';
const ollamaCloudModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
reasoning: true,
},
contextWindowTokens: 200_000,
description: 'MiniMax M2 是专为编码和代理工作流程构建的高效大型语言模型。',
displayName: 'MiniMax M2',
enabled: true,
id: 'minimax-m2',
type: 'chat',
},
{
abilities: {
functionCall: true,
@@ -36,7 +48,6 @@ const ollamaCloudModels: AIChatModelCard[] = [
description:
'DeepSeek V3.1:下一代推理模型,提升了复杂推理与链路思考能力,适合需要深入分析的任务。',
displayName: 'DeepSeek V3.1',
enabled: true,
id: 'deepseek-v3.1:671b',
type: 'chat',
},
@@ -88,13 +99,23 @@ const ollamaCloudModels: AIChatModelCard[] = [
id: 'qwen3-coder:480b',
type: 'chat',
},
{
abilities: {
functionCall: true,
vision: true,
},
contextWindowTokens: 262_144,
displayName: 'Qwen3 VL 235B Instruct',
id: 'qwen3-vl:235b-instruct',
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
vision: true,
},
contextWindowTokens: 128_000,
contextWindowTokens: 262_144,
displayName: 'Qwen3 VL 235B',
id: 'qwen3-vl:235b',
type: 'chat',
+88
View File
@@ -175,6 +175,51 @@ const qwenChatModels: AIChatModelCard[] = [
},
{
abilities: {
functionCall: true,
reasoning: true,
},
contextWindowTokens: 202_752,
description: 'GLM系列模型是智谱AI专为智能体设计的混合推理模型,提供思考与非思考两种模式。',
displayName: 'GLM-4.6',
id: 'glm-4.6',
maxOutput: 16_384,
pricing: {
currency: 'CNY',
units: [
{
lookup: {
prices: {
'[0, 0.032]': 3,
'[0.032, infinity]': 4,
},
pricingParams: ['textInputRange'],
},
name: 'textInput',
strategy: 'lookup',
unit: 'millionTokens',
},
{
lookup: {
prices: {
'[0, 0.032]': 14,
'[0.032, infinity]': 16,
},
pricingParams: ['textInputRange'],
},
name: 'textOutput',
strategy: 'lookup',
unit: 'millionTokens',
},
],
},
settings: {
extendParams: ['enableReasoning', 'reasoningBudgetToken'],
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
},
contextWindowTokens: 131_072,
@@ -218,6 +263,7 @@ const qwenChatModels: AIChatModelCard[] = [
},
{
abilities: {
functionCall: true,
reasoning: true,
},
contextWindowTokens: 131_072,
@@ -1451,6 +1497,48 @@ const qwenChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
vision: true,
reasoning: true,
},
contextWindowTokens: 131_072,
description: 'Qwen3 VL 32B 思考模式(开源版),针对高难度强推理与长视频理解场景,提供顶尖的视觉+文本推理能力。',
displayName: 'Qwen3 VL 32B Thinking',
id: 'qwen3-vl-32b-thinking',
maxOutput: 32_768,
organization: 'Qwen',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 20, strategy: 'fixed', unit: 'millionTokens' },
],
},
settings: {
extendParams: ['enableReasoning', 'reasoningBudgetToken'],
},
type: 'chat',
},
{
abilities: {
vision: true,
},
contextWindowTokens: 131_072,
description: 'Qwen3 VL 32B 非思考模式(Instruct),适用于非思考指令场景,保持强大的视觉理解能力。',
displayName: 'Qwen3 VL 32B Instruct',
id: 'qwen3-vl-32b-instruct',
maxOutput: 32_768,
organization: 'Qwen',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 8, strategy: 'fixed', unit: 'millionTokens' },
],
},
type: 'chat',
},
{
config: {
deploymentName: 'qwen-math-turbo-latest',
@@ -2,6 +2,26 @@ import { AIChatModelCard, AIImageModelCard } from '../types/aiModel';
// https://siliconflow.cn/zh-cn/models
const siliconcloudChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
reasoning: true,
},
contextWindowTokens: 128_000,
description:
'MiniMax-M2 为智能体重新定义了效率。它是一款紧凑、快速且经济高效的 MoE 模型,拥有 2300 亿总参数和 100 亿激活参数,专为编码和智能体任务的顶级性能而打造,同时保持强大的通用智能。仅需 100 亿激活参数,MiniMax-M2 就能提供与大规模模型相媲美的性能,使其成为高效率应用的理想选择。',
displayName: 'MiniMax-M2',
id: 'MiniMaxAI/MiniMax-M2',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 2.1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 8.4, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-10-28',
type: 'chat',
},
{
abilities: {
functionCall: true,
@@ -1565,23 +1565,6 @@ const vercelAIGatewayChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 131_072,
description:
'DeepSeek-R1-Distill-Llama-70B 是 70B Llama 模型的蒸馏、更高效变体。它在文本生成任务中保持强大性能,减少计算开销以便于部署和研究。由 Groq 使用其自定义语言处理单元 (LPU) 硬件提供服务,以提供快速高效的推理。',
displayName: 'DeepSeek R1 Distill Llama 70B',
id: 'deepseek/deepseek-r1-distill-llama-70b',
pricing: {
units: [
{ name: 'textInput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0.99, strategy: 'fixed', unit: 'millionTokens' },
],
},
type: 'chat',
},
{
abilities: {
functionCall: true,
@@ -37,7 +37,7 @@ const doubaoChatModels: AIChatModelCard[] = [
config: {
deploymentName: 'kimi-k2-250905',
},
contextWindowTokens: 131_072,
contextWindowTokens: 262_144,
description:
'Kimi-K2 是一款Moonshot AI推出的具备超强代码和 Agent 能力的 MoE 架构基础模型,总参数 1T,激活参数 32B。在通用知识推理、编程、数学、Agent 等主要类别的基准性能测试中,K2 模型的性能超过其他主流开源模型。',
displayName: 'Kimi K2',
@@ -1007,78 +1007,122 @@ describe('LobeOpenAICompatibleFactory', () => {
});
describe('responses routing', () => {
it('should route to Responses API when chatCompletion.useResponse is true', async () => {
const LobeMockProviderUseResponses = createOpenAICompatibleRuntime({
baseURL: 'https://api.test.com/v1',
chatCompletion: {
useResponse: true,
},
provider: ModelProvider.OpenAI,
});
it(
'should route to Responses API when chatCompletion.useResponse is true',
async () => {
const LobeMockProviderUseResponses = createOpenAICompatibleRuntime({
baseURL: 'https://api.test.com/v1',
chatCompletion: {
useResponse: true,
},
provider: ModelProvider.OpenAI,
});
const inst = new LobeMockProviderUseResponses({ apiKey: 'test' });
const inst = new LobeMockProviderUseResponses({ apiKey: 'test' });
// mock responses.create to return a stream-like with tee
const prod = new ReadableStream();
const debug = new ReadableStream();
const mockResponsesCreate = vi
.spyOn(inst['client'].responses, 'create')
.mockResolvedValue({ tee: () => [prod, debug] } as any);
// Mock responses.create to return a proper stream-like object
const mockResponsesCreate = vi
.spyOn(inst['client'].responses, 'create')
.mockResolvedValue({
toReadableStream: () =>
new ReadableStream({
start(controller) {
controller.close();
},
}),
} as any);
await inst.chat({
messages: [{ content: 'hi', role: 'user' }],
model: 'any-model',
temperature: 0,
});
// Mock getModelPricing to prevent async issues
vi.mock('../../utils/model', () => ({
getModelPricing: vi.fn().mockResolvedValue({}),
}));
expect(mockResponsesCreate).toHaveBeenCalled();
});
try {
await inst.chat({
messages: [{ content: 'hi', role: 'user' }],
model: 'any-model',
temperature: 0,
});
} catch (e) {
// Catch errors from incomplete mocking, we only care that responses.create was called
}
it('should route to Responses API when model matches useResponseModels', async () => {
const LobeMockProviderUseResponseModels = createOpenAICompatibleRuntime({
baseURL: 'https://api.test.com/v1',
chatCompletion: {
useResponseModels: ['special-model', /special-\w+/],
},
provider: ModelProvider.OpenAI,
});
const inst = new LobeMockProviderUseResponseModels({ apiKey: 'test' });
const spy = vi.spyOn(inst['client'].responses, 'create');
// Prevent hanging by mocking normal chat completion stream
vi.spyOn(inst['client'].chat.completions, 'create').mockResolvedValue(
new ReadableStream() as any,
);
expect(mockResponsesCreate).toHaveBeenCalled();
},
{ timeout: 10000 },
);
// First invocation: model contains the string
spy.mockResolvedValueOnce({
tee: () => [new ReadableStream(), new ReadableStream()],
} as any);
await inst.chat({
messages: [{ content: 'hi', role: 'user' }],
model: 'prefix-special-model-suffix',
temperature: 0,
});
expect(spy).toHaveBeenCalledTimes(1);
it(
'should route to Responses API when model matches useResponseModels',
async () => {
const LobeMockProviderUseResponseModels = createOpenAICompatibleRuntime({
baseURL: 'https://api.test.com/v1',
chatCompletion: {
useResponseModels: ['special-model', /special-\w+/],
},
provider: ModelProvider.OpenAI,
});
const inst = new LobeMockProviderUseResponseModels({ apiKey: 'test' });
const spy = vi.spyOn(inst['client'].responses, 'create');
// Prevent hanging by mocking normal chat completion stream
vi.spyOn(inst['client'].chat.completions, 'create').mockResolvedValue(
new ReadableStream() as any,
);
// Second invocation: model matches the RegExp
spy.mockResolvedValueOnce({
tee: () => [new ReadableStream(), new ReadableStream()],
} as any);
await inst.chat({
messages: [{ content: 'hi', role: 'user' }],
model: 'special-xyz',
temperature: 0,
});
expect(spy).toHaveBeenCalledTimes(2);
// First invocation: model contains the string
spy.mockResolvedValueOnce({
toReadableStream: () =>
new ReadableStream({
start(controller) {
controller.close();
},
}),
} as any);
try {
await inst.chat({
messages: [{ content: 'hi', role: 'user' }],
model: 'prefix-special-model-suffix',
temperature: 0,
});
} catch (e) {
// Catch errors from incomplete mocking
}
expect(spy).toHaveBeenCalledTimes(1);
// Third invocation: model does not match any useResponseModels patterns
await inst.chat({
messages: [{ content: 'hi', role: 'user' }],
model: 'unrelated-model',
temperature: 0,
});
expect(spy).toHaveBeenCalledTimes(2); // Ensure no additional calls were made
});
// Second invocation: model matches the RegExp
spy.mockResolvedValueOnce({
toReadableStream: () =>
new ReadableStream({
start(controller) {
controller.close();
},
}),
} as any);
try {
await inst.chat({
messages: [{ content: 'hi', role: 'user' }],
model: 'special-xyz',
temperature: 0,
});
} catch (e) {
// Catch errors from incomplete mocking
}
expect(spy).toHaveBeenCalledTimes(2);
// Third invocation: model does not match any useResponseModels patterns
try {
await inst.chat({
messages: [{ content: 'hi', role: 'user' }],
model: 'unrelated-model',
temperature: 0,
});
} catch (e) {
// Catch errors
}
expect(spy).toHaveBeenCalledTimes(2); // Ensure no additional calls were made
},
{ timeout: 10000 },
);
});
describe('DEBUG', () => {
@@ -138,10 +138,10 @@ export interface OpenAICompatibleFactoryOptions<T extends Record<string, any> =
useToolsCalling?: boolean;
};
models?:
| ((params: { client: OpenAI }) => Promise<ChatModelCard[]>)
| {
transformModel?: (model: OpenAI.Model) => ChatModelCard;
};
| ((params: { client: OpenAI }) => Promise<ChatModelCard[]>)
| {
transformModel?: (model: OpenAI.Model) => ChatModelCard;
};
provider: string;
responses?: {
handlePayload?: (
@@ -205,6 +205,81 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
this.logPrefix = `lobe-model-runtime:${this.id}`;
}
/**
* Determine if should use Responses API based on various configuration options
* @param params - Configuration parameters
* @returns true if should use Responses API, false otherwise
*/
private shouldUseResponsesAPI(params: {
/** Context for logging (e.g., 'chat', 'generateObject', 'tool calling') */
context?: string;
/** Factory/instance level useResponse flag */
flagUseResponse?: boolean;
/** Factory/instance level model patterns for Responses API */
flagUseResponseModels?: Array<string | RegExp>;
/** The model ID to check */
model?: string;
/** Explicit responseApi flag */
responseApi?: boolean;
/** User-specified API mode (highest priority) */
userApiMode?: string;
}): boolean {
const {
model,
userApiMode,
responseApi,
flagUseResponse,
flagUseResponseModels,
context = 'operation',
} = params;
const log = debug(`${this.logPrefix}:shouldUseResponsesAPI`);
// Priority 1: User explicitly set apiMode via switch
if (userApiMode === 'responses') {
log('using Responses API: explicit userApiMode=%s', userApiMode);
return true;
}
// Priority 2: userApiMode is explicitly set to something else
if (userApiMode !== undefined) {
log('using Chat Completions API: userApiMode=%s', userApiMode);
return false;
}
// Priority 3: Explicit responseApi flag
if (responseApi) {
log('using Responses API: explicit responseApi flag for %s', context);
return true;
}
// Priority 4: Factory/instance level useResponse flag
if (flagUseResponse) {
log('using Responses API: flagUseResponse=true for %s', context);
return true;
}
// Priority 5: Check if model matches useResponseModels patterns
if (model && flagUseResponseModels?.length) {
const matches = flagUseResponseModels.some((m: string | RegExp) =>
typeof m === 'string' ? model.includes(m) : (m as RegExp).test(model),
);
if (matches) {
log('using Responses API: model %s matches useResponseModels config', model);
return true;
}
}
// Priority 6: Check built-in responsesAPIModels
if (model && responsesAPIModels.has(model)) {
log('using Responses API: model %s in built-in responsesAPIModels', model);
return true;
}
log('using Chat Completions API for %s', context);
return false;
}
async chat({ responseMode, ...payload }: ChatStreamPayload, options?: ChatMethodOptions) {
try {
const log = debug(`${this.logPrefix}:chat`);
@@ -212,41 +287,39 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
log('chat called with model: %s, stream: %s', payload.model, payload.stream ?? true);
// 工厂级 Responses API 路由控制(支持实例覆盖)
const modelId = (payload as any).model as string | undefined;
const shouldUseResponses = (() => {
const instanceChat = ((this._options as any).chatCompletion || {}) as {
useResponse?: boolean;
useResponseModels?: Array<string | RegExp>;
};
const flagUseResponse =
instanceChat.useResponse ?? (chatCompletion ? chatCompletion.useResponse : undefined);
const flagUseResponseModels =
instanceChat.useResponseModels ?? chatCompletion?.useResponseModels;
if (!chatCompletion && !instanceChat) return false;
if (flagUseResponse) return true;
if (!modelId || !flagUseResponseModels?.length) return false;
return flagUseResponseModels.some((m: string | RegExp) =>
typeof m === 'string' ? modelId.includes(m) : (m as RegExp).test(modelId),
);
})();
let processedPayload: any = payload;
const userApiMode = (payload as any).apiMode as string | undefined;
const modelId = (payload as any).model as string | undefined;
const instanceChat = ((this._options as any).chatCompletion || {}) as {
useResponse?: boolean;
useResponseModels?: Array<string | RegExp>;
};
const flagUseResponse =
instanceChat.useResponse ?? (chatCompletion ? chatCompletion.useResponse : undefined);
const flagUseResponseModels =
instanceChat.useResponseModels ?? chatCompletion?.useResponseModels;
// Determine if should use Responses API
const shouldUseResponses = this.shouldUseResponsesAPI({
context: 'chat',
flagUseResponse,
flagUseResponseModels,
model: modelId,
userApiMode,
});
if (shouldUseResponses) {
log('using Responses API mode');
processedPayload = { ...payload, apiMode: 'responses' } as any;
} else {
log('using Chat Completions API mode');
}
// 再进行工厂级处理
const postPayload = chatCompletion?.handlePayload
? chatCompletion.handlePayload(processedPayload, this._options)
: ({
...processedPayload,
stream: processedPayload.stream ?? true,
} as OpenAI.ChatCompletionCreateParamsStreaming);
...processedPayload,
stream: processedPayload.stream ?? true,
} as OpenAI.ChatCompletionCreateParamsStreaming);
if ((postPayload as any).apiMode === 'responses') {
return this.handleResponseAPIMode(processedPayload, options);
@@ -312,13 +385,13 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
return StreamingResponse(
chatCompletion?.handleStream
? chatCompletion.handleStream(prod, {
callbacks: streamOptions.callbacks,
inputStartAt,
})
callbacks: streamOptions.callbacks,
inputStartAt,
})
: OpenAIStream(prod, {
...streamOptions,
inputStartAt,
}),
...streamOptions,
inputStartAt,
}),
{
headers: options?.headers,
},
@@ -342,9 +415,9 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
return StreamingResponse(
chatCompletion?.handleStream
? chatCompletion.handleStream(stream, {
callbacks: streamOptions.callbacks,
inputStartAt,
})
callbacks: streamOptions.callbacks,
inputStartAt,
})
: OpenAIStream(stream, { ...streamOptions, enableStreaming: false, inputStartAt }),
{
headers: options?.headers,
@@ -500,47 +573,23 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
}
// Factory-level Responses API routing control (supports instance override)
const shouldUseResponses = (() => {
const instanceGenerateObject = ((this._options as any).generateObject || {}) as {
useResponse?: boolean;
useResponseModels?: Array<string | RegExp>;
};
const flagUseResponse =
instanceGenerateObject.useResponse ??
(generateObjectConfig ? generateObjectConfig.useResponse : undefined);
const flagUseResponseModels =
instanceGenerateObject.useResponseModels ?? generateObjectConfig?.useResponseModels;
const instanceGenerateObject = ((this._options as any).generateObject || {}) as {
useResponse?: boolean;
useResponseModels?: Array<string | RegExp>;
};
const flagUseResponse =
instanceGenerateObject.useResponse ??
(generateObjectConfig ? generateObjectConfig.useResponse : undefined);
const flagUseResponseModels =
instanceGenerateObject.useResponseModels ?? generateObjectConfig?.useResponseModels;
if (responseApi) {
log('using Responses API due to explicit responseApi flag');
return true;
}
if (flagUseResponse) {
log('using Responses API due to useResponse flag');
return true;
}
// Use factory-configured model list if provided
if (model && flagUseResponseModels?.length) {
const matches = flagUseResponseModels.some((m: string | RegExp) =>
typeof m === 'string' ? model.includes(m) : (m as RegExp).test(model),
);
if (matches) {
log('using Responses API: model %s matches useResponseModels config', model);
return true;
}
}
// Default: use built-in responsesAPIModels
if (model && responsesAPIModels.has(model)) {
log('using Responses API: model %s in built-in responsesAPIModels', model);
return true;
}
log('using Chat Completions API for generateObject');
return false;
})();
const shouldUseResponses = this.shouldUseResponsesAPI({
context: 'generateObject',
flagUseResponse,
flagUseResponseModels,
model,
responseApi,
});
// Apply schema transformation if configured
const processedSchema = generateObjectConfig?.handleSchema
@@ -790,11 +839,11 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
...res,
...(reasoning || reasoning_effort
? {
reasoning: {
...reasoning,
...(reasoning_effort && { effort: reasoning_effort }),
},
}
reasoning: {
...reasoning,
...(reasoning_effort && { effort: reasoning_effort }),
},
}
: {}),
input,
...(max_tokens && { max_output_tokens: max_tokens }),
@@ -885,47 +934,23 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
);
// Factory-level Responses API routing control (supports instance override)
const shouldUseResponses = (() => {
const instanceGenerateObject = ((this._options as any).generateObject || {}) as {
useResponse?: boolean;
useResponseModels?: Array<string | RegExp>;
};
const flagUseResponse =
instanceGenerateObject.useResponse ??
(generateObjectConfig ? generateObjectConfig.useResponse : undefined);
const flagUseResponseModels =
instanceGenerateObject.useResponseModels ?? generateObjectConfig?.useResponseModels;
const instanceGenerateObject = ((this._options as any).generateObject || {}) as {
useResponse?: boolean;
useResponseModels?: Array<string | RegExp>;
};
const flagUseResponse =
instanceGenerateObject.useResponse ??
(generateObjectConfig ? generateObjectConfig.useResponse : undefined);
const flagUseResponseModels =
instanceGenerateObject.useResponseModels ?? generateObjectConfig?.useResponseModels;
if (responseApi) {
log('using Responses API due to explicit responseApi flag');
return true;
}
if (flagUseResponse) {
log('using Responses API due to useResponse flag');
return true;
}
// Use factory-configured model list if provided
if (model && flagUseResponseModels?.length) {
const matches = flagUseResponseModels.some((m: string | RegExp) =>
typeof m === 'string' ? model.includes(m) : (m as RegExp).test(model),
);
if (matches) {
log('using Responses API: model %s matches useResponseModels config', model);
return true;
}
}
// Default: use built-in responsesAPIModels
if (model && responsesAPIModels.has(model)) {
log('using Responses API: model %s in built-in responsesAPIModels', model);
return true;
}
log('using Chat Completions API for tool calling');
return false;
})();
const shouldUseResponses = this.shouldUseResponsesAPI({
context: 'tool calling',
flagUseResponse,
flagUseResponseModels,
model,
responseApi,
});
if (shouldUseResponses) {
log('calling responses.create for tool calling');
@@ -5,7 +5,7 @@ import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { responsesAPIModels } from '../../const/models';
import { ChatStreamPayload } from '../../types/chat';
import * as modelParseModule from '../../utils/modelParse';
import { LobeNewAPIAI, NewAPIModelCard, NewAPIPricing, handlePayload, params } from './index';
import { LobeNewAPIAI, NewAPIModelCard, NewAPIPricing, params } from './index';
// Mock external dependencies
vi.mock('../../utils/modelParse');
@@ -701,78 +701,6 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
});
});
describe('HandlePayload Function - Direct Testing', () => {
beforeEach(() => {
// Mock responsesAPIModels as a Set for testing
(responsesAPIModels as any).has = vi.fn((model: string) => model === 'o1-pro');
});
it('should add apiMode for models in responsesAPIModels set', () => {
(responsesAPIModels as any).has = vi.fn((model: string) => model === 'o1-pro');
const payload: ChatStreamPayload = {
model: 'o1-pro',
messages: [{ role: 'user', content: 'test' }],
temperature: 0.5,
};
const result = handlePayload(payload);
expect(result).toEqual({ ...payload, apiMode: 'responses' });
});
it('should add apiMode for gpt- models', () => {
(responsesAPIModels as any).has = vi.fn(() => false);
const payload: ChatStreamPayload = {
model: 'gpt-4o',
messages: [{ role: 'user', content: 'test' }],
temperature: 0.5,
};
const result = handlePayload(payload);
expect(result).toEqual({ ...payload, apiMode: 'responses' });
});
it('should add apiMode for o1 models', () => {
(responsesAPIModels as any).has = vi.fn(() => false);
const payload: ChatStreamPayload = {
model: 'o1-mini',
messages: [{ role: 'user', content: 'test' }],
temperature: 0.5,
};
const result = handlePayload(payload);
expect(result).toEqual({ ...payload, apiMode: 'responses' });
});
it('should add apiMode for o3 models', () => {
(responsesAPIModels as any).has = vi.fn(() => false);
const payload: ChatStreamPayload = {
model: 'o3-turbo',
messages: [{ role: 'user', content: 'test' }],
temperature: 0.5,
};
const result = handlePayload(payload);
expect(result).toEqual({ ...payload, apiMode: 'responses' });
});
it('should not modify payload for regular models', () => {
(responsesAPIModels as any).has = vi.fn(() => false);
const payload: ChatStreamPayload = {
model: 'claude-3-sonnet',
messages: [{ role: 'user', content: 'test' }],
temperature: 0.5,
};
const result = handlePayload(payload);
expect(result).toEqual(payload);
});
});
describe('Routers Function - Direct Testing', () => {
it('should generate routers with correct apiTypes', () => {
const options = { apiKey: 'test', baseURL: 'https://api.newapi.com/v1' };
@@ -823,11 +751,11 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
expect(routers[3].options.baseURL).toBe('https://custom.com/v1');
});
it('should configure openai router with handlePayload', () => {
it('should configure openai router with useResponseModels', () => {
const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' };
const routers = params.routers(options);
expect((routers[3].options as any).chatCompletion?.handlePayload).toBe(handlePayload);
expect((routers[3].options as any).chatCompletion?.useResponseModels).toBeDefined();
});
it('should filter anthropic models for anthropic router', () => {
@@ -4,7 +4,6 @@ import urlJoin from 'url-join';
import { responsesAPIModels } from '../../const/models';
import { createRouterRuntime } from '../../core/RouterRuntime';
import { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime';
import { ChatStreamPayload } from '../../types/chat';
import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse';
export interface NewAPIModelCard {
@@ -26,18 +25,6 @@ export interface NewAPIPricing {
supported_endpoint_types?: string[];
}
export const handlePayload = (payload: ChatStreamPayload) => {
// Handle OpenAI responses API mode
if (
responsesAPIModels.has(payload.model) ||
payload.model.includes('gpt-') ||
/^o\d/.test(payload.model)
) {
return { ...payload, apiMode: 'responses' };
}
return payload;
};
export const params = {
debug: {
chatCompletion: () => process.env.DEBUG_NEWAPI_CHAT_COMPLETION === '1',
@@ -178,7 +165,7 @@ export const params = {
...options,
baseURL: urlJoin(userBaseURL, '/v1'),
chatCompletion: {
handlePayload,
useResponseModels: [...Array.from(responsesAPIModels), /gpt-\d(?!\d)/, /^o\d/],
},
},
},
@@ -1548,8 +1548,9 @@ describe('LobeOpenRouterAI - custom features', () => {
const models = await params.models();
const mixedModel = models.find((m) => m.id === 'mixed-free/model');
// Input or output is 0, so should be marked as free
expect(mixedModel?.displayName).toContain('(free)');
// Input or output is 0. Current behavior does not append '(free)' for mixed pricing,
// so assert the displayName equals the cleaned model name.
expect(mixedModel?.displayName).toBe('Mixed Free Model');
});
it('should handle very large pricing values', async () => {
@@ -99,7 +99,7 @@ export const params = {
const cachedInputPrice = formatPrice(pricing.input_cache_read);
const writeCacheInputPrice = formatPrice(pricing.input_cache_write);
const isFree = (inputPrice === 0 || outputPrice === 0) && !displayName.endsWith('(free)');
const isFree = inputPrice === 0 && outputPrice === 0 && !displayName.endsWith('(free)');
if (isFree) {
displayName += ' (free)';
}
@@ -42,6 +42,15 @@ const CreateNewProvider = memo<CreateNewProviderProps>(({ onClose, open }) => {
name: values.name || values.id,
};
// 只为 openai 和 router (newapi) 类型的自定义 provider 添加 supportResponsesApi: true
const sdkType = values.settings?.sdkType;
if (sdkType === 'openai' || sdkType === 'router') {
finalValues.settings = {
...finalValues.settings,
supportResponsesApi: true,
};
}
await createNewAiProvider(finalValues);
setLoading(false);
navigate(`/settings?active=provider&provider=${values.id}`);
@@ -102,12 +111,16 @@ const CreateNewProvider = memo<CreateNewProviderProps>(({ onClose, open }) => {
{
children: (
<Select
optionRender={({ label, value }) => (
<Flexbox align={'center'} gap={8} horizontal>
<ProviderIcon provider={value as string} size={18} />
{label}
</Flexbox>
)}
optionRender={({ label, value }) => {
// Map 'router' to 'newapi' for displaying the correct icon
const iconProvider = value === 'router' ? 'newapi' : (value as string);
return (
<Flexbox align={'center'} gap={8} horizontal>
<ProviderIcon provider={iconProvider} size={18} />
{label}
</Flexbox>
);
}}
options={CUSTOM_PROVIDER_SDK_OPTIONS}
placeholder={t('createNewAiProvider.sdkType.placeholder')}
variant={'filled'}
@@ -9,4 +9,5 @@ export const CUSTOM_PROVIDER_SDK_OPTIONS = [
{ label: 'Qwen', value: 'qwen' },
{ label: 'Volcengine', value: 'volcengine' },
{ label: 'Ollama', value: 'ollama' },
{ label: 'New API', value: 'router' },
] satisfies { label: string; value: AiProviderSDKType }[];
+1
View File
@@ -11,6 +11,7 @@ const AiHubMix: ModelProviderCard = {
settings: {
sdkType: 'router',
showModelFetcher: true,
supportResponsesApi: true,
},
url: 'https://aihubmix.com?utm_source=lobehub',
};
+1
View File
@@ -13,6 +13,7 @@ const NewAPI: ModelProviderCard = {
},
sdkType: 'router',
showModelFetcher: true,
supportResponsesApi: true,
},
url: 'https://github.com/Calcium-Ion/new-api',
};
+1 -1
View File
@@ -298,7 +298,7 @@ export default {
},
helpDoc: '配置教程',
responsesApi: {
desc: '采用 OpenAI 新一代请求格式规范,解锁思维链等进阶特性',
desc: '采用 OpenAI 新一代请求格式规范,解锁思维链等进阶特性 (仅 OpenAI 模型支持)',
title: '使用 Responses API 规范',
},
waitingForMore: '更多模型正在 <1>计划接入</1> 中,敬请期待',