✨ feat: New API support switch Responses API mode (#9776)

* ✨ feat: 添加对新API和路由类型的支持，更新相关配置以启用Responses API * fix: 更新测试文件中的console.error和console.debug实现，确保输出格式一致；在CreateNewProvider组件中调整provider图标映射逻辑 * ✨ feat: 更新novita和qwen模型，调整定价策略，添加新模型及其功能 * 🐛 fix: OIDC error when connecting to self-host instance (#9916) fix: oidc/consent redirect header * ✨ feat: 添加 MiniMax M2 和 Qwen3 VL 235B Instruct 模型，更新模型属性 🔧 fix: 修复免费标识逻辑，确保正确判断模型是否免费 * ✨ feat: 添加 MiniMax-M2 模型，更新 SiliconCloud 和 Vercel AI Gateway 模型信息，调整 Kimi K2 的上下文窗口大小 * fix test * 📝 docs: update ComfyUI documentation cover image URL (#9997) * 🔖 chore(release): v1.142.9 [skip ci] ### [Version 1.142.9](https://github.com/lobehub/lobe-chat/compare/v1.142.8...v1.142.9) <sup>Released on **2025-11-02**</sup> #### 🐛 Bug Fixes - **misc**: OIDC error when connecting to self-host instance. <br/> <details> <summary><kbd>Improvements and Fixes</kbd></summary> #### What's fixed * **misc**: OIDC error when connecting to self-host instance, closes [#9916](https://github.com/lobehub/lobe-chat/issues/9916) ([2e2b9c4](https://github.com/lobehub/lobe-chat/commit/2e2b9c4)) </details> <div align="right"> [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) </div> * 📝 docs(bot): Auto sync agents & plugin to readme * 优化 Responses API 处理逻辑，优化错误处理和流数据转换 --------- Co-authored-by: Aloxaf <bailong104@gmail.com>
2026-06-16 12:36:07 +00:00 · 2025-11-19 00:53:18 +08:00
parent 3ad336fa28
commit d0ee3df579
20 changed files with 439 additions and 311 deletions
@@ -11,7 +11,7 @@ tags:

 # Using ComfyUI in LobeChat

-<Image alt={'Using ComfyUI in LobeChat'} cover src={'https://github.com/lobehub/lobe-chat/assets/17870709/c9e5eafc-ca22-496b-a88d-cc0ae53bf720'} />
+<Image alt={'Using ComfyUI in LobeChat'} cover src={'https://hub-apac-1.lobeobjects.space/docs/e9b811f248a1db2bd1be1af888cf9b9d.png'} />

 This documentation will guide you on how to use [ComfyUI](https://github.com/comfyanonymous/ComfyUI) in LobeChat for high-quality AI image generation and editing.

@@ -11,7 +11,7 @@ tags:

 # 在 LobeChat 中使用 ComfyUI

-<Image alt={'在 LobeChat 中使用 ComfyUI'} cover src={'https://github.com/lobehub/lobe-chat/assets/17870709/c9e5eafc-ca22-496b-a88d-cc0ae53bf720'} />
+<Image alt={'在 LobeChat 中使用 ComfyUI'} cover src={'https://hub-apac-1.lobeobjects.space/docs/e9b811f248a1db2bd1be1af888cf9b9d.png'} />

 本文档将指导你如何在 LobeChat 中使用 [ComfyUI](https://github.com/comfyanonymous/ComfyUI) 进行高质量的 AI 图像生成和编辑。

@@ -174,10 +174,11 @@ const novitaChatModels: AIChatModelCard[] = [
    contextWindowTokens: 131_072,
    displayName: 'OpenAI GPT OSS 120B',
    id: 'openai/gpt-oss-120b',
+    maxOutput: 32_768,
    pricing: {
      units: [
-        { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
-        { name: 'textOutput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput', rate: 0.05, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' },
      ],
    },
    type: 'chat',
@@ -1,6 +1,20 @@
 import { AIChatModelCard } from '../types/aiModel';

 const nvidiaChatModels: AIChatModelCard[] = [
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+    },
+    contextWindowTokens: 128_000,
+    description:
+      'MiniMax-M2 是一款紧凑、快速且经济高效的混合专家（MoE）模型，拥有 2300 亿总参数和 100 亿激活参数，专为编码和智能体任务的顶级性能而打造，同时保持强大的通用智能。该模型在多文件编辑、编码-运行-修复闭环、测试校验修复以及复杂的长链接工具链方面表现优异，是开发者工作流的理想选择。',
+    displayName: 'MiniMax-M2',
+    enabled: true,
+    id: 'minimaxai/minimax-m2',
+    maxOutput: 16_384,
+    type: 'chat',
+  },
  {
    abilities: {
      functionCall: true,
@@ -1,6 +1,18 @@
 import { AIChatModelCard } from '../types/aiModel';

 const ollamaCloudModels: AIChatModelCard[] = [
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+    },
+    contextWindowTokens: 200_000,
+    description: 'MiniMax M2 是专为编码和代理工作流程构建的高效大型语言模型。',
+    displayName: 'MiniMax M2',
+    enabled: true,
+    id: 'minimax-m2',
+    type: 'chat',
+  },
  {
    abilities: {
      functionCall: true,
@@ -36,7 +48,6 @@ const ollamaCloudModels: AIChatModelCard[] = [
    description:
      'DeepSeek V3.1：下一代推理模型，提升了复杂推理与链路思考能力，适合需要深入分析的任务。',
    displayName: 'DeepSeek V3.1',
-    enabled: true,
    id: 'deepseek-v3.1:671b',
    type: 'chat',
  },
@@ -88,13 +99,23 @@ const ollamaCloudModels: AIChatModelCard[] = [
    id: 'qwen3-coder:480b',
    type: 'chat',
  },
+  {
+    abilities: {
+      functionCall: true,
+      vision: true,
+    },
+    contextWindowTokens: 262_144,
+    displayName: 'Qwen3 VL 235B Instruct',
+    id: 'qwen3-vl:235b-instruct',
+    type: 'chat',
+  },
  {
    abilities: {
      functionCall: true,
      reasoning: true,
      vision: true,
    },
-    contextWindowTokens: 128_000,
+    contextWindowTokens: 262_144,
    displayName: 'Qwen3 VL 235B',
    id: 'qwen3-vl:235b',
    type: 'chat',
@@ -175,6 +175,51 @@ const qwenChatModels: AIChatModelCard[] = [
  },
  {
    abilities: {
+      functionCall: true,
+      reasoning: true,
+    },
+    contextWindowTokens: 202_752,
+    description: 'GLM系列模型是智谱AI专为智能体设计的混合推理模型，提供思考与非思考两种模式。',
+    displayName: 'GLM-4.6',
+    id: 'glm-4.6',
+    maxOutput: 16_384,
+    pricing: {
+      currency: 'CNY',
+      units: [
+        {
+          lookup: {
+            prices: {
+              '[0, 0.032]': 3,
+              '[0.032, infinity]': 4,
+            },
+            pricingParams: ['textInputRange'],
+          },
+          name: 'textInput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+        {
+          lookup: {
+            prices: {
+              '[0, 0.032]': 14,
+              '[0.032, infinity]': 16,
+            },
+            pricingParams: ['textInputRange'],
+          },
+          name: 'textOutput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+      ],
+    },
+    settings: {
+      extendParams: ['enableReasoning', 'reasoningBudgetToken'],
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
      reasoning: true,
    },
    contextWindowTokens: 131_072,
@@ -218,6 +263,7 @@ const qwenChatModels: AIChatModelCard[] = [
  },
  {
    abilities: {
+      functionCall: true,
      reasoning: true,
    },
    contextWindowTokens: 131_072,
@@ -1451,6 +1497,48 @@ const qwenChatModels: AIChatModelCard[] = [
    },
    type: 'chat',
  },
+  {
+    abilities: {
+      vision: true,
+      reasoning: true,
+    },
+    contextWindowTokens: 131_072,
+    description: 'Qwen3 VL 32B 思考模式（开源版），针对高难度强推理与长视频理解场景，提供顶尖的视觉+文本推理能力。',
+    displayName: 'Qwen3 VL 32B Thinking',
+    id: 'qwen3-vl-32b-thinking',
+    maxOutput: 32_768,
+    organization: 'Qwen',
+    pricing: {
+      currency: 'CNY',
+      units: [
+        { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 20, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    },
+    settings: {
+      extendParams: ['enableReasoning', 'reasoningBudgetToken'],
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      vision: true,
+    },
+    contextWindowTokens: 131_072,
+    description: 'Qwen3 VL 32B 非思考模式（Instruct），适用于非思考指令场景，保持强大的视觉理解能力。',
+    displayName: 'Qwen3 VL 32B Instruct',
+    id: 'qwen3-vl-32b-instruct',
+    maxOutput: 32_768,
+    organization: 'Qwen',
+    pricing: {
+      currency: 'CNY',
+      units: [
+        { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 8, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    },
+    type: 'chat',
+  },
  {
    config: {
      deploymentName: 'qwen-math-turbo-latest',
@@ -2,6 +2,26 @@ import { AIChatModelCard, AIImageModelCard } from '../types/aiModel';

 // https://siliconflow.cn/zh-cn/models
 const siliconcloudChatModels: AIChatModelCard[] = [
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+    },
+    contextWindowTokens: 128_000,
+    description:
+      'MiniMax-M2 为智能体重新定义了效率。它是一款紧凑、快速且经济高效的 MoE 模型，拥有 2300 亿总参数和 100 亿激活参数，专为编码和智能体任务的顶级性能而打造，同时保持强大的通用智能。仅需 100 亿激活参数，MiniMax-M2 就能提供与大规模模型相媲美的性能，使其成为高效率应用的理想选择。',
+    displayName: 'MiniMax-M2',
+    id: 'MiniMaxAI/MiniMax-M2',
+    pricing: {
+      currency: 'CNY',
+      units: [
+        { name: 'textInput', rate: 2.1, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 8.4, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    },
+    releasedAt: '2025-10-28',
+    type: 'chat',
+  },
  {
    abilities: {
      functionCall: true,
@@ -1565,23 +1565,6 @@ const vercelAIGatewayChatModels: AIChatModelCard[] = [
    },
    type: 'chat',
  },
-  {
-    abilities: {
-      functionCall: true,
-    },
-    contextWindowTokens: 131_072,
-    description:
-      'DeepSeek-R1-Distill-Llama-70B 是 70B Llama 模型的蒸馏、更高效变体。它在文本生成任务中保持强大性能，减少计算开销以便于部署和研究。由 Groq 使用其自定义语言处理单元 (LPU) 硬件提供服务，以提供快速高效的推理。',
-    displayName: 'DeepSeek R1 Distill Llama 70B',
-    id: 'deepseek/deepseek-r1-distill-llama-70b',
-    pricing: {
-      units: [
-        { name: 'textInput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' },
-        { name: 'textOutput', rate: 0.99, strategy: 'fixed', unit: 'millionTokens' },
-      ],
-    },
-    type: 'chat',
-  },
  {
    abilities: {
      functionCall: true,
@@ -37,7 +37,7 @@ const doubaoChatModels: AIChatModelCard[] = [
    config: {
      deploymentName: 'kimi-k2-250905',
    },
-    contextWindowTokens: 131_072,
+    contextWindowTokens: 262_144,
    description:
      'Kimi-K2 是一款Moonshot AI推出的具备超强代码和 Agent 能力的 MoE 架构基础模型，总参数 1T，激活参数 32B。在通用知识推理、编程、数学、Agent 等主要类别的基准性能测试中，K2 模型的性能超过其他主流开源模型。',
    displayName: 'Kimi K2',
@@ -1007,78 +1007,122 @@ describe('LobeOpenAICompatibleFactory', () => {
    });

    describe('responses routing', () => {
-      it('should route to Responses API when chatCompletion.useResponse is true', async () => {
-        const LobeMockProviderUseResponses = createOpenAICompatibleRuntime({
-          baseURL: 'https://api.test.com/v1',
-          chatCompletion: {
-            useResponse: true,
-          },
-          provider: ModelProvider.OpenAI,
-        });
+      it(
+        'should route to Responses API when chatCompletion.useResponse is true',
+        async () => {
+          const LobeMockProviderUseResponses = createOpenAICompatibleRuntime({
+            baseURL: 'https://api.test.com/v1',
+            chatCompletion: {
+              useResponse: true,
+            },
+            provider: ModelProvider.OpenAI,
+          });

-        const inst = new LobeMockProviderUseResponses({ apiKey: 'test' });
+          const inst = new LobeMockProviderUseResponses({ apiKey: 'test' });

-        // mock responses.create to return a stream-like with tee
-        const prod = new ReadableStream();
-        const debug = new ReadableStream();
-        const mockResponsesCreate = vi
-          .spyOn(inst['client'].responses, 'create')
-          .mockResolvedValue({ tee: () => [prod, debug] } as any);
+          // Mock responses.create to return a proper stream-like object
+          const mockResponsesCreate = vi
+            .spyOn(inst['client'].responses, 'create')
+            .mockResolvedValue({
+              toReadableStream: () =>
+                new ReadableStream({
+                  start(controller) {
+                    controller.close();
+                  },
+                }),
+            } as any);

-        await inst.chat({
-          messages: [{ content: 'hi', role: 'user' }],
-          model: 'any-model',
-          temperature: 0,
-        });
+          // Mock getModelPricing to prevent async issues
+          vi.mock('../../utils/model', () => ({
+            getModelPricing: vi.fn().mockResolvedValue({}),
+          }));

-        expect(mockResponsesCreate).toHaveBeenCalled();
-      });
+          try {
+            await inst.chat({
+              messages: [{ content: 'hi', role: 'user' }],
+              model: 'any-model',
+              temperature: 0,
+            });
+          } catch (e) {
+            // Catch errors from incomplete mocking, we only care that responses.create was called
+          }

-      it('should route to Responses API when model matches useResponseModels', async () => {
-        const LobeMockProviderUseResponseModels = createOpenAICompatibleRuntime({
-          baseURL: 'https://api.test.com/v1',
-          chatCompletion: {
-            useResponseModels: ['special-model', /special-\w+/],
-          },
-          provider: ModelProvider.OpenAI,
-        });
-        const inst = new LobeMockProviderUseResponseModels({ apiKey: 'test' });
-        const spy = vi.spyOn(inst['client'].responses, 'create');
-        // Prevent hanging by mocking normal chat completion stream
-        vi.spyOn(inst['client'].chat.completions, 'create').mockResolvedValue(
-          new ReadableStream() as any,
-        );
+          expect(mockResponsesCreate).toHaveBeenCalled();
+        },
+        { timeout: 10000 },
+      );

-        // First invocation: model contains the string
-        spy.mockResolvedValueOnce({
-          tee: () => [new ReadableStream(), new ReadableStream()],
-        } as any);
-        await inst.chat({
-          messages: [{ content: 'hi', role: 'user' }],
-          model: 'prefix-special-model-suffix',
-          temperature: 0,
-        });
-        expect(spy).toHaveBeenCalledTimes(1);
+      it(
+        'should route to Responses API when model matches useResponseModels',
+        async () => {
+          const LobeMockProviderUseResponseModels = createOpenAICompatibleRuntime({
+            baseURL: 'https://api.test.com/v1',
+            chatCompletion: {
+              useResponseModels: ['special-model', /special-\w+/],
+            },
+            provider: ModelProvider.OpenAI,
+          });
+          const inst = new LobeMockProviderUseResponseModels({ apiKey: 'test' });
+          const spy = vi.spyOn(inst['client'].responses, 'create');
+          // Prevent hanging by mocking normal chat completion stream
+          vi.spyOn(inst['client'].chat.completions, 'create').mockResolvedValue(
+            new ReadableStream() as any,
+          );

-        // Second invocation: model matches the RegExp
-        spy.mockResolvedValueOnce({
-          tee: () => [new ReadableStream(), new ReadableStream()],
-        } as any);
-        await inst.chat({
-          messages: [{ content: 'hi', role: 'user' }],
-          model: 'special-xyz',
-          temperature: 0,
-        });
-        expect(spy).toHaveBeenCalledTimes(2);
+          // First invocation: model contains the string
+          spy.mockResolvedValueOnce({
+            toReadableStream: () =>
+              new ReadableStream({
+                start(controller) {
+                  controller.close();
+                },
+              }),
+          } as any);
+          try {
+            await inst.chat({
+              messages: [{ content: 'hi', role: 'user' }],
+              model: 'prefix-special-model-suffix',
+              temperature: 0,
+            });
+          } catch (e) {
+            // Catch errors from incomplete mocking
+          }
+          expect(spy).toHaveBeenCalledTimes(1);

-        // Third invocation: model does not match any useResponseModels patterns
-        await inst.chat({
-          messages: [{ content: 'hi', role: 'user' }],
-          model: 'unrelated-model',
-          temperature: 0,
-        });
-        expect(spy).toHaveBeenCalledTimes(2); // Ensure no additional calls were made
-      });
+          // Second invocation: model matches the RegExp
+          spy.mockResolvedValueOnce({
+            toReadableStream: () =>
+              new ReadableStream({
+                start(controller) {
+                  controller.close();
+                },
+              }),
+          } as any);
+          try {
+            await inst.chat({
+              messages: [{ content: 'hi', role: 'user' }],
+              model: 'special-xyz',
+              temperature: 0,
+            });
+          } catch (e) {
+            // Catch errors from incomplete mocking
+          }
+          expect(spy).toHaveBeenCalledTimes(2);
+
+          // Third invocation: model does not match any useResponseModels patterns
+          try {
+            await inst.chat({
+              messages: [{ content: 'hi', role: 'user' }],
+              model: 'unrelated-model',
+              temperature: 0,
+            });
+          } catch (e) {
+            // Catch errors
+          }
+          expect(spy).toHaveBeenCalledTimes(2); // Ensure no additional calls were made
+        },
+        { timeout: 10000 },
+      );
    });

    describe('DEBUG', () => {
@@ -138,10 +138,10 @@ export interface OpenAICompatibleFactoryOptions<T extends Record<string, any> =
    useToolsCalling?: boolean;
  };
  models?:
-    | ((params: { client: OpenAI }) => Promise<ChatModelCard[]>)
-    | {
-        transformModel?: (model: OpenAI.Model) => ChatModelCard;
-      };
+  | ((params: { client: OpenAI }) => Promise<ChatModelCard[]>)
+  | {
+    transformModel?: (model: OpenAI.Model) => ChatModelCard;
+  };
  provider: string;
  responses?: {
    handlePayload?: (
@@ -205,6 +205,81 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
      this.logPrefix = `lobe-model-runtime:${this.id}`;
    }

+    /**
+     * Determine if should use Responses API based on various configuration options
+     * @param params - Configuration parameters
+     * @returns true if should use Responses API, false otherwise
+     */
+    private shouldUseResponsesAPI(params: {
+      /** Context for logging (e.g., 'chat', 'generateObject', 'tool calling') */
+      context?: string;
+      /** Factory/instance level useResponse flag */
+      flagUseResponse?: boolean;
+      /** Factory/instance level model patterns for Responses API */
+      flagUseResponseModels?: Array<string | RegExp>;
+      /** The model ID to check */
+      model?: string;
+      /** Explicit responseApi flag */
+      responseApi?: boolean;
+      /** User-specified API mode (highest priority) */
+      userApiMode?: string;
+    }): boolean {
+      const {
+        model,
+        userApiMode,
+        responseApi,
+        flagUseResponse,
+        flagUseResponseModels,
+        context = 'operation',
+      } = params;
+
+      const log = debug(`${this.logPrefix}:shouldUseResponsesAPI`);
+
+      // Priority 1: User explicitly set apiMode via switch
+      if (userApiMode === 'responses') {
+        log('using Responses API: explicit userApiMode=%s', userApiMode);
+        return true;
+      }
+
+      // Priority 2: userApiMode is explicitly set to something else
+      if (userApiMode !== undefined) {
+        log('using Chat Completions API: userApiMode=%s', userApiMode);
+        return false;
+      }
+
+      // Priority 3: Explicit responseApi flag
+      if (responseApi) {
+        log('using Responses API: explicit responseApi flag for %s', context);
+        return true;
+      }
+
+      // Priority 4: Factory/instance level useResponse flag
+      if (flagUseResponse) {
+        log('using Responses API: flagUseResponse=true for %s', context);
+        return true;
+      }
+
+      // Priority 5: Check if model matches useResponseModels patterns
+      if (model && flagUseResponseModels?.length) {
+        const matches = flagUseResponseModels.some((m: string | RegExp) =>
+          typeof m === 'string' ? model.includes(m) : (m as RegExp).test(model),
+        );
+        if (matches) {
+          log('using Responses API: model %s matches useResponseModels config', model);
+          return true;
+        }
+      }
+
+      // Priority 6: Check built-in responsesAPIModels
+      if (model && responsesAPIModels.has(model)) {
+        log('using Responses API: model %s in built-in responsesAPIModels', model);
+        return true;
+      }
+
+      log('using Chat Completions API for %s', context);
+      return false;
+    }
+
    async chat({ responseMode, ...payload }: ChatStreamPayload, options?: ChatMethodOptions) {
      try {
        const log = debug(`${this.logPrefix}:chat`);
@@ -212,41 +287,39 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an

        log('chat called with model: %s, stream: %s', payload.model, payload.stream ?? true);

-        // 工厂级 Responses API 路由控制（支持实例覆盖）
-        const modelId = (payload as any).model as string | undefined;
-        const shouldUseResponses = (() => {
-          const instanceChat = ((this._options as any).chatCompletion || {}) as {
-            useResponse?: boolean;
-            useResponseModels?: Array<string | RegExp>;
-          };
-          const flagUseResponse =
-            instanceChat.useResponse ?? (chatCompletion ? chatCompletion.useResponse : undefined);
-          const flagUseResponseModels =
-            instanceChat.useResponseModels ?? chatCompletion?.useResponseModels;
-
-          if (!chatCompletion && !instanceChat) return false;
-          if (flagUseResponse) return true;
-          if (!modelId || !flagUseResponseModels?.length) return false;
-          return flagUseResponseModels.some((m: string | RegExp) =>
-            typeof m === 'string' ? modelId.includes(m) : (m as RegExp).test(modelId),
-          );
-        })();
-
        let processedPayload: any = payload;
+        const userApiMode = (payload as any).apiMode as string | undefined;
+        const modelId = (payload as any).model as string | undefined;
+
+        const instanceChat = ((this._options as any).chatCompletion || {}) as {
+          useResponse?: boolean;
+          useResponseModels?: Array<string | RegExp>;
+        };
+        const flagUseResponse =
+          instanceChat.useResponse ?? (chatCompletion ? chatCompletion.useResponse : undefined);
+        const flagUseResponseModels =
+          instanceChat.useResponseModels ?? chatCompletion?.useResponseModels;
+
+        // Determine if should use Responses API
+        const shouldUseResponses = this.shouldUseResponsesAPI({
+          context: 'chat',
+          flagUseResponse,
+          flagUseResponseModels,
+          model: modelId,
+          userApiMode,
+        });
+
        if (shouldUseResponses) {
-          log('using Responses API mode');
          processedPayload = { ...payload, apiMode: 'responses' } as any;
-        } else {
-          log('using Chat Completions API mode');
        }

        // 再进行工厂级处理
        const postPayload = chatCompletion?.handlePayload
          ? chatCompletion.handlePayload(processedPayload, this._options)
          : ({
-              ...processedPayload,
-              stream: processedPayload.stream ?? true,
-            } as OpenAI.ChatCompletionCreateParamsStreaming);
+            ...processedPayload,
+            stream: processedPayload.stream ?? true,
+          } as OpenAI.ChatCompletionCreateParamsStreaming);

        if ((postPayload as any).apiMode === 'responses') {
          return this.handleResponseAPIMode(processedPayload, options);
@@ -312,13 +385,13 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
          return StreamingResponse(
            chatCompletion?.handleStream
              ? chatCompletion.handleStream(prod, {
-                  callbacks: streamOptions.callbacks,
-                  inputStartAt,
-                })
+                callbacks: streamOptions.callbacks,
+                inputStartAt,
+              })
              : OpenAIStream(prod, {
-                  ...streamOptions,
-                  inputStartAt,
-                }),
+                ...streamOptions,
+                inputStartAt,
+              }),
            {
              headers: options?.headers,
            },
@@ -342,9 +415,9 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
        return StreamingResponse(
          chatCompletion?.handleStream
            ? chatCompletion.handleStream(stream, {
-                callbacks: streamOptions.callbacks,
-                inputStartAt,
-              })
+              callbacks: streamOptions.callbacks,
+              inputStartAt,
+            })
            : OpenAIStream(stream, { ...streamOptions, enableStreaming: false, inputStartAt }),
          {
            headers: options?.headers,
@@ -500,47 +573,23 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
      }

      // Factory-level Responses API routing control (supports instance override)
-      const shouldUseResponses = (() => {
-        const instanceGenerateObject = ((this._options as any).generateObject || {}) as {
-          useResponse?: boolean;
-          useResponseModels?: Array<string | RegExp>;
-        };
-        const flagUseResponse =
-          instanceGenerateObject.useResponse ??
-          (generateObjectConfig ? generateObjectConfig.useResponse : undefined);
-        const flagUseResponseModels =
-          instanceGenerateObject.useResponseModels ?? generateObjectConfig?.useResponseModels;
+      const instanceGenerateObject = ((this._options as any).generateObject || {}) as {
+        useResponse?: boolean;
+        useResponseModels?: Array<string | RegExp>;
+      };
+      const flagUseResponse =
+        instanceGenerateObject.useResponse ??
+        (generateObjectConfig ? generateObjectConfig.useResponse : undefined);
+      const flagUseResponseModels =
+        instanceGenerateObject.useResponseModels ?? generateObjectConfig?.useResponseModels;

-        if (responseApi) {
-          log('using Responses API due to explicit responseApi flag');
-          return true;
-        }
-
-        if (flagUseResponse) {
-          log('using Responses API due to useResponse flag');
-          return true;
-        }
-
-        // Use factory-configured model list if provided
-        if (model && flagUseResponseModels?.length) {
-          const matches = flagUseResponseModels.some((m: string | RegExp) =>
-            typeof m === 'string' ? model.includes(m) : (m as RegExp).test(model),
-          );
-          if (matches) {
-            log('using Responses API: model %s matches useResponseModels config', model);
-            return true;
-          }
-        }
-
-        // Default: use built-in responsesAPIModels
-        if (model && responsesAPIModels.has(model)) {
-          log('using Responses API: model %s in built-in responsesAPIModels', model);
-          return true;
-        }
-
-        log('using Chat Completions API for generateObject');
-        return false;
-      })();
+      const shouldUseResponses = this.shouldUseResponsesAPI({
+        context: 'generateObject',
+        flagUseResponse,
+        flagUseResponseModels,
+        model,
+        responseApi,
+      });

      // Apply schema transformation if configured
      const processedSchema = generateObjectConfig?.handleSchema
@@ -790,11 +839,11 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
        ...res,
        ...(reasoning || reasoning_effort
          ? {
-              reasoning: {
-                ...reasoning,
-                ...(reasoning_effort && { effort: reasoning_effort }),
-              },
-            }
+            reasoning: {
+              ...reasoning,
+              ...(reasoning_effort && { effort: reasoning_effort }),
+            },
+          }
          : {}),
        input,
        ...(max_tokens && { max_output_tokens: max_tokens }),
@@ -885,47 +934,23 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
      );

      // Factory-level Responses API routing control (supports instance override)
-      const shouldUseResponses = (() => {
-        const instanceGenerateObject = ((this._options as any).generateObject || {}) as {
-          useResponse?: boolean;
-          useResponseModels?: Array<string | RegExp>;
-        };
-        const flagUseResponse =
-          instanceGenerateObject.useResponse ??
-          (generateObjectConfig ? generateObjectConfig.useResponse : undefined);
-        const flagUseResponseModels =
-          instanceGenerateObject.useResponseModels ?? generateObjectConfig?.useResponseModels;
+      const instanceGenerateObject = ((this._options as any).generateObject || {}) as {
+        useResponse?: boolean;
+        useResponseModels?: Array<string | RegExp>;
+      };
+      const flagUseResponse =
+        instanceGenerateObject.useResponse ??
+        (generateObjectConfig ? generateObjectConfig.useResponse : undefined);
+      const flagUseResponseModels =
+        instanceGenerateObject.useResponseModels ?? generateObjectConfig?.useResponseModels;

-        if (responseApi) {
-          log('using Responses API due to explicit responseApi flag');
-          return true;
-        }
-
-        if (flagUseResponse) {
-          log('using Responses API due to useResponse flag');
-          return true;
-        }
-
-        // Use factory-configured model list if provided
-        if (model && flagUseResponseModels?.length) {
-          const matches = flagUseResponseModels.some((m: string | RegExp) =>
-            typeof m === 'string' ? model.includes(m) : (m as RegExp).test(model),
-          );
-          if (matches) {
-            log('using Responses API: model %s matches useResponseModels config', model);
-            return true;
-          }
-        }
-
-        // Default: use built-in responsesAPIModels
-        if (model && responsesAPIModels.has(model)) {
-          log('using Responses API: model %s in built-in responsesAPIModels', model);
-          return true;
-        }
-
-        log('using Chat Completions API for tool calling');
-        return false;
-      })();
+      const shouldUseResponses = this.shouldUseResponsesAPI({
+        context: 'tool calling',
+        flagUseResponse,
+        flagUseResponseModels,
+        model,
+        responseApi,
+      });

      if (shouldUseResponses) {
        log('calling responses.create for tool calling');
@@ -5,7 +5,7 @@ import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { responsesAPIModels } from '../../const/models';
 import { ChatStreamPayload } from '../../types/chat';
 import * as modelParseModule from '../../utils/modelParse';
-import { LobeNewAPIAI, NewAPIModelCard, NewAPIPricing, handlePayload, params } from './index';
+import { LobeNewAPIAI, NewAPIModelCard, NewAPIPricing, params } from './index';

 // Mock external dependencies
 vi.mock('../../utils/modelParse');
@@ -701,78 +701,6 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
    });
  });

-  describe('HandlePayload Function - Direct Testing', () => {
-    beforeEach(() => {
-      // Mock responsesAPIModels as a Set for testing
-      (responsesAPIModels as any).has = vi.fn((model: string) => model === 'o1-pro');
-    });
-
-    it('should add apiMode for models in responsesAPIModels set', () => {
-      (responsesAPIModels as any).has = vi.fn((model: string) => model === 'o1-pro');
-
-      const payload: ChatStreamPayload = {
-        model: 'o1-pro',
-        messages: [{ role: 'user', content: 'test' }],
-        temperature: 0.5,
-      };
-
-      const result = handlePayload(payload);
-      expect(result).toEqual({ ...payload, apiMode: 'responses' });
-    });
-
-    it('should add apiMode for gpt- models', () => {
-      (responsesAPIModels as any).has = vi.fn(() => false);
-
-      const payload: ChatStreamPayload = {
-        model: 'gpt-4o',
-        messages: [{ role: 'user', content: 'test' }],
-        temperature: 0.5,
-      };
-
-      const result = handlePayload(payload);
-      expect(result).toEqual({ ...payload, apiMode: 'responses' });
-    });
-
-    it('should add apiMode for o1 models', () => {
-      (responsesAPIModels as any).has = vi.fn(() => false);
-
-      const payload: ChatStreamPayload = {
-        model: 'o1-mini',
-        messages: [{ role: 'user', content: 'test' }],
-        temperature: 0.5,
-      };
-
-      const result = handlePayload(payload);
-      expect(result).toEqual({ ...payload, apiMode: 'responses' });
-    });
-
-    it('should add apiMode for o3 models', () => {
-      (responsesAPIModels as any).has = vi.fn(() => false);
-
-      const payload: ChatStreamPayload = {
-        model: 'o3-turbo',
-        messages: [{ role: 'user', content: 'test' }],
-        temperature: 0.5,
-      };
-
-      const result = handlePayload(payload);
-      expect(result).toEqual({ ...payload, apiMode: 'responses' });
-    });
-
-    it('should not modify payload for regular models', () => {
-      (responsesAPIModels as any).has = vi.fn(() => false);
-
-      const payload: ChatStreamPayload = {
-        model: 'claude-3-sonnet',
-        messages: [{ role: 'user', content: 'test' }],
-        temperature: 0.5,
-      };
-
-      const result = handlePayload(payload);
-      expect(result).toEqual(payload);
-    });
-  });
-
  describe('Routers Function - Direct Testing', () => {
    it('should generate routers with correct apiTypes', () => {
      const options = { apiKey: 'test', baseURL: 'https://api.newapi.com/v1' };
@@ -823,11 +751,11 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
      expect(routers[3].options.baseURL).toBe('https://custom.com/v1');
    });

-    it('should configure openai router with handlePayload', () => {
+    it('should configure openai router with useResponseModels', () => {
      const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' };
      const routers = params.routers(options);

-      expect((routers[3].options as any).chatCompletion?.handlePayload).toBe(handlePayload);
+      expect((routers[3].options as any).chatCompletion?.useResponseModels).toBeDefined();
    });

    it('should filter anthropic models for anthropic router', () => {
@@ -4,7 +4,6 @@ import urlJoin from 'url-join';
 import { responsesAPIModels } from '../../const/models';
 import { createRouterRuntime } from '../../core/RouterRuntime';
 import { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime';
-import { ChatStreamPayload } from '../../types/chat';
 import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse';

 export interface NewAPIModelCard {
@@ -26,18 +25,6 @@ export interface NewAPIPricing {
  supported_endpoint_types?: string[];
 }

-export const handlePayload = (payload: ChatStreamPayload) => {
-  // Handle OpenAI responses API mode
-  if (
-    responsesAPIModels.has(payload.model) ||
-    payload.model.includes('gpt-') ||
-    /^o\d/.test(payload.model)
-  ) {
-    return { ...payload, apiMode: 'responses' };
-  }
-  return payload;
-};
-
 export const params = {
  debug: {
    chatCompletion: () => process.env.DEBUG_NEWAPI_CHAT_COMPLETION === '1',
@@ -178,7 +165,7 @@ export const params = {
          ...options,
          baseURL: urlJoin(userBaseURL, '/v1'),
          chatCompletion: {
-            handlePayload,
+            useResponseModels: [...Array.from(responsesAPIModels), /gpt-\d(?!\d)/, /^o\d/],
          },
        },
      },
@@ -1548,8 +1548,9 @@ describe('LobeOpenRouterAI - custom features', () => {
      const models = await params.models();

      const mixedModel = models.find((m) => m.id === 'mixed-free/model');
-      // Input or output is 0, so should be marked as free
-      expect(mixedModel?.displayName).toContain('(free)');
+      // Input or output is 0. Current behavior does not append '(free)' for mixed pricing,
+      // so assert the displayName equals the cleaned model name.
+      expect(mixedModel?.displayName).toBe('Mixed Free Model');
    });

    it('should handle very large pricing values', async () => {
@@ -99,7 +99,7 @@ export const params = {
      const cachedInputPrice = formatPrice(pricing.input_cache_read);
      const writeCacheInputPrice = formatPrice(pricing.input_cache_write);

-      const isFree = (inputPrice === 0 || outputPrice === 0) && !displayName.endsWith('(free)');
+      const isFree = inputPrice === 0 && outputPrice === 0 && !displayName.endsWith('(free)');
      if (isFree) {
        displayName += ' (free)';
      }
@@ -42,6 +42,15 @@ const CreateNewProvider = memo<CreateNewProviderProps>(({ onClose, open }) => {
        name: values.name || values.id,
      };

+      // 只为 openai 和 router (newapi) 类型的自定义 provider 添加 supportResponsesApi: true
+      const sdkType = values.settings?.sdkType;
+      if (sdkType === 'openai' || sdkType === 'router') {
+        finalValues.settings = {
+          ...finalValues.settings,
+          supportResponsesApi: true,
+        };
+      }
+
      await createNewAiProvider(finalValues);
      setLoading(false);
      navigate(`/settings?active=provider&provider=${values.id}`);
@@ -102,12 +111,16 @@ const CreateNewProvider = memo<CreateNewProviderProps>(({ onClose, open }) => {
    {
      children: (
        <Select
-          optionRender={({ label, value }) => (
-            <Flexbox align={'center'} gap={8} horizontal>
-              <ProviderIcon provider={value as string} size={18} />
-              {label}
-            </Flexbox>
-          )}
+          optionRender={({ label, value }) => {
+            // Map 'router' to 'newapi' for displaying the correct icon
+            const iconProvider = value === 'router' ? 'newapi' : (value as string);
+            return (
+              <Flexbox align={'center'} gap={8} horizontal>
+                <ProviderIcon provider={iconProvider} size={18} />
+                {label}
+              </Flexbox>
+            );
+          }}
          options={CUSTOM_PROVIDER_SDK_OPTIONS}
          placeholder={t('createNewAiProvider.sdkType.placeholder')}
          variant={'filled'}
@@ -9,4 +9,5 @@ export const CUSTOM_PROVIDER_SDK_OPTIONS = [
  { label: 'Qwen', value: 'qwen' },
  { label: 'Volcengine', value: 'volcengine' },
  { label: 'Ollama', value: 'ollama' },
+  { label: 'New API', value: 'router' },
 ] satisfies { label: string; value: AiProviderSDKType }[];
@@ -11,6 +11,7 @@ const AiHubMix: ModelProviderCard = {
  settings: {
    sdkType: 'router',
    showModelFetcher: true,
+    supportResponsesApi: true,
  },
  url: 'https://aihubmix.com?utm_source=lobehub',
 };
@@ -13,6 +13,7 @@ const NewAPI: ModelProviderCard = {
    },
    sdkType: 'router',
    showModelFetcher: true,
+    supportResponsesApi: true,
  },
  url: 'https://github.com/Calcium-Ion/new-api',
 };
@@ -298,7 +298,7 @@ export default {
      },
      helpDoc: '配置教程',
      responsesApi: {
-        desc: '采用 OpenAI 新一代请求格式规范，解锁思维链等进阶特性',
+        desc: '采用 OpenAI 新一代请求格式规范，解锁思维链等进阶特性 (仅 OpenAI 模型支持)',
        title: '使用 Responses API 规范',
      },
      waitingForMore: '更多模型正在 <1>计划接入</1> 中，敬请期待',