🐛 fix(model-runtime): improve DeepSeek structured output (#15680)

2026-06-13 19:20:04 +00:00 · 2026-06-11 16:57:57 +08:00
parent 77e4d0492b
commit 5f4bec347b
17 changed files with 630 additions and 118 deletions
@@ -1032,6 +1032,43 @@ describe('aiChatRouter', () => {
      }
    });

+    it('maps raw provider 4xx errors to BAD_REQUEST instead of internal errors', async () => {
+      const { initModelRuntimeFromDB } = await import('@/server/modules/ModelRuntime');
+
+      // Raw SDK APIError shape: carries an HTTP status but no errorType — the
+      // generateObject path rethrows upstream errors verbatim (e.g. a BYOK
+      // gateway rejecting response_format json_schema).
+      const providerError = Object.assign(
+        new Error(
+          '400 Error from provider (DeepSeek): This response_format type is unavailable now',
+        ),
+        { status: 400 },
+      );
+      const mockGenerateObject = vi.fn().mockRejectedValue(providerError);
+
+      vi.mocked(initModelRuntimeFromDB).mockResolvedValue({
+        generateObject: mockGenerateObject,
+      } as any);
+
+      const caller = aiChatRouter.createCaller({ ...mockCtx, serverDB: {} } as any);
+
+      try {
+        await caller.outputJSON({
+          messages: [{ content: 'test', role: 'user' }],
+          model: 'deepseek-v4-flash-free',
+          provider: 'opencodezen',
+        });
+        throw new Error('Expected outputJSON to throw');
+      } catch (error) {
+        expect(error).toBeInstanceOf(TRPCError);
+        expect(error).toMatchObject({
+          cause: providerError,
+          code: 'BAD_REQUEST',
+          message: providerError.message,
+        });
+      }
+    });
+
    it('should handle tools parameter when provided', async () => {
      const { initModelRuntimeFromDB } = await import('@/server/modules/ModelRuntime');

@@ -52,13 +52,29 @@ const getTRPCErrorCodeFromStatus = (status: number): TRPCErrorCode => {
 const createRuntimeTRPCError = (error: unknown): TRPCError | undefined => {
  const errorType = getRuntimeErrorType(error);
  const spec = getErrorCodeSpec(errorType);
-  if (!errorType || !spec) return;
+  if (errorType && spec) {
+    return new TRPCError({
+      cause: error,
+      code: getTRPCErrorCodeFromStatus(spec.httpStatus),
+      message: errorType,
+    });
+  }

-  return new TRPCError({
-    cause: error,
-    code: getTRPCErrorCodeFromStatus(spec.httpStatus),
-    message: errorType,
-  });
+  // Raw provider SDK errors (OpenAI/Anthropic APIError) carry an HTTP status
+  // but no errorType — the generateObject path rethrows upstream errors
+  // verbatim. Without this mapping, tRPC classifies them as
+  // INTERNAL_SERVER_ERROR, so a user-channel 4xx (e.g. a BYOK provider
+  // rejecting the request) pollutes server 500 monitoring.
+  const status = (error as { status?: unknown } | undefined)?.status;
+  if (typeof status === 'number' && status >= 400 && status < 500) {
+    return new TRPCError({
+      cause: error,
+      code: getTRPCErrorCodeFromStatus(status),
+      message: error instanceof Error ? error.message : `Provider error (${status})`,
+    });
+  }
+
+  return undefined;
 };

 const aiChatProcedure = wsCompatProcedure.use(serverDatabase).use(async (opts) => {
@@ -5,6 +5,7 @@ import { toFile } from 'openai';

 import { disableStreamModels, systemToUserModels } from '../../const/models';
 import type { ChatStreamPayload, OpenAIChatMessage, UserMessageContentPart } from '../../types';
+import { isDeepSeekThinkingEligibleModel } from '../../utils/modelParse';
 import { parseDataUri } from '../../utils/uriParser';

 export type ExtendedChatCompletionContentPart = {
@@ -24,16 +25,6 @@ type ConvertMessageContentOptions = {
 const isDeepSeekModel = (model: string | undefined) =>
  typeof model === 'string' && model.toLowerCase().includes('deepseek');

-// DeepSeek thinking-mode eligible models require reasoning_content on every
-// assistant history message — otherwise the API rejects follow-up turns with
-// "The reasoning_content in the thinking mode must be passed back to the API."
-// See https://api-docs.deepseek.com/guides/thinking_mode#tool-calls
-const isDeepSeekThinkingEligibleModel = (model: string | undefined) => {
-  if (!model) return false;
-  const lower = model.toLowerCase();
-  return lower.includes('deepseek-reasoner') || lower.includes('deepseek-v4');
-};
-
 type OpenAICompatibleContentPart =
  | ExtendedChatCompletionContentPart
  | OpenAI.ChatCompletionContentPart
@@ -0,0 +1,146 @@
+// @vitest-environment node
+import type { Mock } from 'vitest';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+import { createOpenAICompatibleRuntime, isResponseFormatUnsupportedError } from './index';
+
+// Pricing lookup reaches into model-bank's async provider loading, which is
+// irrelevant to these tests (and unavailable in some monorepo test setups).
+vi.mock('../../utils/getModelPricing', () => ({
+  getModelPricing: vi.fn().mockResolvedValue(undefined),
+}));
+
+const TestRuntime = createOpenAICompatibleRuntime({
+  baseURL: 'https://api.test.com/v1',
+  provider: 'testprovider',
+});
+
+const generateObjectPayload = {
+  messages: [{ content: 'Generate a handoff', role: 'user' as const }],
+  model: 'gpt-anything',
+  schema: {
+    name: 'task_topic_handoff',
+    schema: {
+      additionalProperties: false,
+      properties: { summary: { type: 'string' }, title: { type: 'string' } },
+      required: ['title', 'summary'],
+      type: 'object' as const,
+    },
+  },
+};
+
+const toolCallResponse = {
+  choices: [
+    {
+      message: {
+        tool_calls: [
+          {
+            function: {
+              arguments: '{"summary":"Task completed","title":"Done"}',
+              name: 'task_topic_handoff',
+            },
+            id: 'call_1',
+            type: 'function',
+          },
+        ],
+      },
+    },
+  ],
+};
+
+const responseFormatUnsupportedError = Object.assign(
+  new Error('400 Error from provider (DeepSeek): This response_format type is unavailable now'),
+  { status: 400 },
+);
+
+describe('isResponseFormatUnsupportedError', () => {
+  it('should match DeepSeek json_schema rejection variants', () => {
+    expect(
+      isResponseFormatUnsupportedError(new Error('This response_format type is unavailable now')),
+    ).toBe(true);
+    expect(
+      isResponseFormatUnsupportedError({
+        error: {
+          message:
+            'Failed to deserialize the JSON body into the target type: response_format: response_format.type `json_schema` is unavailable now at line 1 column 1193',
+        },
+      }),
+    ).toBe(true);
+  });
+
+  it('should not match unrelated errors', () => {
+    expect(isResponseFormatUnsupportedError(new Error('Insufficient Balance'))).toBe(false);
+    expect(isResponseFormatUnsupportedError(undefined)).toBe(false);
+    expect(isResponseFormatUnsupportedError('response_format')).toBe(false);
+  });
+});
+
+describe('generateObject tool-calling fallback', () => {
+  afterEach(() => {
+    vi.clearAllMocks();
+  });
+
+  const createInstance = () => new TestRuntime({ apiKey: 'test' });
+
+  const getCreateMock = (instance: any) => instance.client.chat.completions.create as Mock;
+
+  it('should proactively use tool calling for DeepSeek-family models', async () => {
+    const instance = createInstance();
+    vi.spyOn((instance as any).client.chat.completions, 'create').mockResolvedValue(
+      toolCallResponse as any,
+    );
+
+    const result = await instance.generateObject({
+      ...generateObjectPayload,
+      model: 'deepseek-v4-flash',
+    });
+
+    const createMock = getCreateMock(instance);
+    expect(createMock).toHaveBeenCalledTimes(1);
+
+    const requestPayload = createMock.mock.calls[0][0];
+    expect(requestPayload.response_format).toBeUndefined();
+    expect(requestPayload.tool_choice).toEqual({
+      function: { name: 'task_topic_handoff' },
+      type: 'function',
+    });
+    expect(result).toEqual({ summary: 'Task completed', title: 'Done' });
+  });
+
+  it('should retry via tool calling when the provider rejects json_schema', async () => {
+    const instance = createInstance();
+    vi.spyOn((instance as any).client.chat.completions, 'create')
+      .mockRejectedValueOnce(responseFormatUnsupportedError)
+      .mockResolvedValueOnce(toolCallResponse as any);
+
+    // model id gives no hint that the upstream is DeepSeek (e.g. gateway alias)
+    const result = await instance.generateObject({ ...generateObjectPayload, model: 'big-pickle' });
+
+    const createMock = getCreateMock(instance);
+    expect(createMock).toHaveBeenCalledTimes(2);
+
+    const firstPayload = createMock.mock.calls[0][0];
+    expect(firstPayload.response_format).toEqual(expect.objectContaining({ type: 'json_schema' }));
+
+    const retryPayload = createMock.mock.calls[1][0];
+    expect(retryPayload.response_format).toBeUndefined();
+    expect(retryPayload.tool_choice).toEqual({
+      function: { name: 'task_topic_handoff' },
+      type: 'function',
+    });
+    expect(result).toEqual({ summary: 'Task completed', title: 'Done' });
+  });
+
+  it('should not retry on unrelated provider errors', async () => {
+    const instance = createInstance();
+    vi.spyOn((instance as any).client.chat.completions, 'create').mockRejectedValue(
+      Object.assign(new Error('Insufficient Balance'), { status: 402 }),
+    );
+
+    await expect(
+      instance.generateObject({ ...generateObjectPayload, model: 'big-pickle' }),
+    ).rejects.toMatchObject({ message: expect.stringContaining('Insufficient Balance') });
+
+    expect(getCreateMock(instance)).toHaveBeenCalledTimes(1);
+  });
+});
@@ -40,6 +40,7 @@ import { desensitizeUrl } from '../../utils/desensitizeUrl';
 import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
 import { getModelPricing } from '../../utils/getModelPricing';
 import { handleOpenAIError } from '../../utils/handleOpenAIError';
+import { detectModelProvider } from '../../utils/modelParse';
 import { postProcessModelList } from '../../utils/postProcessModelList';
 import {
  assertContextWithinWindow,
@@ -62,6 +63,22 @@ export type { PollVideoStatusResult };
 export * from './createVideo';
 export * from './nonStreamToStream';

+/**
+ * Detect provider 400/422 errors that reject `response_format: { type: 'json_schema' }`.
+ * Known message variants from the DeepSeek family (official API and gateways proxying it):
+ * - `This response_format type is unavailable now`
+ * - `response_format.type \`json_schema\` is unavailable now`
+ */
+export const isResponseFormatUnsupportedError = (error: unknown): boolean => {
+  const err = error as { error?: { message?: unknown }; message?: unknown };
+  const message = [err?.message, err?.error?.message]
+    .filter((value): value is string => typeof value === 'string')
+    .join('\n');
+  if (!message) return false;
+
+  return /(?:response_format|json_schema)[^]*?(?:unavailable|not +support)/i.test(message);
+};
+
 // the model contains the following keywords is not a chat model, so we should filter them out
 export const CHAT_MODELS_BLOCK_LIST = [
  'embedding',
@@ -826,6 +843,73 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
      );
    }

+    /**
+     * Simulate schema-based structured output through a forced tool call, for
+     * providers that do not support `response_format: { type: 'json_schema' }`.
+     * Returns the parsed schema object — the same shape as the json_schema path.
+     */
+    private async generateObjectViaToolCalling(
+      payload: GenerateObjectPayload,
+      options: GenerateObjectOptions | undefined,
+      usagePayload: Parameters<typeof convertOpenAIUsage>[1],
+    ) {
+      const log = debug(`${this.logPrefix}:generateObject`);
+      const { messages, schema, model } = payload;
+
+      // Apply schema transformation if configured
+      const processedSchema = generateObjectConfig?.handleSchema
+        ? { ...schema!, schema: generateObjectConfig.handleSchema(schema!.schema) }
+        : schema!;
+
+      const tool: ChatCompletionTool = {
+        function: {
+          description:
+            processedSchema.description ||
+            'Generate structured output according to the provided schema',
+          name: processedSchema.name || 'structured_output',
+          parameters: processedSchema.schema,
+        },
+        type: 'function',
+      };
+
+      const res = await this.client.chat.completions.create(
+        this.handleGenerateObjectPayload(payload, {
+          ...getGenerateObjectReasoningParams(payload),
+          messages,
+          model,
+          ...this.resolvePromptCacheKeyParams(model, options?.user),
+          tool_choice: { function: { name: tool.function.name }, type: 'function' },
+          tools: [tool],
+          user: options?.user,
+        }) as OpenAI.ChatCompletionCreateParamsNonStreaming,
+        { headers: options?.headers, signal: options?.signal },
+      );
+
+      if (res.usage) {
+        await options?.onUsage?.(convertOpenAIUsage(res.usage, usagePayload));
+      }
+
+      // Structural type keeps this compatible across openai SDK majors (v6
+      // widened tool_calls to a function/custom union).
+      const toolCalls = res.choices[0].message.tool_calls as
+        | { function?: { arguments: string; name: string } }[]
+        | undefined;
+      const toolCall =
+        toolCalls?.find((item) => item.function?.name === tool.function.name) ?? toolCalls?.[0];
+
+      if (!toolCall?.function) {
+        log('no tool call found in structured output response');
+        return undefined;
+      }
+
+      try {
+        return JSON.parse(toolCall.function.arguments);
+      } catch {
+        console.error('parse tool call arguments error:', toolCall);
+        return undefined;
+      }
+    }
+
    async generateObject(payload: GenerateObjectPayload, options?: GenerateObjectOptions) {
      try {
        const { messages, schema, model, responseApi, tools } = payload;
@@ -848,54 +932,15 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an

        if (!schema) throw new Error('tools or schema is required');

-        // Use tool calling fallback if configured
-        if (generateObjectConfig?.useToolsCalling) {
+        // Use tool calling fallback if configured, or when the model is known to
+        // reject `response_format: { type: 'json_schema' }`. The DeepSeek API only
+        // supports `json_object` and replies `400 This response_format type is
+        // unavailable now` to json_schema requests, so DeepSeek-family models served
+        // through generic OpenAI-compatible providers (aggregator gateways, custom
+        // endpoints) must simulate structured output via forced tool calling.
+        if (generateObjectConfig?.useToolsCalling || detectModelProvider(model) === 'deepseek') {
          log('using tool calling fallback for structured output');
-
-          // Apply schema transformation if configured
-          const processedSchema = generateObjectConfig.handleSchema
-            ? { ...schema, schema: generateObjectConfig.handleSchema(schema.schema) }
-            : schema;
-
-          const tool: ChatCompletionTool = {
-            function: {
-              description:
-                processedSchema.description ||
-                'Generate structured output according to the provided schema',
-              name: processedSchema.name || 'structured_output',
-              parameters: processedSchema.schema,
-            },
-            type: 'function',
-          };
-
-          const res = await this.client.chat.completions.create(
-            this.handleGenerateObjectPayload(payload, {
-              ...getGenerateObjectReasoningParams(payload),
-              messages,
-              model,
-              ...this.resolvePromptCacheKeyParams(model, options?.user),
-              tool_choice: { function: { name: tool.function.name }, type: 'function' },
-              tools: [tool],
-              user: options?.user,
-            }) as OpenAI.ChatCompletionCreateParamsNonStreaming,
-            { headers: options?.headers, signal: options?.signal },
-          );
-
-          if (res.usage) {
-            await options?.onUsage?.(convertOpenAIUsage(res.usage, usagePayload));
-          }
-
-          const toolCalls = res.choices[0].message.tool_calls!;
-
-          try {
-            return toolCalls.map((item) => ({
-              arguments: JSON.parse(item.function.arguments),
-              name: item.function.name,
-            }));
-          } catch {
-            console.error('parse tool call arguments error:', toolCalls);
-            return undefined;
-          }
+          return await this.generateObjectViaToolCalling(payload, options, usagePayload);
        }

        // Factory-level Responses API routing control (supports instance override)
@@ -955,17 +1000,29 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
        }

        log('calling chat.completions.create for structured output');
-        const res = await this.client.chat.completions.create(
-          this.handleGenerateObjectPayload(payload, {
-            ...getGenerateObjectReasoningParams(payload),
-            messages,
-            model,
-            response_format: { json_schema: processedSchema, type: 'json_schema' },
-            ...this.resolvePromptCacheKeyParams(model, options?.user),
-            user: options?.user,
-          }) as OpenAI.ChatCompletionCreateParamsNonStreaming,
-          { headers: options?.headers, signal: options?.signal },
-        );
+        let res: OpenAI.ChatCompletion;
+        try {
+          res = await this.client.chat.completions.create(
+            this.handleGenerateObjectPayload(payload, {
+              ...getGenerateObjectReasoningParams(payload),
+              messages,
+              model,
+              response_format: { json_schema: processedSchema, type: 'json_schema' },
+              ...this.resolvePromptCacheKeyParams(model, options?.user),
+              user: options?.user,
+            }) as OpenAI.ChatCompletionCreateParamsNonStreaming,
+            { headers: options?.headers, signal: options?.signal },
+          );
+        } catch (error) {
+          // Gateways can serve json_schema-incapable upstreams under arbitrary
+          // model ids (e.g. OpenCode Zen's `big-pickle` proxies DeepSeek), which
+          // the model-id detection above cannot catch. Retry once via forced
+          // tool calling when the upstream rejects the response_format type.
+          if (!isResponseFormatUnsupportedError(error)) throw error;
+
+          log('provider rejected json_schema response_format, retrying via tool calling');
+          return await this.generateObjectViaToolCalling(payload, options, usagePayload);
+        }
        if (res.usage) {
          await options?.onUsage?.(convertOpenAIUsage(res.usage, usagePayload));
        }
@@ -2,11 +2,21 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';

 import * as modelParse from '../../utils/modelParse';
-import { LobeAiHubMixAI } from './index';
+import { LobeAiHubMixAI, params } from './index';

 const mockFetch = vi.fn();
 global.fetch = mockFetch;

+type RouterForTest = {
+  apiType: string;
+  models?: string[];
+};
+
+const resolveRouters = (model?: string) =>
+  (typeof params.routers === 'function'
+    ? params.routers({ apiKey: 'test' }, { model })
+    : params.routers) as RouterForTest[];
+
 describe('LobeAiHubMixAI', () => {
  let instance: InstanceType<typeof LobeAiHubMixAI>;

@@ -32,6 +42,32 @@ describe('LobeAiHubMixAI', () => {
    });
  });

+  describe('routers', () => {
+    it('should route the whole DeepSeek family to the deepseek runtime', () => {
+      // The generic openai fallback sends response_format json_schema for
+      // structured output, which DeepSeek upstreams reject — the deepseek
+      // runtime simulates it via tool calling instead.
+      const routers = resolveRouters();
+      const deepseekRouter = routers.find((router) => router.apiType === 'deepseek');
+
+      expect(deepseekRouter?.models).toEqual(
+        expect.arrayContaining([
+          'deepseek-chat',
+          'deepseek-reasoner',
+          'deepseek-v4-flash',
+          'deepseek-v4-pro',
+        ]),
+      );
+    });
+
+    it('should match gateway-specific DeepSeek ids missing from the static model list', () => {
+      const routers = resolveRouters('deepseek-v4-flash-free');
+      const deepseekRouter = routers.find((router) => router.apiType === 'deepseek');
+
+      expect(deepseekRouter?.models).toContain('deepseek-v4-flash-free');
+    });
+  });
+
  describe('chat', () => {
    it('should support chat method', async () => {
      vi.spyOn(instance as any, 'runWithFallback').mockResolvedValue(new Response());
@@ -5,6 +5,7 @@ import { responsesAPIModels } from '../../const/models';
 import { createRouterRuntime } from '../../core/RouterRuntime';
 import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime';
 import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse';
+import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels';

 /**
 * Response schema for GET https://aihubmix.com/api/v1/models
@@ -190,7 +191,7 @@ export const params: CreateRouterRuntimeOptions = {
      clearTimeout(timeoutId);
    }
  },
-  routers: [
+  routers: (_options, runtimeContext) => [
    {
      apiType: 'anthropic',
      models: LOBE_DEFAULT_MODEL_LIST.map((m) => m.id).filter(
@@ -214,7 +215,15 @@ export const params: CreateRouterRuntimeOptions = {
    },
    {
      apiType: 'deepseek',
-      models: ['deepseek-chat', 'deepseek-reasoner'],
+      // Match the whole DeepSeek family (deepseek-v4*, deepseek-chat, ...), not
+      // just the two legacy ids — the deepseek runtime simulates structured
+      // output via tool calling, while the generic openai fallback sends
+      // response_format json_schema which DeepSeek upstreams reject.
+      models: resolveProviderRouteModels(
+        'deepseek',
+        LOBE_DEFAULT_MODEL_LIST,
+        runtimeContext?.model,
+      ),
      options: { baseURL: urlJoin(baseURL, '/v1') },
    },
    {
@@ -50,7 +50,10 @@ describe('LobeDeepSeekAnthropicAI generateObject', () => {
    vi.clearAllMocks();
  });

-  it('should use any tool choice by default to keep DeepSeek thinking mode enabled', async () => {
+  it('should use any tool choice by default for server-side thinking', async () => {
+    // DeepSeek's Anthropic-compatible endpoint rejects named tool_choice while
+    // thinking is active, but accepts `any`; V4 models can default to thinking
+    // enabled server-side.
    const result = await instance.generateObject(generateObjectPayload);

    const payload = getLastRequestPayload();
@@ -82,11 +85,36 @@ describe('LobeDeepSeekAnthropicAI generateObject', () => {
    expect(payload.tool_choice).toEqual({ name: 'task_topic_handoff', type: 'tool' });
  });

-  it('should map reasoning_effort to output_config.effort', async () => {
+  it('should use any tool choice when thinking is explicitly enabled', async () => {
+    await instance.generateObject({
+      ...generateObjectPayload,
+      thinking: { budget_tokens: 1024, type: 'enabled' },
+    } as any);
+
+    const payload = getLastRequestPayload();
+
+    expect(payload.thinking).toBeUndefined();
+    expect(payload.tool_choice).toEqual({ type: 'any' });
+  });
+
+  it('should use any tool choice for thinking-only deepseek-reasoner', async () => {
+    await instance.generateObject({
+      ...generateObjectPayload,
+      model: 'deepseek-reasoner',
+    });
+
+    const payload = getLastRequestPayload();
+
+    expect(payload.thinking).toBeUndefined();
+    expect(payload.tool_choice).toEqual({ type: 'any' });
+  });
+
+  it('should map reasoning_effort to output_config.effort when thinking is enabled', async () => {
    await instance.generateObject({
      ...generateObjectPayload,
      reasoning_effort: 'high',
-    });
+      thinking: { budget_tokens: 1024, type: 'enabled' },
+    } as any);

    const payload = getLastRequestPayload();

@@ -131,6 +159,66 @@ describe('DeepSeek OpenAI-compatible generateObject configuration', () => {
    expect(openAIParams.generateObject?.useToolsCalling).toBe(true);
  });

+  it('should disable thinking by default for V4 generateObject requests', () => {
+    // V4 defaults to thinking enabled server-side, which rejects the forced
+    // tool_choice used for structured output.
+    const requestPayload = {
+      messages: [{ role: 'user' as const, content: 'Hello' }],
+      model: 'deepseek-v4-flash',
+      reasoning_effort: 'high' as const,
+    };
+
+    const result = openAIParams.generateObject!.handlePayload!(
+      {
+        messages: [{ role: 'user', content: 'Hello' }],
+        model: 'deepseek-v4-flash',
+      },
+      requestPayload,
+      {},
+    );
+
+    expect(result).toEqual(expect.objectContaining({ thinking: { type: 'disabled' } }));
+    expect(result).not.toHaveProperty('reasoning_effort');
+  });
+
+  it('should disable thinking for provider-prefixed V4 generateObject requests', () => {
+    const requestPayload = {
+      messages: [{ role: 'user' as const, content: 'Hello' }],
+      model: 'Deepseek/deepseek-v4-pro',
+      reasoning_effort: 'high' as const,
+    };
+
+    const result = openAIParams.generateObject!.handlePayload!(
+      {
+        messages: [{ role: 'user', content: 'Hello' }],
+        model: 'Deepseek/deepseek-v4-pro',
+      },
+      requestPayload,
+      {},
+    );
+
+    expect(result).toEqual(expect.objectContaining({ thinking: { type: 'disabled' } }));
+    expect(result).not.toHaveProperty('reasoning_effort');
+  });
+
+  it('should not inject thinking parameter for thinking-only deepseek-reasoner', () => {
+    const requestPayload = {
+      messages: [{ role: 'user' as const, content: 'Hello' }],
+      model: 'deepseek-reasoner',
+    };
+
+    const result = openAIParams.generateObject!.handlePayload!(
+      {
+        messages: [{ role: 'user', content: 'Hello' }],
+        model: 'deepseek-reasoner',
+      },
+      requestPayload,
+      {},
+    );
+
+    expect(result).not.toHaveProperty('thinking');
+  });
+
  it('should forward disabled thinking for generateObject DeepSeek requests', () => {
    const requestPayload = {
      messages: [{ role: 'user' as const, content: 'Hello' }],
@@ -5,9 +5,10 @@ import type OpenAI from 'openai';
 import { buildDefaultAnthropicPayload } from '../../core/anthropicCompatibleFactory';
 import type { ChatStreamPayload } from '../../types';
 import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
+import { isDeepSeekV4FamilyModel } from '../../utils/modelParse';
 import { sanitizeDeepSeekJsonPayload } from './sanitizePayload';

-const isDeepSeekV4Model = (model: string) => model.startsWith('deepseek-v4');
+export const isDeepSeekV4Model = (model: string | undefined) => isDeepSeekV4FamilyModel(model);
 const isEmptyContent = (content: unknown) =>
  content === '' || content === null || content === undefined;
 const hasReasoningContent = (reasoning: any) => typeof reasoning?.content === 'string';
@@ -5,6 +5,7 @@ import type { AnthropicGenerateObjectConfig } from '../../core/anthropicCompatib
 import { createAnthropicGenerateObject } from '../../core/anthropicCompatibleFactory/generateObject';
 import type { OpenAICompatibleFactoryOptions } from '../../core/openaiCompatibleFactory';
 import type { ChatStreamPayload, GenerateObjectOptions, GenerateObjectPayload } from '../../types';
+import { isDeepSeekV4Model } from './chatPayload';
 import { sanitizeDeepSeekJsonPayload } from './sanitizePayload';

 type GenerateObjectHandlePayload = NonNullable<
@@ -21,10 +22,11 @@ export const createDeepSeekAnthropicGenerateObject = async (
  options?: GenerateObjectOptions,
  pricing?: Pricing,
 ) => {
-  // DeepSeek V4 thinking mode rejects Anthropic's named schema tool choice,
-  // e.g. `{ type: "tool", name: "task_topic_handoff" }`, but accepts
-  // `{ type: "any" }`. If thinking is already disabled, keep the stricter
-  // named tool choice; otherwise use `any` without changing the thinking mode.
+  // DeepSeek's Anthropic-compatible endpoint rejects named schema tool_choice
+  // while thinking is active, but accepts `{ type: "any" }`. V4 models may
+  // default to thinking enabled server-side, so keep `any` unless the caller
+  // explicitly disabled thinking; with a single schema tool it still forces
+  // structured output.
  const thinkingDisabled = isGenerateObjectThinkingDisabled(payload);
  const requestParams: AnthropicGenerateObjectConfig['requestParams'] = {
    ...(!thinkingDisabled && payload.reasoning_effort
@@ -59,14 +61,29 @@ export const buildDeepSeekGenerateObjectPayload: GenerateObjectHandlePayload = (
  requestPayload,
 ) => {
  const { thinking } = payload;
-  const thinkingExplicitlyDisabled = thinking?.type === 'disabled';
+  const thinkingEnabled = thinking?.type === 'enabled';
  const payloadWithoutReasoningEffort = { ...requestPayload };
  delete (payloadWithoutReasoningEffort as { reasoning_effort?: unknown }).reasoning_effort;

+  // V4 models default to thinking enabled server-side, and thinking mode
+  // rejects the forced tool_choice used for structured output (mirrors the
+  // Anthropic-compatible endpoint behavior). Explicitly disable thinking
+  // unless the caller turned it on. deepseek-reasoner is thinking-only, so
+  // leave its thinking parameter untouched.
+  if (isDeepSeekV4Model(payload.model)) {
+    return sanitizeDeepSeekJsonPayload(
+      thinkingEnabled
+        ? { ...requestPayload, thinking: { type: 'enabled' } }
+        : { ...payloadWithoutReasoningEffort, thinking: { type: 'disabled' } },
+    );
+  }
+
+  const thinkingExplicitlyDisabled = thinking?.type === 'disabled';
+
  return sanitizeDeepSeekJsonPayload({
    ...(thinkingExplicitlyDisabled ? payloadWithoutReasoningEffort : requestPayload),
-    ...(thinking?.type === 'enabled' || thinkingExplicitlyDisabled
-      ? { thinking: { type: thinking.type } }
+    ...(thinkingEnabled || thinkingExplicitlyDisabled
+      ? { thinking: { type: thinking!.type } }
      : {}),
  });
 };
@@ -3,7 +3,6 @@ import { ModelProvider } from 'model-bank';
 import type { Mock } from 'vitest';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';

-import { responsesAPIModels } from '../../const/models';
 import type { ChatStreamPayload } from '../../types/chat';
 import * as modelParseModule from '../../utils/modelParse';
 import type { NewAPIModelCard, NewAPIPricing } from './index';
@@ -25,7 +24,6 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
  let mockFetch: Mock;
  let mockProcessMultiProviderModelList: Mock;
  let mockDetectModelProvider: Mock;
-  let mockResponsesAPIModels: typeof responsesAPIModels;

  beforeEach(() => {
    // Setup fetch mock
@@ -35,7 +33,6 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
    // Setup utility function mocks
    mockProcessMultiProviderModelList = vi.mocked(modelParseModule.processMultiProviderModelList);
    mockDetectModelProvider = vi.mocked(modelParseModule.detectModelProvider);
-    mockResponsesAPIModels = responsesAPIModels;

    // Clear environment variables
    delete process.env.DEBUG_NEWAPI_CHAT_COMPLETION;
@@ -708,11 +705,20 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
      const options = { apiKey: 'test', baseURL: 'https://api.newapi.com/v1' };
      const routers = params.routers(options);

-      expect(routers).toHaveLength(4);
+      expect(routers).toHaveLength(5);
      expect(routers[0].apiType).toBe('anthropic');
      expect(routers[1].apiType).toBe('google');
      expect(routers[2].apiType).toBe('xai');
-      expect(routers[3].apiType).toBe('openai');
+      expect(routers[3].apiType).toBe('deepseek');
+      expect(routers[4].apiType).toBe('openai');
+    });
+
+    it('should configure deepseek router with /v1 path and openai sdkType', () => {
+      const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' };
+      const routers = params.routers(options);
+
+      expect(routers[3].options.baseURL).toBe('https://custom.com/v1');
+      expect((routers[3].options as any).sdkType).toBe('openai');
    });

    it('should process baseURL by removing version paths', () => {
@@ -750,14 +756,14 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
      const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' };
      const routers = params.routers(options);

-      expect(routers[3].options.baseURL).toBe('https://custom.com/v1');
+      expect(routers[4].options.baseURL).toBe('https://custom.com/v1');
    });

    it('should configure openai router with useResponseModels', () => {
      const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' };
      const routers = params.routers(options);

-      expect((routers[3].options as any).chatCompletion?.useResponseModels).toBeDefined();
+      expect((routers[4].options as any).chatCompletion?.useResponseModels).toBeDefined();
    });

    it('should filter anthropic models for anthropic router', () => {
@@ -803,9 +809,9 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
      const options = { apiKey: 'test' }; // No baseURL
      const routers = params.routers(options);

-      expect(routers).toHaveLength(4);
+      expect(routers).toHaveLength(5);
      expect(routers[0].options.baseURL).toBe('');
-      expect(routers[3].options.baseURL).toBe('v1'); // urlJoin('', '/v1') returns 'v1'
+      expect(routers[4].options.baseURL).toBe('v1'); // urlJoin('', '/v1') returns 'v1'
    });
  });

@@ -5,6 +5,7 @@ import { responsesAPIModels } from '../../const/models';
 import { createRouterRuntime } from '../../core/RouterRuntime';
 import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime';
 import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse';
+import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels';

 export interface NewAPIModelCard {
  created: number;
@@ -158,7 +159,7 @@ export const params = {

    return processMultiProviderModelList([...enrichedModelList, ...additionalModels], 'newapi');
  },
-  routers: (options) => {
+  routers: (options, runtimeContext?: { model?: string }) => {
    const userBaseURL = options.baseURL?.replace(/\/v\d+[a-z]*\/?$/, '') || '';

    return [
@@ -192,6 +193,19 @@ export const params = {
          baseURL: urlJoin(userBaseURL, '/v1'),
        },
      },
+      {
+        apiType: 'deepseek',
+        models: resolveProviderRouteModels(
+          'deepseek',
+          LOBE_DEFAULT_MODEL_LIST,
+          runtimeContext?.model,
+        ),
+        options: {
+          ...options,
+          baseURL: urlJoin(userBaseURL, '/v1'),
+          sdkType: 'openai',
+        },
+      },
      {
        apiType: 'openai',
        options: {
@@ -1,4 +1,4 @@
-import { ModelProvider } from 'model-bank';
+import { LOBE_DEFAULT_MODEL_LIST, ModelProvider } from 'model-bank';
 import type OpenAI from 'openai';

 import { createOpenAICompatibleRuntime } from '../../core/openaiCompatibleFactory';
@@ -6,6 +6,7 @@ import { createRouterRuntime } from '../../core/RouterRuntime';
 import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime';
 import type { ChatStreamPayload } from '../../types';
 import { processMultiProviderModelList } from '../../utils/modelParse';
+import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels';

 // ============================================================================
 // Constants
@@ -19,24 +20,24 @@ const MODELS_DEV_URL = 'https://models.dev/api.json';
 // ============================================================================

 interface ModelsDevModel {
-  id: string;
-  name?: string;
-  family?: string;
-  provider?: { npm?: string };
-  release_date?: string;
+  [key: string]: any;
  attachment?: boolean;
-  reasoning?: boolean;
-  tool_call?: boolean;
-  structured_output?: boolean;
-  modalities?: { input?: string[]; output?: string[] };
-  limit?: { context?: number; output?: number };
  cost?: {
    input?: number;
    output?: number;
    cache_read?: number;
    cache_write?: number;
  };
-  [key: string]: any;
+  family?: string;
+  id: string;
+  limit?: { context?: number; output?: number };
+  modalities?: { input?: string[]; output?: string[] };
+  name?: string;
+  provider?: { npm?: string };
+  reasoning?: boolean;
+  release_date?: string;
+  structured_output?: boolean;
+  tool_call?: boolean;
 }

 interface ModelsDevData {
@@ -272,14 +273,20 @@ export const sanitizeJsonSchema = (schema: any): any => {
        nested[k] = sanitizeJsonSchema(v);
      }
      result[key] = nested;
-    } else if (
-      ['allOf', 'anyOf', 'oneOf', 'prefixItems'].includes(key) &&
-      Array.isArray(value)
-    ) {
+    } else if (['allOf', 'anyOf', 'oneOf', 'prefixItems'].includes(key) && Array.isArray(value)) {
      result[key] = value.map(sanitizeJsonSchema);
    } else if (
-      ['items', 'additionalProperties', 'not', 'contains', 'if', 'then', 'else',
-       'unevaluatedItems', 'unevaluatedProperties'].includes(key)
+      [
+        'items',
+        'additionalProperties',
+        'not',
+        'contains',
+        'if',
+        'then',
+        'else',
+        'unevaluatedItems',
+        'unevaluatedProperties',
+      ].includes(key)
    ) {
      result[key] = sanitizeJsonSchema(value);
    } else {
@@ -431,7 +438,7 @@ export const params = {
      );
    }
  },
-  routers: async (options) => {
+  routers: async (options, runtimeContext?: { model?: string }) => {
    const baseURL = options.baseURL || GO_BASE_URL;

    const anthropicModels = await getAnthropicModels();
@@ -443,6 +450,16 @@ export const params = {
        models: anthropicModels,
        options: { ...options, baseURL: stripV1(baseURL) },
      },
+      // DeepSeek models via the deepseek runtime (OpenAI-compatible endpoint)
+      {
+        apiType: 'deepseek',
+        models: resolveProviderRouteModels(
+          'deepseek',
+          LOBE_DEFAULT_MODEL_LIST,
+          runtimeContext?.model,
+        ),
+        options: { ...options, baseURL, sdkType: 'openai' },
+      },
      // OpenAI-compatible fallback for all other models
      {
        apiType: 'openai',
@@ -0,0 +1,38 @@
+// @vitest-environment node
+import { describe, expect, it } from 'vitest';
+
+import { params } from './index';
+
+const resolveRouters = (model?: string) =>
+  params.routers({ apiKey: 'test' }, { model }) as Array<{
+    apiType: string;
+    models?: string[];
+    options: { baseURL?: string; sdkType?: string };
+  }>;
+
+describe('OpenCodeZen routers', () => {
+  it('should route DeepSeek-family models to the deepseek runtime', () => {
+    // The generic openai fallback sends response_format json_schema for
+    // structured output, which DeepSeek upstreams reject — the deepseek
+    // runtime simulates it via tool calling instead.
+    const routers = resolveRouters('deepseek-v4-flash');
+    const deepseekRouter = routers.find((router) => router.apiType === 'deepseek');
+
+    expect(deepseekRouter?.models).toContain('deepseek-v4-flash');
+    expect(deepseekRouter?.options.sdkType).toBe('openai');
+  });
+
+  it('should match gateway-specific DeepSeek ids missing from the static model list', () => {
+    const routers = resolveRouters('deepseek-v4-flash-free');
+    const deepseekRouter = routers.find((router) => router.apiType === 'deepseek');
+
+    expect(deepseekRouter?.models).toContain('deepseek-v4-flash-free');
+  });
+
+  it('should keep the openai catch-all as the last router', () => {
+    const routers = resolveRouters('some-unknown-model');
+
+    expect(routers.at(-1)?.apiType).toBe('openai');
+    expect(routers.at(-1)?.models).toBeUndefined();
+  });
+});
@@ -4,6 +4,7 @@ import { responsesAPIModels } from '../../const/models';
 import { createRouterRuntime } from '../../core/RouterRuntime';
 import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime';
 import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse';
+import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels';

 const ZEN_BASE_URL = 'https://opencode.ai/zen/v1';

@@ -30,7 +31,7 @@ export const params = {
    const modelList = modelsPage.data || [];
    return processMultiProviderModelList(modelList, 'opencodezen');
  },
-  routers: (options) => {
+  routers: (options, runtimeContext?: { model?: string }) => {
    const baseURL = options.baseURL || ZEN_BASE_URL;
    return [
      // Anthropic router for Claude models
@@ -54,6 +55,20 @@ export const params = {
          },
        },
      },
+      // DeepSeek models via the deepseek runtime (OpenAI-compatible endpoint)
+      {
+        apiType: 'deepseek',
+        models: resolveProviderRouteModels(
+          'deepseek',
+          LOBE_DEFAULT_MODEL_LIST,
+          runtimeContext?.model,
+        ),
+        options: {
+          ...options,
+          baseURL,
+          sdkType: 'openai',
+        },
+      },
      // OpenAI-compatible fallback for all other models (Gemini, GLM, Kimi, MiniMax, Qwen, etc.)
      {
        apiType: 'openai',
@@ -0,0 +1,17 @@
+import { detectModelProvider } from '../../utils/modelParse';
+
+type ProviderKey = ReturnType<typeof detectModelProvider>;
+
+export const resolveProviderRouteModels = (
+  provider: ProviderKey,
+  modelList: ReadonlyArray<{ id: string }>,
+  requestedModel?: string,
+): string[] => {
+  const providerModels = modelList
+    .map((model) => model.id)
+    .filter((id) => detectModelProvider(id) === provider);
+
+  if (!requestedModel || detectModelProvider(requestedModel) !== provider) return providerModels;
+
+  return [...new Set([...providerModels, requestedModel])];
+};
@@ -182,6 +182,13 @@ export const MODEL_OWNER_DETECTION_CONFIG = {
  zhipu: ['glm'],
 } as const;

+export const isDeepSeekV4FamilyModel = (model: string | undefined): boolean =>
+  typeof model === 'string' && model.toLowerCase().includes('deepseek-v4');
+
+export const isDeepSeekThinkingEligibleModel = (model: string | undefined): boolean =>
+  typeof model === 'string' &&
+  (model.toLowerCase().includes('deepseek-reasoner') || isDeepSeekV4FamilyModel(model));
+
 // Image model keyword configuration
 export const IMAGE_MODEL_KEYWORDS = [
  'dall-e',