🐛 fix(model-runtime): improve DeepSeek structured output (#15680)

This commit is contained in:
YuTengjing
2026-06-11 16:57:57 +08:00
committed by GitHub
parent 77e4d0492b
commit 5f4bec347b
17 changed files with 630 additions and 118 deletions
@@ -1032,6 +1032,43 @@ describe('aiChatRouter', () => {
}
});
it('maps raw provider 4xx errors to BAD_REQUEST instead of internal errors', async () => {
const { initModelRuntimeFromDB } = await import('@/server/modules/ModelRuntime');
// Raw SDK APIError shape: carries an HTTP status but no errorType — the
// generateObject path rethrows upstream errors verbatim (e.g. a BYOK
// gateway rejecting response_format json_schema).
const providerError = Object.assign(
new Error(
'400 Error from provider (DeepSeek): This response_format type is unavailable now',
),
{ status: 400 },
);
const mockGenerateObject = vi.fn().mockRejectedValue(providerError);
vi.mocked(initModelRuntimeFromDB).mockResolvedValue({
generateObject: mockGenerateObject,
} as any);
const caller = aiChatRouter.createCaller({ ...mockCtx, serverDB: {} } as any);
try {
await caller.outputJSON({
messages: [{ content: 'test', role: 'user' }],
model: 'deepseek-v4-flash-free',
provider: 'opencodezen',
});
throw new Error('Expected outputJSON to throw');
} catch (error) {
expect(error).toBeInstanceOf(TRPCError);
expect(error).toMatchObject({
cause: providerError,
code: 'BAD_REQUEST',
message: providerError.message,
});
}
});
it('should handle tools parameter when provided', async () => {
const { initModelRuntimeFromDB } = await import('@/server/modules/ModelRuntime');
+22 -6
View File
@@ -52,13 +52,29 @@ const getTRPCErrorCodeFromStatus = (status: number): TRPCErrorCode => {
const createRuntimeTRPCError = (error: unknown): TRPCError | undefined => {
const errorType = getRuntimeErrorType(error);
const spec = getErrorCodeSpec(errorType);
if (!errorType || !spec) return;
if (errorType && spec) {
return new TRPCError({
cause: error,
code: getTRPCErrorCodeFromStatus(spec.httpStatus),
message: errorType,
});
}
return new TRPCError({
cause: error,
code: getTRPCErrorCodeFromStatus(spec.httpStatus),
message: errorType,
});
// Raw provider SDK errors (OpenAI/Anthropic APIError) carry an HTTP status
// but no errorType — the generateObject path rethrows upstream errors
// verbatim. Without this mapping, tRPC classifies them as
// INTERNAL_SERVER_ERROR, so a user-channel 4xx (e.g. a BYOK provider
// rejecting the request) pollutes server 500 monitoring.
const status = (error as { status?: unknown } | undefined)?.status;
if (typeof status === 'number' && status >= 400 && status < 500) {
return new TRPCError({
cause: error,
code: getTRPCErrorCodeFromStatus(status),
message: error instanceof Error ? error.message : `Provider error (${status})`,
});
}
return undefined;
};
const aiChatProcedure = wsCompatProcedure.use(serverDatabase).use(async (opts) => {
@@ -5,6 +5,7 @@ import { toFile } from 'openai';
import { disableStreamModels, systemToUserModels } from '../../const/models';
import type { ChatStreamPayload, OpenAIChatMessage, UserMessageContentPart } from '../../types';
import { isDeepSeekThinkingEligibleModel } from '../../utils/modelParse';
import { parseDataUri } from '../../utils/uriParser';
export type ExtendedChatCompletionContentPart = {
@@ -24,16 +25,6 @@ type ConvertMessageContentOptions = {
const isDeepSeekModel = (model: string | undefined) =>
typeof model === 'string' && model.toLowerCase().includes('deepseek');
// DeepSeek thinking-mode eligible models require reasoning_content on every
// assistant history message — otherwise the API rejects follow-up turns with
// "The reasoning_content in the thinking mode must be passed back to the API."
// See https://api-docs.deepseek.com/guides/thinking_mode#tool-calls
const isDeepSeekThinkingEligibleModel = (model: string | undefined) => {
if (!model) return false;
const lower = model.toLowerCase();
return lower.includes('deepseek-reasoner') || lower.includes('deepseek-v4');
};
type OpenAICompatibleContentPart =
| ExtendedChatCompletionContentPart
| OpenAI.ChatCompletionContentPart
@@ -0,0 +1,146 @@
// @vitest-environment node
import type { Mock } from 'vitest';
import { afterEach, describe, expect, it, vi } from 'vitest';
import { createOpenAICompatibleRuntime, isResponseFormatUnsupportedError } from './index';
// Pricing lookup reaches into model-bank's async provider loading, which is
// irrelevant to these tests (and unavailable in some monorepo test setups).
vi.mock('../../utils/getModelPricing', () => ({
getModelPricing: vi.fn().mockResolvedValue(undefined),
}));
const TestRuntime = createOpenAICompatibleRuntime({
baseURL: 'https://api.test.com/v1',
provider: 'testprovider',
});
const generateObjectPayload = {
messages: [{ content: 'Generate a handoff', role: 'user' as const }],
model: 'gpt-anything',
schema: {
name: 'task_topic_handoff',
schema: {
additionalProperties: false,
properties: { summary: { type: 'string' }, title: { type: 'string' } },
required: ['title', 'summary'],
type: 'object' as const,
},
},
};
const toolCallResponse = {
choices: [
{
message: {
tool_calls: [
{
function: {
arguments: '{"summary":"Task completed","title":"Done"}',
name: 'task_topic_handoff',
},
id: 'call_1',
type: 'function',
},
],
},
},
],
};
const responseFormatUnsupportedError = Object.assign(
new Error('400 Error from provider (DeepSeek): This response_format type is unavailable now'),
{ status: 400 },
);
describe('isResponseFormatUnsupportedError', () => {
it('should match DeepSeek json_schema rejection variants', () => {
expect(
isResponseFormatUnsupportedError(new Error('This response_format type is unavailable now')),
).toBe(true);
expect(
isResponseFormatUnsupportedError({
error: {
message:
'Failed to deserialize the JSON body into the target type: response_format: response_format.type `json_schema` is unavailable now at line 1 column 1193',
},
}),
).toBe(true);
});
it('should not match unrelated errors', () => {
expect(isResponseFormatUnsupportedError(new Error('Insufficient Balance'))).toBe(false);
expect(isResponseFormatUnsupportedError(undefined)).toBe(false);
expect(isResponseFormatUnsupportedError('response_format')).toBe(false);
});
});
describe('generateObject tool-calling fallback', () => {
afterEach(() => {
vi.clearAllMocks();
});
const createInstance = () => new TestRuntime({ apiKey: 'test' });
const getCreateMock = (instance: any) => instance.client.chat.completions.create as Mock;
it('should proactively use tool calling for DeepSeek-family models', async () => {
const instance = createInstance();
vi.spyOn((instance as any).client.chat.completions, 'create').mockResolvedValue(
toolCallResponse as any,
);
const result = await instance.generateObject({
...generateObjectPayload,
model: 'deepseek-v4-flash',
});
const createMock = getCreateMock(instance);
expect(createMock).toHaveBeenCalledTimes(1);
const requestPayload = createMock.mock.calls[0][0];
expect(requestPayload.response_format).toBeUndefined();
expect(requestPayload.tool_choice).toEqual({
function: { name: 'task_topic_handoff' },
type: 'function',
});
expect(result).toEqual({ summary: 'Task completed', title: 'Done' });
});
it('should retry via tool calling when the provider rejects json_schema', async () => {
const instance = createInstance();
vi.spyOn((instance as any).client.chat.completions, 'create')
.mockRejectedValueOnce(responseFormatUnsupportedError)
.mockResolvedValueOnce(toolCallResponse as any);
// model id gives no hint that the upstream is DeepSeek (e.g. gateway alias)
const result = await instance.generateObject({ ...generateObjectPayload, model: 'big-pickle' });
const createMock = getCreateMock(instance);
expect(createMock).toHaveBeenCalledTimes(2);
const firstPayload = createMock.mock.calls[0][0];
expect(firstPayload.response_format).toEqual(expect.objectContaining({ type: 'json_schema' }));
const retryPayload = createMock.mock.calls[1][0];
expect(retryPayload.response_format).toBeUndefined();
expect(retryPayload.tool_choice).toEqual({
function: { name: 'task_topic_handoff' },
type: 'function',
});
expect(result).toEqual({ summary: 'Task completed', title: 'Done' });
});
it('should not retry on unrelated provider errors', async () => {
const instance = createInstance();
vi.spyOn((instance as any).client.chat.completions, 'create').mockRejectedValue(
Object.assign(new Error('Insufficient Balance'), { status: 402 }),
);
await expect(
instance.generateObject({ ...generateObjectPayload, model: 'big-pickle' }),
).rejects.toMatchObject({ message: expect.stringContaining('Insufficient Balance') });
expect(getCreateMock(instance)).toHaveBeenCalledTimes(1);
});
});
@@ -40,6 +40,7 @@ import { desensitizeUrl } from '../../utils/desensitizeUrl';
import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
import { getModelPricing } from '../../utils/getModelPricing';
import { handleOpenAIError } from '../../utils/handleOpenAIError';
import { detectModelProvider } from '../../utils/modelParse';
import { postProcessModelList } from '../../utils/postProcessModelList';
import {
assertContextWithinWindow,
@@ -62,6 +63,22 @@ export type { PollVideoStatusResult };
export * from './createVideo';
export * from './nonStreamToStream';
/**
* Detect provider 400/422 errors that reject `response_format: { type: 'json_schema' }`.
* Known message variants from the DeepSeek family (official API and gateways proxying it):
* - `This response_format type is unavailable now`
* - `response_format.type \`json_schema\` is unavailable now`
*/
export const isResponseFormatUnsupportedError = (error: unknown): boolean => {
const err = error as { error?: { message?: unknown }; message?: unknown };
const message = [err?.message, err?.error?.message]
.filter((value): value is string => typeof value === 'string')
.join('\n');
if (!message) return false;
return /(?:response_format|json_schema)[^]*?(?:unavailable|not +support)/i.test(message);
};
// the model contains the following keywords is not a chat model, so we should filter them out
export const CHAT_MODELS_BLOCK_LIST = [
'embedding',
@@ -826,6 +843,73 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
);
}
/**
* Simulate schema-based structured output through a forced tool call, for
* providers that do not support `response_format: { type: 'json_schema' }`.
* Returns the parsed schema object — the same shape as the json_schema path.
*/
private async generateObjectViaToolCalling(
payload: GenerateObjectPayload,
options: GenerateObjectOptions | undefined,
usagePayload: Parameters<typeof convertOpenAIUsage>[1],
) {
const log = debug(`${this.logPrefix}:generateObject`);
const { messages, schema, model } = payload;
// Apply schema transformation if configured
const processedSchema = generateObjectConfig?.handleSchema
? { ...schema!, schema: generateObjectConfig.handleSchema(schema!.schema) }
: schema!;
const tool: ChatCompletionTool = {
function: {
description:
processedSchema.description ||
'Generate structured output according to the provided schema',
name: processedSchema.name || 'structured_output',
parameters: processedSchema.schema,
},
type: 'function',
};
const res = await this.client.chat.completions.create(
this.handleGenerateObjectPayload(payload, {
...getGenerateObjectReasoningParams(payload),
messages,
model,
...this.resolvePromptCacheKeyParams(model, options?.user),
tool_choice: { function: { name: tool.function.name }, type: 'function' },
tools: [tool],
user: options?.user,
}) as OpenAI.ChatCompletionCreateParamsNonStreaming,
{ headers: options?.headers, signal: options?.signal },
);
if (res.usage) {
await options?.onUsage?.(convertOpenAIUsage(res.usage, usagePayload));
}
// Structural type keeps this compatible across openai SDK majors (v6
// widened tool_calls to a function/custom union).
const toolCalls = res.choices[0].message.tool_calls as
| { function?: { arguments: string; name: string } }[]
| undefined;
const toolCall =
toolCalls?.find((item) => item.function?.name === tool.function.name) ?? toolCalls?.[0];
if (!toolCall?.function) {
log('no tool call found in structured output response');
return undefined;
}
try {
return JSON.parse(toolCall.function.arguments);
} catch {
console.error('parse tool call arguments error:', toolCall);
return undefined;
}
}
async generateObject(payload: GenerateObjectPayload, options?: GenerateObjectOptions) {
try {
const { messages, schema, model, responseApi, tools } = payload;
@@ -848,54 +932,15 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
if (!schema) throw new Error('tools or schema is required');
// Use tool calling fallback if configured
if (generateObjectConfig?.useToolsCalling) {
// Use tool calling fallback if configured, or when the model is known to
// reject `response_format: { type: 'json_schema' }`. The DeepSeek API only
// supports `json_object` and replies `400 This response_format type is
// unavailable now` to json_schema requests, so DeepSeek-family models served
// through generic OpenAI-compatible providers (aggregator gateways, custom
// endpoints) must simulate structured output via forced tool calling.
if (generateObjectConfig?.useToolsCalling || detectModelProvider(model) === 'deepseek') {
log('using tool calling fallback for structured output');
// Apply schema transformation if configured
const processedSchema = generateObjectConfig.handleSchema
? { ...schema, schema: generateObjectConfig.handleSchema(schema.schema) }
: schema;
const tool: ChatCompletionTool = {
function: {
description:
processedSchema.description ||
'Generate structured output according to the provided schema',
name: processedSchema.name || 'structured_output',
parameters: processedSchema.schema,
},
type: 'function',
};
const res = await this.client.chat.completions.create(
this.handleGenerateObjectPayload(payload, {
...getGenerateObjectReasoningParams(payload),
messages,
model,
...this.resolvePromptCacheKeyParams(model, options?.user),
tool_choice: { function: { name: tool.function.name }, type: 'function' },
tools: [tool],
user: options?.user,
}) as OpenAI.ChatCompletionCreateParamsNonStreaming,
{ headers: options?.headers, signal: options?.signal },
);
if (res.usage) {
await options?.onUsage?.(convertOpenAIUsage(res.usage, usagePayload));
}
const toolCalls = res.choices[0].message.tool_calls!;
try {
return toolCalls.map((item) => ({
arguments: JSON.parse(item.function.arguments),
name: item.function.name,
}));
} catch {
console.error('parse tool call arguments error:', toolCalls);
return undefined;
}
return await this.generateObjectViaToolCalling(payload, options, usagePayload);
}
// Factory-level Responses API routing control (supports instance override)
@@ -955,17 +1000,29 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
}
log('calling chat.completions.create for structured output');
const res = await this.client.chat.completions.create(
this.handleGenerateObjectPayload(payload, {
...getGenerateObjectReasoningParams(payload),
messages,
model,
response_format: { json_schema: processedSchema, type: 'json_schema' },
...this.resolvePromptCacheKeyParams(model, options?.user),
user: options?.user,
}) as OpenAI.ChatCompletionCreateParamsNonStreaming,
{ headers: options?.headers, signal: options?.signal },
);
let res: OpenAI.ChatCompletion;
try {
res = await this.client.chat.completions.create(
this.handleGenerateObjectPayload(payload, {
...getGenerateObjectReasoningParams(payload),
messages,
model,
response_format: { json_schema: processedSchema, type: 'json_schema' },
...this.resolvePromptCacheKeyParams(model, options?.user),
user: options?.user,
}) as OpenAI.ChatCompletionCreateParamsNonStreaming,
{ headers: options?.headers, signal: options?.signal },
);
} catch (error) {
// Gateways can serve json_schema-incapable upstreams under arbitrary
// model ids (e.g. OpenCode Zen's `big-pickle` proxies DeepSeek), which
// the model-id detection above cannot catch. Retry once via forced
// tool calling when the upstream rejects the response_format type.
if (!isResponseFormatUnsupportedError(error)) throw error;
log('provider rejected json_schema response_format, retrying via tool calling');
return await this.generateObjectViaToolCalling(payload, options, usagePayload);
}
if (res.usage) {
await options?.onUsage?.(convertOpenAIUsage(res.usage, usagePayload));
}
@@ -2,11 +2,21 @@
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import * as modelParse from '../../utils/modelParse';
import { LobeAiHubMixAI } from './index';
import { LobeAiHubMixAI, params } from './index';
const mockFetch = vi.fn();
global.fetch = mockFetch;
type RouterForTest = {
apiType: string;
models?: string[];
};
const resolveRouters = (model?: string) =>
(typeof params.routers === 'function'
? params.routers({ apiKey: 'test' }, { model })
: params.routers) as RouterForTest[];
describe('LobeAiHubMixAI', () => {
let instance: InstanceType<typeof LobeAiHubMixAI>;
@@ -32,6 +42,32 @@ describe('LobeAiHubMixAI', () => {
});
});
describe('routers', () => {
it('should route the whole DeepSeek family to the deepseek runtime', () => {
// The generic openai fallback sends response_format json_schema for
// structured output, which DeepSeek upstreams reject — the deepseek
// runtime simulates it via tool calling instead.
const routers = resolveRouters();
const deepseekRouter = routers.find((router) => router.apiType === 'deepseek');
expect(deepseekRouter?.models).toEqual(
expect.arrayContaining([
'deepseek-chat',
'deepseek-reasoner',
'deepseek-v4-flash',
'deepseek-v4-pro',
]),
);
});
it('should match gateway-specific DeepSeek ids missing from the static model list', () => {
const routers = resolveRouters('deepseek-v4-flash-free');
const deepseekRouter = routers.find((router) => router.apiType === 'deepseek');
expect(deepseekRouter?.models).toContain('deepseek-v4-flash-free');
});
});
describe('chat', () => {
it('should support chat method', async () => {
vi.spyOn(instance as any, 'runWithFallback').mockResolvedValue(new Response());
@@ -5,6 +5,7 @@ import { responsesAPIModels } from '../../const/models';
import { createRouterRuntime } from '../../core/RouterRuntime';
import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime';
import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse';
import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels';
/**
* Response schema for GET https://aihubmix.com/api/v1/models
@@ -190,7 +191,7 @@ export const params: CreateRouterRuntimeOptions = {
clearTimeout(timeoutId);
}
},
routers: [
routers: (_options, runtimeContext) => [
{
apiType: 'anthropic',
models: LOBE_DEFAULT_MODEL_LIST.map((m) => m.id).filter(
@@ -214,7 +215,15 @@ export const params: CreateRouterRuntimeOptions = {
},
{
apiType: 'deepseek',
models: ['deepseek-chat', 'deepseek-reasoner'],
// Match the whole DeepSeek family (deepseek-v4*, deepseek-chat, ...), not
// just the two legacy ids — the deepseek runtime simulates structured
// output via tool calling, while the generic openai fallback sends
// response_format json_schema which DeepSeek upstreams reject.
models: resolveProviderRouteModels(
'deepseek',
LOBE_DEFAULT_MODEL_LIST,
runtimeContext?.model,
),
options: { baseURL: urlJoin(baseURL, '/v1') },
},
{
@@ -50,7 +50,10 @@ describe('LobeDeepSeekAnthropicAI generateObject', () => {
vi.clearAllMocks();
});
it('should use any tool choice by default to keep DeepSeek thinking mode enabled', async () => {
it('should use any tool choice by default for server-side thinking', async () => {
// DeepSeek's Anthropic-compatible endpoint rejects named tool_choice while
// thinking is active, but accepts `any`; V4 models can default to thinking
// enabled server-side.
const result = await instance.generateObject(generateObjectPayload);
const payload = getLastRequestPayload();
@@ -82,11 +85,36 @@ describe('LobeDeepSeekAnthropicAI generateObject', () => {
expect(payload.tool_choice).toEqual({ name: 'task_topic_handoff', type: 'tool' });
});
it('should map reasoning_effort to output_config.effort', async () => {
it('should use any tool choice when thinking is explicitly enabled', async () => {
await instance.generateObject({
...generateObjectPayload,
thinking: { budget_tokens: 1024, type: 'enabled' },
} as any);
const payload = getLastRequestPayload();
expect(payload.thinking).toBeUndefined();
expect(payload.tool_choice).toEqual({ type: 'any' });
});
it('should use any tool choice for thinking-only deepseek-reasoner', async () => {
await instance.generateObject({
...generateObjectPayload,
model: 'deepseek-reasoner',
});
const payload = getLastRequestPayload();
expect(payload.thinking).toBeUndefined();
expect(payload.tool_choice).toEqual({ type: 'any' });
});
it('should map reasoning_effort to output_config.effort when thinking is enabled', async () => {
await instance.generateObject({
...generateObjectPayload,
reasoning_effort: 'high',
});
thinking: { budget_tokens: 1024, type: 'enabled' },
} as any);
const payload = getLastRequestPayload();
@@ -131,6 +159,66 @@ describe('DeepSeek OpenAI-compatible generateObject configuration', () => {
expect(openAIParams.generateObject?.useToolsCalling).toBe(true);
});
it('should disable thinking by default for V4 generateObject requests', () => {
// V4 defaults to thinking enabled server-side, which rejects the forced
// tool_choice used for structured output.
const requestPayload = {
messages: [{ role: 'user' as const, content: 'Hello' }],
model: 'deepseek-v4-flash',
reasoning_effort: 'high' as const,
};
const result = openAIParams.generateObject!.handlePayload!(
{
messages: [{ role: 'user', content: 'Hello' }],
model: 'deepseek-v4-flash',
},
requestPayload,
{},
);
expect(result).toEqual(expect.objectContaining({ thinking: { type: 'disabled' } }));
expect(result).not.toHaveProperty('reasoning_effort');
});
it('should disable thinking for provider-prefixed V4 generateObject requests', () => {
const requestPayload = {
messages: [{ role: 'user' as const, content: 'Hello' }],
model: 'Deepseek/deepseek-v4-pro',
reasoning_effort: 'high' as const,
};
const result = openAIParams.generateObject!.handlePayload!(
{
messages: [{ role: 'user', content: 'Hello' }],
model: 'Deepseek/deepseek-v4-pro',
},
requestPayload,
{},
);
expect(result).toEqual(expect.objectContaining({ thinking: { type: 'disabled' } }));
expect(result).not.toHaveProperty('reasoning_effort');
});
it('should not inject thinking parameter for thinking-only deepseek-reasoner', () => {
const requestPayload = {
messages: [{ role: 'user' as const, content: 'Hello' }],
model: 'deepseek-reasoner',
};
const result = openAIParams.generateObject!.handlePayload!(
{
messages: [{ role: 'user', content: 'Hello' }],
model: 'deepseek-reasoner',
},
requestPayload,
{},
);
expect(result).not.toHaveProperty('thinking');
});
it('should forward disabled thinking for generateObject DeepSeek requests', () => {
const requestPayload = {
messages: [{ role: 'user' as const, content: 'Hello' }],
@@ -5,9 +5,10 @@ import type OpenAI from 'openai';
import { buildDefaultAnthropicPayload } from '../../core/anthropicCompatibleFactory';
import type { ChatStreamPayload } from '../../types';
import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
import { isDeepSeekV4FamilyModel } from '../../utils/modelParse';
import { sanitizeDeepSeekJsonPayload } from './sanitizePayload';
const isDeepSeekV4Model = (model: string) => model.startsWith('deepseek-v4');
export const isDeepSeekV4Model = (model: string | undefined) => isDeepSeekV4FamilyModel(model);
const isEmptyContent = (content: unknown) =>
content === '' || content === null || content === undefined;
const hasReasoningContent = (reasoning: any) => typeof reasoning?.content === 'string';
@@ -5,6 +5,7 @@ import type { AnthropicGenerateObjectConfig } from '../../core/anthropicCompatib
import { createAnthropicGenerateObject } from '../../core/anthropicCompatibleFactory/generateObject';
import type { OpenAICompatibleFactoryOptions } from '../../core/openaiCompatibleFactory';
import type { ChatStreamPayload, GenerateObjectOptions, GenerateObjectPayload } from '../../types';
import { isDeepSeekV4Model } from './chatPayload';
import { sanitizeDeepSeekJsonPayload } from './sanitizePayload';
type GenerateObjectHandlePayload = NonNullable<
@@ -21,10 +22,11 @@ export const createDeepSeekAnthropicGenerateObject = async (
options?: GenerateObjectOptions,
pricing?: Pricing,
) => {
// DeepSeek V4 thinking mode rejects Anthropic's named schema tool choice,
// e.g. `{ type: "tool", name: "task_topic_handoff" }`, but accepts
// `{ type: "any" }`. If thinking is already disabled, keep the stricter
// named tool choice; otherwise use `any` without changing the thinking mode.
// DeepSeek's Anthropic-compatible endpoint rejects named schema tool_choice
// while thinking is active, but accepts `{ type: "any" }`. V4 models may
// default to thinking enabled server-side, so keep `any` unless the caller
// explicitly disabled thinking; with a single schema tool it still forces
// structured output.
const thinkingDisabled = isGenerateObjectThinkingDisabled(payload);
const requestParams: AnthropicGenerateObjectConfig['requestParams'] = {
...(!thinkingDisabled && payload.reasoning_effort
@@ -59,14 +61,29 @@ export const buildDeepSeekGenerateObjectPayload: GenerateObjectHandlePayload = (
requestPayload,
) => {
const { thinking } = payload;
const thinkingExplicitlyDisabled = thinking?.type === 'disabled';
const thinkingEnabled = thinking?.type === 'enabled';
const payloadWithoutReasoningEffort = { ...requestPayload };
delete (payloadWithoutReasoningEffort as { reasoning_effort?: unknown }).reasoning_effort;
// V4 models default to thinking enabled server-side, and thinking mode
// rejects the forced tool_choice used for structured output (mirrors the
// Anthropic-compatible endpoint behavior). Explicitly disable thinking
// unless the caller turned it on. deepseek-reasoner is thinking-only, so
// leave its thinking parameter untouched.
if (isDeepSeekV4Model(payload.model)) {
return sanitizeDeepSeekJsonPayload(
thinkingEnabled
? { ...requestPayload, thinking: { type: 'enabled' } }
: { ...payloadWithoutReasoningEffort, thinking: { type: 'disabled' } },
);
}
const thinkingExplicitlyDisabled = thinking?.type === 'disabled';
return sanitizeDeepSeekJsonPayload({
...(thinkingExplicitlyDisabled ? payloadWithoutReasoningEffort : requestPayload),
...(thinking?.type === 'enabled' || thinkingExplicitlyDisabled
? { thinking: { type: thinking.type } }
...(thinkingEnabled || thinkingExplicitlyDisabled
? { thinking: { type: thinking!.type } }
: {}),
});
};
@@ -3,7 +3,6 @@ import { ModelProvider } from 'model-bank';
import type { Mock } from 'vitest';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { responsesAPIModels } from '../../const/models';
import type { ChatStreamPayload } from '../../types/chat';
import * as modelParseModule from '../../utils/modelParse';
import type { NewAPIModelCard, NewAPIPricing } from './index';
@@ -25,7 +24,6 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
let mockFetch: Mock;
let mockProcessMultiProviderModelList: Mock;
let mockDetectModelProvider: Mock;
let mockResponsesAPIModels: typeof responsesAPIModels;
beforeEach(() => {
// Setup fetch mock
@@ -35,7 +33,6 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
// Setup utility function mocks
mockProcessMultiProviderModelList = vi.mocked(modelParseModule.processMultiProviderModelList);
mockDetectModelProvider = vi.mocked(modelParseModule.detectModelProvider);
mockResponsesAPIModels = responsesAPIModels;
// Clear environment variables
delete process.env.DEBUG_NEWAPI_CHAT_COMPLETION;
@@ -708,11 +705,20 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
const options = { apiKey: 'test', baseURL: 'https://api.newapi.com/v1' };
const routers = params.routers(options);
expect(routers).toHaveLength(4);
expect(routers).toHaveLength(5);
expect(routers[0].apiType).toBe('anthropic');
expect(routers[1].apiType).toBe('google');
expect(routers[2].apiType).toBe('xai');
expect(routers[3].apiType).toBe('openai');
expect(routers[3].apiType).toBe('deepseek');
expect(routers[4].apiType).toBe('openai');
});
it('should configure deepseek router with /v1 path and openai sdkType', () => {
const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' };
const routers = params.routers(options);
expect(routers[3].options.baseURL).toBe('https://custom.com/v1');
expect((routers[3].options as any).sdkType).toBe('openai');
});
it('should process baseURL by removing version paths', () => {
@@ -750,14 +756,14 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' };
const routers = params.routers(options);
expect(routers[3].options.baseURL).toBe('https://custom.com/v1');
expect(routers[4].options.baseURL).toBe('https://custom.com/v1');
});
it('should configure openai router with useResponseModels', () => {
const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' };
const routers = params.routers(options);
expect((routers[3].options as any).chatCompletion?.useResponseModels).toBeDefined();
expect((routers[4].options as any).chatCompletion?.useResponseModels).toBeDefined();
});
it('should filter anthropic models for anthropic router', () => {
@@ -803,9 +809,9 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => {
const options = { apiKey: 'test' }; // No baseURL
const routers = params.routers(options);
expect(routers).toHaveLength(4);
expect(routers).toHaveLength(5);
expect(routers[0].options.baseURL).toBe('');
expect(routers[3].options.baseURL).toBe('v1'); // urlJoin('', '/v1') returns 'v1'
expect(routers[4].options.baseURL).toBe('v1'); // urlJoin('', '/v1') returns 'v1'
});
});
@@ -5,6 +5,7 @@ import { responsesAPIModels } from '../../const/models';
import { createRouterRuntime } from '../../core/RouterRuntime';
import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime';
import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse';
import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels';
export interface NewAPIModelCard {
created: number;
@@ -158,7 +159,7 @@ export const params = {
return processMultiProviderModelList([...enrichedModelList, ...additionalModels], 'newapi');
},
routers: (options) => {
routers: (options, runtimeContext?: { model?: string }) => {
const userBaseURL = options.baseURL?.replace(/\/v\d+[a-z]*\/?$/, '') || '';
return [
@@ -192,6 +193,19 @@ export const params = {
baseURL: urlJoin(userBaseURL, '/v1'),
},
},
{
apiType: 'deepseek',
models: resolveProviderRouteModels(
'deepseek',
LOBE_DEFAULT_MODEL_LIST,
runtimeContext?.model,
),
options: {
...options,
baseURL: urlJoin(userBaseURL, '/v1'),
sdkType: 'openai',
},
},
{
apiType: 'openai',
options: {
@@ -1,4 +1,4 @@
import { ModelProvider } from 'model-bank';
import { LOBE_DEFAULT_MODEL_LIST, ModelProvider } from 'model-bank';
import type OpenAI from 'openai';
import { createOpenAICompatibleRuntime } from '../../core/openaiCompatibleFactory';
@@ -6,6 +6,7 @@ import { createRouterRuntime } from '../../core/RouterRuntime';
import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime';
import type { ChatStreamPayload } from '../../types';
import { processMultiProviderModelList } from '../../utils/modelParse';
import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels';
// ============================================================================
// Constants
@@ -19,24 +20,24 @@ const MODELS_DEV_URL = 'https://models.dev/api.json';
// ============================================================================
interface ModelsDevModel {
id: string;
name?: string;
family?: string;
provider?: { npm?: string };
release_date?: string;
[key: string]: any;
attachment?: boolean;
reasoning?: boolean;
tool_call?: boolean;
structured_output?: boolean;
modalities?: { input?: string[]; output?: string[] };
limit?: { context?: number; output?: number };
cost?: {
input?: number;
output?: number;
cache_read?: number;
cache_write?: number;
};
[key: string]: any;
family?: string;
id: string;
limit?: { context?: number; output?: number };
modalities?: { input?: string[]; output?: string[] };
name?: string;
provider?: { npm?: string };
reasoning?: boolean;
release_date?: string;
structured_output?: boolean;
tool_call?: boolean;
}
interface ModelsDevData {
@@ -272,14 +273,20 @@ export const sanitizeJsonSchema = (schema: any): any => {
nested[k] = sanitizeJsonSchema(v);
}
result[key] = nested;
} else if (
['allOf', 'anyOf', 'oneOf', 'prefixItems'].includes(key) &&
Array.isArray(value)
) {
} else if (['allOf', 'anyOf', 'oneOf', 'prefixItems'].includes(key) && Array.isArray(value)) {
result[key] = value.map(sanitizeJsonSchema);
} else if (
['items', 'additionalProperties', 'not', 'contains', 'if', 'then', 'else',
'unevaluatedItems', 'unevaluatedProperties'].includes(key)
[
'items',
'additionalProperties',
'not',
'contains',
'if',
'then',
'else',
'unevaluatedItems',
'unevaluatedProperties',
].includes(key)
) {
result[key] = sanitizeJsonSchema(value);
} else {
@@ -431,7 +438,7 @@ export const params = {
);
}
},
routers: async (options) => {
routers: async (options, runtimeContext?: { model?: string }) => {
const baseURL = options.baseURL || GO_BASE_URL;
const anthropicModels = await getAnthropicModels();
@@ -443,6 +450,16 @@ export const params = {
models: anthropicModels,
options: { ...options, baseURL: stripV1(baseURL) },
},
// DeepSeek models via the deepseek runtime (OpenAI-compatible endpoint)
{
apiType: 'deepseek',
models: resolveProviderRouteModels(
'deepseek',
LOBE_DEFAULT_MODEL_LIST,
runtimeContext?.model,
),
options: { ...options, baseURL, sdkType: 'openai' },
},
// OpenAI-compatible fallback for all other models
{
apiType: 'openai',
@@ -0,0 +1,38 @@
// @vitest-environment node
import { describe, expect, it } from 'vitest';
import { params } from './index';
const resolveRouters = (model?: string) =>
params.routers({ apiKey: 'test' }, { model }) as Array<{
apiType: string;
models?: string[];
options: { baseURL?: string; sdkType?: string };
}>;
describe('OpenCodeZen routers', () => {
it('should route DeepSeek-family models to the deepseek runtime', () => {
// The generic openai fallback sends response_format json_schema for
// structured output, which DeepSeek upstreams reject — the deepseek
// runtime simulates it via tool calling instead.
const routers = resolveRouters('deepseek-v4-flash');
const deepseekRouter = routers.find((router) => router.apiType === 'deepseek');
expect(deepseekRouter?.models).toContain('deepseek-v4-flash');
expect(deepseekRouter?.options.sdkType).toBe('openai');
});
it('should match gateway-specific DeepSeek ids missing from the static model list', () => {
const routers = resolveRouters('deepseek-v4-flash-free');
const deepseekRouter = routers.find((router) => router.apiType === 'deepseek');
expect(deepseekRouter?.models).toContain('deepseek-v4-flash-free');
});
it('should keep the openai catch-all as the last router', () => {
const routers = resolveRouters('some-unknown-model');
expect(routers.at(-1)?.apiType).toBe('openai');
expect(routers.at(-1)?.models).toBeUndefined();
});
});
@@ -4,6 +4,7 @@ import { responsesAPIModels } from '../../const/models';
import { createRouterRuntime } from '../../core/RouterRuntime';
import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime';
import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse';
import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels';
const ZEN_BASE_URL = 'https://opencode.ai/zen/v1';
@@ -30,7 +31,7 @@ export const params = {
const modelList = modelsPage.data || [];
return processMultiProviderModelList(modelList, 'opencodezen');
},
routers: (options) => {
routers: (options, runtimeContext?: { model?: string }) => {
const baseURL = options.baseURL || ZEN_BASE_URL;
return [
// Anthropic router for Claude models
@@ -54,6 +55,20 @@ export const params = {
},
},
},
// DeepSeek models via the deepseek runtime (OpenAI-compatible endpoint)
{
apiType: 'deepseek',
models: resolveProviderRouteModels(
'deepseek',
LOBE_DEFAULT_MODEL_LIST,
runtimeContext?.model,
),
options: {
...options,
baseURL,
sdkType: 'openai',
},
},
// OpenAI-compatible fallback for all other models (Gemini, GLM, Kimi, MiniMax, Qwen, etc.)
{
apiType: 'openai',
@@ -0,0 +1,17 @@
import { detectModelProvider } from '../../utils/modelParse';
type ProviderKey = ReturnType<typeof detectModelProvider>;
export const resolveProviderRouteModels = (
provider: ProviderKey,
modelList: ReadonlyArray<{ id: string }>,
requestedModel?: string,
): string[] => {
const providerModels = modelList
.map((model) => model.id)
.filter((id) => detectModelProvider(id) === provider);
if (!requestedModel || detectModelProvider(requestedModel) !== provider) return providerModels;
return [...new Set([...providerModels, requestedModel])];
};
@@ -182,6 +182,13 @@ export const MODEL_OWNER_DETECTION_CONFIG = {
zhipu: ['glm'],
} as const;
export const isDeepSeekV4FamilyModel = (model: string | undefined): boolean =>
typeof model === 'string' && model.toLowerCase().includes('deepseek-v4');
export const isDeepSeekThinkingEligibleModel = (model: string | undefined): boolean =>
typeof model === 'string' &&
(model.toLowerCase().includes('deepseek-reasoner') || isDeepSeekV4FamilyModel(model));
// Image model keyword configuration
export const IMAGE_MODEL_KEYWORDS = [
'dall-e',