diff --git a/apps/server/src/routers/lambda/__tests__/aiChat.test.ts b/apps/server/src/routers/lambda/__tests__/aiChat.test.ts index a4f2371767..2917227a97 100644 --- a/apps/server/src/routers/lambda/__tests__/aiChat.test.ts +++ b/apps/server/src/routers/lambda/__tests__/aiChat.test.ts @@ -1032,6 +1032,43 @@ describe('aiChatRouter', () => { } }); + it('maps raw provider 4xx errors to BAD_REQUEST instead of internal errors', async () => { + const { initModelRuntimeFromDB } = await import('@/server/modules/ModelRuntime'); + + // Raw SDK APIError shape: carries an HTTP status but no errorType — the + // generateObject path rethrows upstream errors verbatim (e.g. a BYOK + // gateway rejecting response_format json_schema). + const providerError = Object.assign( + new Error( + '400 Error from provider (DeepSeek): This response_format type is unavailable now', + ), + { status: 400 }, + ); + const mockGenerateObject = vi.fn().mockRejectedValue(providerError); + + vi.mocked(initModelRuntimeFromDB).mockResolvedValue({ + generateObject: mockGenerateObject, + } as any); + + const caller = aiChatRouter.createCaller({ ...mockCtx, serverDB: {} } as any); + + try { + await caller.outputJSON({ + messages: [{ content: 'test', role: 'user' }], + model: 'deepseek-v4-flash-free', + provider: 'opencodezen', + }); + throw new Error('Expected outputJSON to throw'); + } catch (error) { + expect(error).toBeInstanceOf(TRPCError); + expect(error).toMatchObject({ + cause: providerError, + code: 'BAD_REQUEST', + message: providerError.message, + }); + } + }); + it('should handle tools parameter when provided', async () => { const { initModelRuntimeFromDB } = await import('@/server/modules/ModelRuntime'); diff --git a/apps/server/src/routers/lambda/aiChat.ts b/apps/server/src/routers/lambda/aiChat.ts index f610e03fb1..ce0f8fdb88 100644 --- a/apps/server/src/routers/lambda/aiChat.ts +++ b/apps/server/src/routers/lambda/aiChat.ts @@ -52,13 +52,29 @@ const getTRPCErrorCodeFromStatus = (status: number): TRPCErrorCode => { const createRuntimeTRPCError = (error: unknown): TRPCError | undefined => { const errorType = getRuntimeErrorType(error); const spec = getErrorCodeSpec(errorType); - if (!errorType || !spec) return; + if (errorType && spec) { + return new TRPCError({ + cause: error, + code: getTRPCErrorCodeFromStatus(spec.httpStatus), + message: errorType, + }); + } - return new TRPCError({ - cause: error, - code: getTRPCErrorCodeFromStatus(spec.httpStatus), - message: errorType, - }); + // Raw provider SDK errors (OpenAI/Anthropic APIError) carry an HTTP status + // but no errorType — the generateObject path rethrows upstream errors + // verbatim. Without this mapping, tRPC classifies them as + // INTERNAL_SERVER_ERROR, so a user-channel 4xx (e.g. a BYOK provider + // rejecting the request) pollutes server 500 monitoring. + const status = (error as { status?: unknown } | undefined)?.status; + if (typeof status === 'number' && status >= 400 && status < 500) { + return new TRPCError({ + cause: error, + code: getTRPCErrorCodeFromStatus(status), + message: error instanceof Error ? error.message : `Provider error (${status})`, + }); + } + + return undefined; }; const aiChatProcedure = wsCompatProcedure.use(serverDatabase).use(async (opts) => { diff --git a/packages/model-runtime/src/core/contextBuilders/openai.ts b/packages/model-runtime/src/core/contextBuilders/openai.ts index 93fb8a95a0..dffd428039 100644 --- a/packages/model-runtime/src/core/contextBuilders/openai.ts +++ b/packages/model-runtime/src/core/contextBuilders/openai.ts @@ -5,6 +5,7 @@ import { toFile } from 'openai'; import { disableStreamModels, systemToUserModels } from '../../const/models'; import type { ChatStreamPayload, OpenAIChatMessage, UserMessageContentPart } from '../../types'; +import { isDeepSeekThinkingEligibleModel } from '../../utils/modelParse'; import { parseDataUri } from '../../utils/uriParser'; export type ExtendedChatCompletionContentPart = { @@ -24,16 +25,6 @@ type ConvertMessageContentOptions = { const isDeepSeekModel = (model: string | undefined) => typeof model === 'string' && model.toLowerCase().includes('deepseek'); -// DeepSeek thinking-mode eligible models require reasoning_content on every -// assistant history message — otherwise the API rejects follow-up turns with -// "The reasoning_content in the thinking mode must be passed back to the API." -// See https://api-docs.deepseek.com/guides/thinking_mode#tool-calls -const isDeepSeekThinkingEligibleModel = (model: string | undefined) => { - if (!model) return false; - const lower = model.toLowerCase(); - return lower.includes('deepseek-reasoner') || lower.includes('deepseek-v4'); -}; - type OpenAICompatibleContentPart = | ExtendedChatCompletionContentPart | OpenAI.ChatCompletionContentPart diff --git a/packages/model-runtime/src/core/openaiCompatibleFactory/generateObject.test.ts b/packages/model-runtime/src/core/openaiCompatibleFactory/generateObject.test.ts new file mode 100644 index 0000000000..7adc4480b2 --- /dev/null +++ b/packages/model-runtime/src/core/openaiCompatibleFactory/generateObject.test.ts @@ -0,0 +1,146 @@ +// @vitest-environment node +import type { Mock } from 'vitest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +import { createOpenAICompatibleRuntime, isResponseFormatUnsupportedError } from './index'; + +// Pricing lookup reaches into model-bank's async provider loading, which is +// irrelevant to these tests (and unavailable in some monorepo test setups). +vi.mock('../../utils/getModelPricing', () => ({ + getModelPricing: vi.fn().mockResolvedValue(undefined), +})); + +const TestRuntime = createOpenAICompatibleRuntime({ + baseURL: 'https://api.test.com/v1', + provider: 'testprovider', +}); + +const generateObjectPayload = { + messages: [{ content: 'Generate a handoff', role: 'user' as const }], + model: 'gpt-anything', + schema: { + name: 'task_topic_handoff', + schema: { + additionalProperties: false, + properties: { summary: { type: 'string' }, title: { type: 'string' } }, + required: ['title', 'summary'], + type: 'object' as const, + }, + }, +}; + +const toolCallResponse = { + choices: [ + { + message: { + tool_calls: [ + { + function: { + arguments: '{"summary":"Task completed","title":"Done"}', + name: 'task_topic_handoff', + }, + id: 'call_1', + type: 'function', + }, + ], + }, + }, + ], +}; + +const responseFormatUnsupportedError = Object.assign( + new Error('400 Error from provider (DeepSeek): This response_format type is unavailable now'), + { status: 400 }, +); + +describe('isResponseFormatUnsupportedError', () => { + it('should match DeepSeek json_schema rejection variants', () => { + expect( + isResponseFormatUnsupportedError(new Error('This response_format type is unavailable now')), + ).toBe(true); + expect( + isResponseFormatUnsupportedError({ + error: { + message: + 'Failed to deserialize the JSON body into the target type: response_format: response_format.type `json_schema` is unavailable now at line 1 column 1193', + }, + }), + ).toBe(true); + }); + + it('should not match unrelated errors', () => { + expect(isResponseFormatUnsupportedError(new Error('Insufficient Balance'))).toBe(false); + expect(isResponseFormatUnsupportedError(undefined)).toBe(false); + expect(isResponseFormatUnsupportedError('response_format')).toBe(false); + }); +}); + +describe('generateObject tool-calling fallback', () => { + afterEach(() => { + vi.clearAllMocks(); + }); + + const createInstance = () => new TestRuntime({ apiKey: 'test' }); + + const getCreateMock = (instance: any) => instance.client.chat.completions.create as Mock; + + it('should proactively use tool calling for DeepSeek-family models', async () => { + const instance = createInstance(); + vi.spyOn((instance as any).client.chat.completions, 'create').mockResolvedValue( + toolCallResponse as any, + ); + + const result = await instance.generateObject({ + ...generateObjectPayload, + model: 'deepseek-v4-flash', + }); + + const createMock = getCreateMock(instance); + expect(createMock).toHaveBeenCalledTimes(1); + + const requestPayload = createMock.mock.calls[0][0]; + expect(requestPayload.response_format).toBeUndefined(); + expect(requestPayload.tool_choice).toEqual({ + function: { name: 'task_topic_handoff' }, + type: 'function', + }); + expect(result).toEqual({ summary: 'Task completed', title: 'Done' }); + }); + + it('should retry via tool calling when the provider rejects json_schema', async () => { + const instance = createInstance(); + vi.spyOn((instance as any).client.chat.completions, 'create') + .mockRejectedValueOnce(responseFormatUnsupportedError) + .mockResolvedValueOnce(toolCallResponse as any); + + // model id gives no hint that the upstream is DeepSeek (e.g. gateway alias) + const result = await instance.generateObject({ ...generateObjectPayload, model: 'big-pickle' }); + + const createMock = getCreateMock(instance); + expect(createMock).toHaveBeenCalledTimes(2); + + const firstPayload = createMock.mock.calls[0][0]; + expect(firstPayload.response_format).toEqual(expect.objectContaining({ type: 'json_schema' })); + + const retryPayload = createMock.mock.calls[1][0]; + expect(retryPayload.response_format).toBeUndefined(); + expect(retryPayload.tool_choice).toEqual({ + function: { name: 'task_topic_handoff' }, + type: 'function', + }); + expect(result).toEqual({ summary: 'Task completed', title: 'Done' }); + }); + + it('should not retry on unrelated provider errors', async () => { + const instance = createInstance(); + vi.spyOn((instance as any).client.chat.completions, 'create').mockRejectedValue( + Object.assign(new Error('Insufficient Balance'), { status: 402 }), + ); + + await expect( + instance.generateObject({ ...generateObjectPayload, model: 'big-pickle' }), + ).rejects.toMatchObject({ message: expect.stringContaining('Insufficient Balance') }); + + expect(getCreateMock(instance)).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts b/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts index eea4a0cbab..31d768bef2 100644 --- a/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts +++ b/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts @@ -40,6 +40,7 @@ import { desensitizeUrl } from '../../utils/desensitizeUrl'; import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty'; import { getModelPricing } from '../../utils/getModelPricing'; import { handleOpenAIError } from '../../utils/handleOpenAIError'; +import { detectModelProvider } from '../../utils/modelParse'; import { postProcessModelList } from '../../utils/postProcessModelList'; import { assertContextWithinWindow, @@ -62,6 +63,22 @@ export type { PollVideoStatusResult }; export * from './createVideo'; export * from './nonStreamToStream'; +/** + * Detect provider 400/422 errors that reject `response_format: { type: 'json_schema' }`. + * Known message variants from the DeepSeek family (official API and gateways proxying it): + * - `This response_format type is unavailable now` + * - `response_format.type \`json_schema\` is unavailable now` + */ +export const isResponseFormatUnsupportedError = (error: unknown): boolean => { + const err = error as { error?: { message?: unknown }; message?: unknown }; + const message = [err?.message, err?.error?.message] + .filter((value): value is string => typeof value === 'string') + .join('\n'); + if (!message) return false; + + return /(?:response_format|json_schema)[^]*?(?:unavailable|not +support)/i.test(message); +}; + // the model contains the following keywords is not a chat model, so we should filter them out export const CHAT_MODELS_BLOCK_LIST = [ 'embedding', @@ -826,6 +843,73 @@ export const createOpenAICompatibleRuntime = = an ); } + /** + * Simulate schema-based structured output through a forced tool call, for + * providers that do not support `response_format: { type: 'json_schema' }`. + * Returns the parsed schema object — the same shape as the json_schema path. + */ + private async generateObjectViaToolCalling( + payload: GenerateObjectPayload, + options: GenerateObjectOptions | undefined, + usagePayload: Parameters[1], + ) { + const log = debug(`${this.logPrefix}:generateObject`); + const { messages, schema, model } = payload; + + // Apply schema transformation if configured + const processedSchema = generateObjectConfig?.handleSchema + ? { ...schema!, schema: generateObjectConfig.handleSchema(schema!.schema) } + : schema!; + + const tool: ChatCompletionTool = { + function: { + description: + processedSchema.description || + 'Generate structured output according to the provided schema', + name: processedSchema.name || 'structured_output', + parameters: processedSchema.schema, + }, + type: 'function', + }; + + const res = await this.client.chat.completions.create( + this.handleGenerateObjectPayload(payload, { + ...getGenerateObjectReasoningParams(payload), + messages, + model, + ...this.resolvePromptCacheKeyParams(model, options?.user), + tool_choice: { function: { name: tool.function.name }, type: 'function' }, + tools: [tool], + user: options?.user, + }) as OpenAI.ChatCompletionCreateParamsNonStreaming, + { headers: options?.headers, signal: options?.signal }, + ); + + if (res.usage) { + await options?.onUsage?.(convertOpenAIUsage(res.usage, usagePayload)); + } + + // Structural type keeps this compatible across openai SDK majors (v6 + // widened tool_calls to a function/custom union). + const toolCalls = res.choices[0].message.tool_calls as + | { function?: { arguments: string; name: string } }[] + | undefined; + const toolCall = + toolCalls?.find((item) => item.function?.name === tool.function.name) ?? toolCalls?.[0]; + + if (!toolCall?.function) { + log('no tool call found in structured output response'); + return undefined; + } + + try { + return JSON.parse(toolCall.function.arguments); + } catch { + console.error('parse tool call arguments error:', toolCall); + return undefined; + } + } + async generateObject(payload: GenerateObjectPayload, options?: GenerateObjectOptions) { try { const { messages, schema, model, responseApi, tools } = payload; @@ -848,54 +932,15 @@ export const createOpenAICompatibleRuntime = = an if (!schema) throw new Error('tools or schema is required'); - // Use tool calling fallback if configured - if (generateObjectConfig?.useToolsCalling) { + // Use tool calling fallback if configured, or when the model is known to + // reject `response_format: { type: 'json_schema' }`. The DeepSeek API only + // supports `json_object` and replies `400 This response_format type is + // unavailable now` to json_schema requests, so DeepSeek-family models served + // through generic OpenAI-compatible providers (aggregator gateways, custom + // endpoints) must simulate structured output via forced tool calling. + if (generateObjectConfig?.useToolsCalling || detectModelProvider(model) === 'deepseek') { log('using tool calling fallback for structured output'); - - // Apply schema transformation if configured - const processedSchema = generateObjectConfig.handleSchema - ? { ...schema, schema: generateObjectConfig.handleSchema(schema.schema) } - : schema; - - const tool: ChatCompletionTool = { - function: { - description: - processedSchema.description || - 'Generate structured output according to the provided schema', - name: processedSchema.name || 'structured_output', - parameters: processedSchema.schema, - }, - type: 'function', - }; - - const res = await this.client.chat.completions.create( - this.handleGenerateObjectPayload(payload, { - ...getGenerateObjectReasoningParams(payload), - messages, - model, - ...this.resolvePromptCacheKeyParams(model, options?.user), - tool_choice: { function: { name: tool.function.name }, type: 'function' }, - tools: [tool], - user: options?.user, - }) as OpenAI.ChatCompletionCreateParamsNonStreaming, - { headers: options?.headers, signal: options?.signal }, - ); - - if (res.usage) { - await options?.onUsage?.(convertOpenAIUsage(res.usage, usagePayload)); - } - - const toolCalls = res.choices[0].message.tool_calls!; - - try { - return toolCalls.map((item) => ({ - arguments: JSON.parse(item.function.arguments), - name: item.function.name, - })); - } catch { - console.error('parse tool call arguments error:', toolCalls); - return undefined; - } + return await this.generateObjectViaToolCalling(payload, options, usagePayload); } // Factory-level Responses API routing control (supports instance override) @@ -955,17 +1000,29 @@ export const createOpenAICompatibleRuntime = = an } log('calling chat.completions.create for structured output'); - const res = await this.client.chat.completions.create( - this.handleGenerateObjectPayload(payload, { - ...getGenerateObjectReasoningParams(payload), - messages, - model, - response_format: { json_schema: processedSchema, type: 'json_schema' }, - ...this.resolvePromptCacheKeyParams(model, options?.user), - user: options?.user, - }) as OpenAI.ChatCompletionCreateParamsNonStreaming, - { headers: options?.headers, signal: options?.signal }, - ); + let res: OpenAI.ChatCompletion; + try { + res = await this.client.chat.completions.create( + this.handleGenerateObjectPayload(payload, { + ...getGenerateObjectReasoningParams(payload), + messages, + model, + response_format: { json_schema: processedSchema, type: 'json_schema' }, + ...this.resolvePromptCacheKeyParams(model, options?.user), + user: options?.user, + }) as OpenAI.ChatCompletionCreateParamsNonStreaming, + { headers: options?.headers, signal: options?.signal }, + ); + } catch (error) { + // Gateways can serve json_schema-incapable upstreams under arbitrary + // model ids (e.g. OpenCode Zen's `big-pickle` proxies DeepSeek), which + // the model-id detection above cannot catch. Retry once via forced + // tool calling when the upstream rejects the response_format type. + if (!isResponseFormatUnsupportedError(error)) throw error; + + log('provider rejected json_schema response_format, retrying via tool calling'); + return await this.generateObjectViaToolCalling(payload, options, usagePayload); + } if (res.usage) { await options?.onUsage?.(convertOpenAIUsage(res.usage, usagePayload)); } diff --git a/packages/model-runtime/src/providers/aihubmix/index.test.ts b/packages/model-runtime/src/providers/aihubmix/index.test.ts index a8d613624b..b7ddb5abb0 100644 --- a/packages/model-runtime/src/providers/aihubmix/index.test.ts +++ b/packages/model-runtime/src/providers/aihubmix/index.test.ts @@ -2,11 +2,21 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import * as modelParse from '../../utils/modelParse'; -import { LobeAiHubMixAI } from './index'; +import { LobeAiHubMixAI, params } from './index'; const mockFetch = vi.fn(); global.fetch = mockFetch; +type RouterForTest = { + apiType: string; + models?: string[]; +}; + +const resolveRouters = (model?: string) => + (typeof params.routers === 'function' + ? params.routers({ apiKey: 'test' }, { model }) + : params.routers) as RouterForTest[]; + describe('LobeAiHubMixAI', () => { let instance: InstanceType; @@ -32,6 +42,32 @@ describe('LobeAiHubMixAI', () => { }); }); + describe('routers', () => { + it('should route the whole DeepSeek family to the deepseek runtime', () => { + // The generic openai fallback sends response_format json_schema for + // structured output, which DeepSeek upstreams reject — the deepseek + // runtime simulates it via tool calling instead. + const routers = resolveRouters(); + const deepseekRouter = routers.find((router) => router.apiType === 'deepseek'); + + expect(deepseekRouter?.models).toEqual( + expect.arrayContaining([ + 'deepseek-chat', + 'deepseek-reasoner', + 'deepseek-v4-flash', + 'deepseek-v4-pro', + ]), + ); + }); + + it('should match gateway-specific DeepSeek ids missing from the static model list', () => { + const routers = resolveRouters('deepseek-v4-flash-free'); + const deepseekRouter = routers.find((router) => router.apiType === 'deepseek'); + + expect(deepseekRouter?.models).toContain('deepseek-v4-flash-free'); + }); + }); + describe('chat', () => { it('should support chat method', async () => { vi.spyOn(instance as any, 'runWithFallback').mockResolvedValue(new Response()); diff --git a/packages/model-runtime/src/providers/aihubmix/index.ts b/packages/model-runtime/src/providers/aihubmix/index.ts index ae67cf785b..b7cf9b4f49 100644 --- a/packages/model-runtime/src/providers/aihubmix/index.ts +++ b/packages/model-runtime/src/providers/aihubmix/index.ts @@ -5,6 +5,7 @@ import { responsesAPIModels } from '../../const/models'; import { createRouterRuntime } from '../../core/RouterRuntime'; import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime'; import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse'; +import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels'; /** * Response schema for GET https://aihubmix.com/api/v1/models @@ -190,7 +191,7 @@ export const params: CreateRouterRuntimeOptions = { clearTimeout(timeoutId); } }, - routers: [ + routers: (_options, runtimeContext) => [ { apiType: 'anthropic', models: LOBE_DEFAULT_MODEL_LIST.map((m) => m.id).filter( @@ -214,7 +215,15 @@ export const params: CreateRouterRuntimeOptions = { }, { apiType: 'deepseek', - models: ['deepseek-chat', 'deepseek-reasoner'], + // Match the whole DeepSeek family (deepseek-v4*, deepseek-chat, ...), not + // just the two legacy ids — the deepseek runtime simulates structured + // output via tool calling, while the generic openai fallback sends + // response_format json_schema which DeepSeek upstreams reject. + models: resolveProviderRouteModels( + 'deepseek', + LOBE_DEFAULT_MODEL_LIST, + runtimeContext?.model, + ), options: { baseURL: urlJoin(baseURL, '/v1') }, }, { diff --git a/packages/model-runtime/src/providers/deepseek/__tests__/generateObject.test.ts b/packages/model-runtime/src/providers/deepseek/__tests__/generateObject.test.ts index 9d12dc01b7..ef26b6b05c 100644 --- a/packages/model-runtime/src/providers/deepseek/__tests__/generateObject.test.ts +++ b/packages/model-runtime/src/providers/deepseek/__tests__/generateObject.test.ts @@ -50,7 +50,10 @@ describe('LobeDeepSeekAnthropicAI generateObject', () => { vi.clearAllMocks(); }); - it('should use any tool choice by default to keep DeepSeek thinking mode enabled', async () => { + it('should use any tool choice by default for server-side thinking', async () => { + // DeepSeek's Anthropic-compatible endpoint rejects named tool_choice while + // thinking is active, but accepts `any`; V4 models can default to thinking + // enabled server-side. const result = await instance.generateObject(generateObjectPayload); const payload = getLastRequestPayload(); @@ -82,11 +85,36 @@ describe('LobeDeepSeekAnthropicAI generateObject', () => { expect(payload.tool_choice).toEqual({ name: 'task_topic_handoff', type: 'tool' }); }); - it('should map reasoning_effort to output_config.effort', async () => { + it('should use any tool choice when thinking is explicitly enabled', async () => { + await instance.generateObject({ + ...generateObjectPayload, + thinking: { budget_tokens: 1024, type: 'enabled' }, + } as any); + + const payload = getLastRequestPayload(); + + expect(payload.thinking).toBeUndefined(); + expect(payload.tool_choice).toEqual({ type: 'any' }); + }); + + it('should use any tool choice for thinking-only deepseek-reasoner', async () => { + await instance.generateObject({ + ...generateObjectPayload, + model: 'deepseek-reasoner', + }); + + const payload = getLastRequestPayload(); + + expect(payload.thinking).toBeUndefined(); + expect(payload.tool_choice).toEqual({ type: 'any' }); + }); + + it('should map reasoning_effort to output_config.effort when thinking is enabled', async () => { await instance.generateObject({ ...generateObjectPayload, reasoning_effort: 'high', - }); + thinking: { budget_tokens: 1024, type: 'enabled' }, + } as any); const payload = getLastRequestPayload(); @@ -131,6 +159,66 @@ describe('DeepSeek OpenAI-compatible generateObject configuration', () => { expect(openAIParams.generateObject?.useToolsCalling).toBe(true); }); + it('should disable thinking by default for V4 generateObject requests', () => { + // V4 defaults to thinking enabled server-side, which rejects the forced + // tool_choice used for structured output. + const requestPayload = { + messages: [{ role: 'user' as const, content: 'Hello' }], + model: 'deepseek-v4-flash', + reasoning_effort: 'high' as const, + }; + + const result = openAIParams.generateObject!.handlePayload!( + { + messages: [{ role: 'user', content: 'Hello' }], + model: 'deepseek-v4-flash', + }, + requestPayload, + {}, + ); + + expect(result).toEqual(expect.objectContaining({ thinking: { type: 'disabled' } })); + expect(result).not.toHaveProperty('reasoning_effort'); + }); + + it('should disable thinking for provider-prefixed V4 generateObject requests', () => { + const requestPayload = { + messages: [{ role: 'user' as const, content: 'Hello' }], + model: 'Deepseek/deepseek-v4-pro', + reasoning_effort: 'high' as const, + }; + + const result = openAIParams.generateObject!.handlePayload!( + { + messages: [{ role: 'user', content: 'Hello' }], + model: 'Deepseek/deepseek-v4-pro', + }, + requestPayload, + {}, + ); + + expect(result).toEqual(expect.objectContaining({ thinking: { type: 'disabled' } })); + expect(result).not.toHaveProperty('reasoning_effort'); + }); + + it('should not inject thinking parameter for thinking-only deepseek-reasoner', () => { + const requestPayload = { + messages: [{ role: 'user' as const, content: 'Hello' }], + model: 'deepseek-reasoner', + }; + + const result = openAIParams.generateObject!.handlePayload!( + { + messages: [{ role: 'user', content: 'Hello' }], + model: 'deepseek-reasoner', + }, + requestPayload, + {}, + ); + + expect(result).not.toHaveProperty('thinking'); + }); + it('should forward disabled thinking for generateObject DeepSeek requests', () => { const requestPayload = { messages: [{ role: 'user' as const, content: 'Hello' }], diff --git a/packages/model-runtime/src/providers/deepseek/chatPayload.ts b/packages/model-runtime/src/providers/deepseek/chatPayload.ts index b5d3216ab6..946b94b873 100644 --- a/packages/model-runtime/src/providers/deepseek/chatPayload.ts +++ b/packages/model-runtime/src/providers/deepseek/chatPayload.ts @@ -5,9 +5,10 @@ import type OpenAI from 'openai'; import { buildDefaultAnthropicPayload } from '../../core/anthropicCompatibleFactory'; import type { ChatStreamPayload } from '../../types'; import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty'; +import { isDeepSeekV4FamilyModel } from '../../utils/modelParse'; import { sanitizeDeepSeekJsonPayload } from './sanitizePayload'; -const isDeepSeekV4Model = (model: string) => model.startsWith('deepseek-v4'); +export const isDeepSeekV4Model = (model: string | undefined) => isDeepSeekV4FamilyModel(model); const isEmptyContent = (content: unknown) => content === '' || content === null || content === undefined; const hasReasoningContent = (reasoning: any) => typeof reasoning?.content === 'string'; diff --git a/packages/model-runtime/src/providers/deepseek/generateObject.ts b/packages/model-runtime/src/providers/deepseek/generateObject.ts index 321e2fe757..b58fb554e5 100644 --- a/packages/model-runtime/src/providers/deepseek/generateObject.ts +++ b/packages/model-runtime/src/providers/deepseek/generateObject.ts @@ -5,6 +5,7 @@ import type { AnthropicGenerateObjectConfig } from '../../core/anthropicCompatib import { createAnthropicGenerateObject } from '../../core/anthropicCompatibleFactory/generateObject'; import type { OpenAICompatibleFactoryOptions } from '../../core/openaiCompatibleFactory'; import type { ChatStreamPayload, GenerateObjectOptions, GenerateObjectPayload } from '../../types'; +import { isDeepSeekV4Model } from './chatPayload'; import { sanitizeDeepSeekJsonPayload } from './sanitizePayload'; type GenerateObjectHandlePayload = NonNullable< @@ -21,10 +22,11 @@ export const createDeepSeekAnthropicGenerateObject = async ( options?: GenerateObjectOptions, pricing?: Pricing, ) => { - // DeepSeek V4 thinking mode rejects Anthropic's named schema tool choice, - // e.g. `{ type: "tool", name: "task_topic_handoff" }`, but accepts - // `{ type: "any" }`. If thinking is already disabled, keep the stricter - // named tool choice; otherwise use `any` without changing the thinking mode. + // DeepSeek's Anthropic-compatible endpoint rejects named schema tool_choice + // while thinking is active, but accepts `{ type: "any" }`. V4 models may + // default to thinking enabled server-side, so keep `any` unless the caller + // explicitly disabled thinking; with a single schema tool it still forces + // structured output. const thinkingDisabled = isGenerateObjectThinkingDisabled(payload); const requestParams: AnthropicGenerateObjectConfig['requestParams'] = { ...(!thinkingDisabled && payload.reasoning_effort @@ -59,14 +61,29 @@ export const buildDeepSeekGenerateObjectPayload: GenerateObjectHandlePayload = ( requestPayload, ) => { const { thinking } = payload; - const thinkingExplicitlyDisabled = thinking?.type === 'disabled'; + const thinkingEnabled = thinking?.type === 'enabled'; const payloadWithoutReasoningEffort = { ...requestPayload }; delete (payloadWithoutReasoningEffort as { reasoning_effort?: unknown }).reasoning_effort; + // V4 models default to thinking enabled server-side, and thinking mode + // rejects the forced tool_choice used for structured output (mirrors the + // Anthropic-compatible endpoint behavior). Explicitly disable thinking + // unless the caller turned it on. deepseek-reasoner is thinking-only, so + // leave its thinking parameter untouched. + if (isDeepSeekV4Model(payload.model)) { + return sanitizeDeepSeekJsonPayload( + thinkingEnabled + ? { ...requestPayload, thinking: { type: 'enabled' } } + : { ...payloadWithoutReasoningEffort, thinking: { type: 'disabled' } }, + ); + } + + const thinkingExplicitlyDisabled = thinking?.type === 'disabled'; + return sanitizeDeepSeekJsonPayload({ ...(thinkingExplicitlyDisabled ? payloadWithoutReasoningEffort : requestPayload), - ...(thinking?.type === 'enabled' || thinkingExplicitlyDisabled - ? { thinking: { type: thinking.type } } + ...(thinkingEnabled || thinkingExplicitlyDisabled + ? { thinking: { type: thinking!.type } } : {}), }); }; diff --git a/packages/model-runtime/src/providers/newapi/index.test.ts b/packages/model-runtime/src/providers/newapi/index.test.ts index 2872cf2f1d..f54b1be5bd 100644 --- a/packages/model-runtime/src/providers/newapi/index.test.ts +++ b/packages/model-runtime/src/providers/newapi/index.test.ts @@ -3,7 +3,6 @@ import { ModelProvider } from 'model-bank'; import type { Mock } from 'vitest'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { responsesAPIModels } from '../../const/models'; import type { ChatStreamPayload } from '../../types/chat'; import * as modelParseModule from '../../utils/modelParse'; import type { NewAPIModelCard, NewAPIPricing } from './index'; @@ -25,7 +24,6 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => { let mockFetch: Mock; let mockProcessMultiProviderModelList: Mock; let mockDetectModelProvider: Mock; - let mockResponsesAPIModels: typeof responsesAPIModels; beforeEach(() => { // Setup fetch mock @@ -35,7 +33,6 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => { // Setup utility function mocks mockProcessMultiProviderModelList = vi.mocked(modelParseModule.processMultiProviderModelList); mockDetectModelProvider = vi.mocked(modelParseModule.detectModelProvider); - mockResponsesAPIModels = responsesAPIModels; // Clear environment variables delete process.env.DEBUG_NEWAPI_CHAT_COMPLETION; @@ -708,11 +705,20 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => { const options = { apiKey: 'test', baseURL: 'https://api.newapi.com/v1' }; const routers = params.routers(options); - expect(routers).toHaveLength(4); + expect(routers).toHaveLength(5); expect(routers[0].apiType).toBe('anthropic'); expect(routers[1].apiType).toBe('google'); expect(routers[2].apiType).toBe('xai'); - expect(routers[3].apiType).toBe('openai'); + expect(routers[3].apiType).toBe('deepseek'); + expect(routers[4].apiType).toBe('openai'); + }); + + it('should configure deepseek router with /v1 path and openai sdkType', () => { + const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' }; + const routers = params.routers(options); + + expect(routers[3].options.baseURL).toBe('https://custom.com/v1'); + expect((routers[3].options as any).sdkType).toBe('openai'); }); it('should process baseURL by removing version paths', () => { @@ -750,14 +756,14 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => { const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' }; const routers = params.routers(options); - expect(routers[3].options.baseURL).toBe('https://custom.com/v1'); + expect(routers[4].options.baseURL).toBe('https://custom.com/v1'); }); it('should configure openai router with useResponseModels', () => { const options = { apiKey: 'test', baseURL: 'https://custom.com/v1' }; const routers = params.routers(options); - expect((routers[3].options as any).chatCompletion?.useResponseModels).toBeDefined(); + expect((routers[4].options as any).chatCompletion?.useResponseModels).toBeDefined(); }); it('should filter anthropic models for anthropic router', () => { @@ -803,9 +809,9 @@ describe('NewAPI Runtime - 100% Branch Coverage', () => { const options = { apiKey: 'test' }; // No baseURL const routers = params.routers(options); - expect(routers).toHaveLength(4); + expect(routers).toHaveLength(5); expect(routers[0].options.baseURL).toBe(''); - expect(routers[3].options.baseURL).toBe('v1'); // urlJoin('', '/v1') returns 'v1' + expect(routers[4].options.baseURL).toBe('v1'); // urlJoin('', '/v1') returns 'v1' }); }); diff --git a/packages/model-runtime/src/providers/newapi/index.ts b/packages/model-runtime/src/providers/newapi/index.ts index 627463e7bf..c5f9c6505d 100644 --- a/packages/model-runtime/src/providers/newapi/index.ts +++ b/packages/model-runtime/src/providers/newapi/index.ts @@ -5,6 +5,7 @@ import { responsesAPIModels } from '../../const/models'; import { createRouterRuntime } from '../../core/RouterRuntime'; import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime'; import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse'; +import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels'; export interface NewAPIModelCard { created: number; @@ -158,7 +159,7 @@ export const params = { return processMultiProviderModelList([...enrichedModelList, ...additionalModels], 'newapi'); }, - routers: (options) => { + routers: (options, runtimeContext?: { model?: string }) => { const userBaseURL = options.baseURL?.replace(/\/v\d+[a-z]*\/?$/, '') || ''; return [ @@ -192,6 +193,19 @@ export const params = { baseURL: urlJoin(userBaseURL, '/v1'), }, }, + { + apiType: 'deepseek', + models: resolveProviderRouteModels( + 'deepseek', + LOBE_DEFAULT_MODEL_LIST, + runtimeContext?.model, + ), + options: { + ...options, + baseURL: urlJoin(userBaseURL, '/v1'), + sdkType: 'openai', + }, + }, { apiType: 'openai', options: { diff --git a/packages/model-runtime/src/providers/opencodeCodingPlan/index.ts b/packages/model-runtime/src/providers/opencodeCodingPlan/index.ts index 95b41b07a1..d43210b6de 100644 --- a/packages/model-runtime/src/providers/opencodeCodingPlan/index.ts +++ b/packages/model-runtime/src/providers/opencodeCodingPlan/index.ts @@ -1,4 +1,4 @@ -import { ModelProvider } from 'model-bank'; +import { LOBE_DEFAULT_MODEL_LIST, ModelProvider } from 'model-bank'; import type OpenAI from 'openai'; import { createOpenAICompatibleRuntime } from '../../core/openaiCompatibleFactory'; @@ -6,6 +6,7 @@ import { createRouterRuntime } from '../../core/RouterRuntime'; import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime'; import type { ChatStreamPayload } from '../../types'; import { processMultiProviderModelList } from '../../utils/modelParse'; +import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels'; // ============================================================================ // Constants @@ -19,24 +20,24 @@ const MODELS_DEV_URL = 'https://models.dev/api.json'; // ============================================================================ interface ModelsDevModel { - id: string; - name?: string; - family?: string; - provider?: { npm?: string }; - release_date?: string; + [key: string]: any; attachment?: boolean; - reasoning?: boolean; - tool_call?: boolean; - structured_output?: boolean; - modalities?: { input?: string[]; output?: string[] }; - limit?: { context?: number; output?: number }; cost?: { input?: number; output?: number; cache_read?: number; cache_write?: number; }; - [key: string]: any; + family?: string; + id: string; + limit?: { context?: number; output?: number }; + modalities?: { input?: string[]; output?: string[] }; + name?: string; + provider?: { npm?: string }; + reasoning?: boolean; + release_date?: string; + structured_output?: boolean; + tool_call?: boolean; } interface ModelsDevData { @@ -272,14 +273,20 @@ export const sanitizeJsonSchema = (schema: any): any => { nested[k] = sanitizeJsonSchema(v); } result[key] = nested; - } else if ( - ['allOf', 'anyOf', 'oneOf', 'prefixItems'].includes(key) && - Array.isArray(value) - ) { + } else if (['allOf', 'anyOf', 'oneOf', 'prefixItems'].includes(key) && Array.isArray(value)) { result[key] = value.map(sanitizeJsonSchema); } else if ( - ['items', 'additionalProperties', 'not', 'contains', 'if', 'then', 'else', - 'unevaluatedItems', 'unevaluatedProperties'].includes(key) + [ + 'items', + 'additionalProperties', + 'not', + 'contains', + 'if', + 'then', + 'else', + 'unevaluatedItems', + 'unevaluatedProperties', + ].includes(key) ) { result[key] = sanitizeJsonSchema(value); } else { @@ -431,7 +438,7 @@ export const params = { ); } }, - routers: async (options) => { + routers: async (options, runtimeContext?: { model?: string }) => { const baseURL = options.baseURL || GO_BASE_URL; const anthropicModels = await getAnthropicModels(); @@ -443,6 +450,16 @@ export const params = { models: anthropicModels, options: { ...options, baseURL: stripV1(baseURL) }, }, + // DeepSeek models via the deepseek runtime (OpenAI-compatible endpoint) + { + apiType: 'deepseek', + models: resolveProviderRouteModels( + 'deepseek', + LOBE_DEFAULT_MODEL_LIST, + runtimeContext?.model, + ), + options: { ...options, baseURL, sdkType: 'openai' }, + }, // OpenAI-compatible fallback for all other models { apiType: 'openai', diff --git a/packages/model-runtime/src/providers/opencodeZen/index.test.ts b/packages/model-runtime/src/providers/opencodeZen/index.test.ts new file mode 100644 index 0000000000..fa1953fde6 --- /dev/null +++ b/packages/model-runtime/src/providers/opencodeZen/index.test.ts @@ -0,0 +1,38 @@ +// @vitest-environment node +import { describe, expect, it } from 'vitest'; + +import { params } from './index'; + +const resolveRouters = (model?: string) => + params.routers({ apiKey: 'test' }, { model }) as Array<{ + apiType: string; + models?: string[]; + options: { baseURL?: string; sdkType?: string }; + }>; + +describe('OpenCodeZen routers', () => { + it('should route DeepSeek-family models to the deepseek runtime', () => { + // The generic openai fallback sends response_format json_schema for + // structured output, which DeepSeek upstreams reject — the deepseek + // runtime simulates it via tool calling instead. + const routers = resolveRouters('deepseek-v4-flash'); + const deepseekRouter = routers.find((router) => router.apiType === 'deepseek'); + + expect(deepseekRouter?.models).toContain('deepseek-v4-flash'); + expect(deepseekRouter?.options.sdkType).toBe('openai'); + }); + + it('should match gateway-specific DeepSeek ids missing from the static model list', () => { + const routers = resolveRouters('deepseek-v4-flash-free'); + const deepseekRouter = routers.find((router) => router.apiType === 'deepseek'); + + expect(deepseekRouter?.models).toContain('deepseek-v4-flash-free'); + }); + + it('should keep the openai catch-all as the last router', () => { + const routers = resolveRouters('some-unknown-model'); + + expect(routers.at(-1)?.apiType).toBe('openai'); + expect(routers.at(-1)?.models).toBeUndefined(); + }); +}); diff --git a/packages/model-runtime/src/providers/opencodeZen/index.ts b/packages/model-runtime/src/providers/opencodeZen/index.ts index 6165f5b31a..eb644482e4 100644 --- a/packages/model-runtime/src/providers/opencodeZen/index.ts +++ b/packages/model-runtime/src/providers/opencodeZen/index.ts @@ -4,6 +4,7 @@ import { responsesAPIModels } from '../../const/models'; import { createRouterRuntime } from '../../core/RouterRuntime'; import type { CreateRouterRuntimeOptions } from '../../core/RouterRuntime/createRuntime'; import { detectModelProvider, processMultiProviderModelList } from '../../utils/modelParse'; +import { resolveProviderRouteModels } from '../utils/resolveProviderRouteModels'; const ZEN_BASE_URL = 'https://opencode.ai/zen/v1'; @@ -30,7 +31,7 @@ export const params = { const modelList = modelsPage.data || []; return processMultiProviderModelList(modelList, 'opencodezen'); }, - routers: (options) => { + routers: (options, runtimeContext?: { model?: string }) => { const baseURL = options.baseURL || ZEN_BASE_URL; return [ // Anthropic router for Claude models @@ -54,6 +55,20 @@ export const params = { }, }, }, + // DeepSeek models via the deepseek runtime (OpenAI-compatible endpoint) + { + apiType: 'deepseek', + models: resolveProviderRouteModels( + 'deepseek', + LOBE_DEFAULT_MODEL_LIST, + runtimeContext?.model, + ), + options: { + ...options, + baseURL, + sdkType: 'openai', + }, + }, // OpenAI-compatible fallback for all other models (Gemini, GLM, Kimi, MiniMax, Qwen, etc.) { apiType: 'openai', diff --git a/packages/model-runtime/src/providers/utils/resolveProviderRouteModels.ts b/packages/model-runtime/src/providers/utils/resolveProviderRouteModels.ts new file mode 100644 index 0000000000..2582f4d025 --- /dev/null +++ b/packages/model-runtime/src/providers/utils/resolveProviderRouteModels.ts @@ -0,0 +1,17 @@ +import { detectModelProvider } from '../../utils/modelParse'; + +type ProviderKey = ReturnType; + +export const resolveProviderRouteModels = ( + provider: ProviderKey, + modelList: ReadonlyArray<{ id: string }>, + requestedModel?: string, +): string[] => { + const providerModels = modelList + .map((model) => model.id) + .filter((id) => detectModelProvider(id) === provider); + + if (!requestedModel || detectModelProvider(requestedModel) !== provider) return providerModels; + + return [...new Set([...providerModels, requestedModel])]; +}; diff --git a/packages/model-runtime/src/utils/modelParse.ts b/packages/model-runtime/src/utils/modelParse.ts index c20fdada41..c9923d24f2 100644 --- a/packages/model-runtime/src/utils/modelParse.ts +++ b/packages/model-runtime/src/utils/modelParse.ts @@ -182,6 +182,13 @@ export const MODEL_OWNER_DETECTION_CONFIG = { zhipu: ['glm'], } as const; +export const isDeepSeekV4FamilyModel = (model: string | undefined): boolean => + typeof model === 'string' && model.toLowerCase().includes('deepseek-v4'); + +export const isDeepSeekThinkingEligibleModel = (model: string | undefined): boolean => + typeof model === 'string' && + (model.toLowerCase().includes('deepseek-reasoner') || isDeepSeekV4FamilyModel(model)); + // Image model keyword configuration export const IMAGE_MODEL_KEYWORDS = [ 'dall-e',