diff --git a/eslint.config.mjs b/eslint.config.mjs index 15feeacc5a..55ea9a87d6 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -105,6 +105,19 @@ export default eslint( 'perfectionist/sort-objects': 0, }, }, + // model-bank aiModels - enforce English-only descriptions + { + files: ['packages/model-bank/src/aiModels/**/*'], + rules: { + 'no-restricted-syntax': [ + 'error', + { + message: 'Chinese characters are not allowed in aiModels files. Use English instead.', + selector: 'Literal[value=/[\\u4e00-\\u9fff]/]', + }, + ], + }, + }, // CLI scripts { files: ['scripts/**/*'], diff --git a/packages/model-bank/src/aiModels/ai360.ts b/packages/model-bank/src/aiModels/ai360.ts index f64e479d65..5ac4e7de1b 100644 --- a/packages/model-bank/src/aiModels/ai360.ts +++ b/packages/model-bank/src/aiModels/ai360.ts @@ -292,7 +292,8 @@ const ai360ChatModels: AIChatModelCard[] = [ vision: true, }, contextWindowTokens: 32_000, - description: '兼顾生成质量与响应速度,适合作为通用生产级模型', + description: + 'Balances generation quality and response speed, suitable as a general-purpose production-grade model', displayName: 'Doubao Seed 2.0 Lite', id: 'volcengine/doubao-seed-2-0-lite', pricing: { @@ -311,7 +312,7 @@ const ai360ChatModels: AIChatModelCard[] = [ vision: true, }, contextWindowTokens: 32_000, - description: '指向 doubao-seed-2-0-mini 最新版', + description: 'Points to the latest version of doubao-seed-2-0-mini', displayName: 'Doubao Seed 2.0 Mini', id: 'volcengine/doubao-seed-2-0-mini', pricing: { @@ -330,7 +331,7 @@ const ai360ChatModels: AIChatModelCard[] = [ vision: true, }, contextWindowTokens: 32_000, - description: '指向 doubao-seed-2-0-pro 最新版', + description: 'Points to the latest version of doubao-seed-2-0-pro', displayName: 'Doubao Seed 2.0 Pro', id: 'volcengine/doubao-seed-2-0-pro', pricing: { @@ -350,7 +351,7 @@ const ai360ChatModels: AIChatModelCard[] = [ }, contextWindowTokens: 32_000, description: - 'Doubao-Seed-2.0-Code 面向企业级编程需求优化,在 Seed 2.0 优秀的 Agent、VLM 能力基础上,特别增强了代码能力,不仅前端能力表现出众,也对企业常见的多语言编码需求做了特别优化,适合接入各种 AI 编程工具使用。', + 'Doubao-Seed-2.0-Code is optimized for enterprise-level programming needs. Built on the excellent Agent and VLM capabilities of Seed 2.0, it specially enhances coding abilities with outstanding frontend performance and targeted optimization for common enterprise multi-language coding requirements, making it ideal for integration with various AI programming tools.', displayName: 'Doubao Seed 2.0 Code', id: 'volcengine/doubao-seed-2-0-code', pricing: { diff --git a/packages/model-bank/src/aiModels/qwen.ts b/packages/model-bank/src/aiModels/qwen.ts index 66dea5b20e..7701c51ae4 100644 --- a/packages/model-bank/src/aiModels/qwen.ts +++ b/packages/model-bank/src/aiModels/qwen.ts @@ -15,7 +15,6 @@ const qwenChatModels: AIChatModelCard[] = [ displayName: 'Kimi K2.5', id: 'kimi-k2.5', maxOutput: 32_768, - organization: 'Qwen', pricing: { currency: 'CNY', units: [ @@ -2573,7 +2572,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Qwen Image Editing Model supports multi-image input and multi-image output, enabling precise in-image text editing, object addition, removal, or relocation, subject action modification, image style transfer, and enhanced visual detail.', displayName: 'Qwen Image Edit Max', - enabled: true, id: 'qwen-image-edit-max', organization: 'Qwen', parameters: { @@ -2598,7 +2596,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Qwen Image Editing Model supports multi-image input and multi-image output, enabling precise in-image text editing, object addition, removal, or relocation, subject action modification, image style transfer, and enhanced visual detail.', displayName: 'Qwen Image Edit Plus', - enabled: true, id: 'qwen-image-edit-plus', organization: 'Qwen', parameters: { @@ -2623,7 +2620,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Qwen Image Edit is an image-to-image model that edits images based on input images and text prompts, enabling precise adjustments and creative transformations.', displayName: 'Qwen Image Edit', - enabled: true, id: 'qwen-image-edit', organization: 'Qwen', parameters: { @@ -2646,7 +2642,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Qwen Image Generation Model (Max series) delivers enhanced realism and visual naturalness compared with the Plus series, effectively reducing AI-generated artifacts, and demonstrating outstanding performance in human appearance, texture details, and text rendering.', displayName: 'Qwen Image Max', - enabled: true, id: 'qwen-image-max', organization: 'Qwen', parameters: { @@ -2670,7 +2665,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'It supports a wide range of artistic styles and is particularly proficient at rendering complex text within images, enabling integrated image–text layout design.', displayName: 'Qwen Image Plus', - enabled: true, id: 'qwen-image-plus', organization: 'Qwen', parameters: { @@ -2694,7 +2688,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Qwen-Image is a general image generation model supporting multiple art styles and strong complex text rendering, especially Chinese and English. It supports multi-line layouts, paragraph-level text, and fine detail for complex text-image layouts.', displayName: 'Qwen Image', - enabled: true, id: 'qwen-image', organization: 'Qwen', parameters: { @@ -2717,7 +2710,6 @@ const qwenImageModels: AIImageModelCard[] = [ { description: 'Wanxiang 2.6 Image supports image editing and mixed image–text layout output.', displayName: 'Wanxiang2.6 Image', - enabled: true, id: 'wan2.6-image', organization: 'Qwen', parameters: { @@ -2742,7 +2734,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Wanxiang 2.6 T2I supports flexible selection of image dimensions within total pixel area and aspect ratio constraints (same as Wanxiang 2.5).', displayName: 'Wanxiang2.6 T2I', - enabled: true, id: 'wan2.6-t2i', organization: 'Qwen', parameters: { @@ -2829,7 +2820,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Wanxiang 2.2 Plus is the latest model with upgrades in creativity, stability, and realism, producing richer details.', displayName: 'Wanxiang2.2 T2I Plus', - enabled: true, id: 'wan2.2-t2i-plus', organization: 'Qwen', parameters: { @@ -2934,7 +2924,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'FLUX.1 [schnell] is the most advanced open-source few-step model, surpassing similar competitors and even strong non-distilled models like Midjourney v6.0 and DALL-E 3 (HD). It is finely tuned to preserve pretraining diversity, significantly improving visual quality, instruction following, size/aspect variation, font handling, and output diversity.', displayName: 'FLUX.1 [schnell]', - enabled: true, id: 'flux-schnell', organization: 'Qwen', parameters: { @@ -2959,7 +2948,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'FLUX.1 [dev] is an open-weights distilled model for non-commercial use. It keeps near-pro image quality and instruction following while running more efficiently, using resources better than same-size standard models.', displayName: 'FLUX.1 [dev]', - enabled: true, id: 'flux-dev', organization: 'Qwen', parameters: { @@ -2982,9 +2970,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, { description: - 'FLUX.1-merged combines the deep features explored in "DEV" with the high-speed advantages of "Schnell", extending performance limits and broadening applications.', - displayName: 'FLUX.1-merged', - enabled: true, + 'FLUX.1 [merged] combines the deep features explored in "DEV" with the high-speed advantages of "Schnell", extending performance limits and broadening applications.', + displayName: 'FLUX.1 [merged]', id: 'flux-merged', organization: 'Qwen', parameters: { diff --git a/packages/model-bank/src/aiModels/siliconcloud.ts b/packages/model-bank/src/aiModels/siliconcloud.ts index 94cb393dec..b44cde1007 100644 --- a/packages/model-bank/src/aiModels/siliconcloud.ts +++ b/packages/model-bank/src/aiModels/siliconcloud.ts @@ -90,7 +90,7 @@ const siliconcloudChatModels: AIChatModelCard[] = [ }, contextWindowTokens: 131_072, description: - 'GLM-4.6V 在视觉理解精度上达到同参数规模 SOTA,并首次在模型架构中将 Function Call(工具调用)能力原生融入视觉模型,打通从「视觉感知」到「可执行行动(Action)」的链路,为真实业务场景中的多模态 Agent 提供统一的技术底座。', + 'GLM-4.6V achieves state-of-the-art visual understanding accuracy at its parameter scale, and for the first time natively integrates Function Call capability into the visual model architecture, bridging the path from "visual perception" to "executable actions", providing a unified technical foundation for multimodal Agents in real-world business scenarios.', displayName: 'GLM-4.6V', id: 'zai-org/GLM-4.6V', pricing: { @@ -1977,7 +1977,6 @@ const siliconcloudImageModels: AIImageModelCard[] = [ description: 'Kolors is a large-scale latent-diffusion text-to-image model by the Kuaishou Kolors team. Trained on billions of text-image pairs, it excels in visual quality, complex semantic accuracy, and Chinese/English text rendering, with strong Chinese content understanding and generation.', displayName: 'Kolors', - enabled: true, id: 'Kwai-Kolors/Kolors', parameters: { prompt: { @@ -1996,7 +1995,6 @@ const siliconcloudImageModels: AIImageModelCard[] = [ description: 'Qwen-Image is a 20B-parameter image generation foundation model from the Qwen team. It makes major gains in complex text rendering and precise image editing, especially for high-fidelity Chinese/English text. It supports multi-line and paragraph layouts while keeping typography coherent. Beyond text rendering, it supports a wide range of styles from photorealistic to anime, and advanced editing like style transfer, object add/remove, detail enhancement, text editing, and pose control, aiming to be a comprehensive visual creation foundation.', displayName: 'Qwen-Image', - enabled: true, id: 'Qwen/Qwen-Image', parameters: { prompt: { @@ -2019,7 +2017,6 @@ const siliconcloudImageModels: AIImageModelCard[] = [ description: 'Qwen-Image-Edit-2509 is the latest editing version of Qwen-Image from the Qwen team. Built on the 20B Qwen-Image model, it extends strong text rendering into image editing for precise text edits. It uses a dual-control architecture, sending inputs to Qwen2.5-VL for semantic control and a VAE encoder for appearance control, enabling both semantic- and appearance-level editing. It supports local edits (add/remove/modify) and higher-level semantic edits like IP creation and style transfer while preserving semantics. It achieves SOTA results on multiple benchmarks.', displayName: 'Qwen-Image-Edit (2509)', - enabled: true, id: 'Qwen/Qwen-Image-Edit-2509', parameters: { imageUrls: { diff --git a/packages/model-bank/src/aiModels/vercelaigateway.ts b/packages/model-bank/src/aiModels/vercelaigateway.ts index d24d72d2f2..3df12069d8 100644 --- a/packages/model-bank/src/aiModels/vercelaigateway.ts +++ b/packages/model-bank/src/aiModels/vercelaigateway.ts @@ -565,40 +565,6 @@ const vercelAIGatewayChatModels: AIChatModelCard[] = [ }, type: 'chat', }, - { - abilities: { - functionCall: true, - }, - contextWindowTokens: 128_000, - description: - 'Command R is optimized for chat and long-context tasks, positioned as a “scalable” model that balances high performance and accuracy so companies can move beyond prototypes into production.', - displayName: 'Command R', - id: 'cohere/command-r', - pricing: { - units: [ - { name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' }, - { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' }, - ], - }, - type: 'chat', - }, - { - abilities: { - functionCall: true, - }, - contextWindowTokens: 128_000, - description: - 'Command R+ is Cohere’s latest LLM optimized for chat and long context, aiming for exceptional performance so companies can move past prototypes into production.', - displayName: 'Command R+', - id: 'cohere/command-r-plus', - pricing: { - units: [ - { name: 'textInput', rate: 2.5, strategy: 'fixed', unit: 'millionTokens' }, - { name: 'textOutput', rate: 10, strategy: 'fixed', unit: 'millionTokens' }, - ], - }, - type: 'chat', - }, { abilities: { functionCall: true, @@ -633,19 +599,6 @@ const vercelAIGatewayChatModels: AIChatModelCard[] = [ }, type: 'chat', }, - { - contextWindowTokens: 128_000, - description: 'DeepSeek V3.1 Base is an improved version of the DeepSeek V3 model.', - displayName: 'DeepSeek V3.1 Base', - id: 'deepseek/deepseek-v3.1-base', - pricing: { - units: [ - { name: 'textInput', rate: 0.1999, strategy: 'fixed', unit: 'millionTokens' }, - { name: 'textOutput', rate: 0.8001, strategy: 'fixed', unit: 'millionTokens' }, - ], - }, - type: 'chat', - }, { abilities: { functionCall: true, @@ -720,23 +673,6 @@ const vercelAIGatewayChatModels: AIChatModelCard[] = [ }, type: 'chat', }, - { - abilities: { - functionCall: true, - }, - contextWindowTokens: 8192, - description: - 'A 9B open-source model fine-tuned by Google for chat, served by Groq on LPU hardware for fast, efficient inference.', - displayName: 'Gemma 2 9B IT', - id: 'google/gemma-2-9b', - pricing: { - units: [ - { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' }, - { name: 'textOutput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' }, - ], - }, - type: 'chat', - }, { abilities: { functionCall: true, @@ -863,40 +799,6 @@ const vercelAIGatewayChatModels: AIChatModelCard[] = [ }, type: 'chat', }, - { - abilities: { - functionCall: true, - }, - contextWindowTokens: 8192, - description: - 'A 70B open-source model fine-tuned by Meta for instruction following, served by Groq on LPU hardware for fast, efficient inference.', - displayName: 'Llama 3 70B Instruct', - id: 'meta/llama-3-70b', - pricing: { - units: [ - { name: 'textInput', rate: 0.59, strategy: 'fixed', unit: 'millionTokens' }, - { name: 'textOutput', rate: 0.79, strategy: 'fixed', unit: 'millionTokens' }, - ], - }, - type: 'chat', - }, - { - abilities: { - functionCall: true, - }, - contextWindowTokens: 8192, - description: - 'An 8B open-source model fine-tuned by Meta for instruction following, served by Groq on LPU hardware for fast, efficient inference.', - displayName: 'Llama 3 8B Instruct', - id: 'meta/llama-3-8b', - pricing: { - units: [ - { name: 'textInput', rate: 0.05, strategy: 'fixed', unit: 'millionTokens' }, - { name: 'textOutput', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' }, - ], - }, - type: 'chat', - }, { abilities: { functionCall: true, diff --git a/packages/model-bank/src/aiModels/zhipu.ts b/packages/model-bank/src/aiModels/zhipu.ts index 231d7e5c0f..12e366e6f2 100644 --- a/packages/model-bank/src/aiModels/zhipu.ts +++ b/packages/model-bank/src/aiModels/zhipu.ts @@ -75,7 +75,6 @@ const zhipuChatModels: AIChatModelCard[] = [ description: "GLM-4.7 is Zhipu's latest flagship model, enhanced for Agentic Coding scenarios with improved coding capabilities, long-term task planning, and tool collaboration. It achieves leading performance among open-source models on multiple public benchmarks. General capabilities are improved with more concise and natural responses and more immersive writing. For complex agent tasks, instruction following during tool calls is stronger, and the frontend aesthetics and long-term task completion efficiency of Artifacts and Agentic Coding are further enhanced.", displayName: 'GLM-4.7', - enabled: true, id: 'glm-4.7', maxOutput: 131_072, pricing: { @@ -167,7 +166,6 @@ const zhipuChatModels: AIChatModelCard[] = [ description: 'GLM-4.7-Flash, as a 30B-level SOTA model, offers a new choice that balances performance and efficiency. It enhances coding capabilities, long-term task planning, and tool collaboration for Agentic Coding scenarios, achieving leading performance among open-source models of the same size in multiple current benchmark leaderboards. In executing complex intelligent agent tasks, it has stronger instruction compliance during tool calls, and further improves the aesthetics of front-end and the efficiency of long-term task completion for Artifacts and Agentic Coding.', displayName: 'GLM-4.7-FlashX', - enabled: true, id: 'glm-4.7-flashx', maxOutput: 131_072, pricing: { @@ -258,7 +256,6 @@ const zhipuChatModels: AIChatModelCard[] = [ description: 'The GLM-4.6V series represents a major iteration of the GLM family in the multimodal direction, comprising GLM-4.6V (flagship), GLM-4.6V-FlashX (lightweight and high-speed), and GLM-4.6V-Flash (fully free). It extends the training-time context window to 128k tokens, achieves state-of-the-art visual understanding accuracy at comparable parameter scales, and, for the first time, natively integrates Function Call (tool invocation) capabilities into the visual model architecture. This unifies the pipeline from “visual perception” to “executable actions,” providing a consistent technical foundation for multimodal agents in real-world production scenarios.', displayName: 'GLM-4.6V-FlashX', - enabled: true, id: 'glm-4.6v-flashx', maxOutput: 32_768, pricing: { @@ -320,7 +317,6 @@ const zhipuChatModels: AIChatModelCard[] = [ description: 'The GLM-4.6V series represents a major iteration of the GLM family in the multimodal direction, comprising GLM-4.6V (flagship), GLM-4.6V-FlashX (lightweight and high-speed), and GLM-4.6V-Flash (fully free). It extends the training-time context window to 128k tokens, achieves state-of-the-art visual understanding accuracy at comparable parameter scales, and, for the first time, natively integrates Function Call (tool invocation) capabilities into the visual model architecture. This unifies the pipeline from “visual perception” to “executable actions,” providing a consistent technical foundation for multimodal agents in real-world production scenarios.', displayName: 'GLM-4.6V-Flash', - enabled: true, id: 'glm-4.6v-flash', maxOutput: 32_768, pricing: { @@ -1122,7 +1118,6 @@ const zhipuImageModels: AIImageModelCard[] = [ description: 'GLM-Image is Zhipu’s new flagship image generation model. The model was trained end-to-end on domestically produced chips and adopts an original hybrid architecture that combines autoregressive modeling with a diffusion decoder. This design enables strong global instruction understanding alongside fine-grained local detail rendering, overcoming long-standing challenges in generating knowledge-dense content such as posters, presentations, and educational diagrams. It represents an important exploration toward a new generation of “cognitive generative” technology paradigms, exemplified by Nano Banana Pro.', displayName: 'GLM-Image', - enabled: true, id: 'glm-image', parameters: { prompt: { @@ -1152,7 +1147,6 @@ const zhipuImageModels: AIImageModelCard[] = [ description: 'CogView-4 is Zhipu’s first open-source text-to-image model that can generate Chinese characters. It improves semantic understanding, image quality, and Chinese/English text rendering, supports arbitrary-length bilingual prompts, and can generate images at any resolution within specified ranges.', displayName: 'CogView-4', - enabled: true, id: 'cogview-4', parameters: { prompt: { @@ -1174,7 +1168,6 @@ const zhipuImageModels: AIImageModelCard[] = [ description: 'CogView-3-Flash is a free image generation model launched by Zhipu. It generates images that align with user instructions while achieving higher aesthetic quality scores. CogView-3-Flash is primarily used in fields such as artistic creation, design reference, game development, and virtual reality, helping users rapidly convert text descriptions into images.', displayName: 'CogView-3-Flash', - enabled: true, id: 'cogview-3-flash', parameters: { prompt: { diff --git a/packages/model-runtime/src/providers/qwen/index.ts b/packages/model-runtime/src/providers/qwen/index.ts index 1162b7ab1e..0cae27c271 100644 --- a/packages/model-runtime/src/providers/qwen/index.ts +++ b/packages/model-runtime/src/providers/qwen/index.ts @@ -47,25 +47,18 @@ export const LobeQwenAI = createOpenAICompatibleRuntime({ ...rest, ...(model.includes('-thinking') ? { - enable_thinking: true, - thinking_budget: - thinking?.budget_tokens === 0 ? 0 : thinking?.budget_tokens || undefined, - } - : [ - 'qwen3', - 'qwen-turbo', - 'qwen-plus', - 'qwen-flash', - 'deepseek-v3.1', - 'deepseek-v3.2', - 'glm', - 'kimi-k2.5', - ].some((keyword) => model.toLowerCase().includes(keyword)) - ? { - enable_thinking: thinking !== undefined ? thinking.type === 'enabled' : false, + enable_thinking: true, thinking_budget: thinking?.budget_tokens === 0 ? 0 : thinking?.budget_tokens || undefined, } + : thinking + ? { + ...(thinking.type !== undefined && { + enable_thinking: thinking.type === 'enabled', + }), + thinking_budget: + thinking?.budget_tokens === 0 ? 0 : thinking?.budget_tokens || undefined, + } : {}), frequency_penalty: undefined, model, diff --git a/packages/model-runtime/src/providers/siliconcloud/index.test.ts b/packages/model-runtime/src/providers/siliconcloud/index.test.ts index 5877103f8e..30a9ccc098 100644 --- a/packages/model-runtime/src/providers/siliconcloud/index.test.ts +++ b/packages/model-runtime/src/providers/siliconcloud/index.test.ts @@ -1,5 +1,6 @@ // @vitest-environment node import { ModelProvider } from 'model-bank'; +import OpenAI from 'openai'; import { beforeEach, describe, expect, it, vi } from 'vitest'; import { testProvider } from '../../providerTestUtils'; @@ -113,7 +114,7 @@ describe('LobeSiliconCloudAI - custom features', () => { expect(calledPayload.thinking_budget).toBe(32768); }); - it('should set thinking_budget to 1 when budget_tokens is 0', async () => { + it('should set thinking_budget to 128 (minimum) when budget_tokens is 0', async () => { await instance.chat({ messages: [{ content: 'Hello', role: 'user' }], model: 'THUDM/GLM-4.5', @@ -124,10 +125,10 @@ describe('LobeSiliconCloudAI - custom features', () => { }); const calledPayload = (instance['client'].chat.completions.create as any).mock.calls[0][0]; - expect(calledPayload.thinking_budget).toBe(1); + expect(calledPayload.thinking_budget).toBe(128); }); - it('should not add enable_thinking for non-hybrid models', async () => { + it('should set enable_thinking when type is provided', async () => { await instance.chat({ messages: [{ content: 'Hello', role: 'user' }], model: 'Qwen/Qwen2.5-7B-Instruct', @@ -138,9 +139,23 @@ describe('LobeSiliconCloudAI - custom features', () => { }); const calledPayload = (instance['client'].chat.completions.create as any).mock.calls[0][0]; - expect(calledPayload.enable_thinking).toBeUndefined(); + expect(calledPayload.enable_thinking).toBe(true); expect(calledPayload.thinking_budget).toBe(1000); }); + + it('should only set thinking_budget when type is not provided', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'Qwen/Qwen3-8B', + thinking: { + budget_tokens: 1500, + }, + }); + + const calledPayload = (instance['client'].chat.completions.create as any).mock.calls[0][0]; + expect(calledPayload.enable_thinking).toBeUndefined(); + expect(calledPayload.thinking_budget).toBe(1500); + }); }); describe('handleError', () => { @@ -171,7 +186,58 @@ describe('LobeSiliconCloudAI - custom features', () => { }); } catch (e: any) { expect(e.errorType).toBe(AgentRuntimeErrorType.ProviderBizError); - expect(e.message).toContain('Please check if the API Key balance is sufficient'); + expect(e.message).toBeTruthy(); + } + }); + + it('should extract error code and message from SiliconCloud API error response', async () => { + const error = { + error: { + code: 20015, + message: 'Value error, current model does not support parameter `enable_thinking`.', + data: null, + }, + status: 400, + } as any; + + vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(error); + + try { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'Qwen/Qwen2.5-7B-Instruct', + }); + } catch (e: any) { + expect(e.error?.code).toBe(20015); + expect(e.error?.message).toContain('does not support parameter `enable_thinking`'); + } + }); + + it('should handle APIError with error body containing code and message', async () => { + // Create an APIError with the error structure that OpenAI library creates + const errorInfo = { + error: { + code: 20015, + message: 'Value error, current model does not support parameter `enable_thinking`.', + }, + }; + const apiError = new OpenAI.APIError(400, errorInfo, 'Request failed', { + status: 400, + } as any); + + vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError); + + try { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'Qwen/Qwen2.5-7B-Instruct', + }); + } catch (e: any) { + // The error should have the code and message extracted + expect(e.error?.code || e.error?.error?.code).toBe(20015); + expect(e.message || e.error?.message).toContain( + 'does not support parameter `enable_thinking`', + ); } }); }); diff --git a/packages/model-runtime/src/providers/siliconcloud/index.ts b/packages/model-runtime/src/providers/siliconcloud/index.ts index 38f3013f67..b5eb9302d6 100644 --- a/packages/model-runtime/src/providers/siliconcloud/index.ts +++ b/packages/model-runtime/src/providers/siliconcloud/index.ts @@ -11,36 +11,86 @@ export interface SiliconCloudModelCard { id: string; } +const getByteLength = (value: string): number => { + return new TextEncoder().encode(value).length; +}; + +const defaultFetch = globalThis.fetch?.bind(globalThis); + +const siliconFetch: typeof fetch = async (input, init) => { + if (!defaultFetch) return fetch(input, init); + + const response = await defaultFetch(input, init); + + if (!response || response.status < 400) return response; + + const contentType = response.headers.get('content-type') || ''; + if (!contentType.includes('application/json')) return response; + + try { + const cloned = response.clone(); + const data = await cloned.json(); + + if (data && typeof data === 'object' && !('error' in data)) { + const headers = new Headers(response.headers); + headers.delete('content-length'); + + const body = JSON.stringify({ error: data }); + headers.set('content-length', getByteLength(body).toString()); + + return new Response(body, { + headers, + status: response.status, + statusText: response.statusText, + }); + } + } catch { + // ignore JSON parse errors and fall back to original response + } + + return response; +}; + export const params = { baseURL: 'https://api.siliconflow.cn/v1', chatCompletion: { handleError: (error: any): Omit | undefined => { - let errorResponse: Response | undefined; - if (error instanceof Response) { - errorResponse = error; - } else if ('status' in (error as any)) { - errorResponse = error as Response; - } - if (errorResponse) { - if (errorResponse.status === 401) { - return { - error: errorResponse.status, - errorType: AgentRuntimeErrorType.InvalidProviderAPIKey, - }; - } + const status = error?.status || (error instanceof Response && error.status); - if (errorResponse.status === 403) { + if (status === 401) { + return { + error: status, + errorType: AgentRuntimeErrorType.InvalidProviderAPIKey, + }; + } + + if (status === 403) { + return { + error: status, + errorType: AgentRuntimeErrorType.ProviderBizError, + message: + '请检查 API Key 余额是否充足,或者是否在用未实名的 API Key 访问需要实名的模型。', + }; + } + + if (error?.error || error?.code || error?.message) { + // Prioritize nested error structure, then fall back to top-level fields + const errorData = error?.error?.error || error?.error || error; + const { code, message, data } = errorData; + + if (code || message || data) { return { - error: errorResponse.status, + error: { + ...(code !== undefined ? { code } : {}), + ...(typeof data !== 'undefined' ? { data } : {}), + ...(message !== undefined ? { message } : {}), + }, errorType: AgentRuntimeErrorType.ProviderBizError, - message: - 'Please check if the API Key balance is sufficient, or if you are using an unverified API Key to access models that require verification.', }; } } - return { - error, - }; + + return { error }; }, handlePayload: (payload) => { const { max_tokens, model, thinking, ...rest } = payload; @@ -55,23 +105,20 @@ export const params = { }; if (thinking) { - // Only some models support specifying enable_thinking, while other slow-thinking models only support adjusting thinking budget - const hybridThinkingModels = [ - /GLM-4\.5(?!.*Air$)/, // GLM-4.5 and GLM-4.5V (excluding GLM-4.5 Air) - /Qwen3-(?:\d+B|\d+B-A\d+B)$/, // Qwen3-8B, Qwen3-14B, Qwen3-32B, Qwen3-30B-A3B, Qwen3-235B-A22B - /DeepSeek-V3\.1/, - /Hunyuan-A13B-Instruct/, - ]; - if (hybridThinkingModels.some((regexp) => regexp.test(model))) { + // Only set enable_thinking if type is explicitly provided + if (typeof thinking.type !== 'undefined') { result.enable_thinking = thinking.type === 'enabled'; } if (typeof thinkingBudget !== 'undefined') { - result.thinking_budget = Math.min(Math.max(thinkingBudget, 1), 32_768); + result.thinking_budget = Math.min(Math.max(thinkingBudget, 128), 32_768); } } return result; }, }, + constructorOptions: { + fetch: siliconFetch, + }, createImage: createSiliconCloudImage, debug: { chatCompletion: () => process.env.DEBUG_SILICONCLOUD_CHAT_COMPLETION === '1', diff --git a/packages/model-runtime/src/providers/wenxin/index.ts b/packages/model-runtime/src/providers/wenxin/index.ts index 274e6d7805..81c8ae86e8 100644 --- a/packages/model-runtime/src/providers/wenxin/index.ts +++ b/packages/model-runtime/src/providers/wenxin/index.ts @@ -26,7 +26,9 @@ export const params = { }, }), ...(thinking && { - enable_thinking: thinking.type !== 'disabled', + enable_thinking: thinking.type + ? thinking.type !== 'disabled' + : undefined, ...(thinking?.budget_tokens !== 0 && { thinking_budget: Math.min(Math.max(thinking?.budget_tokens, 100), 16_384), }), diff --git a/packages/model-runtime/src/types/chat.ts b/packages/model-runtime/src/types/chat.ts index 0a89e76edf..7dc9862f79 100644 --- a/packages/model-runtime/src/types/chat.ts +++ b/packages/model-runtime/src/types/chat.ts @@ -7,13 +7,13 @@ export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function' | 'tool'; export type ChatResponseFormat = | { type: 'json_object' } | { - json_schema: { - name: string; - schema: Record; - strict?: boolean; - }; - type: 'json_schema'; + json_schema: { + name: string; + schema: Record; + strict?: boolean; }; + type: 'json_schema'; + }; interface UserMessageContentPartThinking { signature: string; @@ -144,7 +144,7 @@ export interface ChatStreamPayload { */ thinking?: { budget_tokens: number; - type: 'enabled' | 'disabled' | 'adaptive'; + type?: 'enabled' | 'disabled' | 'adaptive'; }; thinkingBudget?: number; /**