🐛 fix: Add ExtendParamsTypeSchema for enhanced model settings (#11437)

* 🐛 fix: Update reasoning handling in OpenRouter and VercelAIGateway to include thinkingLevel and adjust gpt-5 reasoning parameters

* 🐛 fix: Add ExtendParamsTypeSchema and AiModelSettingsSchema for enhanced model settings

* 🐛 fix: Add ModelSearchImplementTypeSchema and update AiModelSettingsSchema for enhanced model configuration

* delete gemini-2.5-flash-image-preview model

* Add GLM-4.7 model to volcengine and remove deprecated GLM-4 32B 0414 model from wenxin

*  feat: 添加 MiniMax-M2.1 和 GLM-4.7-Flash 模型到模型库

*  feat: 更新 Zhipu 模型库,添加 GLM-4.7-FlashX 模型并移除 GLM-4.5-Flash 模型

* test: add extendParams mapping for gpt-5.x reasoning models in VercelAIGatewayAI

* remove deprecated DeepSeek R1 model from nvidiaChatModels

* i18n: 更新 MiniMax-M2.1 模型描述为英文
This commit is contained in:
sxjeru
2026-01-29 11:41:38 +08:00
committed by GitHub
parent 8d00af4905
commit f58c980f3a
14 changed files with 421 additions and 170 deletions
+2 -22
View File
@@ -1,27 +1,6 @@
import { AIChatModelCard } from '../types/aiModel';
const cerebrasModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
reasoning: true,
structuredOutput: true,
},
contextWindowTokens: 131_072,
description:
'Performs well on coding and reasoning tasks, supports streaming and tool calls, and fits agentic coding and complex reasoning.',
displayName: 'GLM-4.6',
enabled: true,
id: 'zai-glm-4.6',
maxOutput: 40_000,
pricing: {
units: [
{ name: 'textInput', rate: 2.25, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 2.75, strategy: 'fixed', unit: 'millionTokens' },
],
},
type: 'chat',
},
{
abilities: {
functionCall: true,
@@ -96,7 +75,8 @@ const cerebrasModels: AIChatModelCard[] = [
functionCall: true,
},
contextWindowTokens: 32_768,
description: 'Llama 3.1 8B: a small, low-latency Llama variant for lightweight online inference and chat.',
description:
'Llama 3.1 8B: a small, low-latency Llama variant for lightweight online inference and chat.',
displayName: 'Llama 3.1 8B',
id: 'llama3.1-8b',
pricing: {
+1 -44
View File
@@ -1,4 +1,4 @@
import { CHAT_MODEL_IMAGE_GENERATION_PARAMS, ModelParamsSchema } from '../standard-parameters';
import { ModelParamsSchema } from '../standard-parameters';
import { AIChatModelCard, AIImageModelCard } from '../types';
/**
@@ -485,32 +485,6 @@ const googleChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
imageOutput: true,
vision: true,
},
contextWindowTokens: 32_768 + 8192,
description:
'Nano Banana is Googles newest, fastest, and most efficient native multimodal model, enabling conversational image generation and editing.',
displayName: 'Nano Banana (Preview)',
id: 'gemini-2.5-flash-image-preview',
maxOutput: 8192,
pricing: {
approximatePricePerImage: 0.039,
units: [
{ name: 'textInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'imageInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 2.5, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'imageOutput', rate: 30, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-08-26',
settings: {
extendParams: ['imageAspectRatio'],
},
type: 'chat',
},
{
abilities: {
functionCall: true,
@@ -967,23 +941,6 @@ const googleImageModels: AIImageModelCard[] = [
],
},
},
{
displayName: 'Nano Banana (Preview)',
id: 'gemini-2.5-flash-image-preview:image',
type: 'image',
description:
'Nano Banana is Googles newest, fastest, and most efficient native multimodal model, enabling conversational image generation and editing.',
releasedAt: '2025-08-26',
parameters: CHAT_MODEL_IMAGE_GENERATION_PARAMS,
pricing: {
approximatePricePerImage: 0.039,
units: [
{ name: 'textInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 2.5, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'imageOutput', rate: 30, strategy: 'fixed', unit: 'millionTokens' },
],
},
},
{
displayName: 'Imagen 4',
id: 'imagen-4.0-generate-001',
+12 -16
View File
@@ -79,7 +79,8 @@ const nvidiaChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 128_000,
description: 'A frontier vision-language model that excels at high-quality reasoning from images.',
description:
'A frontier vision-language model that excels at high-quality reasoning from images.',
displayName: 'Llama 3.2 11B Vision Instruct',
id: 'meta/llama-3.2-11b-vision-instruct',
type: 'chat',
@@ -89,7 +90,8 @@ const nvidiaChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 128_000,
description: 'A frontier vision-language model that excels at high-quality reasoning from images.',
description:
'A frontier vision-language model that excels at high-quality reasoning from images.',
displayName: 'Llama 3.2 90B Vision Instruct',
id: 'meta/llama-3.2-90b-vision-instruct',
type: 'chat',
@@ -151,41 +153,35 @@ const nvidiaChatModels: AIChatModelCard[] = [
},
{
contextWindowTokens: 8192,
description: 'A frontier text generation model strong in understanding, transformation, and code generation.',
description:
'A frontier text generation model strong in understanding, transformation, and code generation.',
displayName: 'Gemma 2 9B Instruct',
id: 'google/gemma-2-9b-it',
type: 'chat',
},
{
contextWindowTokens: 8192,
description: 'A frontier text generation model strong in understanding, transformation, and code generation.',
description:
'A frontier text generation model strong in understanding, transformation, and code generation.',
displayName: 'Gemma 2 27B Instruct',
id: 'google/gemma-2-27b-it',
type: 'chat',
},
{
abilities: {
reasoning: true,
},
contextWindowTokens: 128_000,
description: 'A state-of-the-art efficient LLM strong in reasoning, math, and programming.',
displayName: 'DeepSeek R1',
id: 'deepseek-ai/deepseek-r1',
type: 'chat',
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 32_768,
description: 'A bilingual LLM for Chinese and English across language, coding, math, and reasoning.',
description:
'A bilingual LLM for Chinese and English across language, coding, math, and reasoning.',
displayName: 'Qwen2.5 7B Instruct',
id: 'qwen/qwen2.5-7b-instruct',
type: 'chat',
},
{
contextWindowTokens: 32_768,
description: 'A strong mid-sized code model with 32K context, excelling at multilingual programming.',
description:
'A strong mid-sized code model with 32K context, excelling at multilingual programming.',
displayName: 'Qwen2.5 Coder 7B Instruct',
id: 'qwen/qwen2.5-coder-7b-instruct',
type: 'chat',
@@ -2,6 +2,26 @@ import { AIChatModelCard, AIImageModelCard } from '../types/aiModel';
// https://siliconflow.cn/zh-cn/models
const siliconcloudChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
reasoning: true,
},
contextWindowTokens: 192_000,
description:
'MiniMax-M2.1 is an open-source large language model optimized for agent capabilities, excelling in programming, tool usage, instruction following, and long-term planning. The model supports multilingual software development and complex multi-step workflow execution, achieving a score of 74.0 on SWE-bench Verified and surpassing Claude Sonnet 4.5 in multilingual scenarios.',
displayName: 'MiniMax-M2.1 (Pro)',
id: 'Pro/MiniMaxAI/MiniMax-M2.1',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 2.1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 8.4, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-12-23',
type: 'chat',
},
{
abilities: {
functionCall: true,
@@ -117,6 +117,75 @@ const doubaoChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
},
config: {
deploymentName: 'glm-4-7-251222',
},
contextWindowTokens: 200_000,
description:
'GLM-4.7 is the latest flagship model from Zhipu AI. GLM-4.7 enhances coding capabilities, long-term task planning, and tool collaboration for Agentic Coding scenarios, achieving leading performance among open-source models in multiple public benchmarks. General capabilities are improved, with more concise and natural responses, and more immersive writing. In complex agent tasks, instruction following is stronger during tool calls, and the aesthetics of Artifacts and Agentic Coding frontend, as well as long-term task completion efficiency, are further enhanced. • Stronger programming capabilities: Significantly improved multi-language coding and terminal agent performance; GLM-4.7 can now implement "think first, then act" mechanisms in programming frameworks like Claude Code, Kilo Code, TRAE, Cline, and Roo Code, with more stable performance on complex tasks. • Frontend aesthetics improvement: GLM-4.7 shows significant progress in frontend generation quality, capable of generating websites, PPTs, and posters with better visual appeal. • Stronger tool calling capabilities: GLM-4.7 enhances tool calling abilities, scoring 67 in BrowseComp web task evaluation; achieving 84.7 in τ²-Bench interactive tool calling evaluation, surpassing Claude Sonnet 4.5 as the open-source SOTA. • Reasoning capability improvement: Significantly enhanced math and reasoning abilities, scoring 42.8% in the HLE ("Humanity\'s Last Exam") benchmark, a 41% improvement over GLM-4.6, surpassing GPT-5.1. • General capability enhancement: GLM-4.7 conversations are more concise, intelligent, and humane; writing and role-playing are more literary and immersive.',
displayName: 'GLM-4.7',
id: 'glm-4-7',
maxOutput: 128_000,
pricing: {
currency: 'CNY',
units: [
{
lookup: {
prices: {
'[0, 0.032]_[0, 0.0002]': 2,
'[0, 0.032]_[0.0002, infinity]': 3,
'[0.032, 0.2]_[0, infinity]': 4,
},
pricingParams: ['textInputRange', 'textOutputRange'],
},
name: 'textInput',
strategy: 'lookup',
unit: 'millionTokens',
},
{
lookup: {
prices: {
'[0, 0.032]_[0, 0.0002]': 8,
'[0, 0.032]_[0.0002, infinity]': 14,
'[0.032, 0.2]_[0, infinity]': 16,
},
pricingParams: ['textInputRange', 'textOutputRange'],
},
name: 'textOutput',
strategy: 'lookup',
unit: 'millionTokens',
},
{
lookup: {
prices: {
'[0, 0.032]_[0, 0.0002]': 0.4,
'[0, 0.032]_[0.0002, infinity]': 0.6,
'[0.032, 0.2]_[0, infinity]': 0.8,
},
pricingParams: ['textInputRange', 'textOutputRange'],
},
name: 'textInput_cacheRead',
strategy: 'lookup',
unit: 'millionTokens',
},
{
lookup: { prices: { '1h': 0.017 }, pricingParams: ['ttl'] },
name: 'textInput_cacheWrite',
strategy: 'lookup',
unit: 'millionTokens',
},
],
},
settings: {
extendParams: ['enableReasoning'],
},
type: 'chat',
},
{
abilities: {
functionCall: true,
+41 -38
View File
@@ -193,7 +193,8 @@ const wenxinChatModels: AIChatModelCard[] = [
},
{
contextWindowTokens: 131_072,
description: 'ERNIE Speed 128K is a no-I/O-fee model for long-text understanding and large-scale trials.',
description:
'ERNIE Speed 128K is a no-I/O-fee model for long-text understanding and large-scale trials.',
displayName: 'ERNIE Speed 128K',
id: 'ernie-speed-128k',
maxOutput: 4096,
@@ -274,7 +275,8 @@ const wenxinChatModels: AIChatModelCard[] = [
},
{
contextWindowTokens: 8192,
description: 'ERNIE Tiny 8K is ultra-lightweight for simple QA, classification, and low-cost inference.',
description:
'ERNIE Tiny 8K is ultra-lightweight for simple QA, classification, and low-cost inference.',
displayName: 'ERNIE Tiny 8K',
id: 'ernie-tiny-8k',
maxOutput: 2048,
@@ -337,7 +339,8 @@ const wenxinChatModels: AIChatModelCard[] = [
},
{
contextWindowTokens: 8192,
description: 'ERNIE Novel 8K is built for long-form novels and IP plots with multi-character narratives.',
description:
'ERNIE Novel 8K is built for long-form novels and IP plots with multi-character narratives.',
displayName: 'ERNIE Novel 8K',
id: 'ernie-novel-8k',
maxOutput: 2048,
@@ -352,7 +355,8 @@ const wenxinChatModels: AIChatModelCard[] = [
},
{
contextWindowTokens: 131_072,
description: 'ERNIE 4.5 0.3B is an open-source lightweight model for local and customized deployment.',
description:
'ERNIE 4.5 0.3B is an open-source lightweight model for local and customized deployment.',
displayName: 'ERNIE 4.5 0.3B',
id: 'ernie-4.5-0.3b',
maxOutput: 8192,
@@ -443,7 +447,8 @@ const wenxinChatModels: AIChatModelCard[] = [
},
{
contextWindowTokens: 32_768,
description: 'Qianfan 70B is a large Chinese model for high-quality generation and complex reasoning.',
description:
'Qianfan 70B is a large Chinese model for high-quality generation and complex reasoning.',
displayName: 'Qianfan 70B',
id: 'qianfan-70b',
maxOutput: 16_384,
@@ -628,7 +633,8 @@ const wenxinChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 32_768,
description: 'Qianfan Composition is a multimodal creation model for mixed image-text understanding and generation.',
description:
'Qianfan Composition is a multimodal creation model for mixed image-text understanding and generation.',
displayName: 'Qianfan Composition',
id: 'qianfan-composition',
maxOutput: 8192,
@@ -758,7 +764,8 @@ const wenxinChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 4096,
description: 'Qianfan EngCard VL is a multimodal recognition model focused on English scenarios.',
description:
'Qianfan EngCard VL is a multimodal recognition model focused on English scenarios.',
displayName: 'Qianfan EngCard VL',
id: 'qianfan-engcard-vl',
maxOutput: 4000,
@@ -776,7 +783,8 @@ const wenxinChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 4096,
description: 'Qianfan SinglePicOCR is a single-image OCR model with high-accuracy character recognition.',
description:
'Qianfan SinglePicOCR is a single-image OCR model with high-accuracy character recognition.',
displayName: 'Qianfan SinglePicOCR',
id: 'qianfan-singlepicocr',
maxOutput: 4096,
@@ -794,7 +802,8 @@ const wenxinChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 32_768,
description: 'InternVL3 38B is a large open-source multimodal model for high-accuracy image-text understanding.',
description:
'InternVL3 38B is a large open-source multimodal model for high-accuracy image-text understanding.',
displayName: 'InternVL3 38B',
id: 'internvl3-38b',
maxOutput: 8192,
@@ -830,7 +839,8 @@ const wenxinChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 32_768,
description: 'InternVL3 1B is a lightweight multimodal model for resource-constrained deployment.',
description:
'InternVL3 1B is a lightweight multimodal model for resource-constrained deployment.',
displayName: 'InternVL3 1B',
id: 'internvl3-1b',
maxOutput: 8192,
@@ -848,7 +858,8 @@ const wenxinChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 32_768,
description: 'InternVL2.5 38B MPO is a multimodal pretrained model for complex image-text reasoning.',
description:
'InternVL2.5 38B MPO is a multimodal pretrained model for complex image-text reasoning.',
displayName: 'InternVL2.5 38B MPO',
id: 'internvl2.5-38b-mpo',
maxOutput: 4096,
@@ -1056,7 +1067,8 @@ const wenxinChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 65_536,
description: 'GLM-4.5V is a multimodal vision-language model for general image understanding and QA.',
description:
'GLM-4.5V is a multimodal vision-language model for general image understanding and QA.',
displayName: 'GLM-4.5V',
id: 'glm-4.5v',
maxOutput: 16_384,
@@ -1096,7 +1108,8 @@ const wenxinChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 4096,
description: 'DeepSeek VL2 is a multimodal model for image-text understanding and fine-grained visual QA.',
description:
'DeepSeek VL2 is a multimodal model for image-text understanding and fine-grained visual QA.',
displayName: 'DeepSeek VL2',
id: 'deepseek-vl2',
maxOutput: 2048,
@@ -1114,7 +1127,8 @@ const wenxinChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 4096,
description: 'DeepSeek VL2 Small is a lightweight multimodal version for resource-constrained and high-concurrency use.',
description:
'DeepSeek VL2 Small is a lightweight multimodal version for resource-constrained and high-concurrency use.',
displayName: 'DeepSeek VL2 Small',
id: 'deepseek-vl2-small',
maxOutput: 2048,
@@ -1181,7 +1195,8 @@ const wenxinChatModels: AIChatModelCard[] = [
search: true,
},
contextWindowTokens: 144_000,
description: 'DeepSeek V3.2 Think is a full deep-thinking model with stronger long-chain reasoning.',
description:
'DeepSeek V3.2 Think is a full deep-thinking model with stronger long-chain reasoning.',
displayName: 'DeepSeek V3.2 Think',
enabled: true,
id: 'deepseek-v3.2-think',
@@ -1334,8 +1349,7 @@ const wenxinChatModels: AIChatModelCard[] = [
reasoning: true,
},
contextWindowTokens: 32_768,
description:
'DeepSeek R1 Distill Llama 70B combines R1 reasoning with the Llama ecosystem.',
description: 'DeepSeek R1 Distill Llama 70B combines R1 reasoning with the Llama ecosystem.',
displayName: 'DeepSeek R1 Distill Llama 70B',
id: 'deepseek-r1-distill-llama-70b',
maxOutput: 8192,
@@ -1440,7 +1454,8 @@ const wenxinChatModels: AIChatModelCard[] = [
reasoning: true,
},
contextWindowTokens: 131_072,
description: 'Qwen3 235B A22B Thinking 2507 is an ultra-large thinking model for hard reasoning.',
description:
'Qwen3 235B A22B Thinking 2507 is an ultra-large thinking model for hard reasoning.',
displayName: 'Qwen3 235B A22B Thinking 2507',
id: 'qwen3-235b-a22b-thinking-2507',
maxOutput: 32_768,
@@ -1675,7 +1690,8 @@ const wenxinChatModels: AIChatModelCard[] = [
},
{
contextWindowTokens: 32_768,
description: 'Qwen3 8B is a lightweight model with flexible deployment for high-concurrency workloads.',
description:
'Qwen3 8B is a lightweight model with flexible deployment for high-concurrency workloads.',
displayName: 'Qwen3 8B',
id: 'qwen3-8b',
maxOutput: 8192,
@@ -1729,7 +1745,8 @@ const wenxinChatModels: AIChatModelCard[] = [
},
{
contextWindowTokens: 32_768,
description: 'Qwen3 0.6B is an entry-level model for simple reasoning and very constrained environments.',
description:
'Qwen3 0.6B is an entry-level model for simple reasoning and very constrained environments.',
displayName: 'Qwen3 0.6B',
id: 'qwen3-0.6b',
maxOutput: 8192,
@@ -1747,7 +1764,8 @@ const wenxinChatModels: AIChatModelCard[] = [
},
{
contextWindowTokens: 32_768,
description: 'Qwen2.5 7B Instruct is a mature open-source instruct model for multi-scenario chat and generation.',
description:
'Qwen2.5 7B Instruct is a mature open-source instruct model for multi-scenario chat and generation.',
displayName: 'Qwen2.5 7B Instruct',
id: 'qwen2.5-7b-instruct',
maxOutput: 8192,
@@ -1760,22 +1778,6 @@ const wenxinChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
contextWindowTokens: 32_768,
description:
'GLM-4 32B 0414 is a general GLM model supporting multi-task text generation and understanding.',
displayName: 'GLM-4 32B 0414',
id: 'glm-4-32b-0414',
maxOutput: 8192,
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 4, strategy: 'fixed', unit: 'millionTokens' },
],
},
type: 'chat',
},
];
const wenxinImageModels: AIImageModelCard[] = [
@@ -1851,7 +1853,8 @@ const wenxinImageModels: AIImageModelCard[] = [
type: 'image',
},
{
description: 'FLUX.1-schnell is a high-performance image generation model for fast multi-style outputs.',
description:
'FLUX.1-schnell is a high-performance image generation model for fast multi-style outputs.',
displayName: 'FLUX.1-schnell',
enabled: true,
id: 'flux.1-schnell',
+58 -28
View File
@@ -67,6 +67,60 @@ const zhipuChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
search: true,
},
contextWindowTokens: 200_000,
description:
'GLM-4.7-Flash, as a 30B-level SOTA model, offers a new choice that balances performance and efficiency. It enhances coding capabilities, long-term task planning, and tool collaboration for Agentic Coding scenarios, achieving leading performance among open-source models of the same size in multiple current benchmark leaderboards. In executing complex intelligent agent tasks, it has stronger instruction compliance during tool calls, and further improves the aesthetics of front-end and the efficiency of long-term task completion for Artifacts and Agentic Coding.',
displayName: 'GLM-4.7-Flash',
enabled: true,
id: 'glm-4.7-flash',
maxOutput: 131_072,
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput_cacheRead', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
],
},
settings: {
extendParams: ['enableReasoning'],
searchImpl: 'params',
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
search: true,
},
contextWindowTokens: 200_000,
description:
'GLM-4.7-Flash, as a 30B-level SOTA model, offers a new choice that balances performance and efficiency. It enhances coding capabilities, long-term task planning, and tool collaboration for Agentic Coding scenarios, achieving leading performance among open-source models of the same size in multiple current benchmark leaderboards. In executing complex intelligent agent tasks, it has stronger instruction compliance during tool calls, and further improves the aesthetics of front-end and the efficiency of long-term task completion for Artifacts and Agentic Coding.',
displayName: 'GLM-4.7-FlashX',
enabled: true,
id: 'glm-4.7-flashx',
maxOutput: 131_072,
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput_cacheRead', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
],
},
settings: {
extendParams: ['enableReasoning'],
searchImpl: 'params',
},
type: 'chat',
},
{
abilities: {
functionCall: true,
@@ -433,32 +487,6 @@ const zhipuChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
search: true,
},
contextWindowTokens: 131_072,
description: 'Free GLM-4.5 tier with strong performance in reasoning, coding, and agent tasks.',
displayName: 'GLM-4.5-Flash',
enabled: true,
id: 'glm-4.5-flash',
maxOutput: 98_304,
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput_cacheRead', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
],
},
settings: {
extendParams: ['enableReasoning'],
searchImpl: 'params',
},
type: 'chat',
},
{
abilities: {
reasoning: true,
@@ -567,7 +595,8 @@ const zhipuChatModels: AIChatModelCard[] = [
search: true,
},
contextWindowTokens: 131_072,
description: 'Fast and low-cost: Flash-enhanced with ultra-fast reasoning and higher concurrency.',
description:
'Fast and low-cost: Flash-enhanced with ultra-fast reasoning and higher concurrency.',
displayName: 'GLM-Z1-FlashX',
id: 'glm-z1-flashx',
maxOutput: 32_768,
@@ -789,7 +818,8 @@ const zhipuChatModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 16_000,
description: 'GLM-4V-Plus understands video and multiple images, suitable for multimodal tasks.',
description:
'GLM-4V-Plus understands video and multiple images, suitable for multimodal tasks.',
displayName: 'GLM-4V-Plus-0111',
id: 'glm-4v-plus-0111',
pricing: {
+29
View File
@@ -259,6 +259,33 @@ export interface AiModelSettings {
searchProvider?: string;
}
export const ExtendParamsTypeSchema = z.enum([
'reasoningBudgetToken',
'enableReasoning',
'disableContextCaching',
'reasoningEffort',
'gpt5ReasoningEffort',
'gpt5_1ReasoningEffort',
'gpt5_2ReasoningEffort',
'gpt5_2ProReasoningEffort',
'textVerbosity',
'thinking',
'thinkingBudget',
'thinkingLevel',
'thinkingLevel2',
'imageAspectRatio',
'imageResolution',
'urlContext',
]);
export const ModelSearchImplementTypeSchema = z.enum(['tool', 'params', 'internal']);
export const AiModelSettingsSchema = z.object({
extendParams: z.array(ExtendParamsTypeSchema).optional(),
searchImpl: ModelSearchImplementTypeSchema.optional(),
searchProvider: z.string().optional(),
});
export interface AIChatModelCard extends AIBaseModelCard {
abilities?: ModelAbilities;
config?: AiModelConfig;
@@ -344,6 +371,7 @@ export const CreateAiModelSchema = z.object({
id: z.string(),
providerId: z.string(),
releasedAt: z.string().optional(),
settings: AiModelSettingsSchema.optional(),
type: AiModelTypeSchema.optional(),
// checkModel: z.string().optional(),
@@ -380,6 +408,7 @@ export const UpdateAiModelSchema = z.object({
.optional(),
contextWindowTokens: z.number().nullable().optional(),
displayName: z.string().nullable().optional(),
settings: AiModelSettingsSchema.optional(),
type: AiModelTypeSchema.optional(),
});
@@ -179,7 +179,7 @@ describe('computeChatPricing', () => {
it('supports multi-modal fixed units for Gemini 2.5 Flash Image Preview', () => {
const pricing = googleChatModels.find(
(model: { id: string }) => model.id === 'gemini-2.5-flash-image-preview',
(model: { id: string }) => model.id === 'gemini-2.5-flash-image',
)?.pricing;
expect(pricing).toBeDefined();
@@ -207,7 +207,7 @@ describe('computeChatPricing', () => {
it('handles multi-modal image generation for Nano Banana', () => {
const pricing = googleChatModels.find(
(model: { id: string }) => model.id === 'gemini-2.5-flash-image-preview',
(model: { id: string }) => model.id === 'gemini-2.5-flash-image',
)?.pricing;
expect(pricing).toBeDefined();
@@ -12,7 +12,7 @@ const noImageErrorType = 'ProviderNoImageGenerated';
const invalidErrorType = 'InvalidProviderAPIKey';
// Mock the console.error to avoid polluting test output
vi.spyOn(console, 'error').mockImplementation(() => {});
vi.spyOn(console, 'error').mockImplementation(() => { });
let mockClient: GoogleGenAI;
@@ -361,7 +361,7 @@ describe('createGoogleImage', () => {
vi.spyOn(mockClient.models, 'generateContent').mockResolvedValue(mockContentResponse as any);
const payload: CreateImagePayload = {
model: 'gemini-2.5-flash-image-preview:image',
model: 'gemini-2.5-flash-image:image',
params: {
prompt: 'Create a beautiful sunset landscape',
},
@@ -378,7 +378,7 @@ describe('createGoogleImage', () => {
parts: [{ text: 'Create a beautiful sunset landscape' }],
},
],
model: 'gemini-2.5-flash-image-preview',
model: 'gemini-2.5-flash-image',
config: {
responseModalities: ['Image'],
},
@@ -414,7 +414,7 @@ describe('createGoogleImage', () => {
vi.spyOn(mockClient.models, 'generateContent').mockResolvedValue(mockContentResponse as any);
const payload: CreateImagePayload = {
model: 'gemini-2.5-flash-image-preview:image',
model: 'gemini-2.5-flash-image:image',
params: {
prompt: 'Add a red rose to this image',
imageUrl: `data:image/png;base64,${inputImageBase64}`,
@@ -440,7 +440,7 @@ describe('createGoogleImage', () => {
],
},
],
model: 'gemini-2.5-flash-image-preview',
model: 'gemini-2.5-flash-image',
config: {
responseModalities: ['Image'],
},
@@ -482,7 +482,7 @@ describe('createGoogleImage', () => {
vi.spyOn(mockClient.models, 'generateContent').mockResolvedValue(mockContentResponse as any);
const payload: CreateImagePayload = {
model: 'gemini-2.5-flash-image-preview:image',
model: 'gemini-2.5-flash-image:image',
params: {
prompt: 'Change the background to blue sky',
imageUrl: 'https://example.com/image.jpg',
@@ -511,7 +511,7 @@ describe('createGoogleImage', () => {
],
},
],
model: 'gemini-2.5-flash-image-preview',
model: 'gemini-2.5-flash-image',
config: {
responseModalities: ['Image'],
},
@@ -545,7 +545,7 @@ describe('createGoogleImage', () => {
vi.spyOn(mockClient.models, 'generateContent').mockResolvedValue(mockContentResponse as any);
const payload: CreateImagePayload = {
model: 'gemini-2.5-flash-image-preview:image',
model: 'gemini-2.5-flash-image:image',
params: {
prompt: 'Generate a colorful abstract pattern',
imageUrl: null,
@@ -563,7 +563,7 @@ describe('createGoogleImage', () => {
parts: [{ text: 'Generate a colorful abstract pattern' }],
},
],
model: 'gemini-2.5-flash-image-preview',
model: 'gemini-2.5-flash-image',
config: {
responseModalities: ['Image'],
},
@@ -594,7 +594,7 @@ describe('createGoogleImage', () => {
);
const payload: CreateImagePayload = {
model: 'gemini-2.5-flash-image-preview:image',
model: 'gemini-2.5-flash-image:image',
params: {
prompt: 'Create inappropriate content',
},
@@ -619,7 +619,7 @@ describe('createGoogleImage', () => {
);
const payload: CreateImagePayload = {
model: 'gemini-2.5-flash-image-preview:image',
model: 'gemini-2.5-flash-image:image',
params: {
prompt: 'Generate an image',
},
@@ -637,7 +637,7 @@ describe('createGoogleImage', () => {
it('should throw error for unsupported image URL format', async () => {
// Arrange
const payload: CreateImagePayload = {
model: 'gemini-2.5-flash-image-preview:image',
model: 'gemini-2.5-flash-image:image',
params: {
prompt: 'Edit this image',
imageUrl: 'ftp://example.com/image.jpg',
@@ -34,6 +34,7 @@ beforeEach(() => {
});
afterEach(() => {
vi.unstubAllGlobals();
vi.clearAllMocks();
});
@@ -333,6 +334,107 @@ describe('LobeOpenRouterAI - custom features', () => {
expect.anything(),
);
});
it('should map thinkingLevel to reasoning effort', async () => {
await instance.chat({
messages: [{ content: 'Think level', role: 'user' }],
model: 'openai/gpt-4',
thinkingLevel: 'medium',
} as any);
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({ reasoning: { effort: 'medium' } }),
expect.anything(),
);
});
});
describe('models mapping', () => {
it('should map extendParams for gpt-5.x reasoning and verbosity', async () => {
const mockModels = [
{
architecture: { input_modalities: ['text'] },
created: 1_700_000_000,
description: 'Test model',
id: 'openai/gpt-5.2-mini',
name: 'openai/gpt-5.2-mini',
pricing: { completion: '0.00001', prompt: '0.00001' },
supported_parameters: ['reasoning'],
top_provider: { context_length: 8192, max_completion_tokens: 1024 },
},
{
architecture: { input_modalities: ['text'] },
created: 1_700_000_000,
description: 'Test model',
id: 'openai/gpt-5.1-mini',
name: 'openai/gpt-5.1-mini',
pricing: { completion: '0.00001', prompt: '0.00001' },
supported_parameters: ['reasoning'],
top_provider: { context_length: 8192, max_completion_tokens: 1024 },
},
];
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue({
ok: true,
json: async () => ({ data: mockModels }),
} as any),
);
const models = await params.models();
const gpt52 = models.find((m) => m.id === 'openai/gpt-5.2-mini');
const gpt51 = models.find((m) => m.id === 'openai/gpt-5.1-mini');
expect(gpt52?.settings?.extendParams).toEqual(
expect.arrayContaining(['gpt5_2ReasoningEffort', 'textVerbosity']),
);
expect(gpt51?.settings?.extendParams).toEqual(
expect.arrayContaining(['gpt5_1ReasoningEffort', 'textVerbosity']),
);
});
it('should map thinkingLevel for gemini-3 flash/pro reasoning', async () => {
const mockModels = [
{
architecture: { input_modalities: ['text'] },
created: 1_700_000_000,
description: 'Test model',
id: 'google/gemini-3-pro',
name: 'google/gemini-3-pro',
pricing: { completion: '0.00001', prompt: '0.00001' },
supported_parameters: ['reasoning'],
top_provider: { context_length: 8192, max_completion_tokens: 1024 },
},
{
architecture: { input_modalities: ['text'] },
created: 1_700_000_000,
description: 'Test model',
id: 'google/gemini-3-flash',
name: 'google/gemini-3-flash',
pricing: { completion: '0.00001', prompt: '0.00001' },
supported_parameters: ['reasoning'],
top_provider: { context_length: 8192, max_completion_tokens: 1024 },
},
];
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue({
ok: true,
json: async () => ({ data: mockModels }),
} as any),
);
const models = await params.models();
const geminiPro = models.find((m) => m.id === 'google/gemini-3-pro');
const geminiFlash = models.find((m) => m.id === 'google/gemini-3-flash');
expect(geminiPro?.settings?.extendParams).toEqual(expect.arrayContaining(['thinkingLevel2']));
expect(geminiFlash?.settings?.extendParams).toEqual(
expect.arrayContaining(['thinkingLevel']),
);
});
});
describe('models', () => {
@@ -17,11 +17,11 @@ export const params = {
chatCompletion: {
handlePayload: (payload) => {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { reasoning_effort, thinking, reasoning: _reasoning, ...rest } = payload;
const { reasoning_effort, thinking, reasoning: _reasoning, thinkingLevel, ...rest } = payload;
let reasoning: OpenRouterReasoning | undefined;
if (thinking?.type || thinking?.budget_tokens !== undefined || reasoning_effort) {
if (thinking?.type || thinking?.budget_tokens !== undefined || reasoning_effort || thinkingLevel) {
if (thinking?.type === 'disabled') {
reasoning = { enabled: false };
} else if (thinking?.budget_tokens !== undefined) {
@@ -31,6 +31,9 @@ export const params = {
} else if (reasoning_effort) {
reasoning = { effort: reasoning_effort };
}
else if (thinkingLevel) {
reasoning = { effort: thinkingLevel };
}
}
return {
@@ -126,11 +129,14 @@ export const params = {
if (model.description && model.description.includes('`reasoning` `enabled`')) {
extendParams.push('enableReasoning');
}
if (hasReasoning && model.id.includes('gpt-5')) {
extendParams.push('gpt5ReasoningEffort');
}
if (hasReasoning && model.id.includes('openai') && !model.id.includes('gpt-5')) {
extendParams.push('reasoningEffort');
if (hasReasoning && model.id.includes('gpt-5.2')) {
extendParams.push('gpt5_2ReasoningEffort', 'textVerbosity');
} else if (hasReasoning && model.id.includes('gpt-5.1')) {
extendParams.push('gpt5_1ReasoningEffort', 'textVerbosity');
} else if (hasReasoning && model.id.includes('gpt-5')) {
extendParams.push('gpt5ReasoningEffort', 'textVerbosity');
} else if (hasReasoning && model.id.includes('openai')) {
extendParams.push('reasoningEffort', 'textVerbosity');
}
if (hasReasoning && model.id.includes('claude')) {
extendParams.push('enableReasoning', 'reasoningBudgetToken');
@@ -141,6 +147,12 @@ export const params = {
if (hasReasoning && model.id.includes('gemini-2.5')) {
extendParams.push('reasoningBudgetToken');
}
if (hasReasoning && model.id.includes('gemini-3-pro')) {
extendParams.push('thinkingLevel2');
}
if (hasReasoning && model.id.includes('gemini-3-flash')) {
extendParams.push('thinkingLevel');
}
return extendParams.length > 0 ? { settings: { extendParams } } : {};
})(),
};
@@ -245,6 +245,53 @@ describe('LobeVercelAIGatewayAI - custom features', () => {
expect(Array.isArray(model?.pricing?.units)).toBe(true);
});
it('should map extendParams for gpt-5.x reasoning models', async () => {
const mockModelData: VercelAIGatewayModelCard[] = [
{
id: 'openai/gpt-5.2-mini',
name: 'GPT-5.2 Mini',
pricing: { input: 0.000_003, output: 0.000_015 },
tags: ['reasoning'],
type: 'chat',
},
{
id: 'openai/gpt-5.1-mini',
name: 'GPT-5.1 Mini',
pricing: { input: 0.000_003, output: 0.000_015 },
tags: ['reasoning'],
type: 'chat',
},
{
id: 'openai/gpt-5-mini',
name: 'GPT-5 Mini',
pricing: { input: 0.000_003, output: 0.000_015 },
tags: ['reasoning'],
type: 'chat',
},
];
const mockClient = {
models: {
list: vi.fn().mockResolvedValue({ data: mockModelData }),
},
};
const models = await params.models({ client: mockClient as any });
const gpt52 = models.find((m) => m.id === 'openai/gpt-5.2-mini');
const gpt51 = models.find((m) => m.id === 'openai/gpt-5.1-mini');
const gpt5 = models.find((m) => m.id === 'openai/gpt-5-mini');
expect(gpt52?.settings?.extendParams).toEqual(
expect.arrayContaining(['gpt5_2ReasoningEffort', 'textVerbosity']),
);
expect(gpt51?.settings?.extendParams).toEqual(
expect.arrayContaining(['gpt5_1ReasoningEffort', 'textVerbosity']),
);
expect(gpt5?.settings?.extendParams).toEqual(
expect.arrayContaining(['gpt5ReasoningEffort', 'textVerbosity']),
);
});
it('should handle models with missing pricing', async () => {
const mockModelData: VercelAIGatewayModelCard[] = [
{
@@ -125,9 +125,15 @@ export const params = {
// Merge all applicable extendParams for settings
...(() => {
const extendParams: string[] = [];
if (tags.includes('reasoning') && m.id.includes('gpt-5')) {
if (tags.includes('reasoning') && m.id.includes('gpt-5') && !m.id.includes('gpt-5.1') && !m.id.includes('gpt-5.2')) {
extendParams.push('gpt5ReasoningEffort', 'textVerbosity');
}
if (tags.includes('reasoning') && m.id.includes('gpt-5.1') && !m.id.includes('gpt-5.2')) {
extendParams.push('gpt5_1ReasoningEffort', 'textVerbosity');
}
if (tags.includes('reasoning') && m.id.includes('gpt-5.2')) {
extendParams.push('gpt5_2ReasoningEffort', 'textVerbosity');
}
if (tags.includes('reasoning') && m.id.includes('openai') && !m.id.includes('gpt-5')) {
extendParams.push('reasoningEffort', 'textVerbosity');
}