From 1ecf7d2be8a7b848dab6340473efd8aa581d7379 Mon Sep 17 00:00:00 2001 From: Zhijie He Date: Fri, 10 Apr 2026 11:50:22 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=84=20style(image,video):=20=20extend?= =?UTF-8?q?=20more=20AIGC=20params=20support=20(#13597)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛 fix(image,video): preserve prompt and image when switching model * ✨ feat(image): smart imageUrl ↔ imageUrls conversion on model switch - When switching from multi-image to single-image model: use imageUrls[0] as imageUrl - When switching from single-image to multi-image model: wrap imageUrl into [imageUrl] as imageUrls - Preserves prompt and other compatible parameters - Add test cases for bidirectional conversion ♻️ refactor(image): simplify preserveImageInputParams logic - Remove intermediate variables for cleaner code readability - Condense 9 intermediate variables to 3 core ones - Inline condition checks for simpler if statements - Improve code clarity without changing functionality * 🐛 fix(image): preserve imageUrl when target imageUrls default is empty array * chore: format imageUrl & imageUrls * feat: support imageUrls for videoGen fix: fix ci error fix: fix ci error fix: fix + button fix: fix batch images display fix: fix muti images upload display fix: fix ci error style: add Seedance 2.0 support style: add Seedance 2.0 support fix: fix veo imageUrls logic * style: add watermark & prompt_extend & web_search support style: update minimax & seedream price style: fix fix ui error style: update z-image style: fix video ui style: fix seedance & seedream params style: fix seedance & seedream params style: fix seedance & seedream params fix ci error Update createImage.ts fix ci error fix ci error fix ci error fix ci error fix ci error fix ci error fix: fix optimize_prompt_options * fix rebase issue * fix: seedance 2.0 price missing * fix: apply some suggestions --- locales/en-US/image.json | 3 + locales/en-US/video.json | 3 + locales/zh-CN/image.json | 3 + locales/zh-CN/video.json | 3 + packages/model-bank/src/aiModels/google.ts | 10 +- packages/model-bank/src/aiModels/hunyuan.ts | 2 + .../model-bank/src/aiModels/lobehub/video.ts | 4 +- packages/model-bank/src/aiModels/minimax.ts | 30 ++ packages/model-bank/src/aiModels/qwen.ts | 298 ++++++-------- .../model-bank/src/aiModels/volcengine.ts | 97 +++-- packages/model-bank/src/aiModels/wenxin.ts | 18 + packages/model-bank/src/aiModels/zhipu.ts | 38 +- .../src/standard-parameters/index.test.ts | 15 + .../src/standard-parameters/index.ts | 25 ++ .../src/standard-parameters/video.test.ts | 24 ++ .../src/standard-parameters/video.ts | 46 ++- .../src/providers/google/createVideo.ts | 13 + .../src/providers/hunyuan/createImage.test.ts | 1 + .../src/providers/hunyuan/createImage.ts | 11 +- .../src/providers/minimax/createImage.test.ts | 12 + .../src/providers/minimax/createImage.ts | 3 +- .../src/providers/minimax/createVideo.ts | 5 +- .../src/providers/qwen/createImage.ts | 6 + .../src/providers/qwen/createVideo.ts | 140 ++++--- .../providers/volcengine/createImage.test.ts | 122 ++++++ .../src/providers/volcengine/createImage.ts | 11 +- .../volcengine/video/createVideo.test.ts | 30 ++ .../providers/volcengine/video/createVideo.ts | 16 +- .../src/providers/wenxin/createImage.ts | 6 +- .../src/providers/wenxin/createVideo.ts | 5 +- .../src/providers/zhipu/createImage.test.ts | 367 ++++++++++++++++++ .../src/providers/zhipu/createImage.ts | 164 ++++++++ .../src/providers/zhipu/createVideo.ts | 28 +- .../src/providers/zhipu/index.ts | 2 + src/locales/default/image.ts | 3 + src/locales/default/video.ts | 3 + .../GenerationInput/InlineVideoFrames.tsx | 144 ++++++- .../image/features/PromptInput/index.tsx | 70 +++- .../features/GenerationFeed/BatchItem.tsx | 6 +- .../GenerationFeed/VideoReferenceFrames.tsx | 68 ++-- .../video/features/PromptInput/index.tsx | 132 ++++++- .../slices/generationConfig/action.test.ts | 119 +++++- .../image/slices/generationConfig/action.ts | 27 +- src/store/utils/preserveSupportedParams.ts | 63 +++ .../slices/generationConfig/action.test.ts | 109 ++++++ .../video/slices/generationConfig/action.ts | 33 +- .../video/slices/generationConfig/hooks.ts | 3 +- 47 files changed, 1996 insertions(+), 345 deletions(-) create mode 100644 packages/model-runtime/src/providers/zhipu/createImage.test.ts create mode 100644 packages/model-runtime/src/providers/zhipu/createImage.ts create mode 100644 src/store/utils/preserveSupportedParams.ts create mode 100644 src/store/video/slices/generationConfig/action.test.ts diff --git a/locales/en-US/image.json b/locales/en-US/image.json index 662a2c9300..e0f0a83594 100644 --- a/locales/en-US/image.json +++ b/locales/en-US/image.json @@ -12,6 +12,7 @@ "config.model.label": "Model", "config.prompt.placeholder": "Describe what you want to generate", "config.prompt.placeholderWithRef": "Describe how you want to adjust the image", + "config.promptExtend.label": "Extended Prompt", "config.quality.label": "Image Quality", "config.quality.options.hd": "High Definition", "config.quality.options.standard": "Standard", @@ -24,6 +25,8 @@ "config.size.label": "Size", "config.steps.label": "Steps", "config.title": "Configuration", + "config.watermark.label": "Watermark", + "config.webSearch.label": "Web Search", "config.width.label": "Width", "generation.actions.applySeed": "Apply Seed", "generation.actions.copyError": "Copy Error Message", diff --git a/locales/en-US/video.json b/locales/en-US/video.json index d05c2823ad..933e2ba2d0 100644 --- a/locales/en-US/video.json +++ b/locales/en-US/video.json @@ -8,11 +8,14 @@ "config.imageUrl.label": "Start Frame", "config.prompt.placeholder": "Describe the video you want to generate", "config.prompt.placeholderWithRef": "Describe the scene you want to generate with the image", + "config.promptExtend.label": "Prompt Extend", "config.referenceImage.label": "Reference Image", "config.resolution.label": "Resolution", "config.seed.label": "Seed", "config.seed.random": "Random", "config.size.label": "Size", + "config.watermark.label": "Watermark", + "config.webSearch.label": "Web Search", "generation.actions.copyError": "Copy Error Message", "generation.actions.errorCopied": "Error Message Copied to Clipboard", "generation.actions.errorCopyFailed": "Failed to Copy Error Message", diff --git a/locales/zh-CN/image.json b/locales/zh-CN/image.json index 99075cf240..ff7afd3659 100644 --- a/locales/zh-CN/image.json +++ b/locales/zh-CN/image.json @@ -12,6 +12,7 @@ "config.model.label": "模型", "config.prompt.placeholder": "描述你想要生成的内容", "config.prompt.placeholderWithRef": "描述你想如何调整图片", + "config.promptExtend.label": "提示词扩展", "config.quality.label": "图片质量", "config.quality.options.hd": "高清", "config.quality.options.standard": "标准", @@ -24,6 +25,8 @@ "config.size.label": "尺寸", "config.steps.label": "步数", "config.title": "配置", + "config.watermark.label": "水印", + "config.webSearch.label": "联网搜索", "config.width.label": "宽度", "generation.actions.applySeed": "应用种子", "generation.actions.copyError": "复制错误信息", diff --git a/locales/zh-CN/video.json b/locales/zh-CN/video.json index a5af8201f6..eafcc866c2 100644 --- a/locales/zh-CN/video.json +++ b/locales/zh-CN/video.json @@ -8,11 +8,14 @@ "config.imageUrl.label": "起始画面", "config.prompt.placeholder": "描述你想生成的视频内容", "config.prompt.placeholderWithRef": "结合图片,描述你想生成的画面", + "config.promptExtend.label": "提示词扩展", "config.referenceImage.label": "参考图像", "config.resolution.label": "分辨率", "config.seed.label": "种子", "config.seed.random": "随机", "config.size.label": "尺寸", + "config.watermark.label": "水印", + "config.webSearch.label": "联网搜索", "generation.actions.copyError": "复制错误信息", "generation.actions.errorCopied": "错误信息已复制到剪贴板", "generation.actions.errorCopyFailed": "复制错误信息失败", diff --git a/packages/model-bank/src/aiModels/google.ts b/packages/model-bank/src/aiModels/google.ts index 922810aec2..d5ddf028b9 100644 --- a/packages/model-bank/src/aiModels/google.ts +++ b/packages/model-bank/src/aiModels/google.ts @@ -886,8 +886,9 @@ const googleVideoModels: AIVideoModelCard[] = [ endImageUrl: { default: null, }, - imageUrl: { - default: null, + imageUrls: { + default: [], + maxCount: 3, }, prompt: { default: '' }, resolution: { @@ -917,8 +918,9 @@ const googleVideoModels: AIVideoModelCard[] = [ endImageUrl: { default: null, }, - imageUrl: { - default: null, + imageUrls: { + default: [], + maxCount: 3, }, prompt: { default: '' }, resolution: { diff --git a/packages/model-bank/src/aiModels/hunyuan.ts b/packages/model-bank/src/aiModels/hunyuan.ts index aa3fec1e52..46f36323f2 100644 --- a/packages/model-bank/src/aiModels/hunyuan.ts +++ b/packages/model-bank/src/aiModels/hunyuan.ts @@ -528,6 +528,8 @@ const hunyuanImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 2048, min: 512, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', diff --git a/packages/model-bank/src/aiModels/lobehub/video.ts b/packages/model-bank/src/aiModels/lobehub/video.ts index 13a6d79b52..46724d3cbd 100644 --- a/packages/model-bank/src/aiModels/lobehub/video.ts +++ b/packages/model-bank/src/aiModels/lobehub/video.ts @@ -20,9 +20,9 @@ export const seedance20Params: VideoModelParamsSchema = { width: { max: 6000, min: 300 }, }, generateAudio: { default: true }, - imageUrl: { + imageUrls: { aspectRatio: { max: 2.5, min: 0.4 }, - default: null, + default: [], height: { max: 6000, min: 300 }, maxFileSize: 30 * 1024 * 1024, width: { max: 6000, min: 300 }, diff --git a/packages/model-bank/src/aiModels/minimax.ts b/packages/model-bank/src/aiModels/minimax.ts index c7e8ff4466..d41ee9d351 100644 --- a/packages/model-bank/src/aiModels/minimax.ts +++ b/packages/model-bank/src/aiModels/minimax.ts @@ -265,6 +265,12 @@ const minimaxImageModels: AIImageModelCard[] = [ default: '', }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, + }, + pricing: { + currency: 'CNY', + units: [{ name: 'imageGeneration', rate: 0.025, strategy: 'fixed', unit: 'image' }], }, releasedAt: '2025-02-28', type: 'image', @@ -285,6 +291,12 @@ const minimaxImageModels: AIImageModelCard[] = [ default: '', }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, + }, + pricing: { + currency: 'CNY', + units: [{ name: 'imageGeneration', rate: 0.025, strategy: 'fixed', unit: 'image' }], }, releasedAt: '2025-02-28', type: 'image', @@ -308,6 +320,8 @@ const minimaxVideoModels: AIVideoModelCard[] = [ default: '768P', enum: ['768P', '1080P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -332,6 +346,8 @@ const minimaxVideoModels: AIVideoModelCard[] = [ default: '768P', enum: ['768P', '1080P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -359,6 +375,8 @@ const minimaxVideoModels: AIVideoModelCard[] = [ default: '768P', enum: ['512P', '768P', '1080P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -382,6 +400,8 @@ const minimaxVideoModels: AIVideoModelCard[] = [ default: '720P', enum: ['720P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, releasedAt: '2025-02-11', type: 'video', @@ -398,6 +418,8 @@ const minimaxVideoModels: AIVideoModelCard[] = [ default: '720P', enum: ['720P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, releasedAt: '2025-02-11', type: 'video', @@ -416,6 +438,8 @@ const minimaxVideoModels: AIVideoModelCard[] = [ default: '720P', enum: ['720P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, releasedAt: '2025-03-03', type: 'video', @@ -434,6 +458,8 @@ const minimaxVideoModels: AIVideoModelCard[] = [ default: '720P', enum: ['720P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, releasedAt: '2025-03-03', type: 'video', @@ -447,6 +473,8 @@ const minimaxVideoModels: AIVideoModelCard[] = [ default: null, }, prompt: { default: '' }, + promptExtend: { default: false }, + watermark: { default: false }, }, releasedAt: '2025-01-10', type: 'video', @@ -462,6 +490,8 @@ const minimaxVideoModels: AIVideoModelCard[] = [ default: '720P', enum: ['720P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, releasedAt: '2025-03-03', type: 'video', diff --git a/packages/model-bank/src/aiModels/qwen.ts b/packages/model-bank/src/aiModels/qwen.ts index dc26f78212..be26f811a2 100644 --- a/packages/model-bank/src/aiModels/qwen.ts +++ b/packages/model-bank/src/aiModels/qwen.ts @@ -2909,6 +2909,7 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 4096, min: 256, step: 1 }, + promptExtend: { default: false }, }, pricing: { currency: 'CNY', @@ -2934,6 +2935,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 4096, min: 256, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -2959,6 +2962,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 4096, min: 256, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -2972,7 +2977,6 @@ const qwenImageModels: AIImageModelCard[] = [ 'Qwen Image Editing Model supports multi-image input and multi-image output, enabling precise in-image text editing, object addition, removal, or relocation, subject action modification, image style transfer, and enhanced visual detail.', displayName: 'Qwen Image Edit Max', id: 'qwen-image-edit-max', - enabled: true, organization: 'Qwen', parameters: { height: { default: 1536, max: 2048, min: 512, step: 1 }, @@ -2984,6 +2988,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 2048, min: 512, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -2996,7 +3002,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Qwen Image Editing Model supports multi-image input and multi-image output, enabling precise in-image text editing, object addition, removal, or relocation, subject action modification, image style transfer, and enhanced visual detail.', displayName: 'Qwen Image Edit Plus', - enabled: true, id: 'qwen-image-edit-plus', organization: 'Qwen', parameters: { @@ -3009,6 +3014,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 2048, min: 512, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3021,7 +3028,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Qwen Image Edit is an image-to-image model that edits images based on input images and text prompts, enabling precise adjustments and creative transformations.', displayName: 'Qwen Image Edit', - enabled: true, id: 'qwen-image-edit', organization: 'Qwen', parameters: { @@ -3032,6 +3038,8 @@ const qwenImageModels: AIImageModelCard[] = [ default: '', }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3044,7 +3052,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Qwen Image Generation Model (Max series) delivers enhanced realism and visual naturalness compared with the Plus series, effectively reducing AI-generated artifacts, and demonstrating outstanding performance in human appearance, texture details, and text rendering.', displayName: 'Qwen Image Max', - enabled: true, id: 'qwen-image-max', organization: 'Qwen', parameters: { @@ -3056,6 +3063,8 @@ const qwenImageModels: AIImageModelCard[] = [ default: '1664x928', enum: ['1664x928', '1472x1140', '1328x1328', '1140x1472', '928x1664'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3068,7 +3077,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'It supports a wide range of artistic styles and is particularly proficient at rendering complex text within images, enabling integrated image–text layout design.', displayName: 'Qwen Image Plus', - enabled: true, id: 'qwen-image-plus', organization: 'Qwen', parameters: { @@ -3080,6 +3088,8 @@ const qwenImageModels: AIImageModelCard[] = [ default: '1664x928', enum: ['1664x928', '1472x1140', '1328x1328', '1140x1472', '928x1664'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3092,7 +3102,6 @@ const qwenImageModels: AIImageModelCard[] = [ description: 'Qwen-Image is a general image generation model supporting multiple art styles and strong complex text rendering, especially Chinese and English. It supports multi-line layouts, paragraph-level text, and fine detail for complex text-image layouts.', displayName: 'Qwen Image', - enabled: true, id: 'qwen-image', organization: 'Qwen', parameters: { @@ -3104,6 +3113,8 @@ const qwenImageModels: AIImageModelCard[] = [ default: '1328x1328', enum: ['1664x928', '1472x1140', '1328x1328', '1140x1472', '928x1664'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3128,6 +3139,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 2048, max: 11_585, min: 271, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3152,6 +3165,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 2048, max: 5792, min: 271, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3176,6 +3191,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1280, max: 2880, min: 640, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3198,6 +3215,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1280, max: 2880, min: 640, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3221,6 +3240,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1280, max: 2560, min: 384, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3242,6 +3263,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1280, max: 2880, min: 640, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3263,6 +3286,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 1440, min: 512, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3284,6 +3309,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 1440, min: 512, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3305,6 +3332,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 1440, min: 512, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3326,6 +3355,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 1440, min: 512, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3347,6 +3378,8 @@ const qwenImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 1440, min: 512, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3397,6 +3430,7 @@ const qwenImageModels: AIImageModelCard[] = [ default: '1k', enum: ['1k', '2k'], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3427,6 +3461,7 @@ const qwenImageModels: AIImageModelCard[] = [ default: '1k', enum: ['1k', '2k', '4k'], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3435,162 +3470,6 @@ const qwenImageModels: AIImageModelCard[] = [ releasedAt: '2026-03-26', type: 'image', }, - { - description: - 'FLUX.1 [schnell] is the most advanced open-source few-step model, surpassing similar competitors and even strong non-distilled models like Midjourney v6.0 and DALL-E 3 (HD). It is finely tuned to preserve pretraining diversity, significantly improving visual quality, instruction following, size/aspect variation, font handling, and output diversity.', - displayName: 'FLUX.1 [schnell]', - id: 'flux-schnell', - organization: 'Qwen', - parameters: { - prompt: { - default: '', - }, - seed: { default: null }, - size: { - default: '1024x1024', - enum: ['512x1024', '768x512', '768x1024', '1024x576', '576x1024', '1024x1024'], - }, - steps: { default: 4, max: 12, min: 1 }, - }, - pricing: { - currency: 'CNY', - units: [{ name: 'imageGeneration', rate: 0, strategy: 'fixed', unit: 'image' }], - }, - releasedAt: '2024-08-07', - type: 'image', - }, - { - description: - 'FLUX.1 [dev] is an open-weights distilled model for non-commercial use. It keeps near-pro image quality and instruction following while running more efficiently, using resources better than same-size standard models.', - displayName: 'FLUX.1 [dev]', - id: 'flux-dev', - organization: 'Qwen', - parameters: { - prompt: { - default: '', - }, - seed: { default: null }, - size: { - default: '1024x1024', - enum: ['512x1024', '768x512', '768x1024', '1024x576', '576x1024', '1024x1024'], - }, - steps: { default: 50, max: 50, min: 1 }, - }, - pricing: { - currency: 'CNY', - units: [{ name: 'imageGeneration', rate: 0, strategy: 'fixed', unit: 'image' }], - }, - releasedAt: '2024-08-07', - type: 'image', - }, - { - description: - 'FLUX.1 [merged] combines the deep features explored in "DEV" with the high-speed advantages of "Schnell", extending performance limits and broadening applications.', - displayName: 'FLUX.1 [merged]', - id: 'flux-merged', - organization: 'Qwen', - parameters: { - prompt: { - default: '', - }, - seed: { default: null }, - size: { - default: '1024x1024', - enum: ['512x1024', '768x512', '768x1024', '1024x576', '576x1024', '1024x1024'], - }, - steps: { default: 30, max: 30, min: 1 }, - }, - pricing: { - currency: 'CNY', - units: [{ name: 'imageGeneration', rate: 0, strategy: 'fixed', unit: 'image' }], - }, - releasedAt: '2024-08-22', - type: 'image', - }, - { - description: - 'stable-diffusion-3.5-large is an 800M-parameter MMDiT text-to-image model with excellent quality and prompt alignment, supporting 1-megapixel images and efficient runs on consumer hardware.', - displayName: 'StableDiffusion 3.5 Large', - id: 'stable-diffusion-3.5-large', - organization: 'Qwen', - parameters: { - height: { default: 1024, max: 1024, min: 512, step: 128 }, - prompt: { - default: '', - }, - steps: { default: 40, max: 500, min: 1 }, - width: { default: 1024, max: 1024, min: 512, step: 128 }, - }, - pricing: { - currency: 'CNY', - units: [{ name: 'imageGeneration', rate: 0, strategy: 'fixed', unit: 'image' }], - }, - releasedAt: '2024-10-25', - type: 'image', - }, - { - description: - 'stable-diffusion-3.5-large-turbo applies adversarial diffusion distillation (ADD) to stable-diffusion-3.5-large for faster speed.', - displayName: 'StableDiffusion 3.5 Large Turbo', - id: 'stable-diffusion-3.5-large-turbo', - organization: 'Qwen', - parameters: { - height: { default: 1024, max: 1024, min: 512, step: 128 }, - prompt: { - default: '', - }, - steps: { default: 40, max: 500, min: 1 }, - width: { default: 1024, max: 1024, min: 512, step: 128 }, - }, - pricing: { - currency: 'CNY', - units: [{ name: 'imageGeneration', rate: 0, strategy: 'fixed', unit: 'image' }], - }, - releasedAt: '2024-10-25', - type: 'image', - }, - { - description: - 'stable-diffusion-xl brings major improvements over v1.5 and matches top open text-to-image results. Improvements include a 3x larger UNet backbone, a refinement module for better image quality, and more efficient training techniques.', - displayName: 'StableDiffusion xl', - id: 'stable-diffusion-xl', - organization: 'Qwen', - parameters: { - height: { default: 1024, max: 1024, min: 512, step: 128 }, - prompt: { - default: '', - }, - steps: { default: 50, max: 500, min: 1 }, - width: { default: 1024, max: 1024, min: 512, step: 128 }, - }, - pricing: { - currency: 'CNY', - units: [{ name: 'imageGeneration', rate: 0, strategy: 'fixed', unit: 'image' }], - }, - releasedAt: '2024-04-09', - type: 'image', - }, - { - description: - 'stable-diffusion-v1.5 is initialized from the v1.2 checkpoint and fine-tuned for 595k steps on "laion-aesthetics v2 5+" at 512x512 resolution, reducing text conditioning by 10% to improve classifier-free guidance sampling.', - displayName: 'StableDiffusion v1.5', - id: 'stable-diffusion-v1.5', - organization: 'Qwen', - parameters: { - height: { default: 512, max: 1024, min: 512, step: 128 }, - prompt: { - default: '', - }, - steps: { default: 50, max: 500, min: 1 }, - width: { default: 512, max: 1024, min: 512, step: 128 }, - }, - pricing: { - currency: 'CNY', - units: [{ name: 'imageGeneration', rate: 0, strategy: 'fixed', unit: 'image' }], - }, - releasedAt: '2024-04-09', - type: 'image', - }, ]; const qwenVideoModels: AIVideoModelCard[] = [ @@ -3614,6 +3493,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['720P', '1080P'], }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3634,8 +3515,9 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['16:9', '9:16', '1:1', '4:3', '3:4'], }, duration: { default: 5, max: 10, min: 2 }, - imageUrl: { - default: null, + imageUrls: { + default: [], + maxCount: 5, }, prompt: { default: '' }, resolution: { @@ -3643,6 +3525,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['720P', '1080P'], }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3669,6 +3553,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['720P', '1080P'], }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3695,6 +3581,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['720P', '1080P'], }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3719,6 +3607,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['720P', '1080P'], }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3736,8 +3626,9 @@ const qwenVideoModels: AIVideoModelCard[] = [ parameters: { duration: { default: 5, max: 10, min: 2 }, generateAudio: { default: true }, - imageUrl: { - default: null, + imageUrls: { + default: [], + maxCount: 5, }, prompt: { default: '' }, size: { @@ -3756,6 +3647,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ ], }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3771,8 +3664,9 @@ const qwenVideoModels: AIVideoModelCard[] = [ id: 'wan2.6-r2v', parameters: { duration: { default: 5, max: 10, min: 2 }, - imageUrl: { - default: null, + imageUrls: { + default: [], + maxCount: 5, }, prompt: { default: '' }, size: { @@ -3791,6 +3685,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ ], }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3824,6 +3720,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ ], }, seed: { default: null }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3847,6 +3745,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ default: '1080P', enum: ['480P', '720P', '1080P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3881,6 +3781,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ '1248x1632', ], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3906,6 +3808,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ default: '720P', enum: ['480P', '720P', '1080P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3931,6 +3835,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ default: '720P', enum: ['720P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3954,6 +3860,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ default: '720P', enum: ['480P', '720P', '1080P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -3977,6 +3885,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ default: '1080P', enum: ['480P', '1080P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4006,6 +3916,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ '1248x1632', ], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4028,6 +3940,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ default: '720P', enum: ['480P', '720P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4051,6 +3965,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ default: '720P', enum: ['720P'], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4079,6 +3995,8 @@ const qwenVideoModels: AIVideoModelCard[] = [ '832x1088', ], }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4095,10 +4013,12 @@ const qwenVideoModels: AIVideoModelCard[] = [ parameters: { duration: { default: 5, enum: [5] }, prompt: { default: '' }, + promptExtend: { default: false }, size: { default: '1280x720', enum: ['1280x720', '720x1280', '960x960', '1088x832', '832x1088'], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4131,6 +4051,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ default: '1080p', enum: ['720p', '1080p'], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4154,14 +4075,16 @@ const qwenVideoModels: AIVideoModelCard[] = [ default: null, }, generateAudio: { default: true }, - imageUrl: { - default: null, + imageUrls: { + default: [], + maxCount: 7, }, prompt: { default: '' }, resolution: { default: '1080p', enum: ['720p', '1080p'], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4204,6 +4127,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ '1238x1674', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4245,6 +4169,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ '1238x1674', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4285,6 +4210,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ '1238x1674', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4310,6 +4236,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['540P', '720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4334,6 +4261,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['540P', '720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4357,6 +4285,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4380,6 +4309,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4408,6 +4338,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['540P', '720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4435,6 +4366,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['540P', '720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4461,6 +4393,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['540P', '720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4487,6 +4420,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['540P', '720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4501,8 +4435,9 @@ const qwenVideoModels: AIVideoModelCard[] = [ id: 'vidu/viduq2-pro_reference2video', parameters: { duration: { default: 5, max: 10, min: 1 }, - imageUrl: { - default: null, + imageUrls: { + default: [], + maxCount: 7, }, prompt: { default: '' }, resolution: { @@ -4530,6 +4465,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ '1080x1920', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4544,8 +4480,9 @@ const qwenVideoModels: AIVideoModelCard[] = [ id: 'vidu/viduq2_reference2video', parameters: { duration: { default: 5, max: 10, min: 1 }, - imageUrl: { - default: null, + imageUrls: { + default: [], + maxCount: 7, }, prompt: { default: '' }, resolution: { @@ -4573,6 +4510,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ '1080x1920', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4628,6 +4566,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ '1920x832', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4670,6 +4609,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ '1080x1920', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4695,6 +4635,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['360P', '540P', '720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4719,6 +4660,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['360P', '540P', '720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4747,6 +4689,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['360P', '540P', '720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4774,6 +4717,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ enum: ['360P', '540P', '720P', '1080P'], }, seed: { default: null }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -4789,8 +4733,9 @@ const qwenVideoModels: AIVideoModelCard[] = [ parameters: { duration: { default: 5, enum: [5, 8, 10] }, generateAudio: { default: true }, - imageUrl: { - default: null, + imageUrls: { + default: [], + maxCount: 7, }, prompt: { default: '' }, seed: { default: null }, @@ -4819,6 +4764,7 @@ const qwenVideoModels: AIVideoModelCard[] = [ '1080x1920', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', diff --git a/packages/model-bank/src/aiModels/volcengine.ts b/packages/model-bank/src/aiModels/volcengine.ts index ca32aa4727..c1d28d922b 100644 --- a/packages/model-bank/src/aiModels/volcengine.ts +++ b/packages/model-bank/src/aiModels/volcengine.ts @@ -1165,11 +1165,14 @@ const volcengineImageModels: AIImageModelCard[] = [ prompt: { default: '', }, + promptExtend: { default: 'off', enum: ['off', 'standard'] }, + watermark: { default: false }, + webSearch: { default: false }, width: { default: 2048, max: 16_384, min: 480, step: 1 }, }, pricing: { currency: 'CNY', - units: [{ name: 'imageGeneration', rate: 0, strategy: 'fixed', unit: 'image' }], + units: [{ name: 'imageGeneration', rate: 0.22, strategy: 'fixed', unit: 'image' }], }, releasedAt: '2026-01-28', type: 'image', @@ -1186,6 +1189,8 @@ const volcengineImageModels: AIImageModelCard[] = [ prompt: { default: '', }, + promptExtend: { default: 'off', enum: ['off', 'standard'] }, + watermark: { default: false }, width: { default: 2048, max: 16_384, min: 480, step: 1 }, }, pricing: { @@ -1196,12 +1201,6 @@ const volcengineImageModels: AIImageModelCard[] = [ type: 'image', }, { - /* - // TODO: AIImageModelCard does not support config.deploymentName - config: { - deploymentName: 'doubao-seedream-3-0-t2i-250415', - }, - */ description: 'Seedream 4.0 is an image generation model from ByteDance Seed, supporting text and image inputs with highly controllable, high-quality image generation. It generates images from text prompts.', displayName: 'Seedream 4.0', @@ -1213,6 +1212,8 @@ const volcengineImageModels: AIImageModelCard[] = [ prompt: { default: '', }, + promptExtend: { default: 'off', enum: ['off', 'standard', 'fast'] }, + watermark: { default: false }, width: { default: 2048, max: 16_384, min: 240, step: 1 }, }, pricing: { @@ -1223,16 +1224,9 @@ const volcengineImageModels: AIImageModelCard[] = [ type: 'image', }, { - /* - // TODO: AIImageModelCard does not support config.deploymentName - config: { - deploymentName: 'doubao-seedream-3-0-t2i-250415', - }, - */ description: 'Seedream 3.0 is an image generation model from ByteDance Seed, supporting text and image inputs with highly controllable, high-quality image generation. It generates images from text prompts.', displayName: 'Seedream 3.0 Text-to-Image', - enabled: true, id: 'doubao-seedream-3-0-t2i-250415', parameters: { cfg: { default: 2.5, max: 10, min: 1, step: 0.1 }, @@ -1241,6 +1235,7 @@ const volcengineImageModels: AIImageModelCard[] = [ default: '', }, seed: { default: null }, + watermark: { default: false }, width: { default: 1024, max: 3549, min: 296, step: 1 }, }, pricing: { @@ -1250,15 +1245,10 @@ const volcengineImageModels: AIImageModelCard[] = [ releasedAt: '2025-04-15', type: 'image', }, - // Note: Doubao image-to-image and text-to-image models share the same Endpoint, currently switches to edit endpoint if imageUrl exists { - // config: { - // deploymentName: 'doubao-seededit-3-0-i2i-250628', - // }, description: 'The Doubao image model from ByteDance Seed supports text and image inputs with highly controllable, high-quality image generation. It supports text-guided image editing, with output sizes between 512 and 1536 on the long side.', displayName: 'SeedEdit 3.0 Image-to-Image', - enabled: true, id: 'doubao-seededit-3-0-i2i-250628', parameters: { cfg: { default: 5.5, max: 10, min: 1, step: 0.1 }, @@ -1267,6 +1257,11 @@ const volcengineImageModels: AIImageModelCard[] = [ default: '', }, seed: { default: null }, + watermark: { default: false }, + }, + pricing: { + currency: 'CNY', + units: [{ name: 'imageGeneration', rate: 0.259, strategy: 'fixed', unit: 'image' }], }, releasedAt: '2025-06-28', type: 'image', @@ -1281,7 +1276,15 @@ const volcengineVideoModels: AIVideoModelCard[] = [ enabled: true, id: 'doubao-seedance-2-0-260128', organization: 'ByteDance', - parameters: seedance20Params, + parameters: { + ...seedance20Params, + watermark: { default: false }, + webSearch: { default: false }, + }, + pricing: { + currency: 'CNY', + units: [{ name: 'videoGeneration', rate: 37, strategy: 'fixed', unit: 'millionTokens' }], + }, releasedAt: '2026-01-28', type: 'video', }, @@ -1292,7 +1295,15 @@ const volcengineVideoModels: AIVideoModelCard[] = [ enabled: true, id: 'doubao-seedance-2-0-fast-260128', organization: 'ByteDance', - parameters: seedance20Params, + parameters: { + ...seedance20Params, + watermark: { default: false }, + webSearch: { default: false }, + }, + pricing: { + currency: 'CNY', + units: [{ name: 'videoGeneration', rate: 46, strategy: 'fixed', unit: 'millionTokens' }], + }, releasedAt: '2026-01-28', type: 'video', }, @@ -1303,7 +1314,24 @@ const volcengineVideoModels: AIVideoModelCard[] = [ enabled: true, id: 'doubao-seedance-1-5-pro-251215', organization: 'ByteDance', - parameters: seedance15ProParams, + parameters: { + ...seedance15ProParams, + watermark: { default: false }, + }, + pricing: { + currency: 'CNY', + units: [ + { + lookup: { + pricingParams: ['generateAudio'], + prices: { false: 8, true: 16 }, + }, + name: 'videoGeneration', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, releasedAt: '2025-12-15', type: 'video', }, @@ -1333,6 +1361,11 @@ const volcengineVideoModels: AIVideoModelCard[] = [ enum: ['480p', '720p', '1080p'], }, seed: { default: null }, + watermark: { default: false }, + }, + pricing: { + currency: 'CNY', + units: [{ name: 'videoGeneration', rate: 4.2, strategy: 'fixed', unit: 'millionTokens' }], }, releasedAt: '2025-10-15', type: 'video', @@ -1371,6 +1404,11 @@ const volcengineVideoModels: AIVideoModelCard[] = [ enum: ['480p', '720p', '1080p'], }, seed: { default: null }, + watermark: { default: false }, + }, + pricing: { + currency: 'CNY', + units: [{ name: 'videoGeneration', rate: 15, strategy: 'fixed', unit: 'millionTokens' }], }, releasedAt: '2025-05-28', type: 'video', @@ -1395,11 +1433,12 @@ const volcengineVideoModels: AIVideoModelCard[] = [ requiresImageUrl: true, width: { max: 6000, min: 300 }, }, - imageUrl: { + imageUrls: { aspectRatio: { max: 2.5, min: 0.4 }, - default: null, + default: [], height: { max: 6000, min: 300 }, maxFileSize: 30 * 1024 * 1024, + maxCount: 4, width: { max: 6000, min: 300 }, }, duration: { default: 5, max: 12, min: 2 }, @@ -1409,6 +1448,11 @@ const volcengineVideoModels: AIVideoModelCard[] = [ enum: ['480p', '720p', '1080p'], }, seed: { default: null }, + watermark: { default: false }, + }, + pricing: { + currency: 'CNY', + units: [{ name: 'videoGeneration', rate: 10, strategy: 'fixed', unit: 'millionTokens' }], }, releasedAt: '2025-04-28', type: 'video', @@ -1432,6 +1476,11 @@ const volcengineVideoModels: AIVideoModelCard[] = [ enum: ['480p', '720p', '1080p'], }, seed: { default: null }, + watermark: { default: false }, + }, + pricing: { + currency: 'CNY', + units: [{ name: 'videoGeneration', rate: 10, strategy: 'fixed', unit: 'millionTokens' }], }, releasedAt: '2025-04-28', type: 'video', diff --git a/packages/model-bank/src/aiModels/wenxin.ts b/packages/model-bank/src/aiModels/wenxin.ts index afb30f4480..6f6054c921 100644 --- a/packages/model-bank/src/aiModels/wenxin.ts +++ b/packages/model-bank/src/aiModels/wenxin.ts @@ -1772,6 +1772,7 @@ const wenxinImageModels: AIImageModelCard[] = [ '1104x1472', ], }, + promptExtend: { default: false }, }, pricing: { currency: 'CNY', @@ -1791,6 +1792,7 @@ const wenxinImageModels: AIImageModelCard[] = [ default: '', }, width: { default: 1024, max: 2048, min: 512, step: 1 }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1812,6 +1814,7 @@ const wenxinImageModels: AIImageModelCard[] = [ default: '', }, width: { default: 1024, max: 2048, min: 512, step: 1 }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1834,6 +1837,8 @@ const wenxinImageModels: AIImageModelCard[] = [ seed: { default: null }, steps: { default: 25, max: 50, min: 1 }, width: { default: 1024, max: 2048, min: 512, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1855,6 +1860,8 @@ const wenxinImageModels: AIImageModelCard[] = [ }, seed: { default: null }, width: { default: 1024, max: 2048, min: 512, step: 1 }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1876,6 +1883,7 @@ const wenxinImageModels: AIImageModelCard[] = [ seed: { default: null }, steps: { default: 25, max: 50, min: 1 }, width: { default: 1024, max: 2048, min: 512, step: 1 }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1899,6 +1907,8 @@ const wenxinVideoModels: AIVideoModelCard[] = [ default: null, }, prompt: { default: '' }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1918,6 +1928,8 @@ const wenxinVideoModels: AIVideoModelCard[] = [ default: null, }, prompt: { default: '' }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1937,6 +1949,8 @@ const wenxinVideoModels: AIVideoModelCard[] = [ default: null, }, prompt: { default: '' }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1956,6 +1970,8 @@ const wenxinVideoModels: AIVideoModelCard[] = [ default: null, }, prompt: { default: '' }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1974,6 +1990,8 @@ const wenxinVideoModels: AIVideoModelCard[] = [ default: null, }, prompt: { default: '' }, + promptExtend: { default: false }, + watermark: { default: false }, }, pricing: { currency: 'CNY', diff --git a/packages/model-bank/src/aiModels/zhipu.ts b/packages/model-bank/src/aiModels/zhipu.ts index d25ef200e3..bc7eacadfd 100644 --- a/packages/model-bank/src/aiModels/zhipu.ts +++ b/packages/model-bank/src/aiModels/zhipu.ts @@ -1256,6 +1256,10 @@ const zhipuImageModels: AIImageModelCard[] = [ prompt: { default: '', }, + resolution: { + default: 'hd', + enum: ['hd'], + }, size: { default: '1280x1280', enum: [ @@ -1268,6 +1272,7 @@ const zhipuImageModels: AIImageModelCard[] = [ '960x1728', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1286,10 +1291,15 @@ const zhipuImageModels: AIImageModelCard[] = [ prompt: { default: '', }, + resolution: { + default: 'standard', + enum: ['hd', 'standard'], + }, size: { default: '1024x1024', enum: ['1024x1024', '768x1344', '864x1152', '1344x768', '1152x864', '1440x720', '720x1440'], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1308,10 +1318,15 @@ const zhipuImageModels: AIImageModelCard[] = [ prompt: { default: '', }, + resolution: { + default: 'standard', + enum: ['hd', 'standard'], + }, size: { default: '1024x1024', enum: ['1024x1024', '768x1344', '864x1152', '1344x768', '1152x864', '1440x720', '720x1440'], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1334,12 +1349,10 @@ const zhipuVideoModels: AIVideoModelCard[] = [ enum: ['16:9', '9:16', '1:1'], }, duration: { default: 4, enum: [4] }, - endImageUrl: { - default: null, - }, generateAudio: { default: true }, - imageUrl: { - default: null, + imageUrls: { + default: [], + maxCount: 3, }, prompt: { default: '' }, size: { @@ -1507,6 +1520,10 @@ const zhipuVideoModels: AIVideoModelCard[] = [ default: null, }, prompt: { default: '' }, + resolution: { + default: 'speed', + enum: ['speed', 'quality'], + }, size: { default: '1920x1080', enum: [ @@ -1519,6 +1536,7 @@ const zhipuVideoModels: AIVideoModelCard[] = [ '3840x2160', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1538,6 +1556,10 @@ const zhipuVideoModels: AIVideoModelCard[] = [ default: null, }, prompt: { default: '' }, + resolution: { + default: 'speed', + enum: ['speed', 'quality'], + }, size: { default: '1920x1080', enum: [ @@ -1551,6 +1573,7 @@ const zhipuVideoModels: AIVideoModelCard[] = [ '3840x2160', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', @@ -1570,6 +1593,10 @@ const zhipuVideoModels: AIVideoModelCard[] = [ default: null, }, prompt: { default: '' }, + resolution: { + default: 'speed', + enum: ['speed', 'quality'], + }, size: { default: '1920x1080', enum: [ @@ -1583,6 +1610,7 @@ const zhipuVideoModels: AIVideoModelCard[] = [ '3840x2160', ], }, + watermark: { default: false }, }, pricing: { currency: 'CNY', diff --git a/packages/model-bank/src/standard-parameters/index.test.ts b/packages/model-bank/src/standard-parameters/index.test.ts index f511af8dbc..c70828a76c 100644 --- a/packages/model-bank/src/standard-parameters/index.test.ts +++ b/packages/model-bank/src/standard-parameters/index.test.ts @@ -11,7 +11,10 @@ describe('meta-schema', () => { width: { default: 1024, min: 512, max: 2048, step: 64 }, height: { default: 1024, min: 512, max: 2048, step: 64 }, steps: { default: 20, min: 1, max: 50 }, + promptExtend: { default: false }, + watermark: { default: false }, seed: { default: null, min: 0 }, + webSearch: { default: true }, cfg: { default: 7.5, min: 1, max: 20, step: 0.5 }, aspectRatio: { default: '1:1', enum: ['1:1', '16:9', '4:3'] }, size: { default: '1024x1024', enum: ['512x512', '1024x1024', '1536x1536'] }, @@ -34,15 +37,21 @@ describe('meta-schema', () => { const schema: ModelParamsSchema = { prompt: {}, width: { default: 1024, min: 512, max: 2048 }, + promptExtend: { default: 'standard', enum: ['standard', 'fast'] }, + watermark: {}, seed: {}, + webSearch: {}, }; const result = ModelParamsMetaSchema.parse(schema); expect(result.prompt.default).toBe(''); expect(result.width?.step).toBe(1); + expect(result.promptExtend?.default).toBe('standard'); + expect(result.watermark?.default).toBe(false); expect(result.seed?.default).toBeNull(); expect(result.seed?.min).toBe(0); + expect(result.webSearch?.default).toBe(true); }); it('should reject invalid parameter schemas', () => { @@ -148,6 +157,9 @@ describe('meta-schema', () => { prompt: { default: 'test' }, width: { default: 1024, min: 512, max: 2048 }, seed: { default: 12345 }, + promptExtend: { default: 'fast', enum: ['standard', 'fast'] }, + watermark: { default: true }, + webSearch: { default: false }, cfg: { default: 7.5, min: 1, max: 20, step: 0.5 }, aspectRatio: { default: '16:9', enum: ['1:1', '16:9', '4:3'] }, imageUrls: { default: ['test.jpg'] }, @@ -159,6 +171,9 @@ describe('meta-schema', () => { expect(typeof result.prompt).toBe('string'); expect(typeof result.width).toBe('number'); expect(typeof result.seed).toBe('number'); + expect(typeof result.promptExtend).toBe('string'); + expect(typeof result.watermark).toBe('boolean'); + expect(typeof result.webSearch).toBe('boolean'); expect(typeof result.cfg).toBe('number'); expect(typeof result.aspectRatio).toBe('string'); expect(Array.isArray(result.imageUrls)).toBe(true); diff --git a/packages/model-bank/src/standard-parameters/index.ts b/packages/model-bank/src/standard-parameters/index.ts index 3a8bf3d88c..967fcc33f7 100644 --- a/packages/model-bank/src/standard-parameters/index.ts +++ b/packages/model-bank/src/standard-parameters/index.ts @@ -202,6 +202,31 @@ export const ModelParamsMetaSchema = z.object({ }) .optional(), + promptExtend: z + .object({ + default: z.union([z.boolean(), z.string()]), + description: z.string().optional(), + enum: z.array(z.string()).optional(), + type: z.union([z.literal('boolean'), z.literal('string')]).optional(), + }) + .optional(), + + watermark: z + .object({ + default: z.boolean().default(false), + description: z.string().optional(), + type: z.literal('boolean').optional(), + }) + .optional(), + + webSearch: z + .object({ + default: z.boolean().default(true), + description: z.string().optional(), + type: z.literal('boolean').optional(), + }) + .optional(), + seed: z .object({ default: z.number().nullable().default(null), diff --git a/packages/model-bank/src/standard-parameters/video.test.ts b/packages/model-bank/src/standard-parameters/video.test.ts index cc7cd73889..169093bb92 100644 --- a/packages/model-bank/src/standard-parameters/video.test.ts +++ b/packages/model-bank/src/standard-parameters/video.test.ts @@ -20,6 +20,9 @@ describe('video standard-parameters', () => { duration: { default: 5, max: 10, min: 1, step: 1 }, endImageUrl: { default: null }, generateAudio: { default: true }, + promptExtend: { default: 'standard', enum: ['standard', 'fast'] }, + watermark: { default: false }, + webSearch: { default: true }, imageUrl: { default: null }, prompt: { default: '' }, resolution: { default: '720p', enum: ['480p', '720p', '1080p'] }, @@ -41,6 +44,9 @@ describe('video standard-parameters', () => { const schema: VideoModelParamsSchema = { cameraFixed: {}, generateAudio: {}, + promptExtend: { default: true }, + watermark: {}, + webSearch: {}, prompt: {}, seed: {}, }; @@ -50,6 +56,9 @@ describe('video standard-parameters', () => { expect(result.prompt.default).toBe(''); expect(result.cameraFixed?.default).toBe(false); expect(result.generateAudio?.default).toBe(true); + expect(result.promptExtend?.default).toBe(true); + expect(result.watermark?.default).toBe(false); + expect(result.webSearch?.default).toBe(true); expect(result.seed?.default).toBeNull(); expect(result.seed?.max).toBe(MAX_VIDEO_SEED); expect(result.seed?.min).toBe(-1); @@ -89,6 +98,9 @@ describe('video standard-parameters', () => { cameraFixed: { default: true }, duration: { default: 5, max: 10, min: 1 }, generateAudio: { default: false }, + promptExtend: { default: 'fast', enum: ['standard', 'fast'] }, + watermark: { default: true }, + webSearch: { default: false }, prompt: { default: 'test prompt' }, resolution: { default: '1080p', enum: ['720p', '1080p'] }, seed: { default: 42 }, @@ -101,6 +113,9 @@ describe('video standard-parameters', () => { expect(result.cameraFixed).toBe(true); expect(result.duration).toBe(5); expect(result.generateAudio).toBe(false); + expect(result.promptExtend).toBe('fast'); + expect(result.watermark).toBe(true); + expect(result.webSearch).toBe(false); expect(result.resolution).toBe('1080p'); expect(result.seed).toBe(42); }); @@ -132,6 +147,9 @@ describe('video standard-parameters', () => { const schema: VideoModelParamsSchema = { cameraFixed: { default: false }, generateAudio: { default: true }, + promptExtend: { default: 'standard', enum: ['standard', 'fast'] }, + watermark: { default: false }, + webSearch: { default: true }, prompt: { default: 'hello' }, seed: { default: null }, }; @@ -141,6 +159,9 @@ describe('video standard-parameters', () => { expect(typeof result.prompt).toBe('string'); expect(typeof result.cameraFixed).toBe('boolean'); expect(typeof result.generateAudio).toBe('boolean'); + expect(typeof result.promptExtend).toBe('string'); + expect(typeof result.watermark).toBe('boolean'); + expect(typeof result.webSearch).toBe('boolean'); expect(result.seed).toBeNull(); }); }); @@ -172,6 +193,9 @@ describe('video standard-parameters', () => { expect(params.prompt).toBe('required prompt'); expect(params.cameraFixed).toBeUndefined(); expect(params.generateAudio).toBeUndefined(); + expect(params.promptExtend).toBeUndefined(); + expect(params.watermark).toBeUndefined(); + expect(params.webSearch).toBeUndefined(); expect(params.seed).toBeUndefined(); }); }); diff --git a/packages/model-bank/src/standard-parameters/video.ts b/packages/model-bank/src/standard-parameters/video.ts index c6c4c7002b..73dfe351f4 100644 --- a/packages/model-bank/src/standard-parameters/video.ts +++ b/packages/model-bank/src/standard-parameters/video.ts @@ -41,6 +41,20 @@ export const VideoModelParamsMetaSchema = z.object({ }) .optional(), + imageUrls: z + .object({ + /** Aspect ratio (width/height) constraints */ + aspectRatio: z.object({ max: z.number().optional(), min: z.number().optional() }).optional(), + default: z.array(z.string()), + description: z.string().optional(), + height: z.object({ max: z.number().optional(), min: z.number().optional() }).optional(), + maxCount: z.number().optional(), + maxFileSize: z.number().optional(), + type: z.literal('array').optional(), + width: z.object({ max: z.number().optional(), min: z.number().optional() }).optional(), + }) + .optional(), + endImageUrl: z .object({ /** Aspect ratio (width/height) constraints */ @@ -110,6 +124,31 @@ export const VideoModelParamsMetaSchema = z.object({ }) .optional(), + promptExtend: z + .object({ + default: z.union([z.boolean(), z.string()]), + description: z.string().optional(), + enum: z.array(z.string()).optional(), + type: z.union([z.literal('boolean'), z.literal('string')]).optional(), + }) + .optional(), + + watermark: z + .object({ + default: z.boolean().default(false), + description: z.string().optional(), + type: z.literal('boolean').optional(), + }) + .optional(), + + webSearch: z + .object({ + default: z.boolean().default(true), + description: z.string().optional(), + type: z.literal('boolean').optional(), + }) + .optional(), + seed: z .object({ default: z.number().nullable().default(null), @@ -140,8 +179,13 @@ type VideoTypeMapping = T extends 'string' type VideoTypeType = NonNullable< VideoModelParamsOutputSchema[K] >['type']; +type VideoDefaultType = NonNullable< + VideoModelParamsOutputSchema[K] +>['default']; type _StandardVideoGenerationParameters

= { - [key in P]: VideoTypeMapping>; + [key in P]: NonNullable> extends 'array' + ? VideoDefaultType + : VideoTypeMapping>; }; export type RuntimeVideoGenParams = Pick<_StandardVideoGenerationParameters, 'prompt'> & diff --git a/packages/model-runtime/src/providers/google/createVideo.ts b/packages/model-runtime/src/providers/google/createVideo.ts index f169fae1dd..bd470172e5 100644 --- a/packages/model-runtime/src/providers/google/createVideo.ts +++ b/packages/model-runtime/src/providers/google/createVideo.ts @@ -49,6 +49,7 @@ export async function createGoogleVideo( const { prompt, imageUrl, + imageUrls, endImageUrl, aspectRatio, duration, @@ -76,6 +77,18 @@ export async function createGoogleVideo( ...(config && { config }), }; + if (imageUrls && imageUrls.length > 0) { + if (imageUrls.length === 1) { + requestParams.image = await imageToGoogleImageFormat(imageUrls[0]); + } else { + requestParams.config.referenceImages = await Promise.all( + imageUrls.map(async (url) => ({ + image: await imageToGoogleImageFormat(url), + })), + ); + } + } + log('Google video generation request params: %O', requestParams); const operation = await client.models.generateVideos(requestParams); diff --git a/packages/model-runtime/src/providers/hunyuan/createImage.test.ts b/packages/model-runtime/src/providers/hunyuan/createImage.test.ts index 688a9ef165..d671694dfd 100644 --- a/packages/model-runtime/src/providers/hunyuan/createImage.test.ts +++ b/packages/model-runtime/src/providers/hunyuan/createImage.test.ts @@ -67,6 +67,7 @@ describe('createHunyuanImage', () => { size: '1024:1024', extra_body: { logo_add: 0, + revise: 0, }, }); diff --git a/packages/model-runtime/src/providers/hunyuan/createImage.ts b/packages/model-runtime/src/providers/hunyuan/createImage.ts index bea7d6f0f2..f73f1e13f1 100644 --- a/packages/model-runtime/src/providers/hunyuan/createImage.ts +++ b/packages/model-runtime/src/providers/hunyuan/createImage.ts @@ -71,7 +71,8 @@ export async function createHunyuanImage( ? { images: [params.imageUrl] } : {}), extra_body: { - logo_add: 0, // Add Watermark: 0 disabled, 1 enabled + revise: params.promptExtend === true ? 1 : 0, // Prompt optimization switch, default is 0 (no optimization) + logo_add: params.watermark === true ? 1 : 0, // Watermark switch, default is 0 (no watermark) ...(typeof params.seed === 'number' ? { seed: params.seed } : {}), }, }; @@ -92,7 +93,9 @@ export async function createHunyuanImage( let errorData; try { errorData = await submitResponse.json(); - } catch {} + } catch (error) { + void error; + } const errorMessage = typeof errorData?.error?.message === 'string' @@ -202,7 +205,9 @@ export async function createHunyuanImage( let errorData; try { errorData = await queryResponse.json(); - } catch {} + } catch (error) { + void error; + } const errorMessage = typeof errorData?.message === 'string' diff --git a/packages/model-runtime/src/providers/minimax/createImage.test.ts b/packages/model-runtime/src/providers/minimax/createImage.test.ts index 8c39fb8433..b01ccb822a 100644 --- a/packages/model-runtime/src/providers/minimax/createImage.test.ts +++ b/packages/model-runtime/src/providers/minimax/createImage.test.ts @@ -66,6 +66,8 @@ describe('createMiniMaxImage', () => { model: 'image-01', n: 1, prompt: 'A beautiful sunset over the mountains', + aigc_watermark: false, + prompt_optimizer: false, }), }); @@ -113,6 +115,8 @@ describe('createMiniMaxImage', () => { model: 'image-01', n: 1, prompt: 'Abstract digital art', + aigc_watermark: false, + prompt_optimizer: false, }), }), ); @@ -161,6 +165,8 @@ describe('createMiniMaxImage', () => { model: 'image-01', n: 1, prompt: 'Reproducible image with seed', + aigc_watermark: false, + prompt_optimizer: false, seed: 42, }), }), @@ -211,6 +217,8 @@ describe('createMiniMaxImage', () => { model: 'image-01', n: 1, prompt: 'Image with seed 0', + aigc_watermark: false, + prompt_optimizer: false, seed: 0, }), }), @@ -331,6 +339,8 @@ describe('createMiniMaxImage', () => { model: 'image-01', n: 1, prompt: 'A girl looking into the distance from a library window', + aigc_watermark: false, + prompt_optimizer: false, subject_reference: [ { type: 'character', @@ -389,6 +399,8 @@ describe('createMiniMaxImage', () => { model: 'image-01', n: 1, prompt: 'A girl looking into the distance from a library window', + aigc_watermark: false, + prompt_optimizer: false, subject_reference: referenceImageUrls.map((url) => ({ type: 'character', image_file: url, diff --git a/packages/model-runtime/src/providers/minimax/createImage.ts b/packages/model-runtime/src/providers/minimax/createImage.ts index 9d9fcfbba8..f2f3e10704 100644 --- a/packages/model-runtime/src/providers/minimax/createImage.ts +++ b/packages/model-runtime/src/providers/minimax/createImage.ts @@ -39,7 +39,8 @@ export async function createMiniMaxImage( model, n: 1, prompt: params.prompt, - //prompt_optimizer: true, // Enable automatic prompt optimization + aigc_watermark: params.watermark ?? false, + prompt_optimizer: params.promptExtend ?? false, ...(typeof params.seed === 'number' ? { seed: params.seed } : {}), }; diff --git a/packages/model-runtime/src/providers/minimax/createVideo.ts b/packages/model-runtime/src/providers/minimax/createVideo.ts index 0bbf9d2779..bdb69f311d 100644 --- a/packages/model-runtime/src/providers/minimax/createVideo.ts +++ b/packages/model-runtime/src/providers/minimax/createVideo.ts @@ -143,7 +143,8 @@ export async function createMiniMaxVideo( const body: Record = { model, prompt, - aigc_watermark: false, // Disable watermark for better user experience + aigc_watermark: params.watermark ?? false, + prompt_optimizer: params.promptExtend ?? false, ...(typeof duration === 'number' ? { duration } : {}), ...(typeof resolution === 'string' ? { resolution } : {}), }; @@ -166,6 +167,8 @@ export async function createMiniMaxVideo( body.last_frame_image = endImageUrl; } + log('Creating video with MiniMax API - model: %s, params: %O', model, params); + const response = await fetch(`${baseURL}/video_generation`, { body: JSON.stringify(body), headers: { diff --git a/packages/model-runtime/src/providers/qwen/createImage.ts b/packages/model-runtime/src/providers/qwen/createImage.ts index b5b43222bb..b6a95cd77d 100644 --- a/packages/model-runtime/src/providers/qwen/createImage.ts +++ b/packages/model-runtime/src/providers/qwen/createImage.ts @@ -88,6 +88,8 @@ async function createLegacySynthesisTask( : params.size ? { size: params.size.replaceAll('x', '*') } : { size: '1024*1024' }), + ...(params.promptExtend && { prompt_extend: params.promptExtend }), + ...(params.watermark && { watermark: params.watermark }), }; if (endpoint === 'image2image') { @@ -178,6 +180,8 @@ async function createHTTPAsyncGenerationTask( : params.size ? { size: params.size.replaceAll('x', '*') } : { size: '1024*1024' }), + ...(params.promptExtend && { prompt_extend: params.promptExtend }), + ...(params.watermark && { watermark: params.watermark }), }; const response = await fetch(endpoint, { @@ -266,6 +270,8 @@ async function createHTTPSyncGeneration( parameters: { n: 1, ...(typeof params.seed === 'number' ? { seed: params.seed } : {}), + ...(params.promptExtend && { prompt_extend: params.promptExtend }), + ...(params.watermark && { watermark: params.watermark }), }, }), headers: { diff --git a/packages/model-runtime/src/providers/qwen/createVideo.ts b/packages/model-runtime/src/providers/qwen/createVideo.ts index 73900bf4f5..b6ef0fd0f3 100644 --- a/packages/model-runtime/src/providers/qwen/createVideo.ts +++ b/packages/model-runtime/src/providers/qwen/createVideo.ts @@ -117,7 +117,7 @@ async function createVideoTask( baseUrl: string, ): Promise { const { model, params } = payload; - const { prompt, imageUrl, endImageUrl } = params; + const { prompt, imageUrl, imageUrls, endImageUrl } = params; // Determine the endpoint based on task type const url = `${baseUrl}/api/v1/services/aigc/${taskType}/video-synthesis`; @@ -158,6 +158,14 @@ async function createVideoTask( url: imageUrl, }); } + if (imageUrls && imageUrls.length > 0) { + imageUrls.forEach((url) => + media.push({ + type: 'image', + url, + }), + ); + } if (endImageUrl) { media.push({ type: 'image', @@ -175,49 +183,19 @@ async function createVideoTask( url: imageUrl, }); } - if (endImageUrl) { - media.push({ - type: 'last_frame', - url: endImageUrl, - }); - } - if (media.length > 0) { - input.media = media; - } - } else if (model.startsWith('pixverse/')) { - if (imageUrl && !endImageUrl) { - input.media = [ - { - type: 'image_url', - url: imageUrl, - }, - ]; - } else if (imageUrl && endImageUrl) { - input.media = [ - { - type: 'first_frame', - url: imageUrl, - }, - { - type: 'last_frame', - url: endImageUrl, - }, - ]; - } - } else if (model.startsWith('wan2.7')) { - const media = []; - if (imageUrl) { - if (model.includes('r2v')) { - // For Wan2.7 R2V models, treat reference images as "reference_image" type to provide stronger referencing capability + if (imageUrls && imageUrls.length > 0) { + if (imageUrls.length === 1 && endImageUrl) { media.push({ - type: 'reference_image', - url: imageUrl, + type: 'first_frame', + url: imageUrls[0], }); } else { - media.push({ - type: 'first_frame', - url: imageUrl, - }); + imageUrls.forEach((url) => + media.push({ + type: 'refer', + url, + }), + ); } } if (endImageUrl) { @@ -229,12 +207,76 @@ async function createVideoTask( if (media.length > 0) { input.media = media; } + } else if (model.startsWith('pixverse/')) { + const media = []; + if (imageUrls && imageUrls.length > 0) { + imageUrls.forEach((url) => + media.push({ + type: 'image_url', + url, + }), + ); + } + if (imageUrl && !endImageUrl) { + media.push({ + type: 'image_url', + url: imageUrl, + }); + } else if (imageUrl && endImageUrl) { + media.push( + { + type: 'first_frame', + url: imageUrl, + }, + { + type: 'last_frame', + url: endImageUrl, + }, + ); + } + if (media.length > 0) { + input.media = media; + } + } else if (model.startsWith('wan2.7')) { + const media = []; + if (imageUrl) { + media.push({ + type: 'first_frame', + url: imageUrl, + }); + } + if (imageUrls && imageUrls.length > 0) { + imageUrls.forEach((url) => + media.push({ + type: 'reference_image', + url, + }), + ); + } + if (endImageUrl) { + media.push({ + type: 'last_frame', + url: endImageUrl, + }); + } + if (media.length > 0) { + input.media = media; + } } else if (matchesModelPattern(model, reference2VideoModels)) { - input.reference_urls = [imageUrl]; + if (imageUrl) { + input.reference_urls = [imageUrl]; + } + if (imageUrls && imageUrls.length > 0) { + input.reference_urls = imageUrls; + } } else if (matchesModelPattern(model, keyframe2VideoModels)) { - input.first_frame_url = imageUrl; - input.last_frame_url = endImageUrl; - } else if (matchesModelPattern(model, image2VideoModels)) { + if (imageUrl) { + input.first_frame_url = imageUrl; + } + if (endImageUrl) { + input.last_frame_url = endImageUrl; + } + } else if (matchesModelPattern(model, image2VideoModels) && imageUrl) { input.img_url = imageUrl; } @@ -273,6 +315,14 @@ async function createVideoTask( } } + if (params.promptExtend) { + parameters.prompt_extend = params.promptExtend; + } + + if (params.watermark) { + parameters.watermark = params.watermark; + } + const response = await fetch(url, { body: JSON.stringify({ input, diff --git a/packages/model-runtime/src/providers/volcengine/createImage.test.ts b/packages/model-runtime/src/providers/volcengine/createImage.test.ts index a9db7ce7ef..b9aa47fae8 100644 --- a/packages/model-runtime/src/providers/volcengine/createImage.test.ts +++ b/packages/model-runtime/src/providers/volcengine/createImage.test.ts @@ -425,6 +425,128 @@ describe('createVolcengineImage', () => { }), ); }); + + it('should allow overriding watermark when watermark is provided', async () => { + const mockResponse = { + data: [{ url: 'https://example.com/test.jpg' }], + }; + mockGenerate.mockResolvedValue(mockResponse); + + payload.params = { + prompt: 'test prompt', + watermark: true, + }; + + await createVolcengineImage(payload, options); + + expect(mockGenerate).toHaveBeenCalledWith( + expect.objectContaining({ + watermark: true, + }), + ); + }); + + it('should enable web search tool when webSearch is true', async () => { + const mockResponse = { + data: [{ url: 'https://example.com/test.jpg' }], + }; + mockGenerate.mockResolvedValue(mockResponse); + + payload.model = 'doubao-seedream-5-0-260128'; + payload.params.webSearch = true; + + await createVolcengineImage(payload, options); + + expect(mockGenerate).toHaveBeenCalledWith( + expect.objectContaining({ + tools: [{ type: 'web_search' }], + }), + ); + }); + + it('should disable web search tool by default when webSearch is undefined', async () => { + const mockResponse = { + data: [{ url: 'https://example.com/test.jpg' }], + }; + mockGenerate.mockResolvedValue(mockResponse); + + await createVolcengineImage(payload, options); + + expect(mockGenerate).toHaveBeenCalledWith( + expect.not.objectContaining({ + tools: [{ type: 'web_search' }], + }), + ); + }); + + it('should disable web search tool when webSearch is false', async () => { + const mockResponse = { + data: [{ url: 'https://example.com/test.jpg' }], + }; + mockGenerate.mockResolvedValue(mockResponse); + + payload.params = { + prompt: 'test prompt', + webSearch: false, + }; + + await createVolcengineImage(payload, options); + + const requestOptions = mockGenerate.mock.calls[0]?.[0] as Record; + expect(requestOptions.tools).toBeUndefined(); + expect(requestOptions.webSearch).toBeUndefined(); + }); + + it('should add optimize_prompt_options if promptExtend is provided and not "off"', async () => { + const mockResponse = { + data: [{ url: 'https://example.com/test.jpg' }], + }; + mockGenerate.mockResolvedValue(mockResponse); + + payload.params = { + prompt: 'test prompt', + promptExtend: 'fast', + }; + + await createVolcengineImage(payload, options); + + const requestOptions = mockGenerate.mock.calls[0]?.[0] as Record; + expect(requestOptions.optimize_prompt_options).toEqual({ mode: 'fast' }); + expect(requestOptions.promptExtend).toBeUndefined(); + }); + + it('should not add optimize_prompt_options if promptExtend is "off"', async () => { + const mockResponse = { + data: [{ url: 'https://example.com/test.jpg' }], + }; + mockGenerate.mockResolvedValue(mockResponse); + + payload.params = { + prompt: 'test prompt', + promptExtend: 'off', + }; + + await createVolcengineImage(payload, options); + + const requestOptions = mockGenerate.mock.calls[0]?.[0] as Record; + expect(requestOptions.optimize_prompt_options).toBeUndefined(); + }); + + it('should not add optimize_prompt_options if promptExtend is undefined', async () => { + const mockResponse = { + data: [{ url: 'https://example.com/test.jpg' }], + }; + mockGenerate.mockResolvedValue(mockResponse); + + payload.params = { + prompt: 'test prompt', + }; + + await createVolcengineImage(payload, options); + + const requestOptions = mockGenerate.mock.calls[0]?.[0] as Record; + expect(requestOptions.optimize_prompt_options).toBeUndefined(); + }); }); describe('size extraction', () => { diff --git a/packages/model-runtime/src/providers/volcengine/createImage.ts b/packages/model-runtime/src/providers/volcengine/createImage.ts index 3a5386207f..992441319e 100644 --- a/packages/model-runtime/src/providers/volcengine/createImage.ts +++ b/packages/model-runtime/src/providers/volcengine/createImage.ts @@ -68,10 +68,19 @@ export async function createVolcengineImage( delete userInput.image; } + // Remove promptExtend and webSearch parameters that are not supported by Volcengine API + delete userInput.promptExtend; + delete userInput.webSearch; + // Build request options const requestOptions = { model, - watermark: false, // Default to no watermark + watermark: params.watermark ?? false, // Default to no watermark + ...(params.webSearch && { tools: [{ type: 'web_search' }] }), + ...(params.promptExtend && + params.promptExtend !== 'off' && { + optimize_prompt_options: { mode: params.promptExtend }, + }), ...userInput, }; diff --git a/packages/model-runtime/src/providers/volcengine/video/createVideo.test.ts b/packages/model-runtime/src/providers/volcengine/video/createVideo.test.ts index 80be91141f..59bb9ef9b0 100644 --- a/packages/model-runtime/src/providers/volcengine/video/createVideo.test.ts +++ b/packages/model-runtime/src/providers/volcengine/video/createVideo.test.ts @@ -162,6 +162,29 @@ describe('createVolcengineVideo', () => { expect(body.generate_audio).toBe(true); }); + it('should disable web search tool by default when webSearch is undefined', async () => { + payload.model = 'doubao-seedance-2-0-fast-260128'; + await createVolcengineVideo(payload, options); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.tools).toBeUndefined(); + }); + + it('should enable web search tool when webSearch is true', async () => { + payload.model = 'doubao-seedance-2-0-fast-260128'; + payload.params.webSearch = true; + await createVolcengineVideo(payload, options); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.tools).toEqual([{ type: 'web_search' }]); + }); + + it('should disable web search tool when webSearch is false', async () => { + payload.model = 'doubao-seedance-2-0-fast-260128'; + payload.params.webSearch = false; + await createVolcengineVideo(payload, options); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.tools).toBeUndefined(); + }); + it('should map seed to body.seed', async () => { payload.params.seed = 42; await createVolcengineVideo(payload, options); @@ -196,6 +219,13 @@ describe('createVolcengineVideo', () => { const body = JSON.parse(mockFetch.mock.calls[0][1].body); expect(body.callback_url).toBe('https://example.com/webhook'); }); + + it('should allow overriding watermark when watermark is provided', async () => { + payload.params.watermark = true; + await createVolcengineVideo(payload, options); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.watermark).toBe(true); + }); }); describe('client config', () => { diff --git a/packages/model-runtime/src/providers/volcengine/video/createVideo.ts b/packages/model-runtime/src/providers/volcengine/video/createVideo.ts index 02d9165caf..338a6788b6 100644 --- a/packages/model-runtime/src/providers/volcengine/video/createVideo.ts +++ b/packages/model-runtime/src/providers/volcengine/video/createVideo.ts @@ -17,10 +17,13 @@ export async function createVolcengineVideo( const { prompt, imageUrl, + imageUrls, endImageUrl, aspectRatio, duration, generateAudio, + webSearch, + watermark, seed, resolution, cameraFixed, @@ -37,6 +40,16 @@ export async function createVolcengineVideo( content.push({ image_url: { url: imageUrl }, role: 'first_frame', type: 'image_url' }); } + if (imageUrls && imageUrls.length > 0) { + if (imageUrls.length === 1 && endImageUrl) { + content.push({ image_url: { url: imageUrls[0] }, role: 'first_frame', type: 'image_url' }); + } else { + imageUrls.forEach((url) => + content.push({ image_url: { url }, role: 'reference_image', type: 'image_url' }), + ); + } + } + if (endImageUrl) { content.push({ image_url: { url: endImageUrl }, role: 'last_frame', type: 'image_url' }); } @@ -45,7 +58,8 @@ export async function createVolcengineVideo( const body: Record = { content, model, - watermark: false, + watermark: watermark ?? false, + ...(webSearch && { tools: [{ type: 'web_search' }] }), }; if (aspectRatio !== undefined) body.ratio = aspectRatio; diff --git a/packages/model-runtime/src/providers/wenxin/createImage.ts b/packages/model-runtime/src/providers/wenxin/createImage.ts index 87c2cd36b5..5ac025789e 100644 --- a/packages/model-runtime/src/providers/wenxin/createImage.ts +++ b/packages/model-runtime/src/providers/wenxin/createImage.ts @@ -56,6 +56,8 @@ export async function createWenxinImage( : {}), ...(params.steps !== undefined && { steps: params.steps }), ...(model === 'ernie-irag-edit' && { feature: 'variation' }), + ...(params.promptExtend && { prompt_extend: params.promptExtend }), + ...(params.watermark && { watermark: params.watermark }), }; const response = await fetch(endpoint, { @@ -71,7 +73,9 @@ export async function createWenxinImage( let errorData; try { errorData = await response.json(); - } catch {} + } catch (error) { + void error; + } const errorMessage = typeof errorData?.error === 'string' diff --git a/packages/model-runtime/src/providers/wenxin/createVideo.ts b/packages/model-runtime/src/providers/wenxin/createVideo.ts index e3274f6358..cc5e23f7d5 100644 --- a/packages/model-runtime/src/providers/wenxin/createVideo.ts +++ b/packages/model-runtime/src/providers/wenxin/createVideo.ts @@ -91,7 +91,8 @@ export async function createWenxinVideo( options: CreateVideoOptions, ): Promise { const { model, params } = payload; - const { prompt, imageUrl, aspectRatio, duration, generateAudio } = params; + const { prompt, imageUrl, aspectRatio, duration, generateAudio, promptExtend, watermark } = + params; log('Creating video with Wenxin API - model: %s, params: %O', model, params); @@ -125,6 +126,8 @@ export async function createWenxinVideo( if (aspectRatio) body.aspect_ratio = aspectRatio; if (duration) body.duration = duration; if (generateAudio !== undefined) body.generate_audio = generateAudio; + if (promptExtend) body.prompt_extend = promptExtend; + if (watermark) body.watermark = watermark; log('Wenxin video API request body: %O', body); diff --git a/packages/model-runtime/src/providers/zhipu/createImage.test.ts b/packages/model-runtime/src/providers/zhipu/createImage.test.ts new file mode 100644 index 0000000000..be4e33cb7b --- /dev/null +++ b/packages/model-runtime/src/providers/zhipu/createImage.test.ts @@ -0,0 +1,367 @@ +// @vitest-environment node +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import type { CreateImageOptions } from '../../core/openaiCompatibleFactory'; +import type { CreateImagePayload } from '../../types/image'; +import { createZhipuImage, pollZhipuImageStatus, queryZhipuImageStatus } from './createImage'; + +vi.mock('debug', () => ({ + default: vi.fn(() => vi.fn()), +})); + +describe('createZhipuImage', () => { + const mockOptions: CreateImageOptions = { + apiKey: 'test-api-key', + baseURL: 'https://open.bigmodel.cn/api/paas/v4', + provider: 'zhipu', + }; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it('should create image with basic prompt', async () => { + const mockTaskId = 'zhipu-task-123'; + const mockImageUrl = 'https://cdn.zhipu.ai/image.png'; + + global.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ id: mockTaskId }), + }) + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ + task_status: 'SUCCESS', + image_result: [{ url: mockImageUrl }], + }), + }); + + const payload: CreateImagePayload = { + model: 'glm-image', + params: { + prompt: 'A cute cat', + }, + }; + + const result = await createZhipuImage(payload, mockOptions); + + expect(fetch).toHaveBeenNthCalledWith( + 1, + 'https://open.bigmodel.cn/api/paas/v4/async/images/generations', + expect.objectContaining({ + method: 'POST', + headers: { + 'Authorization': 'Bearer test-api-key', + 'Content-Type': 'application/json', + }, + }), + ); + + const submitBody = JSON.parse((fetch as any).mock.calls[0][1].body); + expect(submitBody).toEqual({ + model: 'glm-image', + prompt: 'A cute cat', + watermark_enabled: false, + }); + + expect(fetch).toHaveBeenNthCalledWith( + 2, + 'https://open.bigmodel.cn/api/paas/v4/async-result/zhipu-task-123', + expect.objectContaining({ + method: 'GET', + headers: { + 'Authorization': 'Bearer test-api-key', + 'Content-Type': 'application/json', + }, + }), + ); + + expect(result).toEqual({ + imageUrl: mockImageUrl, + }); + }); + + it('should create image via sync endpoint for cogview-4 model', async () => { + const mockImageUrl = 'https://cdn.zhipu.ai/sync-image.png'; + + global.fetch = vi.fn().mockResolvedValueOnce({ + ok: true, + json: async () => ({ + created: 123, + data: [{ url: mockImageUrl }], + }), + }); + + const payload: CreateImagePayload = { + model: 'cogview-4', + params: { + prompt: 'A cute cat on window', + resolution: 'standard', + size: '1280x1280', + }, + }; + + const result = await createZhipuImage(payload, mockOptions); + + expect(fetch).toHaveBeenCalledTimes(1); + expect(fetch).toHaveBeenCalledWith( + 'https://open.bigmodel.cn/api/paas/v4/images/generations', + expect.objectContaining({ + method: 'POST', + headers: { + 'Authorization': 'Bearer test-api-key', + 'Content-Type': 'application/json', + }, + }), + ); + + const submitBody = JSON.parse((fetch as any).mock.calls[0][1].body); + expect(submitBody).toEqual({ + model: 'cogview-4', + prompt: 'A cute cat on window', + quality: 'standard', + size: '1280x1280', + watermark_enabled: false, + }); + + expect(result).toEqual({ imageUrl: mockImageUrl }); + }); + + it('should convert width and height to size parameter', async () => { + global.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ id: 'zhipu-task-456' }), + }) + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ + task_status: 'SUCCESS', + image_result: [{ url: 'https://cdn.zhipu.ai/size.png' }], + }), + }); + + const payload: CreateImagePayload = { + model: 'glm-image', + params: { + prompt: 'Landscape', + height: 768, + width: 1024, + }, + }; + + await createZhipuImage(payload, mockOptions); + + const submitBody = JSON.parse((fetch as any).mock.calls[0][1].body); + expect(submitBody).toEqual({ + model: 'glm-image', + prompt: 'Landscape', + size: '1024x768', + watermark_enabled: false, + }); + }); + + it('should respect explicit size and watermark parameter', async () => { + global.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ id: 'zhipu-task-789' }), + }) + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ + task_status: 'SUCCESS', + image_result: [{ url: 'https://cdn.zhipu.ai/watermark.png' }], + }), + }); + + const payload: CreateImagePayload = { + model: 'glm-image', + params: { + prompt: 'Poster', + size: '1024x1024', + watermark: true, + width: 512, + height: 512, + }, + }; + + await createZhipuImage(payload, mockOptions); + + const submitBody = JSON.parse((fetch as any).mock.calls[0][1].body); + expect(submitBody).toEqual({ + model: 'glm-image', + prompt: 'Poster', + size: '1024x1024', + watermark_enabled: true, + }); + }); + + it('should throw on HTTP error', async () => { + global.fetch = vi.fn().mockResolvedValueOnce({ + ok: false, + status: 402, + text: async () => 'Insufficient credits', + }); + + const payload: CreateImagePayload = { + model: 'glm-image', + params: { prompt: 'Test' }, + }; + + await expect(createZhipuImage(payload, mockOptions)).rejects.toThrow( + 'Zhipu image API error: 402 Insufficient credits', + ); + }); + + it('should throw when response is missing id', async () => { + global.fetch = vi.fn().mockResolvedValueOnce({ + ok: true, + json: async () => ({}), + }); + + const payload: CreateImagePayload = { + model: 'glm-image', + params: { prompt: 'Test' }, + }; + + await expect(createZhipuImage(payload, mockOptions)).rejects.toThrow( + 'Invalid response: missing task id', + ); + }); + + it('should throw when sync response is missing image url', async () => { + global.fetch = vi.fn().mockResolvedValueOnce({ + ok: true, + json: async () => ({ + created: 123, + data: [], + }), + }); + + const payload: CreateImagePayload = { + model: 'cogview-4', + params: { prompt: 'Test sync missing url' }, + }; + + await expect(createZhipuImage(payload, mockOptions)).rejects.toThrow( + 'Invalid sync response: missing image URL', + ); + }); +}); + +describe('pollZhipuImageStatus', () => { + const options = { + apiKey: 'test-api-key', + baseURL: 'https://open.bigmodel.cn/api/paas/v4', + }; + + it('should return success when task succeeded', async () => { + global.fetch = vi.fn().mockResolvedValueOnce({ + ok: true, + json: async () => ({ + task_status: 'SUCCESS', + image_result: [{ url: 'https://cdn.zhipu.ai/success.png' }], + }), + }); + + const result = await pollZhipuImageStatus('task-123', options); + + expect(result).toEqual({ + imageUrl: 'https://cdn.zhipu.ai/success.png', + }); + }); + + it('should throw when task failed', async () => { + global.fetch = vi.fn().mockResolvedValueOnce({ + ok: true, + json: async () => ({ + task_status: 'FAIL', + error: { message: 'Content moderation failed' }, + }), + }); + + await expect(pollZhipuImageStatus('task-123', options)).rejects.toThrow( + 'Content moderation failed', + ); + }); + + it('should keep polling when task is pending then succeed', async () => { + vi.useFakeTimers(); + + global.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ task_status: 'RUNNING' }), + }) + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ + task_status: 'SUCCESS', + image_result: [{ url: 'https://cdn.zhipu.ai/polled-success.png' }], + }), + }); + + const resultPromise = pollZhipuImageStatus('task-123', options); + await vi.advanceTimersByTimeAsync(1000); + const result = await resultPromise; + + expect(fetch).toHaveBeenCalledTimes(2); + expect(result).toEqual({ imageUrl: 'https://cdn.zhipu.ai/polled-success.png' }); + }); + + it('should throw when task succeeded but no image url', async () => { + global.fetch = vi.fn().mockResolvedValueOnce({ + ok: true, + json: async () => ({ + task_status: 'SUCCESS', + image_result: [], + }), + }); + + await expect(pollZhipuImageStatus('task-123', options)).rejects.toThrow( + 'Task succeeded but no image URL found', + ); + }); +}); + +describe('queryZhipuImageStatus', () => { + it('should query status endpoint correctly', async () => { + const mockResponse = { + task_status: 'SUCCESS', + id: 'task-123', + request_id: 'req-456', + }; + + global.fetch = vi.fn().mockResolvedValueOnce({ + ok: true, + json: async () => mockResponse, + }); + + const result = await queryZhipuImageStatus('task-123', { + apiKey: 'test-api-key', + baseURL: 'https://open.bigmodel.cn/api/paas/v4', + }); + + expect(fetch).toHaveBeenCalledWith( + 'https://open.bigmodel.cn/api/paas/v4/async-result/task-123', + expect.objectContaining({ + method: 'GET', + headers: { + 'Authorization': 'Bearer test-api-key', + 'Content-Type': 'application/json', + }, + }), + ); + expect(result).toEqual(mockResponse); + }); +}); diff --git a/packages/model-runtime/src/providers/zhipu/createImage.ts b/packages/model-runtime/src/providers/zhipu/createImage.ts new file mode 100644 index 0000000000..cad55a7b5a --- /dev/null +++ b/packages/model-runtime/src/providers/zhipu/createImage.ts @@ -0,0 +1,164 @@ +import createDebug from 'debug'; + +import type { CreateImageOptions } from '../../core/openaiCompatibleFactory'; +import type { CreateImagePayload, CreateImageResponse } from '../../types/image'; +import type { TaskResult } from '../../utils/asyncifyPolling'; +import { asyncifyPolling } from '../../utils/asyncifyPolling'; + +const log = createDebug('lobe-image:zhipu'); + +interface ZhipuImageStatusResponse { + created?: number; + data?: Array<{ + url?: string; + }>; + error?: { + code?: string; + message?: string; + }; + id?: string; + image_result?: Array<{ + b64_json?: string; + url?: string; + }>; + request_id?: string; + task_status?: string; +} + +export async function queryZhipuImageStatus( + inferenceId: string, + options: { apiKey: string; baseURL: string }, +): Promise { + const statusUrl = `${options.baseURL}/async-result/${inferenceId}`; + + log('Querying image status for: %s', inferenceId); + + const response = await fetch(statusUrl, { + headers: { + 'Authorization': `Bearer ${options.apiKey}`, + 'Content-Type': 'application/json', + }, + method: 'GET', + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`Zhipu image status API error: ${response.status} ${errorText}`); + } + + const data = (await response.json()) as ZhipuImageStatusResponse; + log('Image status response: %O', data); + + return data; +} + +export async function pollZhipuImageStatus( + inferenceId: string, + options: { apiKey: string; baseURL: string }, +): Promise { + return await asyncifyPolling({ + backoffMultiplier: 1, + checkStatus: (taskStatus): TaskResult => { + if (taskStatus.task_status === 'SUCCESS') { + const imageUrl = taskStatus.image_result?.[0]?.url; + + if (!imageUrl) { + return { + error: new Error('Task succeeded but no image URL found'), + status: 'failed', + }; + } + + return { + data: { imageUrl }, + status: 'success', + }; + } + + if (taskStatus.task_status === 'FAIL') { + return { + error: new Error(taskStatus.error?.message || 'Image generation failed'), + status: 'failed', + }; + } + + return { status: 'pending' }; + }, + logger: { + debug: (message: any, ...args: any[]) => log(message, ...args), + error: (message: any, ...args: any[]) => log(message, ...args), + }, + pollingQuery: () => queryZhipuImageStatus(inferenceId, options), + }); +} + +/** + * Zhipu image generation implementation + * API docs: https://open.bigmodel.cn + */ +export async function createZhipuImage( + payload: CreateImagePayload, + options: CreateImageOptions, +): Promise { + const { model, params } = payload; + const { prompt, resolution, size, watermark, width, height } = params; + + log('Creating image with Zhipu API - model: %s, params: %O', model, params); + + const baseURL = options.baseURL || 'https://open.bigmodel.cn/api/paas/v4'; + + const body: Record = { + model, + prompt, + ...(resolution && { quality: resolution }), + }; + + if (size) { + body.size = size; + } else if (width !== undefined && height !== undefined) { + body.size = `${width}x${height}`; + } + + body.watermark_enabled = watermark ?? false; + + const isSyncModel = model.startsWith('cogview'); + const endpoint = isSyncModel + ? `${baseURL}/images/generations` + : `${baseURL}/async/images/generations`; + + const response = await fetch(endpoint, { + body: JSON.stringify(body), + headers: { + 'Authorization': `Bearer ${options.apiKey}`, + 'Content-Type': 'application/json', + }, + method: 'POST', + }); + + if (!response.ok) { + const errorText = await response.text(); + log('Zhipu image API error: %s %s', response.status, errorText); + throw new Error(`Zhipu image API error: ${response.status} ${errorText}`); + } + + const data = (await response.json()) as ZhipuImageStatusResponse; + log('Zhipu image API response: %O', data); + + const imageUrl = data.data?.[0]?.url; + if (imageUrl) { + return { imageUrl }; + } + + if (isSyncModel) { + throw new Error('Invalid sync response: missing image URL'); + } + + if (!data.id) { + throw new Error('Invalid response: missing task id'); + } + + return await pollZhipuImageStatus(data.id, { + apiKey: options.apiKey, + baseURL, + }); +} diff --git a/packages/model-runtime/src/providers/zhipu/createVideo.ts b/packages/model-runtime/src/providers/zhipu/createVideo.ts index 9d96d60d4e..3fe27309a2 100644 --- a/packages/model-runtime/src/providers/zhipu/createVideo.ts +++ b/packages/model-runtime/src/providers/zhipu/createVideo.ts @@ -90,7 +90,18 @@ export async function createZhipuVideo( options: CreateVideoOptions, ): Promise { const { model, params } = payload; - const { prompt, imageUrl, endImageUrl, aspectRatio, duration, generateAudio, size } = params; + const { + prompt, + imageUrl, + imageUrls, + endImageUrl, + aspectRatio, + duration, + generateAudio, + resolution, + size, + watermark, + } = params; log('Creating video with Zhipu API - model: %s, params: %O', model, params); @@ -104,15 +115,18 @@ export async function createZhipuVideo( // Zhipu requires image_url as an array: [first_frame, last_frame?] // https://docs.bigmodel.cn/cn/guide/paid-recommendation/cogvideox - const imageUrls: string[] = []; + const content: string[] = []; if (imageUrl) { - imageUrls.push(imageUrl); + content.push(imageUrl); + } + if (imageUrls && imageUrls.length > 0) { + imageUrls.forEach((url) => content.push(url)); } if (endImageUrl) { - imageUrls.push(endImageUrl); + content.push(endImageUrl); } - if (imageUrls.length > 0) { - body.image_url = imageUrls; + if (content.length > 0) { + body.image_url = content; } // Add other optional parameters @@ -120,6 +134,8 @@ export async function createZhipuVideo( if (duration) body.duration = duration; if (generateAudio !== undefined) body.with_audio = generateAudio; if (size) body.size = size; + if (resolution) body.quality = resolution; + if (watermark !== undefined) body.watermark_enabled = watermark; log('Zhipu video API request body: %O', body); diff --git a/packages/model-runtime/src/providers/zhipu/index.ts b/packages/model-runtime/src/providers/zhipu/index.ts index ae0f579fdf..ad48851498 100644 --- a/packages/model-runtime/src/providers/zhipu/index.ts +++ b/packages/model-runtime/src/providers/zhipu/index.ts @@ -9,6 +9,7 @@ import { OpenAIStream } from '../../core/streams/openai'; import { convertIterableToStream } from '../../core/streams/protocol'; import { getModelMaxOutputs } from '../../utils/getModelMaxOutputs'; import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse'; +import { createZhipuImage } from './createImage'; import { createZhipuVideo } from './createVideo'; export interface ZhipuModelCard { @@ -142,6 +143,7 @@ export const params = { }); }, }, + createImage: createZhipuImage, createVideo: createZhipuVideo, handlePollVideoStatus: async (inferenceId, options) => { const { pollZhipuVideoStatus } = await import('./createVideo'); diff --git a/src/locales/default/image.ts b/src/locales/default/image.ts index 355bac9676..bf6c0c4b2a 100644 --- a/src/locales/default/image.ts +++ b/src/locales/default/image.ts @@ -10,6 +10,7 @@ export default { 'config.imageUrl.label': 'Reference Image', 'config.imageUrls.label': 'Reference Images', 'config.model.label': 'Model', + 'config.promptExtend.label': 'Prompt Extend', 'config.prompt.placeholder': 'Describe what you want to generate', 'config.prompt.placeholderWithRef': 'Describe how you want to adjust the image', 'config.quality.label': 'Image Quality', @@ -24,7 +25,9 @@ export default { 'config.size.label': 'Size', 'config.steps.label': 'Steps', 'config.title': 'Configuration', + 'config.watermark.label': 'Watermark', 'config.width.label': 'Width', + 'config.webSearch.label': 'Web Search', 'generation.actions.applySeed': 'Apply Seed', 'generation.actions.copyError': 'Copy Error Message', 'generation.actions.copyPrompt': 'Copy Prompt', diff --git a/src/locales/default/video.ts b/src/locales/default/video.ts index 5a99dedcf4..b262832d26 100644 --- a/src/locales/default/video.ts +++ b/src/locales/default/video.ts @@ -6,6 +6,7 @@ export default { 'config.generateAudio.label': 'Generate Audio', 'config.header.title': 'Video', 'config.imageUrl.label': 'Start Frame', + 'config.promptExtend.label': 'Prompt Extend', 'config.prompt.placeholder': 'Describe the video you want to generate', 'config.prompt.placeholderWithRef': 'Describe the scene you want to generate with the image', 'config.referenceImage.label': 'Reference Image', @@ -13,6 +14,8 @@ export default { 'config.seed.label': 'Seed', 'config.seed.random': 'Random', 'config.size.label': 'Size', + 'config.watermark.label': 'Watermark', + 'config.webSearch.label': 'Web Search', 'generation.actions.copyError': 'Copy Error Message', 'generation.actions.errorCopied': 'Error Message Copied to Clipboard', 'generation.actions.errorCopyFailed': 'Failed to Copy Error Message', diff --git a/src/routes/(main)/(create)/features/GenerationInput/InlineVideoFrames.tsx b/src/routes/(main)/(create)/features/GenerationInput/InlineVideoFrames.tsx index de4dda508a..8ac89faae5 100644 --- a/src/routes/(main)/(create)/features/GenerationInput/InlineVideoFrames.tsx +++ b/src/routes/(main)/(create)/features/GenerationInput/InlineVideoFrames.tsx @@ -3,16 +3,34 @@ import { Flexbox } from '@lobehub/ui'; import { createStaticStyles, cssVar } from 'antd-style'; import { ArrowLeftRight } from 'lucide-react'; -import { memo } from 'react'; +import { memo, useMemo, useState } from 'react'; import { useTranslation } from 'react-i18next'; import UploadCard, { UPLOAD_CARD_SIZE, type UploadData } from './UploadCard'; +const STACK_OFFSET = -(UPLOAD_CARD_SIZE - 8); +const EXPAND_OFFSET = 4; + const styles = createStaticStyles(({ css }) => ({ + addCirclePos: css` + position: absolute; + z-index: 100; + inset-block-end: -2px; + inset-inline-end: -2px; + `, + refGroup: css` + position: relative; + `, stack: css` position: relative; padding-block: 4px; padding-inline: 0; + + &:hover { + .inline-ref-close { + opacity: 1; + } + } `, swapIcon: css` flex-shrink: 0; @@ -23,28 +41,134 @@ const styles = createStaticStyles(({ css }) => ({ interface InlineVideoFramesProps { endImageUrl?: string | null; imageUrl?: string | null; + imageUrls?: string[] | null; isSupportEndImage?: boolean; + maxCount?: number; maxFileSize?: number; onEndImageChange: (data: UploadData | null) => void; onImageChange: (data: UploadData | null) => void; + onImageUrlsChange?: (data: UploadData) => void; + onRemoveImageUrl?: (url: string) => void; } const InlineVideoFrames = memo( - ({ imageUrl, endImageUrl, onImageChange, onEndImageChange, isSupportEndImage = true }) => { + ({ + imageUrl, + imageUrls, + endImageUrl, + onImageChange, + onEndImageChange, + onImageUrlsChange, + onRemoveImageUrl, + isSupportEndImage = true, + maxCount = 5, + maxFileSize, + }) => { const { t } = useTranslation('video'); + const [isHovered, setIsHovered] = useState(false); - const hasStartFrame = Boolean(imageUrl); - const showEndFrame = isSupportEndImage && hasStartFrame; + // Combine imageUrl and imageUrls for display + const refFrameUrls = useMemo(() => { + const urls: string[] = []; + if (imageUrl) urls.push(imageUrl); + if (Array.isArray(imageUrls)) { + urls.push(...imageUrls); + } + return urls; + }, [imageUrl, imageUrls]); + + const hasRefFrames = refFrameUrls.length > 0; + const canAddMore = refFrameUrls.length < maxCount; + const shouldCollapse = hasRefFrames && !isHovered; + const showEndFrame = isSupportEndImage && hasRefFrames; return ( - onImageChange(null)} - onUpload={(data) => onImageChange(data)} - /> + setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + > + {/* Render ref frames (from imageUrl and imageUrls) */} + {refFrameUrls.map((url, index) => { + const isFromImageUrl = url === imageUrl; + const label = + index === 0 && isFromImageUrl + ? t('config.imageUrl.label') + : t('config.referenceImage.label'); + return ( + 0 ? (shouldCollapse ? STACK_OFFSET : EXPAND_OFFSET) : 0, + zIndex: index + 1, + }} + onRemove={() => { + if (isFromImageUrl && imageUrl === url) { + onImageChange(null); + } else if (onRemoveImageUrl) { + onRemoveImageUrl(url); + } + }} + onUpload={ + isFromImageUrl + ? (data) => onImageChange(data) + : (data) => onImageUrlsChange?.(data) + } + /> + ); + })} + + {/* Add new frame button */} + {canAddMore && + (shouldCollapse ? ( + {}} + onUpload={(data) => { + if (onImageUrlsChange) { + onImageUrlsChange(data); + } else { + onImageChange(data); + } + }} + /> + ) : ( + {}} + onUpload={(data) => { + if (hasRefFrames) { + if (onImageUrlsChange) { + onImageUrlsChange(data); + } else { + onImageChange(data); + } + } else { + onImageChange(data); + } + }} + /> + ))} + + + {/* End frame separator and upload */} {showEndFrame && ( <> (({ label, paramName }) => { + const { value, setValue } = useGenerationConfigParam(paramName); + + return ( + + {label} + setValue(checked as any)} /> + + ); +}); + +const PromptExtendItem = memo(() => { + const { t } = useTranslation('image'); + const { value, setValue, enumValues } = useGenerationConfigParam('promptExtend'); + + if (enumValues && enumValues.length > 0) { + const options = enumValues.map((item) => ({ label: item, value: item })); + + return ( + + {t('config.promptExtend.label')} + setValue(String(next) as any)} + /> + + ); + } + + return ( + + {t('config.promptExtend.label')} + setValue(checked as any)} /> + + ); +}); + const PromptInput = ({ showTitle = false }: PromptInputProps) => { const isDarkMode = useIsDark(); const { t } = useTranslation('image'); @@ -75,6 +122,9 @@ const PromptInput = ({ showTitle = false }: PromptInputProps) => { const isSupportSeed = useImageStore(isSupportedParamSelector('seed')); const isSupportSteps = useImageStore(isSupportedParamSelector('steps')); const isSupportCfg = useImageStore(isSupportedParamSelector('cfg')); + const isSupportPromptExtend = useImageStore(isSupportedParamSelector('promptExtend')); + const isSupportWatermark = useImageStore(isSupportedParamSelector('watermark')); + const isSupportWebSearch = useImageStore(isSupportedParamSelector('webSearch')); const isLogin = useUserStore(authSelectors.isLogin); const enabledImageModelList = useAiInfraStore(aiProviderSelectors.enabledImageModelList); const { showDimensionControl } = useDimensionControl(); @@ -120,9 +170,13 @@ const PromptInput = ({ showTitle = false }: PromptInputProps) => { hasProcessedPrompt.current = true; setPromptParam(null); - setTimeout(async () => { + const timeoutId = window.setTimeout(async () => { await createImage(); }, 100); + + return () => { + window.clearTimeout(timeoutId); + }; } }, [promptParam, isLogin, setValue, setPromptParam, createImage]); @@ -266,6 +320,16 @@ const PromptInput = ({ showTitle = false }: PromptInputProps) => { )} + {(isSupportWatermark || isSupportPromptExtend || isSupportWebSearch) && ( + + )} + {isSupportWatermark && ( + + )} + {isSupportPromptExtend && } + {isSupportWebSearch && ( + + )} } /> diff --git a/src/routes/(main)/(create)/video/features/GenerationFeed/BatchItem.tsx b/src/routes/(main)/(create)/video/features/GenerationFeed/BatchItem.tsx index 97636bb070..5e342d805a 100644 --- a/src/routes/(main)/(create)/video/features/GenerationFeed/BatchItem.tsx +++ b/src/routes/(main)/(create)/video/features/GenerationFeed/BatchItem.tsx @@ -214,7 +214,10 @@ export const VideoGenerationBatchItem = memo(({ b ); }; - const hasReferenceFrames = batch.config?.imageUrl || batch.config?.endImageUrl; + const hasReferenceFrames = + batch.config?.imageUrl || + (batch.config?.imageUrls && batch.config.imageUrls.length > 0) || + batch.config?.endImageUrl; return ( @@ -223,6 +226,7 @@ export const VideoGenerationBatchItem = memo(({ b )} {batch.prompt} diff --git a/src/routes/(main)/(create)/video/features/GenerationFeed/VideoReferenceFrames.tsx b/src/routes/(main)/(create)/video/features/GenerationFeed/VideoReferenceFrames.tsx index 7b26c846e6..7b10d2d2ad 100644 --- a/src/routes/(main)/(create)/video/features/GenerationFeed/VideoReferenceFrames.tsx +++ b/src/routes/(main)/(create)/video/features/GenerationFeed/VideoReferenceFrames.tsx @@ -37,42 +37,46 @@ const styles = createStaticStyles(({ css, cssVar }) => ({ interface VideoReferenceFramesProps { endImageUrl?: string | null; imageUrl?: string | null; + imageUrls?: string[]; } -const VideoReferenceFrames = memo(({ imageUrl, endImageUrl }) => { - const allImages: string[] = []; - if (imageUrl) allImages.push(imageUrl); - if (endImageUrl) allImages.push(endImageUrl); +const VideoReferenceFrames = memo( + ({ imageUrl, imageUrls, endImageUrl }) => { + const allImages: string[] = []; + if (imageUrl) allImages.push(imageUrl); + if (imageUrls && imageUrls.length > 0) allImages.push(...imageUrls); + if (endImageUrl) allImages.push(endImageUrl); - if (allImages.length === 0) return null; + if (allImages.length === 0) return null; - return ( - - - - {allImages.map((url, index) => ( -

- {index -
- ))} - - - ); -}); + return ( + + + + {allImages.map((url, index) => ( +
+ {index +
+ ))} +
+
+ ); + }, +); VideoReferenceFrames.displayName = 'VideoReferenceFrames'; diff --git a/src/routes/(main)/(create)/video/features/PromptInput/index.tsx b/src/routes/(main)/(create)/video/features/PromptInput/index.tsx index e12534cf81..69b40c402a 100644 --- a/src/routes/(main)/(create)/video/features/PromptInput/index.tsx +++ b/src/routes/(main)/(create)/video/features/PromptInput/index.tsx @@ -153,7 +153,7 @@ const SeedItem = memo(() => { interface SwitchItemProps { label: string; - paramName: 'cameraFixed' | 'generateAudio'; + paramName: 'cameraFixed' | 'generateAudio' | 'watermark' | 'webSearch'; } const SwitchItem = memo(({ label, paramName }) => { @@ -167,11 +167,48 @@ const SwitchItem = memo(({ label, paramName }) => { ); }); +const PromptExtendItem = memo(() => { + const { t } = useTranslation('video'); + const { value, setValue, enumValues } = useVideoGenerationConfigParam('promptExtend'); + + const options = enumValues?.map((item) => ({ label: item, value: item })) ?? []; + + if (options.length > 0) { + return ( + + {t('config.promptExtend.label')} + setValue(String(next) as any)} + /> + + ); + } + + return ( + + {t('config.promptExtend.label')} + setValue(checked as any)} /> + + ); +}); + const PromptInput = ({ showTitle = false }: PromptInputProps) => { const isDarkMode = useIsDark(); const { t } = useTranslation('video'); const { value, setValue } = useVideoGenerationConfigParam('prompt'); const { value: imageUrl, setValue: setImageUrl } = useVideoGenerationConfigParam('imageUrl'); + const { + value: imageUrls, + setValue: setImageUrls, + maxCount: imageUrlsMaxCount, + maxFileSize: imageUrlsMaxFileSize, + } = useVideoGenerationConfigParam('imageUrls'); + const { maxFileSize: imageUrlMaxFileSize } = useVideoGenerationConfigParam('imageUrl'); const { value: endImageUrl, setValue: setEndImageUrl } = useVideoGenerationConfigParam('endImageUrl'); const isCreating = useVideoStore(createVideoSelectors.isCreating); @@ -182,6 +219,7 @@ const PromptInput = ({ showTitle = false }: PromptInputProps) => { const enabledVideoModelList = useAiInfraStore(aiProviderSelectors.enabledVideoModelList); const isInit = useVideoStore((s) => s.isInit); const isSupportImageUrl = useVideoStore(isSupportedParamSelector('imageUrl')); + const isSupportImageUrls = useVideoStore(isSupportedParamSelector('imageUrls')); const isSupportEndImageUrl = useVideoStore(isSupportedParamSelector('endImageUrl')); const isSupportAspectRatio = useVideoStore(isSupportedParamSelector('aspectRatio')); const isSupportResolution = useVideoStore(isSupportedParamSelector('resolution')); @@ -189,7 +227,10 @@ const PromptInput = ({ showTitle = false }: PromptInputProps) => { const isSupportDuration = useVideoStore(isSupportedParamSelector('duration')); const isSupportSeed = useVideoStore(isSupportedParamSelector('seed')); const isSupportGenerateAudio = useVideoStore(isSupportedParamSelector('generateAudio')); + const isSupportPromptExtend = useVideoStore(isSupportedParamSelector('promptExtend')); + const isSupportWatermark = useVideoStore(isSupportedParamSelector('watermark')); const isSupportCameraFixed = useVideoStore(isSupportedParamSelector('cameraFixed')); + const isSupportWebSearch = useVideoStore(isSupportedParamSelector('webSearch')); const isLogin = useUserStore(authSelectors.isLogin); const { value: duration } = useVideoGenerationConfigParam('duration'); useFetchAiVideoConfig(); @@ -237,25 +278,64 @@ const PromptInput = ({ showTitle = false }: PromptInputProps) => { setPromptParam(null); - setTimeout(async () => { + const timeoutId = window.setTimeout(async () => { await createVideo(); }, 100); + + return () => { + window.clearTimeout(timeoutId); + }; } }, [promptParam, isLogin, setValue, setPromptParam, createVideo]); - const showInlineFrames = isSupportImageUrl || isSupportEndImageUrl; - const hasRefImages = Boolean(imageUrl || endImageUrl); + const showInlineFrames = isSupportImageUrl || isSupportImageUrls || isSupportEndImageUrl; + const framePreviewUrls = useMemo( + () => [imageUrl, ...(imageUrls ?? [])].filter(Boolean) as string[], + [imageUrl, imageUrls], + ); + const hasRefImages = framePreviewUrls.length > 0 || Boolean(endImageUrl); + const maxCount = useMemo(() => { + let count = 0; + if (isSupportImageUrl) count += 1; + if (isSupportImageUrls) count += imageUrlsMaxCount ?? 4; + return count; + }, [isSupportImageUrl, isSupportImageUrls, imageUrlsMaxCount]); - const handleImageChange = useCallback( - (data: string | { dimensions?: { height: number; width: number }; url: string } | null) => { - if (data === null) { - setImageUrl(null as any); - return; - } + const handleAddImage = useCallback( + (data: string | { dimensions?: { height: number; width: number }; url: string }) => { const url = typeof data === 'string' ? data : data?.url; - setImageUrl((url ?? null) as any); + if (!url) return; + if (framePreviewUrls.length >= maxCount) return; + + if (isSupportImageUrl && !imageUrl) { + setImageUrl(url); + } else if (isSupportImageUrls) { + setImageUrls([...(imageUrls ?? []), url] as any); + } else if (isSupportImageUrl) { + setImageUrl(url); + } }, - [setImageUrl], + [ + isSupportImageUrl, + isSupportImageUrls, + imageUrl, + imageUrls, + setImageUrl, + setImageUrls, + framePreviewUrls.length, + maxCount, + ], + ); + + const handleRemoveImage = useCallback( + (url: string) => { + if (url === imageUrl) { + setImageUrl(null); + } else { + setImageUrls((imageUrls ?? []).filter((item) => item !== url) as any); + } + }, + [imageUrl, imageUrls, setImageUrl, setImageUrls], ); const handleEndImageChange = useCallback( @@ -286,9 +366,20 @@ const PromptInput = ({ showTitle = false }: PromptInputProps) => { { + if (data === null) { + handleRemoveImage(imageUrl || ''); + return; + } + handleAddImage(data); + }} /> ) : undefined } @@ -343,9 +434,11 @@ const PromptInput = ({ showTitle = false }: PromptInputProps) => { )} - {(isSupportGenerateAudio || isSupportCameraFixed) && ( - - )} + {(isSupportGenerateAudio || + isSupportCameraFixed || + isSupportWatermark || + isSupportPromptExtend || + isSupportWebSearch) && } {isSupportGenerateAudio && ( { {isSupportCameraFixed && ( )} + {isSupportWatermark && ( + + )} + {isSupportPromptExtend && } + {isSupportWebSearch && ( + + )} } /> diff --git a/src/store/image/slices/generationConfig/action.test.ts b/src/store/image/slices/generationConfig/action.test.ts index 9420af570e..c97c325c6c 100644 --- a/src/store/image/slices/generationConfig/action.test.ts +++ b/src/store/image/slices/generationConfig/action.test.ts @@ -24,6 +24,7 @@ vi.mock('@/store/user/slices/settings/selectors', () => ({ // Test fixtures const customModelSchema: ModelParamsSchema = { prompt: { default: '' }, + imageUrls: { default: [] }, width: { default: 1024, min: 256, max: 2048, step: 64 }, height: { default: 1024, min: 256, max: 2048, step: 64 }, steps: { default: 20, min: 1, max: 50 }, @@ -44,6 +45,17 @@ const testImageModels: AIImageModelCard[] = [ parameters: customModelSchema, releasedAt: '2024-01-01', }, + { + id: 'single-image-model', + displayName: 'Single Image Model', + type: 'image', + parameters: { + prompt: { default: '' }, + imageUrl: { default: '' }, + steps: { default: 20, min: 1, max: 50 }, + } as ModelParamsSchema, + releasedAt: '2024-01-01', + }, ]; const mockProviders = [ @@ -57,6 +69,11 @@ const mockProviders = [ name: 'Custom Provider', children: [testImageModels[1]], }, + { + id: 'single-image-provider', + name: 'Single Image Provider', + children: [testImageModels[2]], + }, ]; // Mock external dependencies @@ -178,7 +195,10 @@ describe('GenerationConfigAction', () => { expect(result.current.model).toBe('flux/schnell'); expect(result.current.provider).toBe('fal'); - expect(result.current.parameters).toEqual(fluxSchnellDefaultValues); + expect(result.current.parameters).toEqual({ + ...fluxSchnellDefaultValues, + prompt: 'initial prompt', + }); expect(result.current.parametersSchema).toEqual(fluxSchnellParamsSchema); }); @@ -191,26 +211,113 @@ describe('GenerationConfigAction', () => { expect(result.current.model).toBe('custom-model'); expect(result.current.provider).toBe('custom-provider'); - expect(result.current.parameters).toEqual(customModelDefaultValues); + expect(result.current.parameters).toEqual({ + ...customModelDefaultValues, + prompt: 'initial prompt', + }); expect(result.current.parametersSchema).toEqual(customModelSchema); }); - it('should completely replace parameters when switching models', () => { + it('should preserve prompt and image inputs when switching models', () => { const { result } = renderHook(() => useImageStore()); // Set some custom parameters act(() => { result.current.setParamOnInput('prompt', 'custom prompt'); + result.current.setParamOnInput('imageUrls', ['custom-image-1.png']); result.current.setParamOnInput('steps', 50); }); // Switch model act(() => { - result.current.setModelAndProviderOnSelect('flux/schnell', 'fal'); + result.current.setModelAndProviderOnSelect('custom-model', 'custom-provider'); }); - expect(result.current.parameters).toEqual(fluxSchnellDefaultValues); - expect(result.current.parameters?.prompt).toBe(''); + expect(result.current.parameters).toEqual({ + ...customModelDefaultValues, + prompt: 'custom prompt', + imageUrls: ['custom-image-1.png'], + }); + expect(result.current.parameters?.steps).toBe(customModelDefaultValues.steps); + }); + + it('should convert imageUrls[0] to imageUrl when switching to single-image model', () => { + const { result } = renderHook(() => useImageStore()); + + // Set up multi-image state with imageUrls + act(() => { + result.current.setParamOnInput('prompt', 'test prompt'); + result.current.setParamOnInput('imageUrls', ['image1.png', 'image2.png', 'image3.png']); + }); + + // Switch to single-image model - should convert imageUrls[0] to imageUrl + act(() => { + result.current.setModelAndProviderOnSelect('single-image-model', 'single-image-provider'); + }); + + expect(result.current.parameters?.imageUrl).toBe('image1.png'); + expect(result.current.parameters?.prompt).toBe('test prompt'); + expect(result.current.parameters?.imageUrls).toBeUndefined(); + }); + + it('should convert imageUrl to imageUrls array when switching to multi-image model', () => { + const { result } = renderHook(() => useImageStore()); + const singleImageSchema: ModelParamsSchema = { + prompt: { default: '' }, + imageUrl: { default: '' }, + steps: { default: 20, min: 1, max: 50 }, + }; + + // Initialize with single-image model state + useImageStore.setState({ + model: 'single-image-model', + provider: 'single-image-provider', + parameters: { + prompt: 'test prompt', + imageUrl: 'reference-image.png', + steps: 20, + }, + parametersSchema: singleImageSchema, + }); + + // Get fresh hook after state update + const { result: storeResult } = renderHook(() => useImageStore()); + + // Switch to multi-image model - should convert imageUrl to imageUrls array + act(() => { + storeResult.current.setModelAndProviderOnSelect('custom-model', 'custom-provider'); + }); + + expect(storeResult.current.parameters?.imageUrls).toEqual(['reference-image.png']); + expect(storeResult.current.parameters?.prompt).toBe('test prompt'); + expect(storeResult.current.parameters?.imageUrl).toBeUndefined(); + }); + + it('should migrate imageUrl when target model has empty imageUrls default', () => { + const singleImageSchema: ModelParamsSchema = { + prompt: { default: '' }, + imageUrl: { default: '' }, + }; + + useImageStore.setState({ + model: 'single-image-model', + provider: 'single-image-provider', + parameters: { + prompt: 'keep this prompt', + imageUrl: 'from-single-model.png', + }, + parametersSchema: singleImageSchema, + }); + + const { result } = renderHook(() => useImageStore()); + + // custom-model schema defines imageUrls default as [] + act(() => { + result.current.setModelAndProviderOnSelect('custom-model', 'custom-provider'); + }); + + expect(result.current.parameters?.imageUrls).toEqual(['from-single-model.png']); + expect(result.current.parameters?.prompt).toBe('keep this prompt'); }); }); diff --git a/src/store/image/slices/generationConfig/action.ts b/src/store/image/slices/generationConfig/action.ts index 543dc966f5..96a394ee26 100644 --- a/src/store/image/slices/generationConfig/action.ts +++ b/src/store/image/slices/generationConfig/action.ts @@ -14,6 +14,10 @@ import { useUserStore } from '@/store/user'; import { authSelectors } from '@/store/user/selectors'; import { settingsSelectors } from '@/store/user/slices/settings/selectors'; +import { + normalizeImageInputOnSchemaSwitch, + preserveSupportedParams, +} from '../../../utils/preserveSupportedParams'; import { type ImageStore } from '../../store'; import { calculateInitialAspectRatio } from '../../utils/aspectRatio'; import { adaptSizeToRatio, parseRatio } from '../../utils/size'; @@ -64,6 +68,20 @@ function prepareModelConfigState(model: string, provider: string) { }; } +function preserveImageInputParams( + previousParameters: RuntimeImageGenParams, + nextDefaultValues: RuntimeImageGenParams, + nextSchema: ModelParamsSchema, +) { + const result = preserveSupportedParams(previousParameters, nextDefaultValues, nextSchema, [ + 'prompt', + 'imageUrl', + 'imageUrls', + ]); + + return normalizeImageInputOnSchemaSwitch(previousParameters, nextSchema, result); +} + type Setter = StoreSetter; export const createGenerationConfigSlice = (set: Setter, get: () => ImageStore, _api?: unknown) => new GenerationConfigActionImpl(set, get, _api); @@ -244,16 +262,23 @@ export class GenerationConfigActionImpl { }; setModelAndProviderOnSelect = (model: string, provider: string): void => { + const previousParameters = this.#get().parameters; const { defaultValues, parametersSchema, initialActiveRatio } = prepareModelConfigState( model, provider, ); + const parameters = preserveImageInputParams( + previousParameters, + defaultValues, + parametersSchema, + ); + this.#set( { model, provider, - parameters: defaultValues, + parameters, parametersSchema, isAspectRatioLocked: false, activeAspectRatio: initialActiveRatio, diff --git a/src/store/utils/preserveSupportedParams.ts b/src/store/utils/preserveSupportedParams.ts new file mode 100644 index 0000000000..8535785a7a --- /dev/null +++ b/src/store/utils/preserveSupportedParams.ts @@ -0,0 +1,63 @@ +export function preserveSupportedParams< + TParams extends Record, + TSchema extends Record, + TKey extends keyof TParams & string, +>( + previousParameters: TParams, + nextDefaultValues: TParams, + nextSchema: TSchema, + keys: readonly TKey[], +): TParams { + const supportedPreservedEntries = keys.flatMap((key) => { + if (!(key in nextSchema)) return []; + + const value = previousParameters[key]; + if (typeof value === 'undefined') return []; + + return [[key, value] as const]; + }); + + return { + ...nextDefaultValues, + ...Object.fromEntries(supportedPreservedEntries), + }; +} + +export function normalizeImageInputOnSchemaSwitch< + TParams extends Record & { + imageUrl?: unknown; + imageUrls?: unknown; + }, + TSchema extends Record, +>(previousParameters: TParams, nextSchema: TSchema, preservedResult: TParams): TParams { + const result = { ...preservedResult }; + + const imageUrl = previousParameters.imageUrl; + const imageUrls = previousParameters.imageUrls; + const supportsImageUrl = 'imageUrl' in nextSchema; + const supportsImageUrls = 'imageUrls' in nextSchema; + + // Multi-image -> Single-image + if ( + Array.isArray(imageUrls) && + imageUrls.length > 0 && + !supportsImageUrls && + supportsImageUrl && + !result.imageUrl + ) { + result.imageUrl = imageUrls[0]; + } + + // Single-image -> Multi-image + if ( + typeof imageUrl === 'string' && + imageUrl && + supportsImageUrls && + !supportsImageUrl && + !(Array.isArray(result.imageUrls) && result.imageUrls.length > 0) + ) { + result.imageUrls = [imageUrl]; + } + + return result; +} diff --git a/src/store/video/slices/generationConfig/action.test.ts b/src/store/video/slices/generationConfig/action.test.ts new file mode 100644 index 0000000000..cb241bdae5 --- /dev/null +++ b/src/store/video/slices/generationConfig/action.test.ts @@ -0,0 +1,109 @@ +import { act, renderHook } from '@testing-library/react'; +import { + type AIVideoModelCard, + extractVideoDefaultValues, + type RuntimeVideoGenParams, + type VideoModelParamsSchema, +} from 'model-bank'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { useVideoStore } from '@/store/video'; + +const modelASchema: VideoModelParamsSchema = { + prompt: { default: '' }, + imageUrl: { default: '' }, + endImageUrl: { default: '' }, + duration: { default: 5, min: 1, max: 10 }, +}; + +const modelBSchema: VideoModelParamsSchema = { + prompt: { default: '' }, + imageUrl: { default: '' }, + endImageUrl: { default: '' }, + duration: { default: 3, min: 1, max: 10 }, +}; + +const testVideoModels: AIVideoModelCard[] = [ + { + id: 'video-model-a', + displayName: 'Video Model A', + type: 'video', + parameters: modelASchema, + releasedAt: '2025-01-01', + }, + { + id: 'video-model-b', + displayName: 'Video Model B', + type: 'video', + parameters: modelBSchema, + releasedAt: '2025-01-02', + }, +]; + +const mockProviders = [ + { + id: 'provider-a', + name: 'Provider A', + children: [testVideoModels[0]], + }, + { + id: 'provider-b', + name: 'Provider B', + children: [testVideoModels[1]], + }, +]; + +vi.mock('@/store/aiInfra', () => ({ + aiProviderSelectors: { + enabledVideoModelList: vi.fn(() => mockProviders), + }, + getAiInfraStoreState: vi.fn(() => ({})), +})); + +const modelBDefaultValues = extractVideoDefaultValues(modelBSchema); + +beforeEach(() => { + vi.clearAllMocks(); + + useVideoStore.setState({ + isInit: true, + model: 'video-model-a', + provider: 'provider-a', + parametersSchema: modelASchema, + parameters: { + prompt: 'initial prompt', + imageUrl: 'start-frame.png', + endImageUrl: 'end-frame.png', + duration: 6, + } as RuntimeVideoGenParams, + }); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('video generationConfig actions', () => { + it('should preserve prompt and frame images when switching model', () => { + const { result } = renderHook(() => useVideoStore()); + + act(() => { + result.current.setParamOnInput('prompt', 'cinematic sunset'); + result.current.setParamOnInput('imageUrl', 'start-custom.png'); + result.current.setParamOnInput('endImageUrl', 'end-custom.png'); + result.current.setParamOnInput('duration', 8); + }); + + act(() => { + result.current.setModelAndProviderOnSelect('video-model-b', 'provider-b'); + }); + + expect(result.current.parameters).toEqual({ + ...modelBDefaultValues, + prompt: 'cinematic sunset', + imageUrl: 'start-custom.png', + endImageUrl: 'end-custom.png', + }); + expect(result.current.parameters?.duration).toBe(modelBDefaultValues.duration); + }); +}); diff --git a/src/store/video/slices/generationConfig/action.ts b/src/store/video/slices/generationConfig/action.ts index ae1b1f796e..d5e273da43 100644 --- a/src/store/video/slices/generationConfig/action.ts +++ b/src/store/video/slices/generationConfig/action.ts @@ -1,6 +1,7 @@ import { type AIVideoModelCard, extractVideoDefaultValues, + type RuntimeVideoGenParams, type RuntimeVideoGenParamsKeys, type RuntimeVideoGenParamsValue, type VideoModelParamsSchema, @@ -12,6 +13,10 @@ import { type StoreSetter } from '@/store/types'; import { useUserStore } from '@/store/user'; import { authSelectors } from '@/store/user/selectors'; +import { + normalizeImageInputOnSchemaSwitch, + preserveSupportedParams, +} from '../../../utils/preserveSupportedParams'; import type { VideoStore } from '../../store'; export function getVideoModelAndDefaults(model: string, provider: string) { @@ -39,17 +44,33 @@ export function getVideoModelAndDefaults(model: string, provider: string) { return { activeModel, defaultValues, parametersSchema }; } +function preserveVideoInputParams( + previousParameters: RuntimeVideoGenParams, + nextDefaultValues: RuntimeVideoGenParams, + nextSchema: VideoModelParamsSchema, +) { + const result = preserveSupportedParams(previousParameters, nextDefaultValues, nextSchema, [ + 'prompt', + 'imageUrl', + 'imageUrls', + 'endImageUrl', + ]); + + return normalizeImageInputOnSchemaSwitch(previousParameters, nextSchema, result); +} + type Setter = StoreSetter; export const createGenerationConfigSlice = (set: Setter, get: () => VideoStore, _api?: unknown) => new GenerationConfigActionImpl(set, get, _api); export class GenerationConfigActionImpl { + readonly #get: () => VideoStore; readonly #set: Setter; - constructor(set: Setter, _get: () => VideoStore, _api?: unknown) { - void _get; + constructor(set: Setter, get: () => VideoStore, _api?: unknown) { void _api; + this.#get = get; this.#set = set; } @@ -85,12 +106,18 @@ export class GenerationConfigActionImpl { }; setModelAndProviderOnSelect = (model: string, provider: string): void => { + const previousParameters = this.#get().parameters; const { defaultValues, parametersSchema } = getVideoModelAndDefaults(model, provider); + const parameters = preserveVideoInputParams( + previousParameters, + defaultValues, + parametersSchema, + ); this.#set( { model, - parameters: defaultValues, + parameters, parametersSchema, provider, }, diff --git a/src/store/video/slices/generationConfig/hooks.ts b/src/store/video/slices/generationConfig/hooks.ts index ddafe9dd41..33b6f08243 100644 --- a/src/store/video/slices/generationConfig/hooks.ts +++ b/src/store/video/slices/generationConfig/hooks.ts @@ -40,9 +40,10 @@ export function useVideoGenerationConfigParam< const enumValues = 'enum' in paramConfig ? (paramConfig.enum as string[]) : undefined; const min = 'min' in paramConfig ? (paramConfig.min as number) : undefined; const max = 'max' in paramConfig ? (paramConfig.max as number) : undefined; + const maxCount = 'maxCount' in paramConfig ? (paramConfig.maxCount as number) : undefined; const step = 'step' in paramConfig ? (paramConfig.step as number) : undefined; - return { enumValues, imageConstraints, max, maxFileSize, min, step }; + return { enumValues, imageConstraints, max, maxCount, maxFileSize, min, step }; }, [paramConfig]); return {