🐛 fix: fix google gemini output relative issue (#6970)

* fix google streaming * add ability * fix instruction * fix tests * update i18n
2026-06-14 11:40:07 +00:00 · 2025-03-15 11:52:20 +08:00
parent 59cafa0bc3
commit fdcaaf34fa
45 changed files with 225 additions and 10 deletions
@@ -76,6 +76,7 @@
      "custom": "نموذج مخصص، الإعداد الافتراضي يدعم الاستدعاء الوظيفي والتعرف البصري، يرجى التحقق من قدرة النموذج على القيام بذلك بناءً على الحالة الفعلية",
      "file": "يدعم هذا النموذج قراءة وتعرف الملفات المرفوعة",
      "functionCall": "يدعم هذا النموذج استدعاء الوظائف",
+      "imageOutput": "يدعم هذا النموذج إنشاء الصور",
      "reasoning": "يدعم هذا النموذج التفكير العميق",
      "search": "يدعم هذا النموذج البحث عبر الإنترنت",
      "tokens": "يدعم هذا النموذج حتى {{tokens}} رمزًا في جلسة واحدة",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "نموذج جمنيس 2.0 فلاش، تم تحسينه لتحقيق أهداف مثل الكفاءة من حيث التكلفة وانخفاض الكمون."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "نموذج تجريبي Gemini 2.0 Flash، يدعم توليد الصور"
+  },
  "gemini-2.0-flash-lite": {
    "description": "نموذج جمنّي 2.0 فلاش هو نسخة معدلة، تم تحسينها لتحقيق الكفاءة من حيث التكلفة والحد من التأخير."
  },
@@ -76,6 +76,7 @@
      "custom": "Потребителски модел, по подразбиране поддържа функционалност за функционални обаждания и визуално разпознаване, моля, потвърдете наличието на тези възможности спрямо реалните условия",
      "file": "Този модел поддържа качване на файлове и разпознаване",
      "functionCall": "Този модел поддържа функционални обаждания (Function Call)",
+      "imageOutput": "Този модел поддържа генериране на изображения",
      "reasoning": "Този модел поддържа дълбочинно мислене",
      "search": "Този модел поддържа търсене в мрежата",
      "tokens": "Този модел поддържа до {{tokens}} токена за една сесия",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash моделна вариация, оптимизирана за икономичност и ниска латентност."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Gemini 2.0 Flash експериментален модел, който поддържа генериране на изображения"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flash е вариант на модела, оптимизиран за икономичност и ниска латентност."
  },
@@ -76,6 +76,7 @@
      "custom": "Benutzerdefiniertes Modell, standardmäßig unterstützt es sowohl Funktionsaufrufe als auch visuelle Erkennung. Bitte überprüfen Sie die Verfügbarkeit dieser Fähigkeiten basierend auf den tatsächlichen Gegebenheiten.",
      "file": "Dieses Modell unterstützt das Hochladen von Dateien und deren Erkennung.",
      "functionCall": "Dieses Modell unterstützt Funktionsaufrufe.",
+      "imageOutput": "Dieses Modell unterstützt die Generierung von Bildern",
      "reasoning": "Dieses Modell unterstützt tiefes Denken",
      "search": "Dieses Modell unterstützt die Online-Suche",
      "tokens": "Dieses Modell unterstützt maximal {{tokens}} Tokens pro Sitzung.",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash-Modellvariante, die auf Kosteneffizienz und niedrige Latenz optimiert ist."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Gemini 2.0 Flash Experimentmodell, das die Bildgenerierung unterstützt"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flash ist eine Modellvariante, die auf Kosteneffizienz und niedrige Latenz optimiert ist."
  },
@@ -76,6 +76,7 @@
      "custom": "Custom model, by default, supports both function call and visual recognition. Please verify the availability of the above capabilities based on actual situations.",
      "file": "This model supports file upload for reading and recognition.",
      "functionCall": "This model supports function call.",
+      "imageOutput": "This model supports image generation",
      "reasoning": "This model supports deep thinking",
      "search": "This model supports online search",
      "tokens": "This model supports up to {{tokens}} tokens in a single session.",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash model variant optimized for cost-effectiveness and low latency."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Gemini 2.0 Flash experimental model, supports image generation"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flash is a variant of the model optimized for cost-effectiveness and low latency."
  },
@@ -76,6 +76,7 @@
      "custom": "Modelo personalizado: admite llamadas de función y reconocimiento visual. Verifique la disponibilidad de estas capacidades según sea necesario.",
      "file": "Este modelo admite la carga y reconocimiento de archivos.",
      "functionCall": "Este modelo admite llamadas de función.",
+      "imageOutput": "Este modelo admite la generación de imágenes",
      "reasoning": "Este modelo admite un pensamiento profundo",
      "search": "Este modelo admite búsqueda en línea",
      "tokens": "Este modelo admite un máximo de {{tokens}} tokens por sesión.",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Variante del modelo Gemini 2.0 Flash, optimizada para objetivos como la rentabilidad y la baja latencia."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Modelo experimental Gemini 2.0 Flash, que admite la generación de imágenes"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Variante del modelo Gemini 2.0 Flash, optimizada para objetivos como la rentabilidad y la baja latencia."
  },
@@ -76,6 +76,7 @@
      "custom": "مدل سفارشی، تنظیمات پیش‌فرض از فراخوانی توابع و تشخیص بصری پشتیبانی می‌کند، لطفاً قابلیت‌های فوق را بر اساس شرایط واقعی بررسی کنید",
      "file": "این مدل از بارگذاری و شناسایی فایل‌ها پشتیبانی می‌کند",
      "functionCall": "این مدل از فراخوانی توابع (Function Call) پشتیبانی می‌کند",
+      "imageOutput": "این مدل از تولید تصویر پشتیبانی می‌کند",
      "reasoning": "این مدل از تفکر عمیق پشتیبانی می‌کند",
      "search": "این مدل از جستجوی آنلاین پشتیبانی می‌کند",
      "tokens": "این مدل در هر جلسه حداکثر از {{tokens}} توکن پشتیبانی می‌کند",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "مدل متغیر Gemini 2.0 Flash که برای بهینه‌سازی هزینه و تأخیر کم طراحی شده است."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "مدل آزمایشی Gemini 2.0 Flash، از تولید تصویر پشتیبانی می‌کند"
+  },
  "gemini-2.0-flash-lite": {
    "description": "مدل متغیر Gemini 2.0 Flash برای بهینه‌سازی هزینه و تأخیر کم طراحی شده است."
  },
@@ -76,6 +76,7 @@
      "custom": "Modèle personnalisé par défaut prenant en charge à la fois les appels de fonction et la reconnaissance visuelle. Veuillez vérifier la disponibilité de ces capacités en fonction de vos besoins réels.",
      "file": "Ce modèle prend en charge la lecture et la reconnaissance de fichiers téléchargés.",
      "functionCall": "Ce modèle prend en charge les appels de fonction.",
+      "imageOutput": "Ce modèle prend en charge la génération d'images",
      "reasoning": "Ce modèle prend en charge une réflexion approfondie",
      "search": "Ce modèle prend en charge la recherche en ligne",
      "tokens": "Ce modèle prend en charge jusqu'à {{tokens}} jetons par session.",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Modèle variant Gemini 2.0 Flash, optimisé pour des objectifs tels que le rapport coût-efficacité et la faible latence."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Modèle expérimental Gemini 2.0 Flash, prenant en charge la génération d'images"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Une variante du modèle Gemini 2.0 Flash, optimisée pour des objectifs tels que le rapport coût-efficacité et la faible latence."
  },
@@ -76,6 +76,7 @@
      "custom": "Modello personalizzato: di default supporta sia la chiamata di funzioni che il riconoscimento visivo. Verifica l'effettiva disponibilità di tali funzionalità.",
      "file": "Questo modello supporta il caricamento e il riconoscimento di file.",
      "functionCall": "Questo modello supporta la chiamata di funzioni.",
+      "imageOutput": "Questo modello supporta la generazione di immagini",
      "reasoning": "Questo modello supporta un pensiero profondo",
      "search": "Questo modello supporta la ricerca online",
      "tokens": "Questo modello supporta un massimo di {{tokens}} token per sessione.",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash è una variante del modello ottimizzata per obiettivi come il rapporto costo-efficacia e la bassa latenza."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Modello sperimentale Gemini 2.0 Flash, supporta la generazione di immagini"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flash è una variante del modello Flash, ottimizzata per obiettivi come il rapporto costo-efficacia e la bassa latenza."
  },
@@ -76,6 +76,7 @@
      "custom": "カスタムモデル、デフォルトでは関数呼び出しとビジョン認識の両方をサポートしています。上記機能の有効性を確認してください。",
      "file": "このモデルはファイルのアップロードと認識をサポートしています。",
      "functionCall": "このモデルは関数呼び出し（Function Call）をサポートしています。",
+      "imageOutput": "このモデルは画像生成をサポートしています",
      "reasoning": "このモデルは深い思考をサポートしています",
      "search": "このモデルはオンライン検索をサポートしています",
      "tokens": "このモデルは1つのセッションあたり最大{{tokens}}トークンをサポートしています。",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash モデルのバリアントで、コスト効率と低遅延などの目標に最適化されています。"
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Gemini 2.0 Flash 実験モデル、画像生成をサポート"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flashモデルのバリアントで、コスト効率と低遅延などの目標に最適化されています。"
  },
@@ -76,6 +76,7 @@
      "custom": "사용자 정의 모델, 기본적으로 함수 호출 및 시각 인식을 모두 지원하며, 실제 기능을 확인하세요",
      "file": "이 모델은 파일 업로드 및 인식을 지원합니다",
      "functionCall": "이 모델은 함수 호출을 지원합니다",
+      "imageOutput": "이 모델은 이미지를 생성하는 기능을 지원합니다.",
      "reasoning": "이 모델은 깊이 있는 사고를 지원합니다.",
      "search": "이 모델은 온라인 검색을 지원합니다.",
      "tokens": "이 모델은 단일 세션당 최대 {{tokens}} 토큰을 지원합니다",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash 모델 변형으로, 비용 효율성과 저지연 등의 목표를 위해 최적화되었습니다."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Gemini 2.0 Flash 실험 모델, 이미지 생성을 지원합니다."
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 플래시 모델 변형으로, 비용 효율성과 낮은 지연 시간 등의 목표를 위해 최적화되었습니다."
  },
@@ -76,6 +76,7 @@
      "custom": "Custom model, by default, supports both function call and visual recognition. Please verify the availability of the above capabilities based on actual needs.",
      "file": "This model supports file upload for reading and recognition.",
      "functionCall": "This model supports function call.",
+      "imageOutput": "Dit model ondersteunt het genereren van afbeeldingen",
      "reasoning": "Dit model ondersteunt diepgaand denken",
      "search": "Dit model ondersteunt online zoeken",
      "tokens": "This model supports up to {{tokens}} tokens in a single session.",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash modelvariant, geoptimaliseerd voor kosteneffectiviteit en lage latentie."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Gemini 2.0 Flash experimenteel model, ondersteunt afbeeldingsgeneratie"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flash is een modelvariant die is geoptimaliseerd voor kosteneffectiviteit en lage latentie."
  },
@@ -76,6 +76,7 @@
      "custom": "Niestandardowy model, domyślnie obsługujący zarówno wywołania funkcji, jak i rozpoznawanie wizualne. Proszę zweryfikować możliwość użycia tych funkcji w praktyce.",
      "file": "Ten model obsługuje wczytywanie plików i rozpoznawanie",
      "functionCall": "Ten model obsługuje wywołania funkcji (Function Call).",
+      "imageOutput": "Ten model wspiera generowanie obrazów",
      "reasoning": "Ten model wspiera głębokie myślenie",
      "search": "Ten model wspiera wyszukiwanie w sieci",
      "tokens": "Ten model obsługuje maksymalnie {{tokens}} tokenów w pojedynczej sesji.",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash to wariant modelu, zoptymalizowany pod kątem efektywności kosztowej i niskiego opóźnienia."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Model eksperymentalny Gemini 2.0 Flash, wspierający generowanie obrazów"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flash to wariant modelu, zoptymalizowany pod kątem efektywności kosztowej i niskiego opóźnienia."
  },
@@ -76,6 +76,7 @@
      "custom": "Modelo personalizado, por padrão, suporta chamadas de função e reconhecimento visual. Por favor, verifique a disponibilidade dessas capacidades de acordo com a situação real.",
      "file": "Este modelo suporta leitura e reconhecimento de arquivos enviados.",
      "functionCall": "Este modelo suporta chamadas de função.",
+      "imageOutput": "Este modelo suporta a geração de imagens",
      "reasoning": "Este modelo suporta pensamento profundo",
      "search": "Este modelo suporta pesquisa online",
      "tokens": "Este modelo suporta no máximo {{tokens}} tokens por sessão.",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Variante do modelo Gemini 2.0 Flash, otimizada para custo-benefício e baixa latência."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Modelo experimental Gemini 2.0 Flash, suporta geração de imagens"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Variante do modelo Gemini 2.0 Flash, otimizada para custo-benefício e baixa latência."
  },
@@ -76,6 +76,7 @@
      "custom": "Пользовательская модель по умолчанию поддерживает как вызов функций, так и распознавание изображений. Пожалуйста, проверьте доступность указанных возможностей в вашем случае",
      "file": "Эта модель поддерживает загрузку и распознавание файлов",
      "functionCall": "Эта модель поддерживает вызов функций",
+      "imageOutput": "Эта модель поддерживает генерацию изображений",
      "reasoning": "Эта модель поддерживает глубокое мышление",
      "search": "Эта модель поддерживает поиск в интернете",
      "tokens": "Эта модель поддерживает до {{tokens}} токенов в одной сессии",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Модельный вариант Gemini 2.0 Flash, оптимизированный для достижения таких целей, как экономическая эффективность и низкая задержка."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Экспериментальная модель Gemini 2.0 Flash, поддерживающая генерацию изображений"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Модельный вариант Gemini 2.0 Flash, оптимизированный для достижения таких целей, как экономическая эффективность и низкая задержка."
  },
@@ -76,6 +76,7 @@
      "custom": "Özel model, varsayılan olarak hem fonksiyon çağrısını hem de görüntü tanımayı destekler, yukarıdaki yeteneklerin kullanılabilirliğini doğrulamak için lütfen gerçek durumu kontrol edin",
      "file": "Bu model dosya yükleme ve tanımayı destekler",
      "functionCall": "Bu model fonksiyon çağrısını destekler",
+      "imageOutput": "Bu model resim oluşturmayı destekler",
      "reasoning": "Bu model derin düşünmeyi destekler",
      "search": "Bu model çevrimiçi aramayı destekler",
      "tokens": "Bu model tek bir oturumda en fazla {{tokens}} Token destekler",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash modeli varyantı, maliyet etkinliği ve düşük gecikme gibi hedefler için optimize edilmiştir."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Gemini 2.0 Flash deneysel modeli, görüntü oluşturmayı destekler"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flash model varyantı, maliyet etkinliği ve düşük gecikme gibi hedefler için optimize edilmiştir."
  },
@@ -76,6 +76,7 @@
      "custom": "Mô hình tùy chỉnh, mặc định hỗ trợ cả cuộc gọi hàm và nhận diện hình ảnh, vui lòng xác minh khả năng sử dụng của chúng theo tình hình cụ thể",
      "file": "Mô hình này hỗ trợ tải lên và nhận diện tệp",
      "functionCall": "Mô hình này hỗ trợ cuộc gọi hàm (Function Call)",
+      "imageOutput": "Mô hình này hỗ trợ tạo ra hình ảnh",
      "reasoning": "Mô hình này hỗ trợ tư duy sâu sắc",
      "search": "Mô hình này hỗ trợ tìm kiếm trực tuyến",
      "tokens": "Mỗi phiên của mô hình này hỗ trợ tối đa {{tokens}} Tokens",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Biến thể mô hình Gemini 2.0 Flash, được tối ưu hóa cho hiệu quả chi phí và độ trễ thấp."
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Mô hình thử nghiệm Gemini 2.0 Flash, hỗ trợ tạo hình ảnh"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Biến thể mô hình Gemini 2.0 Flash được tối ưu hóa cho hiệu quả chi phí và độ trễ thấp."
  },
@@ -76,6 +76,7 @@
      "custom": "自定义模型，默认设定同时支持函数调用与视觉识别，请根据实际情况验证上述能力的可用性",
      "file": "该模型支持上传文件读取与识别",
      "functionCall": "该模型支持函数调用（Function Call）",
+      "imageOutput": "该模型支持生成图片",
      "reasoning": "该模型支持深度思考",
      "search": "该模型支持联网搜索",
      "tokens": "该模型单个会话最多支持 {{tokens}} Tokens",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash 模型变体，针对成本效益和低延迟等目标进行了优化。"
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Gemini 2.0 Flash 实验模型，支持图像生成"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flash 模型变体，针对成本效益和低延迟等目标进行了优化。"
  },
@@ -76,6 +76,7 @@
      "custom": "自訂模型，預設支援函式呼叫與視覺辨識，請根據實際情況驗證上述能力的可用性",
      "file": "該模型支援上傳檔案讀取與辨識",
      "functionCall": "該模型支援函式呼叫（Function Call）",
+      "imageOutput": "該模型支援生成圖片",
      "reasoning": "該模型支持深度思考",
      "search": "該模型支援聯網搜尋",
      "tokens": "該模型單一會話最多支援 {{tokens}} Tokens",
@@ -836,6 +836,9 @@
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash 模型變體，針對成本效益和低延遲等目標進行了優化。"
  },
+  "gemini-2.0-flash-exp-image-generation": {
+    "description": "Gemini 2.0 Flash 實驗模型，支持圖像生成"
+  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flash 模型變體，針對成本效益和低延遲等目標進行了優化。"
  },
@@ -7,6 +7,7 @@ import {
  AtomIcon,
  LucideEye,
  LucideGlobe,
+  LucideImage,
  LucidePaperclip,
  ToyBrick,
 } from 'lucide-react';
@@ -56,6 +57,10 @@ const useStyles = createStyles(({ css, token, isDarkMode }) => ({
    color: ${token.green};
    background: ${token.green1};
  `,
+  tagGreenDeep: css`
+    color: ${token.green9};
+    background: ${token.green3};
+  `,
  tagPurple: css`
    color: ${token.purple};
    background: ${token.purple1};
@@ -98,6 +103,17 @@ export const ModelInfoTags = memo<ModelInfoTagsProps>(
            </div>
          </Tooltip>
        )}
+        {model.imageOutput && (
+          <Tooltip
+            placement={placement}
+            styles={{ root: { pointerEvents: 'none' } }}
+            title={t('ModelSelect.featureTag.imageOutput')}
+          >
+            <div className={cx(styles.tag, styles.tagGreen)} style={{ cursor: 'pointer' }} title="">
+              <Icon icon={LucideImage} />
+            </div>
+          </Tooltip>
+        )}
        {model.vision && (
          <Tooltip
            placement={placement}
@@ -94,6 +94,7 @@ const googleChatModels: AIChatModelCard[] = [
  },
  {
    abilities: {
+      imageOutput: true,
      vision: true,
    },
    contextWindowTokens: 32_768,
@@ -109,6 +110,24 @@ const googleChatModels: AIChatModelCard[] = [
    releasedAt: '2025-02-05',
    type: 'chat',
  },
+  {
+    abilities: {
+      imageOutput: true,
+      vision: true,
+    },
+    contextWindowTokens: 32_768,
+    description: 'Gemini 2.0 Flash 实验模型，支持图像生成',
+    displayName: 'Gemini 2.0 Flash (Image Generation) Experimental',
+    enabled: true,
+    id: 'gemini-2.0-flash-exp-image-generation',
+    maxOutput: 8192,
+    pricing: {
+      input: 0,
+      output: 0,
+    },
+    releasedAt: '2025-03-14',
+    type: 'chat',
+  },
  {
    abilities: {
      vision: true,
@@ -11,8 +11,8 @@ const VertexAI: ModelProviderCard = {
  name: 'VertexAI',
  settings: {
    disableBrowserRequest: true,
+    showModelFetcher: false,
    smoothing: {
-      speed: 2,
      text: true,
    },
  },
@@ -10,7 +10,6 @@ import {
  SchemaType,
 } from '@google/generative-ai';

-import { VertexAIStream } from '@/libs/agent-runtime/utils/streams/vertex-ai';
 import type { ChatModelCard } from '@/types/llm';
 import { imageUrlToBase64 } from '@/utils/imageToBase64';
 import { safeParseJSON } from '@/utils/safeParseJSON';
@@ -28,11 +27,24 @@ import { ModelProvider } from '../types/type';
 import { AgentRuntimeError } from '../utils/createError';
 import { debugStream } from '../utils/debugStream';
 import { StreamingResponse } from '../utils/response';
-import { GoogleGenerativeAIStream, convertIterableToStream } from '../utils/streams';
+import {
+  GoogleGenerativeAIStream,
+  VertexAIStream,
+  convertIterableToStream,
+} from '../utils/streams';
 import { parseDataUri } from '../utils/uriParser';

 const modelsOffSafetySettings = new Set(['gemini-2.0-flash-exp']);
-const modelsWithModalities = new Set(['gemini-2.0-flash-exp']);
+
+const modelsWithModalities = new Set([
+  'gemini-2.0-flash-exp',
+  'gemini-2.0-flash-exp-image-generation',
+]);
+
+const modelsDisableInstuction = new Set([
+  'gemini-2.0-flash-exp',
+  'gemini-2.0-flash-exp-image-generation',
+]);

 export interface GoogleModelCard {
  displayName: string;
@@ -97,9 +109,7 @@ export class LobeGoogleAI implements LobeRuntimeAI {
            generationConfig: {
              maxOutputTokens: payload.max_tokens,
              // @ts-expect-error - Google SDK 0.24.0 doesn't have this property for now with
-              response_modalities: modelsWithModalities.has(model)
-                ? ['Text', 'Image']
-                : undefined,
+              response_modalities: modelsWithModalities.has(model) ? ['Text', 'Image'] : undefined,
              temperature: payload.temperature,
              topP: payload.top_p,
            },
@@ -129,7 +139,9 @@ export class LobeGoogleAI implements LobeRuntimeAI {
        )
        .generateContentStream({
          contents,
-          systemInstruction: payload.system as string,
+          systemInstruction: modelsDisableInstuction.has(model)
+            ? undefined
+            : (payload.system as string),
          tools: this.buildGoogleTools(payload.tools, payload),
        });

@@ -193,4 +193,94 @@ describe('GoogleGenerativeAIStream', () => {
      `data: {"inputImageTokens":258,"inputTextTokens":8,"totalInputTokens":266,"totalTokens":266}\n\n`,
    ]);
  });
+
+  it('should handle stop with content', async () => {
+    vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+
+    const data = [
+      {
+        candidates: [
+          {
+            content: { parts: [{ text: '234' }], role: 'model' },
+            safetyRatings: [
+              { category: 'HARM_CATEGORY_HATE_SPEECH', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_DANGEROUS_CONTENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_HARASSMENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', probability: 'NEGLIGIBLE' },
+            ],
+          },
+        ],
+        text: () => '234',
+        usageMetadata: {
+          promptTokenCount: 20,
+          totalTokenCount: 20,
+          promptTokensDetails: [{ modality: 'TEXT', tokenCount: 20 }],
+        },
+        modelVersion: 'gemini-2.0-flash-exp-image-generation',
+      },
+      {
+        text: () => '567890\n',
+        candidates: [
+          {
+            content: { parts: [{ text: '567890\n' }], role: 'model' },
+            finishReason: 'STOP',
+            safetyRatings: [
+              { category: 'HARM_CATEGORY_HATE_SPEECH', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_DANGEROUS_CONTENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_HARASSMENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', probability: 'NEGLIGIBLE' },
+            ],
+          },
+        ],
+        usageMetadata: {
+          promptTokenCount: 19,
+          candidatesTokenCount: 11,
+          totalTokenCount: 30,
+          promptTokensDetails: [{ modality: 'TEXT', tokenCount: 19 }],
+          candidatesTokensDetails: [{ modality: 'TEXT', tokenCount: 11 }],
+        },
+        modelVersion: 'gemini-2.0-flash-exp-image-generation',
+      },
+    ];
+
+    const mockGoogleStream = new ReadableStream({
+      start(controller) {
+        data.forEach((item) => {
+          controller.enqueue(item);
+        });
+
+        controller.close();
+      },
+    });
+
+    const protocolStream = GoogleGenerativeAIStream(mockGoogleStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual(
+      [
+        'id: chat_1',
+        'event: text',
+        'data: "234"\n',
+
+        'id: chat_1',
+        'event: text',
+        `data: "567890\\n"\n`,
+        // stop
+        'id: chat_1',
+        'event: stop',
+        `data: "STOP"\n`,
+        // usage
+        'id: chat_1',
+        'event: usage',
+        `data: {"inputTextTokens":19,"totalInputTokens":19,"totalOutputTokens":11,"totalTokens":30}\n`,
+      ].map((i) => i + '\n'),
+    );
+  });
 });
@@ -71,6 +71,7 @@ const transformGoogleGenerativeAIStream = (
      if (chunk.usageMetadata) {
        const usage = chunk.usageMetadata;
        return [
+          !!text ? { data: text, id: context?.id, type: 'text' } : undefined,
          { data: candidate.finishReason, id: context?.id, type: 'stop' },
          {
            data: {
@@ -88,7 +89,7 @@ const transformGoogleGenerativeAIStream = (
            id: context?.id,
            type: 'usage',
          },
-        ];
+        ].filter(Boolean) as StreamProtocolChunk[];
      }
      return { data: candidate.finishReason, id: context?.id, type: 'stop' };
    }
@@ -6,3 +6,4 @@ export * from './openai';
 export * from './protocol';
 export * from './qwen';
 export * from './spark';
+export * from './vertex-ai';
@@ -78,6 +78,7 @@ export default {
      custom: '自定义模型，默认设定同时支持函数调用与视觉识别，请根据实际情况验证上述能力的可用性',
      file: '该模型支持上传文件读取与识别',
      functionCall: '该模型支持函数调用（Function Call）',
+      imageOutput: '该模型支持生成图片',
      reasoning: '该模型支持深度思考',
      search: '该模型支持联网搜索',
      tokens: '该模型单个会话最多支持 {{tokens}} Tokens',
@@ -28,6 +28,10 @@ export interface ModelAbilities {
   * whether model supports function call
   */
  functionCall?: boolean;
+  /**
+   * whether model supports image output
+   */
+  imageOutput?: boolean;
  /**
   * whether model supports reasoning
   */
@@ -36,7 +40,6 @@ export interface ModelAbilities {
   * whether model supports search web
   */
  search?: boolean;
-
  /**
   *  whether model supports vision
   */