From e529108ff6afd32efb56d9d36c534aed3cc9ee3b Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Sat, 8 Feb 2025 10:25:28 +0800
Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20refactor=20the=20auth=20con?=
 =?UTF-8?q?dition=20in=20Next=20Auth=20(#5866)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This upgrade includes two changes:

For users deploying with Vercel using next-auth, it is necessary to add the environment variable `NEXT_PUBLIC_ENABLE_NEXT_AUTH=1` to ensure Next Auth is enabled; other users are not affected.
For users using clerk in self-built images, it is necessary to additionally configure `NEXT_PUBLIC_ENABLE_NEXT_AUTH=0` to disable Next Auth

Other standard deployment scenarios (using Clerk in Vercel and using next-auth in Docker) are not affected

For More detail, refer to https://github.com/lobehub/lobe-chat/issues/5804

本次升级存在两个变更：

- 针对使用 Vercel 部署中使用 next-auth 的用户，需要额外添加 `NEXT_PUBLIC_ENABLE_NEXT_AUTH=1` 环境变量来确保开启 Next Auth
- 针对使用自构建镜像中使用 clerk 的用户，需要额外配置 `NEXT_PUBLIC_ENABLE_NEXT_AUTH=0` 环境变量来关闭 Next Auth

其他标准部署场景（Vercel 中使用 Clerk 与 Docker 中使用 next-auth ）不受影响

变更详情原因查看 https://github.com/lobehub/lobe-chat/issues/5804
---
 .env.example                                  |    1 +
 Dockerfile.database                           |    3 +-
 next.config.ts                                |    1 +
 .../me/(home)/__tests__/UserBanner.test.tsx   |    5 +-
 .../me/(home)/__tests__/useCategory.test.tsx  |   10 +-
 .../me/(home)/features/UserBanner.tsx         |    9 +-
 .../me/(home)/features/useCategory.tsx        |    4 +-
 .../(mobile)/me/profile/features/Category.tsx |    4 +-
 .../(main)/profile/(home)/Client.tsx          |    4 +-
 .../(main)/profile/hooks/useCategory.tsx      |    6 +-
 .../(main)/settings/_layout/Mobile/Header.tsx |    4 +-
 src/app/[variants]/page.tsx                   |    1 -
 src/config/auth.ts                            |    3 +-
 src/const/auth.ts                             |    3 +-
 src/features/User/UserPanel/PanelContent.tsx  |   10 +-
 src/features/User/UserPanel/useMenu.tsx       |    4 +-
 .../User/__tests__/PanelContent.test.tsx      |   13 +-
 .../GlobalProvider/StoreInitialization.tsx    |    3 +-
 .../__snapshots__/index.test.ts.snap          | 2190 -----------------
 src/middleware.ts                             |   17 +-
 src/services/chat.ts                          |    3 +-
 src/store/user/slices/auth/action.test.ts     |    6 +-
 src/store/user/slices/auth/action.ts          |    6 +-
 src/store/user/slices/auth/initialState.ts    |    1 -
 src/store/user/slices/auth/selectors.ts       |   12 +-
 src/store/user/slices/common/action.ts        |    1 -
 26 files changed, 58 insertions(+), 2266 deletions(-)
 delete mode 100644 src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap

diff --git a/.env.example b/.env.example
index c7829523d7..c0bd7fbb3c 100644
--- a/.env.example
+++ b/.env.example
@@ -190,6 +190,7 @@ OPENAI_API_KEY=sk-xxxxxxxxx
 
 
 # NextAuth related configurations
+# NEXT_PUBLIC_ENABLE_NEXT_AUTH=1
 # NEXT_AUTH_SECRET=
 
 # Auth0 configurations
diff --git a/Dockerfile.database b/Dockerfile.database
index feeefa0837..fc265363dd 100644
--- a/Dockerfile.database
+++ b/Dockerfile.database
@@ -38,6 +38,7 @@ FROM base AS builder
 ARG USE_CN_MIRROR
 ARG NEXT_PUBLIC_BASE_PATH
 ARG NEXT_PUBLIC_SERVICE_MODE
+ARG NEXT_PUBLIC_ENABLE_NEXT_AUTH
 ARG NEXT_PUBLIC_SENTRY_DSN
 ARG NEXT_PUBLIC_ANALYTICS_POSTHOG
 ARG NEXT_PUBLIC_POSTHOG_HOST
@@ -49,7 +50,7 @@ ARG NEXT_PUBLIC_UMAMI_WEBSITE_ID
 ENV NEXT_PUBLIC_BASE_PATH="${NEXT_PUBLIC_BASE_PATH}"
 
 ENV NEXT_PUBLIC_SERVICE_MODE="${NEXT_PUBLIC_SERVICE_MODE:-server}" \
-    NEXT_PUBLIC_ENABLE_NEXT_AUTH="1" \
+    NEXT_PUBLIC_ENABLE_NEXT_AUTH="${NEXT_PUBLIC_ENABLE_NEXT_AUTH:-1}" \
     APP_URL="http://app.com" \
     DATABASE_DRIVER="node" \
     DATABASE_URL="postgres://postgres:password@localhost:5432/postgres" \
diff --git a/next.config.ts b/next.config.ts
index 11f75c6dbe..59d058c4be 100644
--- a/next.config.ts
+++ b/next.config.ts
@@ -179,6 +179,7 @@ const nextConfig: NextConfig = {
   ],
   // when external packages in dev mode with turbopack, this config will lead to bundle error
   serverExternalPackages: isProd ? ['@electric-sql/pglite'] : undefined,
+
   transpilePackages: ['pdfjs-dist', 'mermaid'],
 
   webpack(config) {
diff --git a/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/UserBanner.test.tsx b/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/UserBanner.test.tsx
index b3b0fc826f..1da776414a 100644
--- a/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/UserBanner.test.tsx
+++ b/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/UserBanner.test.tsx
@@ -49,8 +49,9 @@ afterEach(() => {
 describe('UserBanner', () => {
   it('should render UserInfo and DataStatistics when auth is disabled', () => {
     act(() => {
-      useUserStore.setState({ isSignedIn: false, enableAuth: () => false });
+      useUserStore.setState({ isSignedIn: false });
     });
+    enableAuth = false;
 
     render(<UserBanner />);
 
@@ -75,7 +76,7 @@ describe('UserBanner', () => {
 
   it('should render UserLoginOrSignup when user is not logged in with auth enabled', () => {
     act(() => {
-      useUserStore.setState({ isSignedIn: false, enableAuth: () => true });
+      useUserStore.setState({ isSignedIn: false });
     });
     enableClerk = true;
 
diff --git a/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/useCategory.test.tsx b/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/useCategory.test.tsx
index 5c697ce4d9..d04ebbd07c 100644
--- a/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/useCategory.test.tsx
+++ b/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/useCategory.test.tsx
@@ -45,12 +45,10 @@ afterEach(() => {
   enableClerk = true;
 });
 
-// 目前对 enableAuth 的判定是在 useUserStore 中，所以需要 mock useUserStore
-// 类型定义： enableAuth: () => boolean
 describe('useCategory', () => {
   it('should return correct items when the user is logged in with authentication', () => {
     act(() => {
-      useUserStore.setState({ isSignedIn: true, enableAuth: () => true });
+      useUserStore.setState({ isSignedIn: true });
     });
     enableAuth = true;
     enableClerk = false;
@@ -70,8 +68,9 @@ describe('useCategory', () => {
 
   it('should return correct items when the user is not logged in', () => {
     act(() => {
-      useUserStore.setState({ isSignedIn: false, enableAuth: () => true });
+      useUserStore.setState({ isSignedIn: false });
     });
+    enableAuth = true;
 
     const { result } = renderHook(() => useCategory(), { wrapper });
 
@@ -88,9 +87,10 @@ describe('useCategory', () => {
 
   it('should handle settings for non-authenticated users', () => {
     act(() => {
-      useUserStore.setState({ isSignedIn: false, enableAuth: () => false });
+      useUserStore.setState({ isSignedIn: false });
     });
     enableClerk = false;
+    enableAuth = false;
 
     const { result } = renderHook(() => useCategory(), { wrapper });
 
diff --git a/src/app/[variants]/(main)/(mobile)/me/(home)/features/UserBanner.tsx b/src/app/[variants]/(main)/(mobile)/me/(home)/features/UserBanner.tsx
index b71d1358a6..c46796af67 100644
--- a/src/app/[variants]/(main)/(mobile)/me/(home)/features/UserBanner.tsx
+++ b/src/app/[variants]/(main)/(mobile)/me/(home)/features/UserBanner.tsx
@@ -5,6 +5,7 @@ import { useRouter } from 'next/navigation';
 import { memo } from 'react';
 import { Flexbox } from 'react-layout-kit';
 
+import { enableAuth, enableNextAuth } from '@/const/auth';
 import { isDeprecatedEdition } from '@/const/version';
 import DataStatistics from '@/features/User/DataStatistics';
 import UserInfo from '@/features/User/UserInfo';
@@ -15,11 +16,7 @@ import { authSelectors } from '@/store/user/selectors';
 const UserBanner = memo(() => {
   const router = useRouter();
   const isLoginWithAuth = useUserStore(authSelectors.isLoginWithAuth);
-  const [enableAuth, signIn, enabledNextAuth] = useUserStore((s) => [
-    authSelectors.enabledAuth(s),
-    s.openLogin,
-    authSelectors.enabledNextAuth(s),
-  ]);
+  const [signIn] = useUserStore((s) => [s.openLogin]);
 
   return (
     <Flexbox gap={12} paddingBlock={8}>
@@ -38,7 +35,7 @@ const UserBanner = memo(() => {
         <UserLoginOrSignup
           onClick={() => {
             // If use NextAuth, call openLogin method directly
-            if (enabledNextAuth) {
+            if (enableNextAuth) {
               signIn();
               return;
             }
diff --git a/src/app/[variants]/(main)/(mobile)/me/(home)/features/useCategory.tsx b/src/app/[variants]/(main)/(mobile)/me/(home)/features/useCategory.tsx
index 2dafbea2ec..93a4bf903c 100644
--- a/src/app/[variants]/(main)/(mobile)/me/(home)/features/useCategory.tsx
+++ b/src/app/[variants]/(main)/(mobile)/me/(home)/features/useCategory.tsx
@@ -12,6 +12,7 @@ import { useRouter } from 'next/navigation';
 import { useTranslation } from 'react-i18next';
 
 import { CellProps } from '@/components/Cell';
+import { enableAuth } from '@/const/auth';
 import { LOBE_CHAT_CLOUD } from '@/const/branding';
 import { DOCUMENTS, FEEDBACK, OFFICIAL_URL, UTM_SOURCE } from '@/const/url';
 import { isServerMode } from '@/const/version';
@@ -27,10 +28,9 @@ export const useCategory = () => {
   const { canInstall, install } = usePWAInstall();
   const { t } = useTranslation(['common', 'setting', 'auth']);
   const { showCloudPromotion, hideDocs } = useServerConfigStore(featureFlagsSelectors);
-  const [isLogin, isLoginWithAuth, enableAuth] = useUserStore((s) => [
+  const [isLogin, isLoginWithAuth] = useUserStore((s) => [
     authSelectors.isLogin(s),
     authSelectors.isLoginWithAuth(s),
-    authSelectors.enabledAuth(s),
   ]);
 
   const profile: CellProps[] = [
diff --git a/src/app/[variants]/(main)/(mobile)/me/profile/features/Category.tsx b/src/app/[variants]/(main)/(mobile)/me/profile/features/Category.tsx
index 54c61a3a23..04567d7046 100644
--- a/src/app/[variants]/(main)/(mobile)/me/profile/features/Category.tsx
+++ b/src/app/[variants]/(main)/(mobile)/me/profile/features/Category.tsx
@@ -6,15 +6,15 @@ import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 
 import Cell, { CellProps } from '@/components/Cell';
+import { enableAuth } from '@/const/auth';
 import { isDeprecatedEdition } from '@/const/version';
 import { ProfileTabs } from '@/store/global/initialState';
 import { useUserStore } from '@/store/user';
 import { authSelectors } from '@/store/user/selectors';
 
 const Category = memo(() => {
-  const [isLogin, enableAuth, isLoginWithClerk, signOut] = useUserStore((s) => [
+  const [isLogin, isLoginWithClerk, signOut] = useUserStore((s) => [
     authSelectors.isLogin(s),
-    authSelectors.enabledAuth(s),
     authSelectors.isLoginWithClerk(s),
     s.logout,
   ]);
diff --git a/src/app/[variants]/(main)/profile/(home)/Client.tsx b/src/app/[variants]/(main)/profile/(home)/Client.tsx
index 13ef127c9d..c54895119f 100644
--- a/src/app/[variants]/(main)/profile/(home)/Client.tsx
+++ b/src/app/[variants]/(main)/profile/(home)/Client.tsx
@@ -4,6 +4,7 @@ import { Form, type ItemGroup } from '@lobehub/ui';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 
+import { enableAuth } from '@/const/auth';
 import { FORM_STYLE } from '@/const/layoutTokens';
 import AvatarWithUpload from '@/features/AvatarWithUpload';
 import UserAvatar from '@/features/User/UserAvatar';
@@ -14,8 +15,7 @@ type SettingItemGroup = ItemGroup;
 
 const Client = memo<{ mobile?: boolean }>(() => {
   const [isLoginWithNextAuth] = useUserStore((s) => [authSelectors.isLoginWithNextAuth(s)]);
-  const [enableAuth, nickname, username, userProfile] = useUserStore((s) => [
-    s.enableAuth(),
+  const [nickname, username, userProfile] = useUserStore((s) => [
     userProfileSelectors.nickName(s),
     userProfileSelectors.username(s),
     userProfileSelectors.userProfile(s),
diff --git a/src/app/[variants]/(main)/profile/hooks/useCategory.tsx b/src/app/[variants]/(main)/profile/hooks/useCategory.tsx
index 91aecee480..3fe30f6b0b 100644
--- a/src/app/[variants]/(main)/profile/hooks/useCategory.tsx
+++ b/src/app/[variants]/(main)/profile/hooks/useCategory.tsx
@@ -4,6 +4,7 @@ import Link from 'next/link';
 import { useTranslation } from 'react-i18next';
 
 import type { MenuProps } from '@/components/Menu';
+import { enableAuth } from '@/const/auth';
 import { isDeprecatedEdition } from '@/const/version';
 import { ProfileTabs } from '@/store/global/initialState';
 import { useUserStore } from '@/store/user';
@@ -11,10 +12,7 @@ import { authSelectors } from '@/store/user/slices/auth/selectors';
 
 export const useCategory = () => {
   const { t } = useTranslation('auth');
-  const [enableAuth, isLoginWithClerk] = useUserStore((s) => [
-    authSelectors.enabledAuth(s),
-    authSelectors.isLoginWithClerk(s),
-  ]);
+  const [isLoginWithClerk] = useUserStore((s) => [authSelectors.isLoginWithClerk(s)]);
 
   const cateItems: MenuProps['items'] = [
     {
diff --git a/src/app/[variants]/(main)/settings/_layout/Mobile/Header.tsx b/src/app/[variants]/(main)/settings/_layout/Mobile/Header.tsx
index c96e5844ea..94451f03cc 100644
--- a/src/app/[variants]/(main)/settings/_layout/Mobile/Header.tsx
+++ b/src/app/[variants]/(main)/settings/_layout/Mobile/Header.tsx
@@ -6,13 +6,12 @@ import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox } from 'react-layout-kit';
 
+import { enableAuth } from '@/const/auth';
 import { useActiveSettingsKey } from '@/hooks/useActiveTabKey';
 import { useQueryRoute } from '@/hooks/useQueryRoute';
 import { useShowMobileWorkspace } from '@/hooks/useShowMobileWorkspace';
 import { SettingsTabs } from '@/store/global/initialState';
 import { useSessionStore } from '@/store/session';
-import { useUserStore } from '@/store/user';
-import { authSelectors } from '@/store/user/selectors';
 import { mobileHeaderSticky } from '@/styles/mobileHeader';
 
 const Header = memo(() => {
@@ -22,7 +21,6 @@ const Header = memo(() => {
   const showMobileWorkspace = useShowMobileWorkspace();
   const activeSettingsKey = useActiveSettingsKey();
   const isSessionActive = useSessionStore((s) => !!s.activeId);
-  const enableAuth = useUserStore(authSelectors.enabledAuth);
 
   const handleBackClick = () => {
     if (isSessionActive && showMobileWorkspace) {
diff --git a/src/app/[variants]/page.tsx b/src/app/[variants]/page.tsx
index c8723029ec..83d62474ac 100644
--- a/src/app/[variants]/page.tsx
+++ b/src/app/[variants]/page.tsx
@@ -7,4 +7,3 @@ export const metadata: Metadata = {
 };
 
 export { default } from './loading';
-
diff --git a/src/config/auth.ts b/src/config/auth.ts
index d922a711e7..adee4abfb0 100644
--- a/src/config/auth.ts
+++ b/src/config/auth.ts
@@ -217,8 +217,7 @@ export const getAuthConfig = () => {
       CLERK_WEBHOOK_SECRET: process.env.CLERK_WEBHOOK_SECRET,
 
       // Next Auth
-      NEXT_PUBLIC_ENABLE_NEXT_AUTH:
-        !!process.env.NEXT_AUTH_SECRET || process.env.NEXT_PUBLIC_ENABLE_NEXT_AUTH === '1',
+      NEXT_PUBLIC_ENABLE_NEXT_AUTH: process.env.NEXT_PUBLIC_ENABLE_NEXT_AUTH === '1',
       NEXT_AUTH_SSO_PROVIDERS: process.env.NEXT_AUTH_SSO_PROVIDERS,
       NEXT_AUTH_SECRET: process.env.NEXT_AUTH_SECRET,
       NEXT_AUTH_DEBUG: !!process.env.NEXT_AUTH_DEBUG,
diff --git a/src/const/auth.ts b/src/const/auth.ts
index 9a633da545..43f462463e 100644
--- a/src/const/auth.ts
+++ b/src/const/auth.ts
@@ -2,8 +2,7 @@ import { authEnv } from '@/config/auth';
 
 export const enableClerk = authEnv.NEXT_PUBLIC_ENABLE_CLERK_AUTH;
 export const enableNextAuth = authEnv.NEXT_PUBLIC_ENABLE_NEXT_AUTH;
-export const enableAuth =
-  authEnv.NEXT_PUBLIC_ENABLE_CLERK_AUTH || authEnv.NEXT_PUBLIC_ENABLE_NEXT_AUTH;
+export const enableAuth = enableClerk || enableNextAuth || false;
 
 export const LOBE_CHAT_AUTH_HEADER = 'X-lobe-chat-auth';
 
diff --git a/src/features/User/UserPanel/PanelContent.tsx b/src/features/User/UserPanel/PanelContent.tsx
index a1ddccc00e..31606b230f 100644
--- a/src/features/User/UserPanel/PanelContent.tsx
+++ b/src/features/User/UserPanel/PanelContent.tsx
@@ -5,6 +5,7 @@ import { Flexbox } from 'react-layout-kit';
 
 import BrandWatermark from '@/components/BrandWatermark';
 import Menu from '@/components/Menu';
+import { enableAuth, enableNextAuth } from '@/const/auth';
 import { isDeprecatedEdition } from '@/const/version';
 import { useUserStore } from '@/store/user';
 import { authSelectors } from '@/store/user/selectors';
@@ -19,12 +20,7 @@ import { useMenu } from './useMenu';
 const PanelContent = memo<{ closePopover: () => void }>(({ closePopover }) => {
   const router = useRouter();
   const isLoginWithAuth = useUserStore(authSelectors.isLoginWithAuth);
-  const [openSignIn, signOut, enableAuth, enabledNextAuth] = useUserStore((s) => [
-    s.openLogin,
-    s.logout,
-    s.enableAuth(),
-    s.enabledNextAuth,
-  ]);
+  const [openSignIn, signOut] = useUserStore((s) => [s.openLogin, s.logout]);
   const { mainItems, logoutItems } = useMenu();
 
   const handleSignIn = () => {
@@ -36,7 +32,7 @@ const PanelContent = memo<{ closePopover: () => void }>(({ closePopover }) => {
     signOut();
     closePopover();
     // NextAuth doesn't need to redirect to login page
-    if (enabledNextAuth) return;
+    if (enableNextAuth) return;
     router.push('/login');
   };
 
diff --git a/src/features/User/UserPanel/useMenu.tsx b/src/features/User/UserPanel/useMenu.tsx
index dadf82d970..0d3b374caa 100644
--- a/src/features/User/UserPanel/useMenu.tsx
+++ b/src/features/User/UserPanel/useMenu.tsx
@@ -21,6 +21,7 @@ import { useTranslation } from 'react-i18next';
 import { Flexbox } from 'react-layout-kit';
 
 import type { MenuProps } from '@/components/Menu';
+import { enableAuth } from '@/const/auth';
 import { LOBE_CHAT_CLOUD } from '@/const/branding';
 import {
   DISCORD,
@@ -68,8 +69,7 @@ export const useMenu = () => {
   const hasNewVersion = useNewVersion();
   const { t } = useTranslation(['common', 'setting', 'auth']);
   const { showCloudPromotion, hideDocs } = useServerConfigStore(featureFlagsSelectors);
-  const [enableAuth, isLogin, isLoginWithAuth] = useUserStore((s) => [
-    authSelectors.enabledAuth(s),
+  const [isLogin, isLoginWithAuth] = useUserStore((s) => [
     authSelectors.isLogin(s),
     authSelectors.isLoginWithAuth(s),
   ]);
diff --git a/src/features/User/__tests__/PanelContent.test.tsx b/src/features/User/__tests__/PanelContent.test.tsx
index 35be35b8e2..dc8b2b164f 100644
--- a/src/features/User/__tests__/PanelContent.test.tsx
+++ b/src/features/User/__tests__/PanelContent.test.tsx
@@ -68,13 +68,12 @@ vi.mock('@/const/version', () => ({
 // 定义一个变量来存储 enableAuth 的值
 let enableAuth = true;
 
-beforeEach(() => {
-  useUserStore.setState({ enableAuth: () => true });
-});
-
-afterEach(() => {
-  enableAuth = true;
-});
+// 模拟 @/const/auth 模块
+vi.mock('@/const/auth', () => ({
+  get enableAuth() {
+    return enableAuth;
+  },
+}));
 
 describe('PanelContent', () => {
   const closePopover = vi.fn();
diff --git a/src/layout/GlobalProvider/StoreInitialization.tsx b/src/layout/GlobalProvider/StoreInitialization.tsx
index a0baa68f4a..4627b28399 100644
--- a/src/layout/GlobalProvider/StoreInitialization.tsx
+++ b/src/layout/GlobalProvider/StoreInitialization.tsx
@@ -5,6 +5,7 @@ import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { createStoreUpdater } from 'zustand-utils';
 
+import { enableNextAuth } from '@/const/auth';
 import { useIsMobile } from '@/hooks/useIsMobile';
 import { useEnabledDataSync } from '@/hooks/useSyncData';
 import { useAgentStore } from '@/store/agent';
@@ -39,8 +40,6 @@ const StoreInitialization = memo(() => {
 
   // Update NextAuth status
   const useUserStoreUpdater = createStoreUpdater(useUserStore);
-  const enableNextAuth = useServerConfigStore(serverConfigSelectors.enabledOAuthSSO);
-  useUserStoreUpdater('enabledNextAuth', enableNextAuth);
   const oAuthSSOProviders = useServerConfigStore(serverConfigSelectors.oAuthSSOProviders);
   useUserStoreUpdater('oAuthSSOProviders', oAuthSSOProviders);
 
diff --git a/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap b/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap
deleted file mode 100644
index 98223d74d7..0000000000
--- a/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap
+++ /dev/null
@@ -1,2190 +0,0 @@
-// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
-
-exports[`LobeTogetherAI > models > should get models 1`] = `
-[
-  {
-    "contextWindowTokens": undefined,
-    "description": "This model is a 75/25 merge of Chronos (13B) and Nous Hermes (13B) models resulting in having a great ability to produce evocative storywriting and follow a narrative.",
-    "displayName": "Chronos Hermes (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Austism/chronos-hermes-13b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding",
-    "displayName": "BAAI-Bge-Base-1p5",
-    "enabled": false,
-    "functionCall": false,
-    "id": "BAAI/bge-base-en-v1.5",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding",
-    "displayName": "BAAI-Bge-Large-1p5",
-    "enabled": false,
-    "functionCall": false,
-    "id": "BAAI/bge-large-en-v1.5",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 4096,
-    "description": "MythoLogic-L2 and Huginn merge using a highly experimental tensor type merge technique. The main difference with MythoMix is that I allowed more of Huginn to intermingle with the single tensors located at the front and end of a model",
-    "displayName": "MythoMax-L2 (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Gryphe/MythoMax-L2-13b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llama Guard: LLM-based Input-Output Safeguard for Human-AI Conversations",
-    "displayName": "Llama Guard (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Meta-Llama/Llama-Guard-7b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "NexusRaven is an open-source and commercially viable function calling LLM that surpasses the state-of-the-art in function calling capabilities.",
-    "displayName": "NexusRaven (13B)",
-    "enabled": false,
-    "functionCall": true,
-    "id": "Nexusflow/NexusRaven-V2-13B",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "first Nous collection of dataset and models made by fine-tuning mostly on data created by Nous in-house",
-    "displayName": "Nous Capybara v1.9 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NousResearch/Nous-Capybara-7B-V1p9",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Nous Hermes 2 on Mistral 7B DPO is the new flagship 7B Hermes! This model was DPO'd from Teknium/OpenHermes-2.5-Mistral-7B and has improved across the board on all benchmarks tested - AGIEval, BigBench Reasoning, GPT4All, and TruthfulQA.",
-    "displayName": "Nous Hermes 2 - Mistral DPO (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 32768,
-    "description": "Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
-    "displayName": "Nous Hermes 2 - Mixtral 8x7B-DPO ",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Nous Hermes 2 Mixtral 7bx8 SFT is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
-    "displayName": "Nous Hermes 2 - Mixtral 8x7B-SFT",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune",
-    "displayName": "Nous Hermes-2 Yi (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NousResearch/Nous-Hermes-2-Yi-34B",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
-    "displayName": "Nous Hermes Llama-2 (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NousResearch/Nous-Hermes-Llama2-13b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Nous-Hermes-Llama2-7b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
-    "displayName": "Nous Hermes LLaMA-2 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NousResearch/Nous-Hermes-llama-2-7b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An OpenOrca dataset fine-tune on top of Mistral 7B by the OpenOrca team.",
-    "displayName": "OpenOrca Mistral (7B) 8K",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Open-Orca/Mistral-7B-OpenOrca",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Phind-CodeLlama-34B-v1 trained on additional 1.5B tokens high-quality programming-related data proficient in Python, C/C++, TypeScript, Java, and more.",
-    "displayName": "Phind Code LLaMA v2 (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Phind/Phind-CodeLlama-34B-v2",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 Chat (0.5B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-0.5B-Chat",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 (0.5B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-0.5B",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 Chat (1.8B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-1.8B-Chat",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 (1.8B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-1.8B",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 Chat (110B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-110B-Chat",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 Chat (14B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-14B-Chat",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 (14B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-14B",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 Chat (32B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-32B-Chat",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 (32B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-32B",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 Chat (4B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-4B-Chat",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 (4B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-4B",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 Chat (72B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-72B-Chat",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 (72B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-72B",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 Chat (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-7B-Chat",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
-    "displayName": "Qwen 1.5 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Qwen/Qwen1.5-7B",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Fine-tune version of Stable Diffusion focused on photorealism.",
-    "displayName": "Realistic Vision 3.0",
-    "enabled": false,
-    "functionCall": false,
-    "id": "SG161222/Realistic_Vision_V3.0_VAE",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": true,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Arctic is a dense-MoE Hybrid transformer architecture pre-trained from scratch by the Snowflake AI Research Team.",
-    "displayName": "Snowflake Arctic Instruct",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Snowflake/snowflake-arctic-instruct",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Re:MythoMax (ReMM) is a recreation trial of the original MythoMax-L2-B13 with updated models. This merge use SLERP [TESTING] to merge ReML and Huginn v1.2.",
-    "displayName": "ReMM SLERP L2 (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Undi95/ReMM-SLERP-L2-13B",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "A merge of models built by Undi95 with the new task_arithmetic merge method from mergekit.",
-    "displayName": "Toppy M (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Undi95/Toppy-M-7B",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "A universal English sentence embedding WhereIsAI/UAE-Large-V1 achieves SOTA on the MTEB Leaderboard with an average score of 64.64!",
-    "displayName": "UAE-Large-V1",
-    "enabled": false,
-    "functionCall": false,
-    "id": "WhereIsAI/UAE-Large-V1",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.",
-    "displayName": "WizardCoder v1.0 (15B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "WizardLM/WizardCoder-15B-V1.0",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.",
-    "displayName": "WizardCoder Python v1.0 (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "WizardLM/WizardCoder-Python-34B-V1.0",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities",
-    "displayName": "WizardLM v1.2 (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "WizardLM/WizardLM-13B-V1.2",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The OLMo models are trained on the Dolma dataset",
-    "displayName": "OLMo Instruct (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "allenai/OLMo-7B-Instruct",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The OLMo models are trained on the Dolma dataset",
-    "displayName": "OLMo Twin-2T (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "allenai/OLMo-7B-Twin-2T",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The OLMo models are trained on the Dolma dataset",
-    "displayName": "OLMo (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "allenai/OLMo-7B",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "original BERT model",
-    "displayName": "Bert Base Uncased",
-    "enabled": false,
-    "functionCall": false,
-    "id": "bert-base-uncased",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Instruct (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-13b-Instruct-hf",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Python (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-13b-Python-hf",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 16384,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Instruct (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-34b-Instruct-hf",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Python (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-34b-Python-hf",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Instruct (70B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-70b-Instruct-hf",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Python (70B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-70b-Python-hf",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama (70B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-70b-hf",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Instruct (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-7b-Instruct-hf",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Python (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-7b-Python-hf",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "This Dolphin is really good at coding, I trained with a lot of coding data. It is very obedient but it is not DPO tuned - so you still might need to encourage it in the system prompt as I show in the below examples.",
-    "displayName": "Dolphin 2.5 Mixtral 8x7b",
-    "enabled": false,
-    "functionCall": false,
-    "id": "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 32768,
-    "description": "DBRX Instruct is a mixture-of-experts (MoE) large language model trained from scratch by Databricks. DBRX Instruct specializes in few-turn interactions.",
-    "displayName": "DBRX Instruct",
-    "enabled": false,
-    "functionCall": false,
-    "id": "databricks/dbrx-instruct",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.",
-    "displayName": "Deepseek Coder Instruct (33B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "deepseek-ai/deepseek-coder-33b-instruct",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 4096,
-    "description": "trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese",
-    "displayName": "DeepSeek LLM Chat (67B)",
-    "enabled": true,
-    "functionCall": false,
-    "id": "deepseek-ai/deepseek-llm-67b-chat",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An instruction fine-tuned LLaMA-2 (70B) model by merging Platypus2 (70B) by garage-bAInd and LLaMA-2 Instruct v2 (70B) by upstage.",
-    "displayName": "Platypus2 Instruct (70B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "garage-bAInd/Platypus2-70B-instruct",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 8192,
-    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
-    "displayName": "Gemma Instruct (2B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "google/gemma-2b-it",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
-    "displayName": "Gemma (2B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "google/gemma-2b",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
-    "displayName": "Gemma Instruct (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "google/gemma-7b-it",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
-    "displayName": "Gemma (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "google/gemma-7b",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
-    "displayName": "Vicuna v1.5 (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "lmsys/vicuna-13b-v1.5",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
-    "displayName": "Vicuna v1.5 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "lmsys/vicuna-7b-v1.5",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 4096,
-    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 Chat (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "meta-llama/Llama-2-13b-chat-hf",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "meta-llama/Llama-2-13b-hf",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 Chat (70B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "meta-llama/Llama-2-70b-chat-hf",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 4096,
-    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 (70B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "meta-llama/Llama-2-70b-hf",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 Chat (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "meta-llama/Llama-2-7b-chat-hf",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "meta-llama/Llama-2-7b-hf",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 8192,
-    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
-    "displayName": "Meta Llama 3 70B Instruct",
-    "enabled": false,
-    "functionCall": false,
-    "id": "meta-llama/Llama-3-70b-chat-hf",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 8192,
-    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
-    "displayName": "Meta Llama 3 8B Instruct",
-    "enabled": false,
-    "functionCall": false,
-    "id": "meta-llama/Llama-3-8b-chat-hf",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
-    "displayName": "Meta Llama 3 8B",
-    "enabled": false,
-    "functionCall": false,
-    "id": "meta-llama/Llama-3-8b-hf",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": null,
-    "displayName": "Meta Llama Guard 2 8B",
-    "enabled": false,
-    "functionCall": undefined,
-    "id": "meta-llama/LlamaGuard-2-8b",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
-    "displayName": "Meta Llama 3 70B",
-    "enabled": false,
-    "functionCall": false,
-    "id": "meta-llama/Meta-Llama-3-70B",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 65536,
-    "description": "WizardLM-2 8x22B is Wizard's most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models.",
-    "displayName": "WizardLM-2 (8x22B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "microsoft/WizardLM-2-8x22B",
-    "maxOutput": 65536,
-    "reasoning": false,
-    "tokens": 65536,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)",
-    "displayName": "Microsoft Phi-2",
-    "enabled": false,
-    "functionCall": false,
-    "id": "microsoft/phi-2",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 8192,
-    "description": "instruct fine-tuned version of Mistral-7B-v0.1",
-    "displayName": "Mistral (7B) Instruct",
-    "enabled": false,
-    "functionCall": false,
-    "id": "mistralai/Mistral-7B-Instruct-v0.1",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 32768,
-    "description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
-    "displayName": "Mistral (7B) Instruct v0.2",
-    "enabled": false,
-    "functionCall": false,
-    "id": "mistralai/Mistral-7B-Instruct-v0.2",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 8192,
-    "description": "7.3B parameter model that outperforms Llama 2 13B on all benchmarks, approaches CodeLlama 7B performance on code, Uses Grouped-query attention (GQA) for faster inference and Sliding Window Attention (SWA) to handle longer sequences at smaller cost",
-    "displayName": "Mistral (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "mistralai/Mistral-7B-v0.1",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 65536,
-    "description": "The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.",
-    "displayName": "Mixtral-8x22B Instruct v0.1",
-    "enabled": true,
-    "functionCall": false,
-    "id": "mistralai/Mixtral-8x22B-Instruct-v0.1",
-    "maxOutput": 65536,
-    "reasoning": false,
-    "tokens": 65536,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The Mixtral-8x22B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
-    "displayName": "Mixtral-8x22B",
-    "enabled": false,
-    "functionCall": false,
-    "id": "mistralai/Mixtral-8x22B",
-    "maxOutput": 65536,
-    "reasoning": false,
-    "tokens": 65536,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 32768,
-    "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
-    "displayName": "Mixtral-8x7B Instruct v0.1",
-    "enabled": true,
-    "functionCall": false,
-    "id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 32768,
-    "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
-    "displayName": "Mixtral-8x7B v0.1",
-    "enabled": false,
-    "functionCall": false,
-    "id": "mistralai/Mixtral-8x7B-v0.1",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "A merge of OpenChat 3.5 was trained with C-RLFT on a collection of publicly available high-quality instruction data, with a custom processing pipeline.",
-    "displayName": "OpenChat 3.5",
-    "enabled": false,
-    "functionCall": false,
-    "id": "openchat/openchat-3.5-1210",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An open source Stable Diffusion model fine tuned model on Midjourney images. ",
-    "displayName": "Openjourney v4",
-    "enabled": false,
-    "functionCall": false,
-    "id": "prompthero/openjourney",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
-    "displayName": "Stable Diffusion 1.5",
-    "enabled": false,
-    "functionCall": false,
-    "id": "runwayml/stable-diffusion-v1-5",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "A sentence-transformers model: it maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search.",
-    "displayName": "Sentence-BERT",
-    "enabled": false,
-    "functionCall": false,
-    "id": "sentence-transformers/msmarco-bert-base-dot-v5",
-    "maxOutput": 512,
-    "reasoning": false,
-    "tokens": 512,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "A state-of-the-art model by Snorkel AI, DPO fine-tuned on Mistral-7B",
-    "displayName": "Snorkel Mistral PairRM DPO (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "snorkelai/Snorkel-Mistral-PairRM-DPO",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
-    "displayName": "Stable Diffusion 2.1",
-    "enabled": false,
-    "functionCall": false,
-    "id": "stabilityai/stable-diffusion-2-1",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "A text-to-image generative AI model that excels at creating 1024x1024 images.",
-    "displayName": "Stable Diffusion XL 1.0",
-    "enabled": false,
-    "functionCall": false,
-    "id": "stabilityai/stable-diffusion-xl-base-1.0",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "State of the art Mistral Fine-tuned on extensive public datasets",
-    "displayName": "OpenHermes-2-Mistral (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "teknium/OpenHermes-2-Mistral-7B",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Continuation of OpenHermes 2 Mistral model trained on additional code datasets",
-    "displayName": "OpenHermes-2.5-Mistral (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "teknium/OpenHermes-2p5-Mistral-7B",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "This model can be used to moderate other chatbot models. Built using GPT-JT model fine-tuned on Ontocord.ai's OIG-moderation dataset v0.1.",
-    "displayName": "GPT-JT-Moderation (6B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/GPT-JT-Moderation-6B",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations.",
-    "displayName": "LLaMA-2-32K (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/LLaMA-2-7B-32K",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations, instruction tuned by Together",
-    "displayName": "LLaMA-2-7B-32K-Instruct (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/Llama-2-7B-32K-Instruct",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).",
-    "displayName": "RedPajama-INCITE (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/RedPajama-INCITE-7B-Base",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-7B-v1 base model.",
-    "displayName": "RedPajama-INCITE Chat (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/RedPajama-INCITE-7B-Chat",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-7B-v1 base model.",
-    "displayName": "RedPajama-INCITE Instruct (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/RedPajama-INCITE-7B-Instruct",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).",
-    "displayName": "RedPajama-INCITE (3B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.",
-    "displayName": "RedPajama-INCITE Chat (3B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-3B-v1 base model.",
-    "displayName": "RedPajama-INCITE Instruct (3B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers",
-    "displayName": "StripedHyena Hessian (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/StripedHyena-Hessian-7B",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 32768,
-    "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers",
-    "displayName": "StripedHyena Nous (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/StripedHyena-Nous-7B",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations. ",
-    "displayName": "Alpaca (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/alpaca-7b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.",
-    "displayName": "Evo-1 Base (131K)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/evo-1-131k-base",
-    "maxOutput": 131073,
-    "reasoning": false,
-    "tokens": 131073,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.",
-    "displayName": "Evo-1 Base (8K)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/evo-1-8k-base",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "M2-BERT from the Monarch Mixer paper fine-tuned for retrieval",
-    "displayName": "M2-BERT-Retrieval-2K",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/m2-bert-80M-2k-retrieval",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.",
-    "displayName": "M2-BERT-Retrieval-32k",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/m2-bert-80M-32k-retrieval",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.",
-    "displayName": "M2-BERT-Retrieval-8k",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/m2-bert-80M-8k-retrieval",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": 4096,
-    "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling",
-    "displayName": "Upstage SOLAR Instruct v1 (11B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "upstage/SOLAR-10.7B-Instruct-v1.0",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Dreambooth model trained on a diverse set of analog photographs to provide an analog film effect. ",
-    "displayName": "Analog Diffusion",
-    "enabled": false,
-    "functionCall": false,
-    "id": "wavymulder/Analog-Diffusion",
-    "maxOutput": undefined,
-    "reasoning": false,
-    "tokens": undefined,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
-    "displayName": "01-ai Yi Chat (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "zero-one-ai/Yi-34B-Chat",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
-    "displayName": "01-ai Yi Base (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "zero-one-ai/Yi-34B",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
-    "displayName": "01-ai Yi Base (6B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "zero-one-ai/Yi-6B",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
-    "displayName": "Llama3 8B Chat HF INT4",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/Llama-3-8b-chat-hf-int4",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
-    "displayName": "Togethercomputer Llama3 8B Instruct Int8",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/Llama-3-8b-chat-hf-int8",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
-    "displayName": "Pythia (1B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "EleutherAI/pythia-1b-v0",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "replit-code-v1-3b is a 2.7B Causal Language Model focused on Code Completion. The model has been trained on a subset of the Stack Dedup v1.2 dataset.",
-    "displayName": "Replit-Code-v1 (3B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "replit/replit-code-v1-3b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chat model based on EleutherAI’s Pythia-7B model, and is fine-tuned with data focusing on dialog-style interactions.",
-    "displayName": "Pythia-Chat-Base (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/Pythia-Chat-Base-7B-v0.16",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Decoder-style transformer pretrained from scratch on 1T tokens of English text and code.",
-    "displayName": "MPT (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "mosaicml/mpt-7b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.",
-    "displayName": "MPT-Chat (30B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/mpt-30b-chat",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "T5 fine-tuned on more than 1000 additional tasks covering also more languages, making it better than T5 at majority of tasks. ",
-    "displayName": "Flan T5 XL (3B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "google/flan-t5-xl",
-    "maxOutput": 512,
-    "reasoning": false,
-    "tokens": 512,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Foundation model designed specifically for SQL generation tasks. Pre-trained for 3 epochs and fine-tuned for 10 epochs.",
-    "displayName": "NSQL (6B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NumbersStation/nsql-6B",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.",
-    "displayName": "Koala (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/Koala-7B",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
-    "displayName": "Pythia (6.9B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "EleutherAI/pythia-6.9b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An instruction-following LLM based on pythia-12b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
-    "displayName": "Dolly v2 (12B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "databricks/dolly-v2-12b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An instruction-following LLM based on pythia-3b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
-    "displayName": "Dolly v2 (3B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "databricks/dolly-v2-3b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Autoregressive language model trained on the Pile. Its architecture intentionally resembles that of GPT-3, and is almost identical to that of GPT-J 6B.",
-    "displayName": "GPT-NeoX (20B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "EleutherAI/gpt-neox-20b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
-    "displayName": "Pythia (2.8B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "EleutherAI/pythia-2.8b-v0",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "LLaMA 13B fine-tuned on over 300,000 instructions. Designed for long responses, low hallucination rate, and absence of censorship mechanisms.",
-    "displayName": "Nous Hermes (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NousResearch/Nous-Hermes-13b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
-    "displayName": "Guanaco (65B) ",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/guanaco-65b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/llama-2-7b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chatbot trained by fine-tuning Flan-t5-xl on user-shared conversations collected from ShareGPT.",
-    "displayName": "Vicuna-FastChat-T5 (3B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "lmsys/fastchat-t5-3b-v1.0",
-    "maxOutput": 512,
-    "reasoning": false,
-    "tokens": 512,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
-    "displayName": "LLaMA (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "huggyllama/llama-7b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ",
-    "displayName": "Open-Assistant StableLM SFT-7 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "OpenAssistant/stablelm-7b-sft-v7-epoch-3",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": true,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
-    "displayName": "Pythia (12B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "EleutherAI/pythia-12b-v0",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.",
-    "displayName": "MPT-Chat (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/mpt-7b-chat",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Transformer model trained using Ben Wang's Mesh Transformer JAX. ",
-    "displayName": "GPT-J (6B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "EleutherAI/gpt-j-6b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ",
-    "displayName": "Open-Assistant Pythia SFT-4 (12B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": true,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.",
-    "displayName": "Vicuna v1.3 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "lmsys/vicuna-7b-v1.3",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "This model is fine-tuned from CodeLlama-34B-Python and achieves 69.5% pass@1 on HumanEval.",
-    "displayName": "Phind Code LLaMA Python v1 (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Phind/Phind-CodeLlama-34B-Python-v1",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "NSQL is a family of autoregressive open-source large foundation models (FMs) designed specifically for SQL generation tasks",
-    "displayName": "NSQL LLaMA-2 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NumbersStation/nsql-llama-2-7B",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Nous-Hermes-Llama2-70b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
-    "displayName": "Nous Hermes LLaMA-2 (70B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "NousResearch/Nous-Hermes-Llama2-70b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities.",
-    "displayName": "WizardLM v1.0 (70B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "WizardLM/WizardLM-70B-V1.0",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
-    "displayName": "LLaMA (65B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "huggyllama/llama-65b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
-    "displayName": "Vicuna v1.5 16K (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "lmsys/vicuna-13b-v1.5-16k",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chat model fine-tuned from EleutherAI’s GPT-NeoX with over 40 million instructions on carbon reduced compute.",
-    "displayName": "GPT-NeoXT-Chat-Base (20B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "A fine-tuned version of Mistral-7B to act as a helpful assistant.",
-    "displayName": "Zephyr-7B-ß",
-    "enabled": false,
-    "functionCall": false,
-    "id": "HuggingFaceH4/zephyr-7b-beta",
-    "maxOutput": 32768,
-    "reasoning": false,
-    "tokens": 32768,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Python (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/CodeLlama-13b-Python",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/llama-2-13b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Instruct (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/CodeLlama-7b-Instruct",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
-    "displayName": "Guanaco (13B) ",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/guanaco-13b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Python (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/CodeLlama-34b-Python",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Designed for short-form instruction following, finetuned on Dolly and Anthropic HH-RLHF and other datasets",
-    "displayName": "MPT-Instruct (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "mosaicml/mpt-7b-instruct",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 Chat (70B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/llama-2-70b-chat",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Instruct (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/CodeLlama-34b-Instruct",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/CodeLlama-34b",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An autoregressive language models for program synthesis.",
-    "displayName": "CodeGen2 (16B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Salesforce/codegen2-16B",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An autoregressive language models for program synthesis.",
-    "displayName": "CodeGen2 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "Salesforce/codegen2-7B",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Flan T5 XXL (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).",
-    "displayName": "Flan T5 XXL (11B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "google/flan-t5-xxl",
-    "maxOutput": 512,
-    "reasoning": false,
-    "tokens": 512,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 (70B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/llama-2-70b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-7b-hf",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-13b-hf",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Instruct (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/CodeLlama-13b-Instruct",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 Chat (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/llama-2-13b-chat",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.",
-    "displayName": "Vicuna v1.3 (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "lmsys/vicuna-13b-v1.3",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
-    "displayName": "LLaMA (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "huggyllama/llama-13b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Fine-tuned from StarCoder to act as a helpful coding assistant. As an alpha release is only intended for educational or research purpopses.",
-    "displayName": "StarCoderChat Alpha (16B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "HuggingFaceH4/starchat-alpha",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
-    "displayName": "LLaMA (30B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "huggyllama/llama-30b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.",
-    "displayName": "StableLM-Base-Alpha (3B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "stabilityai/stablelm-base-alpha-3b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.",
-    "displayName": "StableLM-Base-Alpha (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "stabilityai/stablelm-base-alpha-7b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama Python (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/CodeLlama-7b-Python",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Defog's SQLCoder is a state-of-the-art LLM for converting natural language questions to SQL queries, fine-tuned from Bigcode's Starcoder 15B model.",
-    "displayName": "Sqlcoder (15B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "defog/sqlcoder",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Trained on 80+ coding languages, uses Multi Query Attention, an 8K context window, and was trained using the Fill-in-the-Middle objective on 1T tokens.",
-    "displayName": "StarCoder (16B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "bigcode/starcoder",
-    "maxOutput": 8192,
-    "reasoning": false,
-    "tokens": 8192,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "An instruction-following LLM based on pythia-7b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
-    "displayName": "Dolly v2 (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "databricks/dolly-v2-7b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
-    "displayName": "Guanaco (33B) ",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/guanaco-33b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.",
-    "displayName": "Koala (13B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/Koala-13B",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Fork of GPT-J instruction tuned to excel at few-shot prompts (blog post).",
-    "displayName": "GPT-JT (6B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/GPT-JT-6B-v1",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
-    "displayName": "LLaMA-2 Chat (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/llama-2-7b-chat",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling",
-    "displayName": "Upstage SOLAR Instruct v1 (11B)-Int4",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks. ",
-    "displayName": "Guanaco (7B) ",
-    "enabled": false,
-    "functionCall": false,
-    "id": "togethercomputer/guanaco-7b",
-    "maxOutput": 2048,
-    "reasoning": false,
-    "tokens": 2048,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens.",
-    "displayName": "Llemma (7B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "EleutherAI/llemma_7b",
-    "maxOutput": 4096,
-    "reasoning": false,
-    "tokens": 4096,
-    "vision": false,
-  },
-  {
-    "contextWindowTokens": undefined,
-    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
-    "displayName": "Code Llama (34B)",
-    "enabled": false,
-    "functionCall": false,
-    "id": "codellama/CodeLlama-34b-hf",
-    "maxOutput": 16384,
-    "reasoning": false,
-    "tokens": 16384,
-    "vision": false,
-  },
-]
-`;
diff --git a/src/middleware.ts b/src/middleware.ts
index da87efed30..b3dec2eb51 100644
--- a/src/middleware.ts
+++ b/src/middleware.ts
@@ -1,7 +1,6 @@
 import { clerkMiddleware, createRouteMatcher } from '@clerk/nextjs/server';
 import { NextRequest, NextResponse } from 'next/server';
 import { UAParser } from 'ua-parser-js';
-import urlJoin from 'url-join';
 
 import { authEnv } from '@/config/auth';
 import { LOBE_THEME_APPEARANCE } from '@/const/theme';
@@ -84,23 +83,21 @@ const defaultMiddleware = (request: NextRequest) => {
   if (['/api', '/trpc', '/webapi'].some((path) => url.pathname.startsWith(path)))
     return NextResponse.next();
 
-  // 处理 URL 重写
-  // 构建新路径: /${route}${originalPathname}
-  // 只对 GET 请求进行 URL 重写，确保其他类型的请求（包括 OPTIONS）不受影响
-  const nextPathname = `/${urlJoin(route, url.pathname)}`;
+  // refs: https://github.com/lobehub/lobe-chat/pull/5866
+  // new handle segment rewrite: /${route}${originalPathname}
+  // / -> /zh-CN__0__dark
+  // /discover -> /zh-CN__0__dark/discover
+  const nextPathname = `/${route}` + (url.pathname === '/' ? '' : url.pathname);
   console.log(`[rewrite] ${url.pathname} -> ${nextPathname}`);
+
   url.pathname = nextPathname;
 
-  return NextResponse.rewrite(url);
+  return NextResponse.rewrite(url, { status: 200 });
 };
 
-const publicRoute = ['/', '/discover'];
-
 // Initialize an Edge compatible NextAuth middleware
 const nextAuthMiddleware = NextAuthEdge.auth((req) => {
   const response = defaultMiddleware(req);
-  // skip the '/' route
-  if (publicRoute.some((url) => req.nextUrl.pathname.startsWith(url))) return response;
 
   // Just check if session exists
   const session = req.auth;
diff --git a/src/services/chat.ts b/src/services/chat.ts
index aab22366a1..d06f1ef9a1 100644
--- a/src/services/chat.ts
+++ b/src/services/chat.ts
@@ -3,6 +3,7 @@ import { produce } from 'immer';
 import { merge } from 'lodash-es';
 
 import { DEFAULT_MODEL_PROVIDER_LIST } from '@/config/modelProviders';
+import { enableAuth } from '@/const/auth';
 import { INBOX_GUIDE_SYSTEMROLE } from '@/const/guide';
 import { INBOX_SESSION_ID } from '@/const/session';
 import { DEFAULT_AGENT_CONFIG } from '@/const/settings';
@@ -515,7 +516,7 @@ class ChatService {
      * if enable login and not signed in, return unauthorized error
      */
     const userStore = useUserStore.getState();
-    if (userStore.enableAuth() && !userStore.isSignedIn) {
+    if (enableAuth && !userStore.isSignedIn) {
       throw AgentRuntimeError.createError(ChatErrorType.InvalidAccessCode);
     }
 
diff --git a/src/store/user/slices/auth/action.test.ts b/src/store/user/slices/auth/action.test.ts
index 4bf79fdd1d..f9e72e5dce 100644
--- a/src/store/user/slices/auth/action.test.ts
+++ b/src/store/user/slices/auth/action.test.ts
@@ -89,7 +89,7 @@ describe('createAuthSlice', () => {
     });
 
     it('should call next-auth signOut when NextAuth is enabled', async () => {
-      useUserStore.setState({ enabledNextAuth: true });
+      enableNextAuth = true;
 
       const { result } = renderHook(() => useUserStore());
 
@@ -100,6 +100,7 @@ describe('createAuthSlice', () => {
       const { signOut } = await import('next-auth/react');
 
       expect(signOut).toHaveBeenCalled();
+      enableNextAuth = false;
     });
 
     it('should not call next-auth signOut when NextAuth is disabled', async () => {
@@ -143,7 +144,7 @@ describe('createAuthSlice', () => {
     });
 
     it('should call next-auth signIn when NextAuth is enabled', async () => {
-      useUserStore.setState({ enabledNextAuth: true });
+      enableNextAuth = true;
 
       const { result } = renderHook(() => useUserStore());
 
@@ -154,6 +155,7 @@ describe('createAuthSlice', () => {
       const { signIn } = await import('next-auth/react');
 
       expect(signIn).toHaveBeenCalled();
+      enableNextAuth = false;
     });
     it('should not call next-auth signIn when NextAuth is disabled', async () => {
       const { result } = renderHook(() => useUserStore());
diff --git a/src/store/user/slices/auth/action.ts b/src/store/user/slices/auth/action.ts
index 9166140a8c..ac86ee9fec 100644
--- a/src/store/user/slices/auth/action.ts
+++ b/src/store/user/slices/auth/action.ts
@@ -1,6 +1,6 @@
 import { StateCreator } from 'zustand/vanilla';
 
-import { enableClerk } from '@/const/auth';
+import { enableAuth, enableClerk, enableNextAuth } from '@/const/auth';
 
 import { UserStore } from '../../store';
 
@@ -23,7 +23,7 @@ export const createAuthSlice: StateCreator<
   UserAuthAction
 > = (set, get) => ({
   enableAuth: () => {
-    return enableClerk || get()?.enabledNextAuth || false;
+    return enableAuth;
   },
   logout: async () => {
     if (enableClerk) {
@@ -32,7 +32,6 @@ export const createAuthSlice: StateCreator<
       return;
     }
 
-    const enableNextAuth = get().enabledNextAuth;
     if (enableNextAuth) {
       const { signOut } = await import('next-auth/react');
       signOut();
@@ -50,7 +49,6 @@ export const createAuthSlice: StateCreator<
       return;
     }
 
-    const enableNextAuth = get().enabledNextAuth;
     if (enableNextAuth) {
       const { signIn } = await import('next-auth/react');
       // Check if only one provider is available
diff --git a/src/store/user/slices/auth/initialState.ts b/src/store/user/slices/auth/initialState.ts
index 8fb0b4cd4a..2925544769 100644
--- a/src/store/user/slices/auth/initialState.ts
+++ b/src/store/user/slices/auth/initialState.ts
@@ -16,7 +16,6 @@ export interface UserAuthState {
   clerkSignIn?: (props?: SignInProps) => void;
   clerkSignOut?: SignOut;
   clerkUser?: UserResource;
-  enabledNextAuth?: boolean;
   isLoaded?: boolean;
 
   isSignedIn?: boolean;
diff --git a/src/store/user/slices/auth/selectors.ts b/src/store/user/slices/auth/selectors.ts
index 2beab5fd7e..655853ca4d 100644
--- a/src/store/user/slices/auth/selectors.ts
+++ b/src/store/user/slices/auth/selectors.ts
@@ -1,6 +1,6 @@
 import { t } from 'i18next';
 
-import { enableClerk } from '@/const/auth';
+import { enableAuth, enableClerk, enableNextAuth } from '@/const/auth';
 import { BRANDING_NAME } from '@/const/branding';
 import { UserStore } from '@/store/user';
 import { LobeUser } from '@/types/user';
@@ -8,7 +8,7 @@ import { LobeUser } from '@/types/user';
 const DEFAULT_USERNAME = BRANDING_NAME;
 
 const nickName = (s: UserStore) => {
-  if (!s.enableAuth()) return t('userPanel.defaultNickname', { ns: 'common' });
+  if (!enableAuth) return t('userPanel.defaultNickname', { ns: 'common' });
 
   if (s.isSignedIn) return s.user?.fullName || s.user?.username;
 
@@ -16,7 +16,7 @@ const nickName = (s: UserStore) => {
 };
 
 const username = (s: UserStore) => {
-  if (!s.enableAuth()) return DEFAULT_USERNAME;
+  if (!enableAuth) return DEFAULT_USERNAME;
 
   if (s.isSignedIn) return s.user?.username;
 
@@ -36,17 +36,15 @@ export const userProfileSelectors = {
  */
 const isLogin = (s: UserStore) => {
   // 如果没有开启鉴权，说明不需要登录，默认是登录态
-  if (!s.enableAuth()) return true;
+  if (!enableAuth) return true;
 
   return s.isSignedIn;
 };
 
 export const authSelectors = {
-  enabledAuth: (s: UserStore): boolean => s.enableAuth(),
-  enabledNextAuth: (s: UserStore): boolean => !!s.enabledNextAuth,
   isLoaded: (s: UserStore) => s.isLoaded,
   isLogin,
   isLoginWithAuth: (s: UserStore) => s.isSignedIn,
   isLoginWithClerk: (s: UserStore): boolean => (s.isSignedIn && enableClerk) || false,
-  isLoginWithNextAuth: (s: UserStore): boolean => (s.isSignedIn && !!s.enabledNextAuth) || false,
+  isLoginWithNextAuth: (s: UserStore): boolean => (s.isSignedIn && !!enableNextAuth) || false,
 };
diff --git a/src/store/user/slices/common/action.ts b/src/store/user/slices/common/action.ts
index ad8487ed37..f9fda9f6b4 100644
--- a/src/store/user/slices/common/action.ts
+++ b/src/store/user/slices/common/action.ts
@@ -99,7 +99,6 @@ export const createCommonSlice: StateCreator<
             set(
               {
                 defaultSettings,
-                enabledNextAuth: serverConfig.enabledOAuthSSO,
                 isOnboard: data.isOnboard,
                 isShowPWAGuide: data.canEnablePWAGuide,
                 isUserCanEnableTrace: data.canEnableTrace,