From e529108ff6afd32efb56d9d36c534aed3cc9ee3b Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Sat, 8 Feb 2025 10:25:28 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20refactor=20the=20auth=20con?= =?UTF-8?q?dition=20in=20Next=20Auth=20(#5866)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This upgrade includes two changes: For users deploying with Vercel using next-auth, it is necessary to add the environment variable `NEXT_PUBLIC_ENABLE_NEXT_AUTH=1` to ensure Next Auth is enabled; other users are not affected. For users using clerk in self-built images, it is necessary to additionally configure `NEXT_PUBLIC_ENABLE_NEXT_AUTH=0` to disable Next Auth Other standard deployment scenarios (using Clerk in Vercel and using next-auth in Docker) are not affected For More detail, refer to https://github.com/lobehub/lobe-chat/issues/5804 本次升级存在两个变更: - 针对使用 Vercel 部署中使用 next-auth 的用户,需要额外添加 `NEXT_PUBLIC_ENABLE_NEXT_AUTH=1` 环境变量来确保开启 Next Auth - 针对使用自构建镜像中使用 clerk 的用户,需要额外配置 `NEXT_PUBLIC_ENABLE_NEXT_AUTH=0` 环境变量来关闭 Next Auth 其他标准部署场景(Vercel 中使用 Clerk 与 Docker 中使用 next-auth )不受影响 变更详情原因查看 https://github.com/lobehub/lobe-chat/issues/5804 --- .env.example | 1 + Dockerfile.database | 3 +- next.config.ts | 1 + .../me/(home)/__tests__/UserBanner.test.tsx | 5 +- .../me/(home)/__tests__/useCategory.test.tsx | 10 +- .../me/(home)/features/UserBanner.tsx | 9 +- .../me/(home)/features/useCategory.tsx | 4 +- .../(mobile)/me/profile/features/Category.tsx | 4 +- .../(main)/profile/(home)/Client.tsx | 4 +- .../(main)/profile/hooks/useCategory.tsx | 6 +- .../(main)/settings/_layout/Mobile/Header.tsx | 4 +- src/app/[variants]/page.tsx | 1 - src/config/auth.ts | 3 +- src/const/auth.ts | 3 +- src/features/User/UserPanel/PanelContent.tsx | 10 +- src/features/User/UserPanel/useMenu.tsx | 4 +- .../User/__tests__/PanelContent.test.tsx | 13 +- .../GlobalProvider/StoreInitialization.tsx | 3 +- .../__snapshots__/index.test.ts.snap | 2190 ----------------- src/middleware.ts | 17 +- src/services/chat.ts | 3 +- src/store/user/slices/auth/action.test.ts | 6 +- src/store/user/slices/auth/action.ts | 6 +- src/store/user/slices/auth/initialState.ts | 1 - src/store/user/slices/auth/selectors.ts | 12 +- src/store/user/slices/common/action.ts | 1 - 26 files changed, 58 insertions(+), 2266 deletions(-) delete mode 100644 src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap diff --git a/.env.example b/.env.example index c7829523d7..c0bd7fbb3c 100644 --- a/.env.example +++ b/.env.example @@ -190,6 +190,7 @@ OPENAI_API_KEY=sk-xxxxxxxxx # NextAuth related configurations +# NEXT_PUBLIC_ENABLE_NEXT_AUTH=1 # NEXT_AUTH_SECRET= # Auth0 configurations diff --git a/Dockerfile.database b/Dockerfile.database index feeefa0837..fc265363dd 100644 --- a/Dockerfile.database +++ b/Dockerfile.database @@ -38,6 +38,7 @@ FROM base AS builder ARG USE_CN_MIRROR ARG NEXT_PUBLIC_BASE_PATH ARG NEXT_PUBLIC_SERVICE_MODE +ARG NEXT_PUBLIC_ENABLE_NEXT_AUTH ARG NEXT_PUBLIC_SENTRY_DSN ARG NEXT_PUBLIC_ANALYTICS_POSTHOG ARG NEXT_PUBLIC_POSTHOG_HOST @@ -49,7 +50,7 @@ ARG NEXT_PUBLIC_UMAMI_WEBSITE_ID ENV NEXT_PUBLIC_BASE_PATH="${NEXT_PUBLIC_BASE_PATH}" ENV NEXT_PUBLIC_SERVICE_MODE="${NEXT_PUBLIC_SERVICE_MODE:-server}" \ - NEXT_PUBLIC_ENABLE_NEXT_AUTH="1" \ + NEXT_PUBLIC_ENABLE_NEXT_AUTH="${NEXT_PUBLIC_ENABLE_NEXT_AUTH:-1}" \ APP_URL="http://app.com" \ DATABASE_DRIVER="node" \ DATABASE_URL="postgres://postgres:password@localhost:5432/postgres" \ diff --git a/next.config.ts b/next.config.ts index 11f75c6dbe..59d058c4be 100644 --- a/next.config.ts +++ b/next.config.ts @@ -179,6 +179,7 @@ const nextConfig: NextConfig = { ], // when external packages in dev mode with turbopack, this config will lead to bundle error serverExternalPackages: isProd ? ['@electric-sql/pglite'] : undefined, + transpilePackages: ['pdfjs-dist', 'mermaid'], webpack(config) { diff --git a/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/UserBanner.test.tsx b/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/UserBanner.test.tsx index b3b0fc826f..1da776414a 100644 --- a/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/UserBanner.test.tsx +++ b/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/UserBanner.test.tsx @@ -49,8 +49,9 @@ afterEach(() => { describe('UserBanner', () => { it('should render UserInfo and DataStatistics when auth is disabled', () => { act(() => { - useUserStore.setState({ isSignedIn: false, enableAuth: () => false }); + useUserStore.setState({ isSignedIn: false }); }); + enableAuth = false; render(); @@ -75,7 +76,7 @@ describe('UserBanner', () => { it('should render UserLoginOrSignup when user is not logged in with auth enabled', () => { act(() => { - useUserStore.setState({ isSignedIn: false, enableAuth: () => true }); + useUserStore.setState({ isSignedIn: false }); }); enableClerk = true; diff --git a/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/useCategory.test.tsx b/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/useCategory.test.tsx index 5c697ce4d9..d04ebbd07c 100644 --- a/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/useCategory.test.tsx +++ b/src/app/[variants]/(main)/(mobile)/me/(home)/__tests__/useCategory.test.tsx @@ -45,12 +45,10 @@ afterEach(() => { enableClerk = true; }); -// 目前对 enableAuth 的判定是在 useUserStore 中,所以需要 mock useUserStore -// 类型定义: enableAuth: () => boolean describe('useCategory', () => { it('should return correct items when the user is logged in with authentication', () => { act(() => { - useUserStore.setState({ isSignedIn: true, enableAuth: () => true }); + useUserStore.setState({ isSignedIn: true }); }); enableAuth = true; enableClerk = false; @@ -70,8 +68,9 @@ describe('useCategory', () => { it('should return correct items when the user is not logged in', () => { act(() => { - useUserStore.setState({ isSignedIn: false, enableAuth: () => true }); + useUserStore.setState({ isSignedIn: false }); }); + enableAuth = true; const { result } = renderHook(() => useCategory(), { wrapper }); @@ -88,9 +87,10 @@ describe('useCategory', () => { it('should handle settings for non-authenticated users', () => { act(() => { - useUserStore.setState({ isSignedIn: false, enableAuth: () => false }); + useUserStore.setState({ isSignedIn: false }); }); enableClerk = false; + enableAuth = false; const { result } = renderHook(() => useCategory(), { wrapper }); diff --git a/src/app/[variants]/(main)/(mobile)/me/(home)/features/UserBanner.tsx b/src/app/[variants]/(main)/(mobile)/me/(home)/features/UserBanner.tsx index b71d1358a6..c46796af67 100644 --- a/src/app/[variants]/(main)/(mobile)/me/(home)/features/UserBanner.tsx +++ b/src/app/[variants]/(main)/(mobile)/me/(home)/features/UserBanner.tsx @@ -5,6 +5,7 @@ import { useRouter } from 'next/navigation'; import { memo } from 'react'; import { Flexbox } from 'react-layout-kit'; +import { enableAuth, enableNextAuth } from '@/const/auth'; import { isDeprecatedEdition } from '@/const/version'; import DataStatistics from '@/features/User/DataStatistics'; import UserInfo from '@/features/User/UserInfo'; @@ -15,11 +16,7 @@ import { authSelectors } from '@/store/user/selectors'; const UserBanner = memo(() => { const router = useRouter(); const isLoginWithAuth = useUserStore(authSelectors.isLoginWithAuth); - const [enableAuth, signIn, enabledNextAuth] = useUserStore((s) => [ - authSelectors.enabledAuth(s), - s.openLogin, - authSelectors.enabledNextAuth(s), - ]); + const [signIn] = useUserStore((s) => [s.openLogin]); return ( @@ -38,7 +35,7 @@ const UserBanner = memo(() => { { // If use NextAuth, call openLogin method directly - if (enabledNextAuth) { + if (enableNextAuth) { signIn(); return; } diff --git a/src/app/[variants]/(main)/(mobile)/me/(home)/features/useCategory.tsx b/src/app/[variants]/(main)/(mobile)/me/(home)/features/useCategory.tsx index 2dafbea2ec..93a4bf903c 100644 --- a/src/app/[variants]/(main)/(mobile)/me/(home)/features/useCategory.tsx +++ b/src/app/[variants]/(main)/(mobile)/me/(home)/features/useCategory.tsx @@ -12,6 +12,7 @@ import { useRouter } from 'next/navigation'; import { useTranslation } from 'react-i18next'; import { CellProps } from '@/components/Cell'; +import { enableAuth } from '@/const/auth'; import { LOBE_CHAT_CLOUD } from '@/const/branding'; import { DOCUMENTS, FEEDBACK, OFFICIAL_URL, UTM_SOURCE } from '@/const/url'; import { isServerMode } from '@/const/version'; @@ -27,10 +28,9 @@ export const useCategory = () => { const { canInstall, install } = usePWAInstall(); const { t } = useTranslation(['common', 'setting', 'auth']); const { showCloudPromotion, hideDocs } = useServerConfigStore(featureFlagsSelectors); - const [isLogin, isLoginWithAuth, enableAuth] = useUserStore((s) => [ + const [isLogin, isLoginWithAuth] = useUserStore((s) => [ authSelectors.isLogin(s), authSelectors.isLoginWithAuth(s), - authSelectors.enabledAuth(s), ]); const profile: CellProps[] = [ diff --git a/src/app/[variants]/(main)/(mobile)/me/profile/features/Category.tsx b/src/app/[variants]/(main)/(mobile)/me/profile/features/Category.tsx index 54c61a3a23..04567d7046 100644 --- a/src/app/[variants]/(main)/(mobile)/me/profile/features/Category.tsx +++ b/src/app/[variants]/(main)/(mobile)/me/profile/features/Category.tsx @@ -6,15 +6,15 @@ import { memo } from 'react'; import { useTranslation } from 'react-i18next'; import Cell, { CellProps } from '@/components/Cell'; +import { enableAuth } from '@/const/auth'; import { isDeprecatedEdition } from '@/const/version'; import { ProfileTabs } from '@/store/global/initialState'; import { useUserStore } from '@/store/user'; import { authSelectors } from '@/store/user/selectors'; const Category = memo(() => { - const [isLogin, enableAuth, isLoginWithClerk, signOut] = useUserStore((s) => [ + const [isLogin, isLoginWithClerk, signOut] = useUserStore((s) => [ authSelectors.isLogin(s), - authSelectors.enabledAuth(s), authSelectors.isLoginWithClerk(s), s.logout, ]); diff --git a/src/app/[variants]/(main)/profile/(home)/Client.tsx b/src/app/[variants]/(main)/profile/(home)/Client.tsx index 13ef127c9d..c54895119f 100644 --- a/src/app/[variants]/(main)/profile/(home)/Client.tsx +++ b/src/app/[variants]/(main)/profile/(home)/Client.tsx @@ -4,6 +4,7 @@ import { Form, type ItemGroup } from '@lobehub/ui'; import { memo } from 'react'; import { useTranslation } from 'react-i18next'; +import { enableAuth } from '@/const/auth'; import { FORM_STYLE } from '@/const/layoutTokens'; import AvatarWithUpload from '@/features/AvatarWithUpload'; import UserAvatar from '@/features/User/UserAvatar'; @@ -14,8 +15,7 @@ type SettingItemGroup = ItemGroup; const Client = memo<{ mobile?: boolean }>(() => { const [isLoginWithNextAuth] = useUserStore((s) => [authSelectors.isLoginWithNextAuth(s)]); - const [enableAuth, nickname, username, userProfile] = useUserStore((s) => [ - s.enableAuth(), + const [nickname, username, userProfile] = useUserStore((s) => [ userProfileSelectors.nickName(s), userProfileSelectors.username(s), userProfileSelectors.userProfile(s), diff --git a/src/app/[variants]/(main)/profile/hooks/useCategory.tsx b/src/app/[variants]/(main)/profile/hooks/useCategory.tsx index 91aecee480..3fe30f6b0b 100644 --- a/src/app/[variants]/(main)/profile/hooks/useCategory.tsx +++ b/src/app/[variants]/(main)/profile/hooks/useCategory.tsx @@ -4,6 +4,7 @@ import Link from 'next/link'; import { useTranslation } from 'react-i18next'; import type { MenuProps } from '@/components/Menu'; +import { enableAuth } from '@/const/auth'; import { isDeprecatedEdition } from '@/const/version'; import { ProfileTabs } from '@/store/global/initialState'; import { useUserStore } from '@/store/user'; @@ -11,10 +12,7 @@ import { authSelectors } from '@/store/user/slices/auth/selectors'; export const useCategory = () => { const { t } = useTranslation('auth'); - const [enableAuth, isLoginWithClerk] = useUserStore((s) => [ - authSelectors.enabledAuth(s), - authSelectors.isLoginWithClerk(s), - ]); + const [isLoginWithClerk] = useUserStore((s) => [authSelectors.isLoginWithClerk(s)]); const cateItems: MenuProps['items'] = [ { diff --git a/src/app/[variants]/(main)/settings/_layout/Mobile/Header.tsx b/src/app/[variants]/(main)/settings/_layout/Mobile/Header.tsx index c96e5844ea..94451f03cc 100644 --- a/src/app/[variants]/(main)/settings/_layout/Mobile/Header.tsx +++ b/src/app/[variants]/(main)/settings/_layout/Mobile/Header.tsx @@ -6,13 +6,12 @@ import { memo } from 'react'; import { useTranslation } from 'react-i18next'; import { Flexbox } from 'react-layout-kit'; +import { enableAuth } from '@/const/auth'; import { useActiveSettingsKey } from '@/hooks/useActiveTabKey'; import { useQueryRoute } from '@/hooks/useQueryRoute'; import { useShowMobileWorkspace } from '@/hooks/useShowMobileWorkspace'; import { SettingsTabs } from '@/store/global/initialState'; import { useSessionStore } from '@/store/session'; -import { useUserStore } from '@/store/user'; -import { authSelectors } from '@/store/user/selectors'; import { mobileHeaderSticky } from '@/styles/mobileHeader'; const Header = memo(() => { @@ -22,7 +21,6 @@ const Header = memo(() => { const showMobileWorkspace = useShowMobileWorkspace(); const activeSettingsKey = useActiveSettingsKey(); const isSessionActive = useSessionStore((s) => !!s.activeId); - const enableAuth = useUserStore(authSelectors.enabledAuth); const handleBackClick = () => { if (isSessionActive && showMobileWorkspace) { diff --git a/src/app/[variants]/page.tsx b/src/app/[variants]/page.tsx index c8723029ec..83d62474ac 100644 --- a/src/app/[variants]/page.tsx +++ b/src/app/[variants]/page.tsx @@ -7,4 +7,3 @@ export const metadata: Metadata = { }; export { default } from './loading'; - diff --git a/src/config/auth.ts b/src/config/auth.ts index d922a711e7..adee4abfb0 100644 --- a/src/config/auth.ts +++ b/src/config/auth.ts @@ -217,8 +217,7 @@ export const getAuthConfig = () => { CLERK_WEBHOOK_SECRET: process.env.CLERK_WEBHOOK_SECRET, // Next Auth - NEXT_PUBLIC_ENABLE_NEXT_AUTH: - !!process.env.NEXT_AUTH_SECRET || process.env.NEXT_PUBLIC_ENABLE_NEXT_AUTH === '1', + NEXT_PUBLIC_ENABLE_NEXT_AUTH: process.env.NEXT_PUBLIC_ENABLE_NEXT_AUTH === '1', NEXT_AUTH_SSO_PROVIDERS: process.env.NEXT_AUTH_SSO_PROVIDERS, NEXT_AUTH_SECRET: process.env.NEXT_AUTH_SECRET, NEXT_AUTH_DEBUG: !!process.env.NEXT_AUTH_DEBUG, diff --git a/src/const/auth.ts b/src/const/auth.ts index 9a633da545..43f462463e 100644 --- a/src/const/auth.ts +++ b/src/const/auth.ts @@ -2,8 +2,7 @@ import { authEnv } from '@/config/auth'; export const enableClerk = authEnv.NEXT_PUBLIC_ENABLE_CLERK_AUTH; export const enableNextAuth = authEnv.NEXT_PUBLIC_ENABLE_NEXT_AUTH; -export const enableAuth = - authEnv.NEXT_PUBLIC_ENABLE_CLERK_AUTH || authEnv.NEXT_PUBLIC_ENABLE_NEXT_AUTH; +export const enableAuth = enableClerk || enableNextAuth || false; export const LOBE_CHAT_AUTH_HEADER = 'X-lobe-chat-auth'; diff --git a/src/features/User/UserPanel/PanelContent.tsx b/src/features/User/UserPanel/PanelContent.tsx index a1ddccc00e..31606b230f 100644 --- a/src/features/User/UserPanel/PanelContent.tsx +++ b/src/features/User/UserPanel/PanelContent.tsx @@ -5,6 +5,7 @@ import { Flexbox } from 'react-layout-kit'; import BrandWatermark from '@/components/BrandWatermark'; import Menu from '@/components/Menu'; +import { enableAuth, enableNextAuth } from '@/const/auth'; import { isDeprecatedEdition } from '@/const/version'; import { useUserStore } from '@/store/user'; import { authSelectors } from '@/store/user/selectors'; @@ -19,12 +20,7 @@ import { useMenu } from './useMenu'; const PanelContent = memo<{ closePopover: () => void }>(({ closePopover }) => { const router = useRouter(); const isLoginWithAuth = useUserStore(authSelectors.isLoginWithAuth); - const [openSignIn, signOut, enableAuth, enabledNextAuth] = useUserStore((s) => [ - s.openLogin, - s.logout, - s.enableAuth(), - s.enabledNextAuth, - ]); + const [openSignIn, signOut] = useUserStore((s) => [s.openLogin, s.logout]); const { mainItems, logoutItems } = useMenu(); const handleSignIn = () => { @@ -36,7 +32,7 @@ const PanelContent = memo<{ closePopover: () => void }>(({ closePopover }) => { signOut(); closePopover(); // NextAuth doesn't need to redirect to login page - if (enabledNextAuth) return; + if (enableNextAuth) return; router.push('/login'); }; diff --git a/src/features/User/UserPanel/useMenu.tsx b/src/features/User/UserPanel/useMenu.tsx index dadf82d970..0d3b374caa 100644 --- a/src/features/User/UserPanel/useMenu.tsx +++ b/src/features/User/UserPanel/useMenu.tsx @@ -21,6 +21,7 @@ import { useTranslation } from 'react-i18next'; import { Flexbox } from 'react-layout-kit'; import type { MenuProps } from '@/components/Menu'; +import { enableAuth } from '@/const/auth'; import { LOBE_CHAT_CLOUD } from '@/const/branding'; import { DISCORD, @@ -68,8 +69,7 @@ export const useMenu = () => { const hasNewVersion = useNewVersion(); const { t } = useTranslation(['common', 'setting', 'auth']); const { showCloudPromotion, hideDocs } = useServerConfigStore(featureFlagsSelectors); - const [enableAuth, isLogin, isLoginWithAuth] = useUserStore((s) => [ - authSelectors.enabledAuth(s), + const [isLogin, isLoginWithAuth] = useUserStore((s) => [ authSelectors.isLogin(s), authSelectors.isLoginWithAuth(s), ]); diff --git a/src/features/User/__tests__/PanelContent.test.tsx b/src/features/User/__tests__/PanelContent.test.tsx index 35be35b8e2..dc8b2b164f 100644 --- a/src/features/User/__tests__/PanelContent.test.tsx +++ b/src/features/User/__tests__/PanelContent.test.tsx @@ -68,13 +68,12 @@ vi.mock('@/const/version', () => ({ // 定义一个变量来存储 enableAuth 的值 let enableAuth = true; -beforeEach(() => { - useUserStore.setState({ enableAuth: () => true }); -}); - -afterEach(() => { - enableAuth = true; -}); +// 模拟 @/const/auth 模块 +vi.mock('@/const/auth', () => ({ + get enableAuth() { + return enableAuth; + }, +})); describe('PanelContent', () => { const closePopover = vi.fn(); diff --git a/src/layout/GlobalProvider/StoreInitialization.tsx b/src/layout/GlobalProvider/StoreInitialization.tsx index a0baa68f4a..4627b28399 100644 --- a/src/layout/GlobalProvider/StoreInitialization.tsx +++ b/src/layout/GlobalProvider/StoreInitialization.tsx @@ -5,6 +5,7 @@ import { memo } from 'react'; import { useTranslation } from 'react-i18next'; import { createStoreUpdater } from 'zustand-utils'; +import { enableNextAuth } from '@/const/auth'; import { useIsMobile } from '@/hooks/useIsMobile'; import { useEnabledDataSync } from '@/hooks/useSyncData'; import { useAgentStore } from '@/store/agent'; @@ -39,8 +40,6 @@ const StoreInitialization = memo(() => { // Update NextAuth status const useUserStoreUpdater = createStoreUpdater(useUserStore); - const enableNextAuth = useServerConfigStore(serverConfigSelectors.enabledOAuthSSO); - useUserStoreUpdater('enabledNextAuth', enableNextAuth); const oAuthSSOProviders = useServerConfigStore(serverConfigSelectors.oAuthSSOProviders); useUserStoreUpdater('oAuthSSOProviders', oAuthSSOProviders); diff --git a/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap b/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap deleted file mode 100644 index 98223d74d7..0000000000 --- a/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap +++ /dev/null @@ -1,2190 +0,0 @@ -// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html - -exports[`LobeTogetherAI > models > should get models 1`] = ` -[ - { - "contextWindowTokens": undefined, - "description": "This model is a 75/25 merge of Chronos (13B) and Nous Hermes (13B) models resulting in having a great ability to produce evocative storywriting and follow a narrative.", - "displayName": "Chronos Hermes (13B)", - "enabled": false, - "functionCall": false, - "id": "Austism/chronos-hermes-13b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding", - "displayName": "BAAI-Bge-Base-1p5", - "enabled": false, - "functionCall": false, - "id": "BAAI/bge-base-en-v1.5", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding", - "displayName": "BAAI-Bge-Large-1p5", - "enabled": false, - "functionCall": false, - "id": "BAAI/bge-large-en-v1.5", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": false, - }, - { - "contextWindowTokens": 4096, - "description": "MythoLogic-L2 and Huginn merge using a highly experimental tensor type merge technique. The main difference with MythoMix is that I allowed more of Huginn to intermingle with the single tensors located at the front and end of a model", - "displayName": "MythoMax-L2 (13B)", - "enabled": false, - "functionCall": false, - "id": "Gryphe/MythoMax-L2-13b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llama Guard: LLM-based Input-Output Safeguard for Human-AI Conversations", - "displayName": "Llama Guard (7B)", - "enabled": false, - "functionCall": false, - "id": "Meta-Llama/Llama-Guard-7b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "NexusRaven is an open-source and commercially viable function calling LLM that surpasses the state-of-the-art in function calling capabilities.", - "displayName": "NexusRaven (13B)", - "enabled": false, - "functionCall": true, - "id": "Nexusflow/NexusRaven-V2-13B", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "first Nous collection of dataset and models made by fine-tuning mostly on data created by Nous in-house", - "displayName": "Nous Capybara v1.9 (7B)", - "enabled": false, - "functionCall": false, - "id": "NousResearch/Nous-Capybara-7B-V1p9", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Nous Hermes 2 on Mistral 7B DPO is the new flagship 7B Hermes! This model was DPO'd from Teknium/OpenHermes-2.5-Mistral-7B and has improved across the board on all benchmarks tested - AGIEval, BigBench Reasoning, GPT4All, and TruthfulQA.", - "displayName": "Nous Hermes 2 - Mistral DPO (7B)", - "enabled": false, - "functionCall": false, - "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": 32768, - "description": "Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.", - "displayName": "Nous Hermes 2 - Mixtral 8x7B-DPO ", - "enabled": false, - "functionCall": false, - "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Nous Hermes 2 Mixtral 7bx8 SFT is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.", - "displayName": "Nous Hermes 2 - Mixtral 8x7B-SFT", - "enabled": false, - "functionCall": false, - "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune", - "displayName": "Nous Hermes-2 Yi (34B)", - "enabled": false, - "functionCall": false, - "id": "NousResearch/Nous-Hermes-2-Yi-34B", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions.", - "displayName": "Nous Hermes Llama-2 (13B)", - "enabled": false, - "functionCall": false, - "id": "NousResearch/Nous-Hermes-Llama2-13b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Nous-Hermes-Llama2-7b is a state-of-the-art language model fine-tuned on over 300,000 instructions.", - "displayName": "Nous Hermes LLaMA-2 (7B)", - "enabled": false, - "functionCall": false, - "id": "NousResearch/Nous-Hermes-llama-2-7b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An OpenOrca dataset fine-tune on top of Mistral 7B by the OpenOrca team.", - "displayName": "OpenOrca Mistral (7B) 8K", - "enabled": false, - "functionCall": false, - "id": "Open-Orca/Mistral-7B-OpenOrca", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Phind-CodeLlama-34B-v1 trained on additional 1.5B tokens high-quality programming-related data proficient in Python, C/C++, TypeScript, Java, and more.", - "displayName": "Phind Code LLaMA v2 (34B)", - "enabled": false, - "functionCall": false, - "id": "Phind/Phind-CodeLlama-34B-v2", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 Chat (0.5B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-0.5B-Chat", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 (0.5B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-0.5B", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 Chat (1.8B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-1.8B-Chat", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 (1.8B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-1.8B", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 Chat (110B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-110B-Chat", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 Chat (14B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-14B-Chat", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 (14B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-14B", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 Chat (32B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-32B-Chat", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 (32B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-32B", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 Chat (4B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-4B-Chat", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 (4B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-4B", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 Chat (72B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-72B-Chat", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 (72B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-72B", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 Chat (7B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-7B-Chat", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", - "displayName": "Qwen 1.5 (7B)", - "enabled": false, - "functionCall": false, - "id": "Qwen/Qwen1.5-7B", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Fine-tune version of Stable Diffusion focused on photorealism.", - "displayName": "Realistic Vision 3.0", - "enabled": false, - "functionCall": false, - "id": "SG161222/Realistic_Vision_V3.0_VAE", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": true, - }, - { - "contextWindowTokens": undefined, - "description": "Arctic is a dense-MoE Hybrid transformer architecture pre-trained from scratch by the Snowflake AI Research Team.", - "displayName": "Snowflake Arctic Instruct", - "enabled": false, - "functionCall": false, - "id": "Snowflake/snowflake-arctic-instruct", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Re:MythoMax (ReMM) is a recreation trial of the original MythoMax-L2-B13 with updated models. This merge use SLERP [TESTING] to merge ReML and Huginn v1.2.", - "displayName": "ReMM SLERP L2 (13B)", - "enabled": false, - "functionCall": false, - "id": "Undi95/ReMM-SLERP-L2-13B", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "A merge of models built by Undi95 with the new task_arithmetic merge method from mergekit.", - "displayName": "Toppy M (7B)", - "enabled": false, - "functionCall": false, - "id": "Undi95/Toppy-M-7B", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "A universal English sentence embedding WhereIsAI/UAE-Large-V1 achieves SOTA on the MTEB Leaderboard with an average score of 64.64!", - "displayName": "UAE-Large-V1", - "enabled": false, - "functionCall": false, - "id": "WhereIsAI/UAE-Large-V1", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.", - "displayName": "WizardCoder v1.0 (15B)", - "enabled": false, - "functionCall": false, - "id": "WizardLM/WizardCoder-15B-V1.0", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.", - "displayName": "WizardCoder Python v1.0 (34B)", - "enabled": false, - "functionCall": false, - "id": "WizardLM/WizardCoder-Python-34B-V1.0", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities", - "displayName": "WizardLM v1.2 (13B)", - "enabled": false, - "functionCall": false, - "id": "WizardLM/WizardLM-13B-V1.2", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The OLMo models are trained on the Dolma dataset", - "displayName": "OLMo Instruct (7B)", - "enabled": false, - "functionCall": false, - "id": "allenai/OLMo-7B-Instruct", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The OLMo models are trained on the Dolma dataset", - "displayName": "OLMo Twin-2T (7B)", - "enabled": false, - "functionCall": false, - "id": "allenai/OLMo-7B-Twin-2T", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The OLMo models are trained on the Dolma dataset", - "displayName": "OLMo (7B)", - "enabled": false, - "functionCall": false, - "id": "allenai/OLMo-7B", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "original BERT model", - "displayName": "Bert Base Uncased", - "enabled": false, - "functionCall": false, - "id": "bert-base-uncased", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Instruct (13B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-13b-Instruct-hf", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Python (13B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-13b-Python-hf", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": 16384, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Instruct (34B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-34b-Instruct-hf", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Python (34B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-34b-Python-hf", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Instruct (70B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-70b-Instruct-hf", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Python (70B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-70b-Python-hf", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama (70B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-70b-hf", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Instruct (7B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-7b-Instruct-hf", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Python (7B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-7b-Python-hf", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "This Dolphin is really good at coding, I trained with a lot of coding data. It is very obedient but it is not DPO tuned - so you still might need to encourage it in the system prompt as I show in the below examples.", - "displayName": "Dolphin 2.5 Mixtral 8x7b", - "enabled": false, - "functionCall": false, - "id": "cognitivecomputations/dolphin-2.5-mixtral-8x7b", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": 32768, - "description": "DBRX Instruct is a mixture-of-experts (MoE) large language model trained from scratch by Databricks. DBRX Instruct specializes in few-turn interactions.", - "displayName": "DBRX Instruct", - "enabled": false, - "functionCall": false, - "id": "databricks/dbrx-instruct", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.", - "displayName": "Deepseek Coder Instruct (33B)", - "enabled": false, - "functionCall": false, - "id": "deepseek-ai/deepseek-coder-33b-instruct", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": 4096, - "description": "trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese", - "displayName": "DeepSeek LLM Chat (67B)", - "enabled": true, - "functionCall": false, - "id": "deepseek-ai/deepseek-llm-67b-chat", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An instruction fine-tuned LLaMA-2 (70B) model by merging Platypus2 (70B) by garage-bAInd and LLaMA-2 Instruct v2 (70B) by upstage.", - "displayName": "Platypus2 Instruct (70B)", - "enabled": false, - "functionCall": false, - "id": "garage-bAInd/Platypus2-70B-instruct", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": 8192, - "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", - "displayName": "Gemma Instruct (2B)", - "enabled": false, - "functionCall": false, - "id": "google/gemma-2b-it", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", - "displayName": "Gemma (2B)", - "enabled": false, - "functionCall": false, - "id": "google/gemma-2b", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", - "displayName": "Gemma Instruct (7B)", - "enabled": false, - "functionCall": false, - "id": "google/gemma-7b-it", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", - "displayName": "Gemma (7B)", - "enabled": false, - "functionCall": false, - "id": "google/gemma-7b", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.", - "displayName": "Vicuna v1.5 (13B)", - "enabled": false, - "functionCall": false, - "id": "lmsys/vicuna-13b-v1.5", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.", - "displayName": "Vicuna v1.5 (7B)", - "enabled": false, - "functionCall": false, - "id": "lmsys/vicuna-7b-v1.5", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": 4096, - "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 Chat (13B)", - "enabled": false, - "functionCall": false, - "id": "meta-llama/Llama-2-13b-chat-hf", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 (13B)", - "enabled": false, - "functionCall": false, - "id": "meta-llama/Llama-2-13b-hf", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 Chat (70B)", - "enabled": false, - "functionCall": false, - "id": "meta-llama/Llama-2-70b-chat-hf", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": 4096, - "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 (70B)", - "enabled": false, - "functionCall": false, - "id": "meta-llama/Llama-2-70b-hf", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 Chat (7B)", - "enabled": false, - "functionCall": false, - "id": "meta-llama/Llama-2-7b-chat-hf", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 (7B)", - "enabled": false, - "functionCall": false, - "id": "meta-llama/Llama-2-7b-hf", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": 8192, - "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", - "displayName": "Meta Llama 3 70B Instruct", - "enabled": false, - "functionCall": false, - "id": "meta-llama/Llama-3-70b-chat-hf", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": 8192, - "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", - "displayName": "Meta Llama 3 8B Instruct", - "enabled": false, - "functionCall": false, - "id": "meta-llama/Llama-3-8b-chat-hf", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", - "displayName": "Meta Llama 3 8B", - "enabled": false, - "functionCall": false, - "id": "meta-llama/Llama-3-8b-hf", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": null, - "displayName": "Meta Llama Guard 2 8B", - "enabled": false, - "functionCall": undefined, - "id": "meta-llama/LlamaGuard-2-8b", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", - "displayName": "Meta Llama 3 70B", - "enabled": false, - "functionCall": false, - "id": "meta-llama/Meta-Llama-3-70B", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": 65536, - "description": "WizardLM-2 8x22B is Wizard's most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models.", - "displayName": "WizardLM-2 (8x22B)", - "enabled": false, - "functionCall": false, - "id": "microsoft/WizardLM-2-8x22B", - "maxOutput": 65536, - "reasoning": false, - "tokens": 65536, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)", - "displayName": "Microsoft Phi-2", - "enabled": false, - "functionCall": false, - "id": "microsoft/phi-2", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": 8192, - "description": "instruct fine-tuned version of Mistral-7B-v0.1", - "displayName": "Mistral (7B) Instruct", - "enabled": false, - "functionCall": false, - "id": "mistralai/Mistral-7B-Instruct-v0.1", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": 32768, - "description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.", - "displayName": "Mistral (7B) Instruct v0.2", - "enabled": false, - "functionCall": false, - "id": "mistralai/Mistral-7B-Instruct-v0.2", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": 8192, - "description": "7.3B parameter model that outperforms Llama 2 13B on all benchmarks, approaches CodeLlama 7B performance on code, Uses Grouped-query attention (GQA) for faster inference and Sliding Window Attention (SWA) to handle longer sequences at smaller cost", - "displayName": "Mistral (7B)", - "enabled": false, - "functionCall": false, - "id": "mistralai/Mistral-7B-v0.1", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": 65536, - "description": "The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.", - "displayName": "Mixtral-8x22B Instruct v0.1", - "enabled": true, - "functionCall": false, - "id": "mistralai/Mixtral-8x22B-Instruct-v0.1", - "maxOutput": 65536, - "reasoning": false, - "tokens": 65536, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The Mixtral-8x22B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.", - "displayName": "Mixtral-8x22B", - "enabled": false, - "functionCall": false, - "id": "mistralai/Mixtral-8x22B", - "maxOutput": 65536, - "reasoning": false, - "tokens": 65536, - "vision": false, - }, - { - "contextWindowTokens": 32768, - "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.", - "displayName": "Mixtral-8x7B Instruct v0.1", - "enabled": true, - "functionCall": false, - "id": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": 32768, - "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.", - "displayName": "Mixtral-8x7B v0.1", - "enabled": false, - "functionCall": false, - "id": "mistralai/Mixtral-8x7B-v0.1", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "A merge of OpenChat 3.5 was trained with C-RLFT on a collection of publicly available high-quality instruction data, with a custom processing pipeline.", - "displayName": "OpenChat 3.5", - "enabled": false, - "functionCall": false, - "id": "openchat/openchat-3.5-1210", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An open source Stable Diffusion model fine tuned model on Midjourney images. ", - "displayName": "Openjourney v4", - "enabled": false, - "functionCall": false, - "id": "prompthero/openjourney", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.", - "displayName": "Stable Diffusion 1.5", - "enabled": false, - "functionCall": false, - "id": "runwayml/stable-diffusion-v1-5", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "A sentence-transformers model: it maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search.", - "displayName": "Sentence-BERT", - "enabled": false, - "functionCall": false, - "id": "sentence-transformers/msmarco-bert-base-dot-v5", - "maxOutput": 512, - "reasoning": false, - "tokens": 512, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "A state-of-the-art model by Snorkel AI, DPO fine-tuned on Mistral-7B", - "displayName": "Snorkel Mistral PairRM DPO (7B)", - "enabled": false, - "functionCall": false, - "id": "snorkelai/Snorkel-Mistral-PairRM-DPO", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.", - "displayName": "Stable Diffusion 2.1", - "enabled": false, - "functionCall": false, - "id": "stabilityai/stable-diffusion-2-1", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "A text-to-image generative AI model that excels at creating 1024x1024 images.", - "displayName": "Stable Diffusion XL 1.0", - "enabled": false, - "functionCall": false, - "id": "stabilityai/stable-diffusion-xl-base-1.0", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "State of the art Mistral Fine-tuned on extensive public datasets", - "displayName": "OpenHermes-2-Mistral (7B)", - "enabled": false, - "functionCall": false, - "id": "teknium/OpenHermes-2-Mistral-7B", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Continuation of OpenHermes 2 Mistral model trained on additional code datasets", - "displayName": "OpenHermes-2.5-Mistral (7B)", - "enabled": false, - "functionCall": false, - "id": "teknium/OpenHermes-2p5-Mistral-7B", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "This model can be used to moderate other chatbot models. Built using GPT-JT model fine-tuned on Ontocord.ai's OIG-moderation dataset v0.1.", - "displayName": "GPT-JT-Moderation (6B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/GPT-JT-Moderation-6B", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations.", - "displayName": "LLaMA-2-32K (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/LLaMA-2-7B-32K", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations, instruction tuned by Together", - "displayName": "LLaMA-2-7B-32K-Instruct (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/Llama-2-7B-32K-Instruct", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).", - "displayName": "RedPajama-INCITE (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/RedPajama-INCITE-7B-Base", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-7B-v1 base model.", - "displayName": "RedPajama-INCITE Chat (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/RedPajama-INCITE-7B-Chat", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-7B-v1 base model.", - "displayName": "RedPajama-INCITE Instruct (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/RedPajama-INCITE-7B-Instruct", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).", - "displayName": "RedPajama-INCITE (3B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/RedPajama-INCITE-Base-3B-v1", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.", - "displayName": "RedPajama-INCITE Chat (3B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-3B-v1 base model.", - "displayName": "RedPajama-INCITE Instruct (3B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers", - "displayName": "StripedHyena Hessian (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/StripedHyena-Hessian-7B", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": 32768, - "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers", - "displayName": "StripedHyena Nous (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/StripedHyena-Nous-7B", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations. ", - "displayName": "Alpaca (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/alpaca-7b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.", - "displayName": "Evo-1 Base (131K)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/evo-1-131k-base", - "maxOutput": 131073, - "reasoning": false, - "tokens": 131073, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.", - "displayName": "Evo-1 Base (8K)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/evo-1-8k-base", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "M2-BERT from the Monarch Mixer paper fine-tuned for retrieval", - "displayName": "M2-BERT-Retrieval-2K", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/m2-bert-80M-2k-retrieval", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.", - "displayName": "M2-BERT-Retrieval-32k", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/m2-bert-80M-32k-retrieval", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.", - "displayName": "M2-BERT-Retrieval-8k", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/m2-bert-80M-8k-retrieval", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": 4096, - "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling", - "displayName": "Upstage SOLAR Instruct v1 (11B)", - "enabled": false, - "functionCall": false, - "id": "upstage/SOLAR-10.7B-Instruct-v1.0", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Dreambooth model trained on a diverse set of analog photographs to provide an analog film effect. ", - "displayName": "Analog Diffusion", - "enabled": false, - "functionCall": false, - "id": "wavymulder/Analog-Diffusion", - "maxOutput": undefined, - "reasoning": false, - "tokens": undefined, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The Yi series models are large language models trained from scratch by developers at 01.AI", - "displayName": "01-ai Yi Chat (34B)", - "enabled": false, - "functionCall": false, - "id": "zero-one-ai/Yi-34B-Chat", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The Yi series models are large language models trained from scratch by developers at 01.AI", - "displayName": "01-ai Yi Base (34B)", - "enabled": false, - "functionCall": false, - "id": "zero-one-ai/Yi-34B", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The Yi series models are large language models trained from scratch by developers at 01.AI", - "displayName": "01-ai Yi Base (6B)", - "enabled": false, - "functionCall": false, - "id": "zero-one-ai/Yi-6B", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", - "displayName": "Llama3 8B Chat HF INT4", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/Llama-3-8b-chat-hf-int4", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", - "displayName": "Togethercomputer Llama3 8B Instruct Int8", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/Llama-3-8b-chat-hf-int8", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.", - "displayName": "Pythia (1B)", - "enabled": false, - "functionCall": false, - "id": "EleutherAI/pythia-1b-v0", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "replit-code-v1-3b is a 2.7B Causal Language Model focused on Code Completion. The model has been trained on a subset of the Stack Dedup v1.2 dataset.", - "displayName": "Replit-Code-v1 (3B)", - "enabled": false, - "functionCall": false, - "id": "replit/replit-code-v1-3b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chat model based on EleutherAI’s Pythia-7B model, and is fine-tuned with data focusing on dialog-style interactions.", - "displayName": "Pythia-Chat-Base (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/Pythia-Chat-Base-7B-v0.16", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Decoder-style transformer pretrained from scratch on 1T tokens of English text and code.", - "displayName": "MPT (7B)", - "enabled": false, - "functionCall": false, - "id": "mosaicml/mpt-7b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.", - "displayName": "MPT-Chat (30B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/mpt-30b-chat", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "T5 fine-tuned on more than 1000 additional tasks covering also more languages, making it better than T5 at majority of tasks. ", - "displayName": "Flan T5 XL (3B)", - "enabled": false, - "functionCall": false, - "id": "google/flan-t5-xl", - "maxOutput": 512, - "reasoning": false, - "tokens": 512, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Foundation model designed specifically for SQL generation tasks. Pre-trained for 3 epochs and fine-tuned for 10 epochs.", - "displayName": "NSQL (6B)", - "enabled": false, - "functionCall": false, - "id": "NumbersStation/nsql-6B", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.", - "displayName": "Koala (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/Koala-7B", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.", - "displayName": "Pythia (6.9B)", - "enabled": false, - "functionCall": false, - "id": "EleutherAI/pythia-6.9b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An instruction-following LLM based on pythia-12b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.", - "displayName": "Dolly v2 (12B)", - "enabled": false, - "functionCall": false, - "id": "databricks/dolly-v2-12b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An instruction-following LLM based on pythia-3b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.", - "displayName": "Dolly v2 (3B)", - "enabled": false, - "functionCall": false, - "id": "databricks/dolly-v2-3b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Autoregressive language model trained on the Pile. Its architecture intentionally resembles that of GPT-3, and is almost identical to that of GPT-J 6B.", - "displayName": "GPT-NeoX (20B)", - "enabled": false, - "functionCall": false, - "id": "EleutherAI/gpt-neox-20b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.", - "displayName": "Pythia (2.8B)", - "enabled": false, - "functionCall": false, - "id": "EleutherAI/pythia-2.8b-v0", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "LLaMA 13B fine-tuned on over 300,000 instructions. Designed for long responses, low hallucination rate, and absence of censorship mechanisms.", - "displayName": "Nous Hermes (13B)", - "enabled": false, - "functionCall": false, - "id": "NousResearch/Nous-Hermes-13b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.", - "displayName": "Guanaco (65B) ", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/guanaco-65b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/llama-2-7b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chatbot trained by fine-tuning Flan-t5-xl on user-shared conversations collected from ShareGPT.", - "displayName": "Vicuna-FastChat-T5 (3B)", - "enabled": false, - "functionCall": false, - "id": "lmsys/fastchat-t5-3b-v1.0", - "maxOutput": 512, - "reasoning": false, - "tokens": 512, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.", - "displayName": "LLaMA (7B)", - "enabled": false, - "functionCall": false, - "id": "huggyllama/llama-7b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ", - "displayName": "Open-Assistant StableLM SFT-7 (7B)", - "enabled": false, - "functionCall": false, - "id": "OpenAssistant/stablelm-7b-sft-v7-epoch-3", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": true, - }, - { - "contextWindowTokens": undefined, - "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.", - "displayName": "Pythia (12B)", - "enabled": false, - "functionCall": false, - "id": "EleutherAI/pythia-12b-v0", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.", - "displayName": "MPT-Chat (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/mpt-7b-chat", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Transformer model trained using Ben Wang's Mesh Transformer JAX. ", - "displayName": "GPT-J (6B)", - "enabled": false, - "functionCall": false, - "id": "EleutherAI/gpt-j-6b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ", - "displayName": "Open-Assistant Pythia SFT-4 (12B)", - "enabled": false, - "functionCall": false, - "id": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": true, - }, - { - "contextWindowTokens": undefined, - "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.", - "displayName": "Vicuna v1.3 (7B)", - "enabled": false, - "functionCall": false, - "id": "lmsys/vicuna-7b-v1.3", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "This model is fine-tuned from CodeLlama-34B-Python and achieves 69.5% pass@1 on HumanEval.", - "displayName": "Phind Code LLaMA Python v1 (34B)", - "enabled": false, - "functionCall": false, - "id": "Phind/Phind-CodeLlama-34B-Python-v1", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "NSQL is a family of autoregressive open-source large foundation models (FMs) designed specifically for SQL generation tasks", - "displayName": "NSQL LLaMA-2 (7B)", - "enabled": false, - "functionCall": false, - "id": "NumbersStation/nsql-llama-2-7B", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Nous-Hermes-Llama2-70b is a state-of-the-art language model fine-tuned on over 300,000 instructions.", - "displayName": "Nous Hermes LLaMA-2 (70B)", - "enabled": false, - "functionCall": false, - "id": "NousResearch/Nous-Hermes-Llama2-70b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities.", - "displayName": "WizardLM v1.0 (70B)", - "enabled": false, - "functionCall": false, - "id": "WizardLM/WizardLM-70B-V1.0", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.", - "displayName": "LLaMA (65B)", - "enabled": false, - "functionCall": false, - "id": "huggyllama/llama-65b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.", - "displayName": "Vicuna v1.5 16K (13B)", - "enabled": false, - "functionCall": false, - "id": "lmsys/vicuna-13b-v1.5-16k", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chat model fine-tuned from EleutherAI’s GPT-NeoX with over 40 million instructions on carbon reduced compute.", - "displayName": "GPT-NeoXT-Chat-Base (20B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "A fine-tuned version of Mistral-7B to act as a helpful assistant.", - "displayName": "Zephyr-7B-ß", - "enabled": false, - "functionCall": false, - "id": "HuggingFaceH4/zephyr-7b-beta", - "maxOutput": 32768, - "reasoning": false, - "tokens": 32768, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Python (13B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/CodeLlama-13b-Python", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 (13B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/llama-2-13b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Instruct (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/CodeLlama-7b-Instruct", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.", - "displayName": "Guanaco (13B) ", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/guanaco-13b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Python (34B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/CodeLlama-34b-Python", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Designed for short-form instruction following, finetuned on Dolly and Anthropic HH-RLHF and other datasets", - "displayName": "MPT-Instruct (7B)", - "enabled": false, - "functionCall": false, - "id": "mosaicml/mpt-7b-instruct", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 Chat (70B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/llama-2-70b-chat", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Instruct (34B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/CodeLlama-34b-Instruct", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama (34B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/CodeLlama-34b", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An autoregressive language models for program synthesis.", - "displayName": "CodeGen2 (16B)", - "enabled": false, - "functionCall": false, - "id": "Salesforce/codegen2-16B", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An autoregressive language models for program synthesis.", - "displayName": "CodeGen2 (7B)", - "enabled": false, - "functionCall": false, - "id": "Salesforce/codegen2-7B", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Flan T5 XXL (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).", - "displayName": "Flan T5 XXL (11B)", - "enabled": false, - "functionCall": false, - "id": "google/flan-t5-xxl", - "maxOutput": 512, - "reasoning": false, - "tokens": 512, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 (70B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/llama-2-70b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama (7B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-7b-hf", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama (13B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-13b-hf", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Instruct (13B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/CodeLlama-13b-Instruct", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 Chat (13B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/llama-2-13b-chat", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.", - "displayName": "Vicuna v1.3 (13B)", - "enabled": false, - "functionCall": false, - "id": "lmsys/vicuna-13b-v1.3", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.", - "displayName": "LLaMA (13B)", - "enabled": false, - "functionCall": false, - "id": "huggyllama/llama-13b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Fine-tuned from StarCoder to act as a helpful coding assistant. As an alpha release is only intended for educational or research purpopses.", - "displayName": "StarCoderChat Alpha (16B)", - "enabled": false, - "functionCall": false, - "id": "HuggingFaceH4/starchat-alpha", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.", - "displayName": "LLaMA (30B)", - "enabled": false, - "functionCall": false, - "id": "huggyllama/llama-30b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.", - "displayName": "StableLM-Base-Alpha (3B)", - "enabled": false, - "functionCall": false, - "id": "stabilityai/stablelm-base-alpha-3b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.", - "displayName": "StableLM-Base-Alpha (7B)", - "enabled": false, - "functionCall": false, - "id": "stabilityai/stablelm-base-alpha-7b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama Python (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/CodeLlama-7b-Python", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Defog's SQLCoder is a state-of-the-art LLM for converting natural language questions to SQL queries, fine-tuned from Bigcode's Starcoder 15B model.", - "displayName": "Sqlcoder (15B)", - "enabled": false, - "functionCall": false, - "id": "defog/sqlcoder", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Trained on 80+ coding languages, uses Multi Query Attention, an 8K context window, and was trained using the Fill-in-the-Middle objective on 1T tokens.", - "displayName": "StarCoder (16B)", - "enabled": false, - "functionCall": false, - "id": "bigcode/starcoder", - "maxOutput": 8192, - "reasoning": false, - "tokens": 8192, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "An instruction-following LLM based on pythia-7b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.", - "displayName": "Dolly v2 (7B)", - "enabled": false, - "functionCall": false, - "id": "databricks/dolly-v2-7b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.", - "displayName": "Guanaco (33B) ", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/guanaco-33b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.", - "displayName": "Koala (13B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/Koala-13B", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Fork of GPT-J instruction tuned to excel at few-shot prompts (blog post).", - "displayName": "GPT-JT (6B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/GPT-JT-6B-v1", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", - "displayName": "LLaMA-2 Chat (7B)", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/llama-2-7b-chat", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling", - "displayName": "Upstage SOLAR Instruct v1 (11B)-Int4", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks. ", - "displayName": "Guanaco (7B) ", - "enabled": false, - "functionCall": false, - "id": "togethercomputer/guanaco-7b", - "maxOutput": 2048, - "reasoning": false, - "tokens": 2048, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens.", - "displayName": "Llemma (7B)", - "enabled": false, - "functionCall": false, - "id": "EleutherAI/llemma_7b", - "maxOutput": 4096, - "reasoning": false, - "tokens": 4096, - "vision": false, - }, - { - "contextWindowTokens": undefined, - "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", - "displayName": "Code Llama (34B)", - "enabled": false, - "functionCall": false, - "id": "codellama/CodeLlama-34b-hf", - "maxOutput": 16384, - "reasoning": false, - "tokens": 16384, - "vision": false, - }, -] -`; diff --git a/src/middleware.ts b/src/middleware.ts index da87efed30..b3dec2eb51 100644 --- a/src/middleware.ts +++ b/src/middleware.ts @@ -1,7 +1,6 @@ import { clerkMiddleware, createRouteMatcher } from '@clerk/nextjs/server'; import { NextRequest, NextResponse } from 'next/server'; import { UAParser } from 'ua-parser-js'; -import urlJoin from 'url-join'; import { authEnv } from '@/config/auth'; import { LOBE_THEME_APPEARANCE } from '@/const/theme'; @@ -84,23 +83,21 @@ const defaultMiddleware = (request: NextRequest) => { if (['/api', '/trpc', '/webapi'].some((path) => url.pathname.startsWith(path))) return NextResponse.next(); - // 处理 URL 重写 - // 构建新路径: /${route}${originalPathname} - // 只对 GET 请求进行 URL 重写,确保其他类型的请求(包括 OPTIONS)不受影响 - const nextPathname = `/${urlJoin(route, url.pathname)}`; + // refs: https://github.com/lobehub/lobe-chat/pull/5866 + // new handle segment rewrite: /${route}${originalPathname} + // / -> /zh-CN__0__dark + // /discover -> /zh-CN__0__dark/discover + const nextPathname = `/${route}` + (url.pathname === '/' ? '' : url.pathname); console.log(`[rewrite] ${url.pathname} -> ${nextPathname}`); + url.pathname = nextPathname; - return NextResponse.rewrite(url); + return NextResponse.rewrite(url, { status: 200 }); }; -const publicRoute = ['/', '/discover']; - // Initialize an Edge compatible NextAuth middleware const nextAuthMiddleware = NextAuthEdge.auth((req) => { const response = defaultMiddleware(req); - // skip the '/' route - if (publicRoute.some((url) => req.nextUrl.pathname.startsWith(url))) return response; // Just check if session exists const session = req.auth; diff --git a/src/services/chat.ts b/src/services/chat.ts index aab22366a1..d06f1ef9a1 100644 --- a/src/services/chat.ts +++ b/src/services/chat.ts @@ -3,6 +3,7 @@ import { produce } from 'immer'; import { merge } from 'lodash-es'; import { DEFAULT_MODEL_PROVIDER_LIST } from '@/config/modelProviders'; +import { enableAuth } from '@/const/auth'; import { INBOX_GUIDE_SYSTEMROLE } from '@/const/guide'; import { INBOX_SESSION_ID } from '@/const/session'; import { DEFAULT_AGENT_CONFIG } from '@/const/settings'; @@ -515,7 +516,7 @@ class ChatService { * if enable login and not signed in, return unauthorized error */ const userStore = useUserStore.getState(); - if (userStore.enableAuth() && !userStore.isSignedIn) { + if (enableAuth && !userStore.isSignedIn) { throw AgentRuntimeError.createError(ChatErrorType.InvalidAccessCode); } diff --git a/src/store/user/slices/auth/action.test.ts b/src/store/user/slices/auth/action.test.ts index 4bf79fdd1d..f9e72e5dce 100644 --- a/src/store/user/slices/auth/action.test.ts +++ b/src/store/user/slices/auth/action.test.ts @@ -89,7 +89,7 @@ describe('createAuthSlice', () => { }); it('should call next-auth signOut when NextAuth is enabled', async () => { - useUserStore.setState({ enabledNextAuth: true }); + enableNextAuth = true; const { result } = renderHook(() => useUserStore()); @@ -100,6 +100,7 @@ describe('createAuthSlice', () => { const { signOut } = await import('next-auth/react'); expect(signOut).toHaveBeenCalled(); + enableNextAuth = false; }); it('should not call next-auth signOut when NextAuth is disabled', async () => { @@ -143,7 +144,7 @@ describe('createAuthSlice', () => { }); it('should call next-auth signIn when NextAuth is enabled', async () => { - useUserStore.setState({ enabledNextAuth: true }); + enableNextAuth = true; const { result } = renderHook(() => useUserStore()); @@ -154,6 +155,7 @@ describe('createAuthSlice', () => { const { signIn } = await import('next-auth/react'); expect(signIn).toHaveBeenCalled(); + enableNextAuth = false; }); it('should not call next-auth signIn when NextAuth is disabled', async () => { const { result } = renderHook(() => useUserStore()); diff --git a/src/store/user/slices/auth/action.ts b/src/store/user/slices/auth/action.ts index 9166140a8c..ac86ee9fec 100644 --- a/src/store/user/slices/auth/action.ts +++ b/src/store/user/slices/auth/action.ts @@ -1,6 +1,6 @@ import { StateCreator } from 'zustand/vanilla'; -import { enableClerk } from '@/const/auth'; +import { enableAuth, enableClerk, enableNextAuth } from '@/const/auth'; import { UserStore } from '../../store'; @@ -23,7 +23,7 @@ export const createAuthSlice: StateCreator< UserAuthAction > = (set, get) => ({ enableAuth: () => { - return enableClerk || get()?.enabledNextAuth || false; + return enableAuth; }, logout: async () => { if (enableClerk) { @@ -32,7 +32,6 @@ export const createAuthSlice: StateCreator< return; } - const enableNextAuth = get().enabledNextAuth; if (enableNextAuth) { const { signOut } = await import('next-auth/react'); signOut(); @@ -50,7 +49,6 @@ export const createAuthSlice: StateCreator< return; } - const enableNextAuth = get().enabledNextAuth; if (enableNextAuth) { const { signIn } = await import('next-auth/react'); // Check if only one provider is available diff --git a/src/store/user/slices/auth/initialState.ts b/src/store/user/slices/auth/initialState.ts index 8fb0b4cd4a..2925544769 100644 --- a/src/store/user/slices/auth/initialState.ts +++ b/src/store/user/slices/auth/initialState.ts @@ -16,7 +16,6 @@ export interface UserAuthState { clerkSignIn?: (props?: SignInProps) => void; clerkSignOut?: SignOut; clerkUser?: UserResource; - enabledNextAuth?: boolean; isLoaded?: boolean; isSignedIn?: boolean; diff --git a/src/store/user/slices/auth/selectors.ts b/src/store/user/slices/auth/selectors.ts index 2beab5fd7e..655853ca4d 100644 --- a/src/store/user/slices/auth/selectors.ts +++ b/src/store/user/slices/auth/selectors.ts @@ -1,6 +1,6 @@ import { t } from 'i18next'; -import { enableClerk } from '@/const/auth'; +import { enableAuth, enableClerk, enableNextAuth } from '@/const/auth'; import { BRANDING_NAME } from '@/const/branding'; import { UserStore } from '@/store/user'; import { LobeUser } from '@/types/user'; @@ -8,7 +8,7 @@ import { LobeUser } from '@/types/user'; const DEFAULT_USERNAME = BRANDING_NAME; const nickName = (s: UserStore) => { - if (!s.enableAuth()) return t('userPanel.defaultNickname', { ns: 'common' }); + if (!enableAuth) return t('userPanel.defaultNickname', { ns: 'common' }); if (s.isSignedIn) return s.user?.fullName || s.user?.username; @@ -16,7 +16,7 @@ const nickName = (s: UserStore) => { }; const username = (s: UserStore) => { - if (!s.enableAuth()) return DEFAULT_USERNAME; + if (!enableAuth) return DEFAULT_USERNAME; if (s.isSignedIn) return s.user?.username; @@ -36,17 +36,15 @@ export const userProfileSelectors = { */ const isLogin = (s: UserStore) => { // 如果没有开启鉴权,说明不需要登录,默认是登录态 - if (!s.enableAuth()) return true; + if (!enableAuth) return true; return s.isSignedIn; }; export const authSelectors = { - enabledAuth: (s: UserStore): boolean => s.enableAuth(), - enabledNextAuth: (s: UserStore): boolean => !!s.enabledNextAuth, isLoaded: (s: UserStore) => s.isLoaded, isLogin, isLoginWithAuth: (s: UserStore) => s.isSignedIn, isLoginWithClerk: (s: UserStore): boolean => (s.isSignedIn && enableClerk) || false, - isLoginWithNextAuth: (s: UserStore): boolean => (s.isSignedIn && !!s.enabledNextAuth) || false, + isLoginWithNextAuth: (s: UserStore): boolean => (s.isSignedIn && !!enableNextAuth) || false, }; diff --git a/src/store/user/slices/common/action.ts b/src/store/user/slices/common/action.ts index ad8487ed37..f9fda9f6b4 100644 --- a/src/store/user/slices/common/action.ts +++ b/src/store/user/slices/common/action.ts @@ -99,7 +99,6 @@ export const createCommonSlice: StateCreator< set( { defaultSettings, - enabledNextAuth: serverConfig.enabledOAuthSSO, isOnboard: data.isOnboard, isShowPWAGuide: data.canEnablePWAGuide, isUserCanEnableTrace: data.canEnableTrace,