mirror of
https://github.com/lobehub/lobe-chat.git
synced 2026-06-17 04:55:51 +00:00
💄 style: support web_search_preview & fix some bug form OpenAI Response API (#8131)
* 💄 style: support `web_search_preview` for OpenAI Response API * ♻️ refactor: refactor annotation handling * 🐛 fix: fix `reasoning_effort` error in response api * 🐛 fix: fix `o` series calling in Response API * 🐛 fix: fix ci test * 💄 style: update `gpt-4o` ability tags * 💄 style: update `gpt-4o-mini` ability tags * ♻️ refactor: add `responsesAPIModels` to store responses api only * 🐛 fix: fix `computer-use-preview` calling * 🐛 fix: fix ci error * 🐛 fix: fix citations * 💄 style: add `o3-pro` rules, disable Stream & use Responses API * 🔨 chore: comment `o3-pro` from `disableStreamModels`
This commit is contained in:
@@ -59,6 +59,7 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
search: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 1_047_576,
|
||||
@@ -73,11 +74,15 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
output: 8,
|
||||
},
|
||||
releasedAt: '2025-04-14',
|
||||
settings: {
|
||||
searchImpl: 'params',
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
search: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 1_047_576,
|
||||
@@ -93,6 +98,9 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
output: 1.6,
|
||||
},
|
||||
releasedAt: '2025-04-14',
|
||||
settings: {
|
||||
searchImpl: 'params',
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
@@ -135,6 +143,28 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 200_000,
|
||||
description:
|
||||
'o1 系列模型经过强化学习训练,能够在回答前进行思考,并执行复杂的推理任务。o1-pro 模型使用了更多计算资源,以进行更深入的思考,从而持续提供更优质的回答。',
|
||||
displayName: 'o1-pro',
|
||||
id: 'o1-pro',
|
||||
maxOutput: 100_000,
|
||||
pricing: {
|
||||
input: 150,
|
||||
output: 600,
|
||||
},
|
||||
releasedAt: '2025-03-19',
|
||||
settings: {
|
||||
extendParams: ['reasoningEffort'],
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
reasoning: true,
|
||||
@@ -158,6 +188,7 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
vision: true,
|
||||
},
|
||||
@@ -220,6 +251,7 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
search: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 128_000,
|
||||
@@ -234,6 +266,9 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
output: 0.6,
|
||||
},
|
||||
releasedAt: '2024-07-18',
|
||||
settings: {
|
||||
searchImpl: 'params',
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
@@ -259,6 +294,29 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
//search: true,
|
||||
},
|
||||
contextWindowTokens: 128_000,
|
||||
description: 'GPT-4o mini Audio 模型,支持音频输入输出',
|
||||
displayName: 'GPT-4o mini Audio',
|
||||
id: 'gpt-4o-mini-audio-preview',
|
||||
maxOutput: 16_384,
|
||||
pricing: {
|
||||
input: 0.15,
|
||||
output: 0.6,
|
||||
},
|
||||
releasedAt: '2024-12-17',
|
||||
/*
|
||||
settings: {
|
||||
searchImpl: 'params',
|
||||
},
|
||||
*/
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
search: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 128_000,
|
||||
@@ -272,6 +330,9 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
output: 10,
|
||||
},
|
||||
releasedAt: '2024-05-13',
|
||||
settings: {
|
||||
searchImpl: 'params',
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
@@ -297,6 +358,7 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
search: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 128_000,
|
||||
@@ -310,11 +372,15 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
output: 10,
|
||||
},
|
||||
releasedAt: '2024-11-20',
|
||||
settings: {
|
||||
searchImpl: 'params',
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
search: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 128_000,
|
||||
@@ -327,9 +393,16 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
output: 15,
|
||||
},
|
||||
releasedAt: '2024-05-13',
|
||||
settings: {
|
||||
searchImpl: 'params',
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
//search: true,
|
||||
},
|
||||
contextWindowTokens: 128_000,
|
||||
description: 'GPT-4o Audio 模型,支持音频输入输出',
|
||||
displayName: 'GPT-4o Audio',
|
||||
@@ -340,6 +413,11 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
output: 10,
|
||||
},
|
||||
releasedAt: '2024-10-01',
|
||||
/*
|
||||
settings: {
|
||||
searchImpl: 'params',
|
||||
},
|
||||
*/
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
@@ -545,6 +623,48 @@ export const openaiChatModels: AIChatModelCard[] = [
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 200_000,
|
||||
description: 'codex-mini-latest 是 o4-mini 的微调版本,专门用于 Codex CLI。对于直接通过 API 使用,我们推荐从 gpt-4.1 开始。',
|
||||
displayName: 'Codex mini',
|
||||
id: 'codex-mini-latest',
|
||||
maxOutput: 100_000,
|
||||
pricing: {
|
||||
input: 1.5,
|
||||
output: 6,
|
||||
},
|
||||
releasedAt: '2025-06-01',
|
||||
settings: {
|
||||
extendParams: ['reasoningEffort'],
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 8192,
|
||||
description: 'computer-use-preview 模型是专为“计算机使用工具”设计的专用模型,经过训练以理解并执行计算机相关任务。',
|
||||
displayName: 'Computer Use Preview',
|
||||
id: 'computer-use-preview',
|
||||
maxOutput: 1024,
|
||||
pricing: {
|
||||
input: 3,
|
||||
output: 12,
|
||||
},
|
||||
releasedAt: '2025-03-11',
|
||||
settings: {
|
||||
extendParams: ['reasoningEffort'],
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
];
|
||||
|
||||
export const openaiEmbeddingModels: AIEmbeddingModelCard[] = [
|
||||
|
||||
+26
-1
@@ -6,7 +6,32 @@ export const systemToUserModels = new Set([
|
||||
]);
|
||||
|
||||
// TODO: 临时写法,后续要重构成 model card 展示配置
|
||||
export const disableStreamModels = new Set(['o1', 'o1-2024-12-17']);
|
||||
export const disableStreamModels = new Set([
|
||||
'o1',
|
||||
'o1-2024-12-17',
|
||||
'o1-pro',
|
||||
'o1-pro-2025-03-19',
|
||||
/*
|
||||
官网显示不支持,但是实际试下来支持 Streaming,暂时注释掉
|
||||
'o3-pro',
|
||||
'o3-pro-2025-06-10',
|
||||
*/
|
||||
'computer-use-preview',
|
||||
'computer-use-preview-2025-03-11',
|
||||
]);
|
||||
|
||||
/**
|
||||
* models use Responses API only
|
||||
*/
|
||||
export const responsesAPIModels = new Set([
|
||||
'o1-pro',
|
||||
'o1-pro-2025-03-19',
|
||||
'o3-pro',
|
||||
'o3-pro-2025-06-10',
|
||||
'codex-mini-latest',
|
||||
'computer-use-preview',
|
||||
'computer-use-preview-2025-03-11',
|
||||
]);
|
||||
|
||||
/**
|
||||
* models support context caching
|
||||
|
||||
@@ -2,21 +2,24 @@ import { ChatStreamPayload, ModelProvider } from '../types';
|
||||
import { processMultiProviderModelList } from '../utils/modelParse';
|
||||
import { createOpenAICompatibleRuntime } from '../utils/openaiCompatibleFactory';
|
||||
import { pruneReasoningPayload } from '../utils/openaiHelpers';
|
||||
import { responsesAPIModels } from '@/const/models';
|
||||
|
||||
export interface OpenAIModelCard {
|
||||
id: string;
|
||||
}
|
||||
|
||||
const prunePrefixes = ['o1', 'o3', 'o4'];
|
||||
const prunePrefixes = ['o1', 'o3', 'o4', 'codex', 'computer-use'];
|
||||
|
||||
const oaiSearchContextSize = process.env.OPENAI_SEARCH_CONTEXT_SIZE; // low, medium, high
|
||||
|
||||
export const LobeOpenAI = createOpenAICompatibleRuntime({
|
||||
baseURL: 'https://api.openai.com/v1',
|
||||
chatCompletion: {
|
||||
handlePayload: (payload) => {
|
||||
const { model } = payload;
|
||||
const { enabledSearch, model, ...rest } = payload;
|
||||
|
||||
if (model === 'o1-pro') {
|
||||
return { ...payload, apiMode: 'responses' } as ChatStreamPayload;
|
||||
if (responsesAPIModels.has(model) || enabledSearch) {
|
||||
return { ...rest, apiMode: 'responses', enabledSearch, model } as ChatStreamPayload;
|
||||
}
|
||||
|
||||
if (prunePrefixes.some((prefix) => model.startsWith(prefix))) {
|
||||
@@ -24,11 +27,10 @@ export const LobeOpenAI = createOpenAICompatibleRuntime({
|
||||
}
|
||||
|
||||
if (model.includes('-search-')) {
|
||||
const oaiSearchContextSize = process.env.OPENAI_SEARCH_CONTEXT_SIZE; // low, medium, high
|
||||
|
||||
return {
|
||||
...payload,
|
||||
...rest,
|
||||
frequency_penalty: undefined,
|
||||
model,
|
||||
presence_penalty: undefined,
|
||||
stream: payload.stream ?? true,
|
||||
temperature: undefined,
|
||||
@@ -41,7 +43,7 @@ export const LobeOpenAI = createOpenAICompatibleRuntime({
|
||||
} as any;
|
||||
}
|
||||
|
||||
return { ...payload, stream: payload.stream ?? true };
|
||||
return { ...rest, model, stream: payload.stream ?? true };
|
||||
},
|
||||
},
|
||||
debug: {
|
||||
@@ -57,17 +59,37 @@ export const LobeOpenAI = createOpenAICompatibleRuntime({
|
||||
},
|
||||
provider: ModelProvider.OpenAI,
|
||||
responses: {
|
||||
handlePayload: (payload: ChatStreamPayload) => {
|
||||
const { model } = payload;
|
||||
handlePayload: (payload) => {
|
||||
const { enabledSearch, model, tools, ...rest } = payload;
|
||||
|
||||
const openaiTools = enabledSearch
|
||||
? [
|
||||
...(tools || []),
|
||||
{
|
||||
type: 'web_search_preview',
|
||||
...(oaiSearchContextSize && {
|
||||
search_context_size: oaiSearchContextSize,
|
||||
}),
|
||||
},
|
||||
]
|
||||
: tools;
|
||||
|
||||
if (prunePrefixes.some((prefix) => model.startsWith(prefix))) {
|
||||
if (!payload.reasoning) {
|
||||
payload.reasoning = { summary: 'auto' };
|
||||
} else {
|
||||
payload.reasoning.summary = 'auto';
|
||||
}
|
||||
|
||||
// computer-use series must set truncation as auto
|
||||
if (model.startsWith('computer-use')) {
|
||||
payload.truncation = 'auto';
|
||||
}
|
||||
|
||||
return pruneReasoningPayload(payload) as any;
|
||||
}
|
||||
|
||||
return { ...payload, stream: payload.stream ?? true };
|
||||
return { ...rest, model, stream: payload.stream ?? true, tools: openaiTools } as any;
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -107,6 +107,7 @@ export interface ChatStreamPayload {
|
||||
effort?: string;
|
||||
summary?: string;
|
||||
};
|
||||
reasoning_effort?: 'low' | 'medium' | 'high';
|
||||
responseMode?: 'stream' | 'json';
|
||||
/**
|
||||
* @title 是否开启流式请求
|
||||
@@ -132,6 +133,7 @@ export interface ChatStreamPayload {
|
||||
* @default 1
|
||||
*/
|
||||
top_p?: number;
|
||||
truncation?: 'auto' | 'disabled';
|
||||
}
|
||||
|
||||
export interface ChatMethodOptions {
|
||||
|
||||
@@ -209,14 +209,9 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
|
||||
}
|
||||
|
||||
async chat(
|
||||
{ responseMode, apiMode, ...payload }: ChatStreamPayload,
|
||||
{ responseMode, ...payload }: ChatStreamPayload,
|
||||
options?: ChatMethodOptions,
|
||||
) {
|
||||
// new openai Response API
|
||||
if (apiMode === 'responses') {
|
||||
return this.handleResponseAPIMode(payload, options);
|
||||
}
|
||||
|
||||
try {
|
||||
const inputStartAt = Date.now();
|
||||
const postPayload = chatCompletion?.handlePayload
|
||||
@@ -226,6 +221,11 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
|
||||
stream: payload.stream ?? true,
|
||||
} as OpenAI.ChatCompletionCreateParamsStreaming);
|
||||
|
||||
// new openai Response API
|
||||
if ((postPayload as any).apiMode === 'responses') {
|
||||
return this.handleResponseAPIMode(payload, options);
|
||||
}
|
||||
|
||||
const messages = await convertOpenAIMessages(postPayload.messages);
|
||||
|
||||
let response: Stream<OpenAI.Chat.Completions.ChatCompletionChunk>;
|
||||
@@ -478,11 +478,12 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
|
||||
): Promise<Response> {
|
||||
const inputStartAt = Date.now();
|
||||
|
||||
const { messages, ...res } = responses?.handlePayload
|
||||
const { messages, reasoning_effort, tools, ...res } = responses?.handlePayload
|
||||
? (responses?.handlePayload(payload, this._options) as ChatStreamPayload)
|
||||
: payload;
|
||||
|
||||
// remove penalty params
|
||||
delete res.apiMode;
|
||||
delete res.frequency_penalty;
|
||||
delete res.presence_penalty;
|
||||
|
||||
@@ -490,9 +491,10 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
|
||||
|
||||
const postPayload = {
|
||||
...res,
|
||||
...(reasoning_effort ? { reasoning: { effort: reasoning_effort } } : {}),
|
||||
input,
|
||||
store: false,
|
||||
tools: payload.tools?.map((tool) => this.convertChatCompletionToolToResponseTool(tool)),
|
||||
tools: tools?.map((tool) => this.convertChatCompletionToolToResponseTool(tool)),
|
||||
} as OpenAI.Responses.ResponseCreateParamsStreaming;
|
||||
|
||||
if (debug?.responses?.()) {
|
||||
|
||||
+6
-6
@@ -86,11 +86,11 @@ exports[`OpenAIResponsesStream > Reasoning > summary 1`] = `
|
||||
"data: " analyzing"
|
||||
|
||||
",
|
||||
"id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
|
||||
"id: rs_684313b9774481908ee856625f82fb8c0b502bf083132d0d
|
||||
",
|
||||
"event: data
|
||||
"event: text
|
||||
",
|
||||
"data: {"type":"response.output_item.done","output_index":0,"item":{"id":"rs_684313b9774481908ee856625f82fb8c0b502bf083132d0d","type":"reasoning","summary":[{"type":"summary_text","text":"**Answering a numeric comparison**\\n\\nThe user is asking in Chinese which number is larger: 9.1 or 9.92. This is straightforward since 9.92 is clearly larger, as it's greater than 9.1. We can respond with \\"9.92大于9.1\\" without needing to search for more information. It's simple comparison, but I could also add a little explanation, noting that 9.92 is indeed 0.82 more than 9.1. However, keeping it simple with \\"9.92 > 9.1\\" is perfectly fine!"}]}}
|
||||
"data: null
|
||||
|
||||
",
|
||||
"id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
|
||||
@@ -128,11 +128,11 @@ exports[`OpenAIResponsesStream > Reasoning > summary 1`] = `
|
||||
"data: {"type":"response.content_part.done","item_id":"msg_684313bee2c88190b0f4b09621ad7dc60b502bf083132d0d","output_index":1,"content_index":0,"part":{"type":"output_text","annotations":[],"text":"9.92 比 9.1 大。"}}
|
||||
|
||||
",
|
||||
"id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
|
||||
"id: msg_684313bee2c88190b0f4b09621ad7dc60b502bf083132d0d
|
||||
",
|
||||
"event: data
|
||||
"event: text
|
||||
",
|
||||
"data: {"type":"response.output_item.done","output_index":1,"item":{"id":"msg_684313bee2c88190b0f4b09621ad7dc60b502bf083132d0d","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"9.92 比 9. 大。"}],"role":"assistant"}}
|
||||
"data: null
|
||||
|
||||
",
|
||||
"id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import OpenAI from 'openai';
|
||||
import type { Stream } from 'openai/streaming';
|
||||
|
||||
import { ChatMessageError } from '@/types/message';
|
||||
import { ChatMessageError, CitationItem } from '@/types/message';
|
||||
|
||||
import { AgentRuntimeErrorType } from '../../../error';
|
||||
import { convertResponseUsage } from '../../usageConverter';
|
||||
@@ -20,7 +20,17 @@ import {
|
||||
import { OpenAIStreamOptions } from './openai';
|
||||
|
||||
const transformOpenAIStream = (
|
||||
chunk: OpenAI.Responses.ResponseStreamEvent,
|
||||
chunk: OpenAI.Responses.ResponseStreamEvent | {
|
||||
annotation: {
|
||||
end_index: number;
|
||||
start_index: number;
|
||||
title: string;
|
||||
type: 'url_citation';
|
||||
url: string;
|
||||
};
|
||||
item_id: string;
|
||||
type: 'response.output_text.annotation.added';
|
||||
},
|
||||
streamContext: StreamContext,
|
||||
): StreamProtocolChunk | StreamProtocolChunk[] => {
|
||||
// handle the first chunk error
|
||||
@@ -42,6 +52,7 @@ const transformOpenAIStream = (
|
||||
switch (chunk.type) {
|
||||
case 'response.created': {
|
||||
streamContext.id = chunk.response.id;
|
||||
streamContext.returnedCitationArray = [];
|
||||
|
||||
return { data: chunk.response.status, id: streamContext.id, type: 'data' };
|
||||
}
|
||||
@@ -106,6 +117,31 @@ const transformOpenAIStream = (
|
||||
return { data: chunk.delta, id: chunk.item_id, type: 'reasoning' };
|
||||
}
|
||||
|
||||
case 'response.output_text.annotation.added': {
|
||||
const citations = chunk.annotation;
|
||||
|
||||
if (streamContext.returnedCitationArray) {
|
||||
streamContext.returnedCitationArray.push({
|
||||
title: citations.title,
|
||||
url: citations.url,
|
||||
} as CitationItem);
|
||||
}
|
||||
|
||||
return { data: null, id: chunk.item_id, type: 'text' };
|
||||
}
|
||||
|
||||
case 'response.output_item.done': {
|
||||
if (streamContext.returnedCitationArray?.length) {
|
||||
return {
|
||||
data: { citations: streamContext.returnedCitationArray },
|
||||
id: chunk.item.id,
|
||||
type: 'grounding',
|
||||
}
|
||||
}
|
||||
|
||||
return { data: null, id: chunk.item.id, type: 'text' };
|
||||
}
|
||||
|
||||
case 'response.completed': {
|
||||
if (chunk.response.usage) {
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user