💄 style: support web_search_preview & fix some bug form OpenAI Response API (#8131)

* 💄 style: support `web_search_preview` for OpenAI Response API

* ♻️ refactor: refactor annotation handling

* 🐛 fix: fix `reasoning_effort` error in response api

* 🐛 fix: fix `o` series calling in Response API

* 🐛 fix: fix ci test

* 💄 style: update `gpt-4o` ability tags

* 💄 style: update `gpt-4o-mini` ability tags

* ♻️ refactor: add `responsesAPIModels` to store responses api only

* 🐛 fix: fix `computer-use-preview` calling

* 🐛 fix: fix ci error

* 🐛 fix: fix citations

* 💄 style: add `o3-pro` rules, disable Stream & use Responses API

* 🔨 chore: comment `o3-pro` from `disableStreamModels`
This commit is contained in:
Zhijie He
2025-06-12 21:03:05 +08:00
committed by GitHub
parent 9d81cdca36
commit b2983f062e
7 changed files with 235 additions and 28 deletions
+120
View File
@@ -59,6 +59,7 @@ export const openaiChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
search: true,
vision: true,
},
contextWindowTokens: 1_047_576,
@@ -73,11 +74,15 @@ export const openaiChatModels: AIChatModelCard[] = [
output: 8,
},
releasedAt: '2025-04-14',
settings: {
searchImpl: 'params',
},
type: 'chat',
},
{
abilities: {
functionCall: true,
search: true,
vision: true,
},
contextWindowTokens: 1_047_576,
@@ -93,6 +98,9 @@ export const openaiChatModels: AIChatModelCard[] = [
output: 1.6,
},
releasedAt: '2025-04-14',
settings: {
searchImpl: 'params',
},
type: 'chat',
},
{
@@ -135,6 +143,28 @@ export const openaiChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
vision: true,
},
contextWindowTokens: 200_000,
description:
'o1 系列模型经过强化学习训练,能够在回答前进行思考,并执行复杂的推理任务。o1-pro 模型使用了更多计算资源,以进行更深入的思考,从而持续提供更优质的回答。',
displayName: 'o1-pro',
id: 'o1-pro',
maxOutput: 100_000,
pricing: {
input: 150,
output: 600,
},
releasedAt: '2025-03-19',
settings: {
extendParams: ['reasoningEffort'],
},
type: 'chat',
},
{
abilities: {
reasoning: true,
@@ -158,6 +188,7 @@ export const openaiChatModels: AIChatModelCard[] = [
},
{
abilities: {
functionCall: true,
reasoning: true,
vision: true,
},
@@ -220,6 +251,7 @@ export const openaiChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
search: true,
vision: true,
},
contextWindowTokens: 128_000,
@@ -234,6 +266,9 @@ export const openaiChatModels: AIChatModelCard[] = [
output: 0.6,
},
releasedAt: '2024-07-18',
settings: {
searchImpl: 'params',
},
type: 'chat',
},
{
@@ -259,6 +294,29 @@ export const openaiChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
//search: true,
},
contextWindowTokens: 128_000,
description: 'GPT-4o mini Audio 模型,支持音频输入输出',
displayName: 'GPT-4o mini Audio',
id: 'gpt-4o-mini-audio-preview',
maxOutput: 16_384,
pricing: {
input: 0.15,
output: 0.6,
},
releasedAt: '2024-12-17',
/*
settings: {
searchImpl: 'params',
},
*/
type: 'chat',
},
{
abilities: {
functionCall: true,
search: true,
vision: true,
},
contextWindowTokens: 128_000,
@@ -272,6 +330,9 @@ export const openaiChatModels: AIChatModelCard[] = [
output: 10,
},
releasedAt: '2024-05-13',
settings: {
searchImpl: 'params',
},
type: 'chat',
},
{
@@ -297,6 +358,7 @@ export const openaiChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
search: true,
vision: true,
},
contextWindowTokens: 128_000,
@@ -310,11 +372,15 @@ export const openaiChatModels: AIChatModelCard[] = [
output: 10,
},
releasedAt: '2024-11-20',
settings: {
searchImpl: 'params',
},
type: 'chat',
},
{
abilities: {
functionCall: true,
search: true,
vision: true,
},
contextWindowTokens: 128_000,
@@ -327,9 +393,16 @@ export const openaiChatModels: AIChatModelCard[] = [
output: 15,
},
releasedAt: '2024-05-13',
settings: {
searchImpl: 'params',
},
type: 'chat',
},
{
abilities: {
functionCall: true,
//search: true,
},
contextWindowTokens: 128_000,
description: 'GPT-4o Audio 模型,支持音频输入输出',
displayName: 'GPT-4o Audio',
@@ -340,6 +413,11 @@ export const openaiChatModels: AIChatModelCard[] = [
output: 10,
},
releasedAt: '2024-10-01',
/*
settings: {
searchImpl: 'params',
},
*/
type: 'chat',
},
{
@@ -545,6 +623,48 @@ export const openaiChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
vision: true,
},
contextWindowTokens: 200_000,
description: 'codex-mini-latest 是 o4-mini 的微调版本,专门用于 Codex CLI。对于直接通过 API 使用,我们推荐从 gpt-4.1 开始。',
displayName: 'Codex mini',
id: 'codex-mini-latest',
maxOutput: 100_000,
pricing: {
input: 1.5,
output: 6,
},
releasedAt: '2025-06-01',
settings: {
extendParams: ['reasoningEffort'],
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
vision: true,
},
contextWindowTokens: 8192,
description: 'computer-use-preview 模型是专为“计算机使用工具”设计的专用模型,经过训练以理解并执行计算机相关任务。',
displayName: 'Computer Use Preview',
id: 'computer-use-preview',
maxOutput: 1024,
pricing: {
input: 3,
output: 12,
},
releasedAt: '2025-03-11',
settings: {
extendParams: ['reasoningEffort'],
},
type: 'chat',
},
];
export const openaiEmbeddingModels: AIEmbeddingModelCard[] = [
+26 -1
View File
@@ -6,7 +6,32 @@ export const systemToUserModels = new Set([
]);
// TODO: 临时写法,后续要重构成 model card 展示配置
export const disableStreamModels = new Set(['o1', 'o1-2024-12-17']);
export const disableStreamModels = new Set([
'o1',
'o1-2024-12-17',
'o1-pro',
'o1-pro-2025-03-19',
/*
官网显示不支持,但是实际试下来支持 Streaming,暂时注释掉
'o3-pro',
'o3-pro-2025-06-10',
*/
'computer-use-preview',
'computer-use-preview-2025-03-11',
]);
/**
* models use Responses API only
*/
export const responsesAPIModels = new Set([
'o1-pro',
'o1-pro-2025-03-19',
'o3-pro',
'o3-pro-2025-06-10',
'codex-mini-latest',
'computer-use-preview',
'computer-use-preview-2025-03-11',
]);
/**
* models support context caching
+33 -11
View File
@@ -2,21 +2,24 @@ import { ChatStreamPayload, ModelProvider } from '../types';
import { processMultiProviderModelList } from '../utils/modelParse';
import { createOpenAICompatibleRuntime } from '../utils/openaiCompatibleFactory';
import { pruneReasoningPayload } from '../utils/openaiHelpers';
import { responsesAPIModels } from '@/const/models';
export interface OpenAIModelCard {
id: string;
}
const prunePrefixes = ['o1', 'o3', 'o4'];
const prunePrefixes = ['o1', 'o3', 'o4', 'codex', 'computer-use'];
const oaiSearchContextSize = process.env.OPENAI_SEARCH_CONTEXT_SIZE; // low, medium, high
export const LobeOpenAI = createOpenAICompatibleRuntime({
baseURL: 'https://api.openai.com/v1',
chatCompletion: {
handlePayload: (payload) => {
const { model } = payload;
const { enabledSearch, model, ...rest } = payload;
if (model === 'o1-pro') {
return { ...payload, apiMode: 'responses' } as ChatStreamPayload;
if (responsesAPIModels.has(model) || enabledSearch) {
return { ...rest, apiMode: 'responses', enabledSearch, model } as ChatStreamPayload;
}
if (prunePrefixes.some((prefix) => model.startsWith(prefix))) {
@@ -24,11 +27,10 @@ export const LobeOpenAI = createOpenAICompatibleRuntime({
}
if (model.includes('-search-')) {
const oaiSearchContextSize = process.env.OPENAI_SEARCH_CONTEXT_SIZE; // low, medium, high
return {
...payload,
...rest,
frequency_penalty: undefined,
model,
presence_penalty: undefined,
stream: payload.stream ?? true,
temperature: undefined,
@@ -41,7 +43,7 @@ export const LobeOpenAI = createOpenAICompatibleRuntime({
} as any;
}
return { ...payload, stream: payload.stream ?? true };
return { ...rest, model, stream: payload.stream ?? true };
},
},
debug: {
@@ -57,17 +59,37 @@ export const LobeOpenAI = createOpenAICompatibleRuntime({
},
provider: ModelProvider.OpenAI,
responses: {
handlePayload: (payload: ChatStreamPayload) => {
const { model } = payload;
handlePayload: (payload) => {
const { enabledSearch, model, tools, ...rest } = payload;
const openaiTools = enabledSearch
? [
...(tools || []),
{
type: 'web_search_preview',
...(oaiSearchContextSize && {
search_context_size: oaiSearchContextSize,
}),
},
]
: tools;
if (prunePrefixes.some((prefix) => model.startsWith(prefix))) {
if (!payload.reasoning) {
payload.reasoning = { summary: 'auto' };
} else {
payload.reasoning.summary = 'auto';
}
// computer-use series must set truncation as auto
if (model.startsWith('computer-use')) {
payload.truncation = 'auto';
}
return pruneReasoningPayload(payload) as any;
}
return { ...payload, stream: payload.stream ?? true };
return { ...rest, model, stream: payload.stream ?? true, tools: openaiTools } as any;
},
},
});
+2
View File
@@ -107,6 +107,7 @@ export interface ChatStreamPayload {
effort?: string;
summary?: string;
};
reasoning_effort?: 'low' | 'medium' | 'high';
responseMode?: 'stream' | 'json';
/**
* @title 是否开启流式请求
@@ -132,6 +133,7 @@ export interface ChatStreamPayload {
* @default 1
*/
top_p?: number;
truncation?: 'auto' | 'disabled';
}
export interface ChatMethodOptions {
@@ -209,14 +209,9 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
}
async chat(
{ responseMode, apiMode, ...payload }: ChatStreamPayload,
{ responseMode, ...payload }: ChatStreamPayload,
options?: ChatMethodOptions,
) {
// new openai Response API
if (apiMode === 'responses') {
return this.handleResponseAPIMode(payload, options);
}
try {
const inputStartAt = Date.now();
const postPayload = chatCompletion?.handlePayload
@@ -226,6 +221,11 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
stream: payload.stream ?? true,
} as OpenAI.ChatCompletionCreateParamsStreaming);
// new openai Response API
if ((postPayload as any).apiMode === 'responses') {
return this.handleResponseAPIMode(payload, options);
}
const messages = await convertOpenAIMessages(postPayload.messages);
let response: Stream<OpenAI.Chat.Completions.ChatCompletionChunk>;
@@ -478,11 +478,12 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
): Promise<Response> {
const inputStartAt = Date.now();
const { messages, ...res } = responses?.handlePayload
const { messages, reasoning_effort, tools, ...res } = responses?.handlePayload
? (responses?.handlePayload(payload, this._options) as ChatStreamPayload)
: payload;
// remove penalty params
delete res.apiMode;
delete res.frequency_penalty;
delete res.presence_penalty;
@@ -490,9 +491,10 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
const postPayload = {
...res,
...(reasoning_effort ? { reasoning: { effort: reasoning_effort } } : {}),
input,
store: false,
tools: payload.tools?.map((tool) => this.convertChatCompletionToolToResponseTool(tool)),
tools: tools?.map((tool) => this.convertChatCompletionToolToResponseTool(tool)),
} as OpenAI.Responses.ResponseCreateParamsStreaming;
if (debug?.responses?.()) {
@@ -86,11 +86,11 @@ exports[`OpenAIResponsesStream > Reasoning > summary 1`] = `
"data: " analyzing"
",
"id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
"id: rs_684313b9774481908ee856625f82fb8c0b502bf083132d0d
",
"event: data
"event: text
",
"data: {"type":"response.output_item.done","output_index":0,"item":{"id":"rs_684313b9774481908ee856625f82fb8c0b502bf083132d0d","type":"reasoning","summary":[{"type":"summary_text","text":"**Answering a numeric comparison**\\n\\nThe user is asking in Chinese which number is larger: 9.1 or 9.92. This is straightforward since 9.92 is clearly larger, as it's greater than 9.1. We can respond with \\"9.92大于9.1\\" without needing to search for more information. It's simple comparison, but I could also add a little explanation, noting that 9.92 is indeed 0.82 more than 9.1. However, keeping it simple with \\"9.92 > 9.1\\" is perfectly fine!"}]}}
"data: null
",
"id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
@@ -128,11 +128,11 @@ exports[`OpenAIResponsesStream > Reasoning > summary 1`] = `
"data: {"type":"response.content_part.done","item_id":"msg_684313bee2c88190b0f4b09621ad7dc60b502bf083132d0d","output_index":1,"content_index":0,"part":{"type":"output_text","annotations":[],"text":"9.92 比 9.1 大。"}}
",
"id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
"id: msg_684313bee2c88190b0f4b09621ad7dc60b502bf083132d0d
",
"event: data
"event: text
",
"data: {"type":"response.output_item.done","output_index":1,"item":{"id":"msg_684313bee2c88190b0f4b09621ad7dc60b502bf083132d0d","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"9.92 比 9. 大。"}],"role":"assistant"}}
"data: null
",
"id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
@@ -1,7 +1,7 @@
import OpenAI from 'openai';
import type { Stream } from 'openai/streaming';
import { ChatMessageError } from '@/types/message';
import { ChatMessageError, CitationItem } from '@/types/message';
import { AgentRuntimeErrorType } from '../../../error';
import { convertResponseUsage } from '../../usageConverter';
@@ -20,7 +20,17 @@ import {
import { OpenAIStreamOptions } from './openai';
const transformOpenAIStream = (
chunk: OpenAI.Responses.ResponseStreamEvent,
chunk: OpenAI.Responses.ResponseStreamEvent | {
annotation: {
end_index: number;
start_index: number;
title: string;
type: 'url_citation';
url: string;
};
item_id: string;
type: 'response.output_text.annotation.added';
},
streamContext: StreamContext,
): StreamProtocolChunk | StreamProtocolChunk[] => {
// handle the first chunk error
@@ -42,6 +52,7 @@ const transformOpenAIStream = (
switch (chunk.type) {
case 'response.created': {
streamContext.id = chunk.response.id;
streamContext.returnedCitationArray = [];
return { data: chunk.response.status, id: streamContext.id, type: 'data' };
}
@@ -106,6 +117,31 @@ const transformOpenAIStream = (
return { data: chunk.delta, id: chunk.item_id, type: 'reasoning' };
}
case 'response.output_text.annotation.added': {
const citations = chunk.annotation;
if (streamContext.returnedCitationArray) {
streamContext.returnedCitationArray.push({
title: citations.title,
url: citations.url,
} as CitationItem);
}
return { data: null, id: chunk.item_id, type: 'text' };
}
case 'response.output_item.done': {
if (streamContext.returnedCitationArray?.length) {
return {
data: { citations: streamContext.returnedCitationArray },
id: chunk.item.id,
type: 'grounding',
}
}
return { data: null, id: chunk.item.id, type: 'text' };
}
case 'response.completed': {
if (chunk.response.usage) {
return {