💄 style: support web_search_preview & fix some bug form OpenAI Response API (#8131)

* 💄 style: support `web_search_preview` for OpenAI Response API * ♻️ refactor: refactor annotation handling * 🐛 fix: fix `reasoning_effort` error in response api * 🐛 fix: fix `o` series calling in Response API * 🐛 fix: fix ci test * 💄 style: update `gpt-4o` ability tags * 💄 style: update `gpt-4o-mini` ability tags * ♻️ refactor: add `responsesAPIModels` to store responses api only * 🐛 fix: fix `computer-use-preview` calling * 🐛 fix: fix ci error * 🐛 fix: fix citations * 💄 style: add `o3-pro` rules, disable Stream & use Responses API * 🔨 chore: comment `o3-pro` from `disableStreamModels`
2026-06-17 04:55:51 +00:00 · 2025-06-12 21:03:05 +08:00
parent 9d81cdca36
commit b2983f062e
7 changed files with 235 additions and 28 deletions
@@ -59,6 +59,7 @@ export const openaiChatModels: AIChatModelCard[] = [
  {
    abilities: {
      functionCall: true,
+      search: true,
      vision: true,
    },
    contextWindowTokens: 1_047_576,
@@ -73,11 +74,15 @@ export const openaiChatModels: AIChatModelCard[] = [
      output: 8,
    },
    releasedAt: '2025-04-14',
+    settings: {
+      searchImpl: 'params',
+    },
    type: 'chat',
  },
  {
    abilities: {
      functionCall: true,
+      search: true,
      vision: true,
    },
    contextWindowTokens: 1_047_576,
@@ -93,6 +98,9 @@ export const openaiChatModels: AIChatModelCard[] = [
      output: 1.6,
    },
    releasedAt: '2025-04-14',
+    settings: {
+      searchImpl: 'params',
+    },
    type: 'chat',
  },
  {
@@ -135,6 +143,28 @@ export const openaiChatModels: AIChatModelCard[] = [
    },
    type: 'chat',
  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      vision: true,
+    },
+    contextWindowTokens: 200_000,
+    description:
+      'o1 系列模型经过强化学习训练，能够在回答前进行思考，并执行复杂的推理任务。o1-pro 模型使用了更多计算资源，以进行更深入的思考，从而持续提供更优质的回答。',
+    displayName: 'o1-pro',
+    id: 'o1-pro',
+    maxOutput: 100_000,
+    pricing: {
+      input: 150,
+      output: 600,
+    },
+    releasedAt: '2025-03-19',
+    settings: {
+      extendParams: ['reasoningEffort'],
+    },
+    type: 'chat',
+  },
  {
    abilities: {
      reasoning: true,
@@ -158,6 +188,7 @@ export const openaiChatModels: AIChatModelCard[] = [
  },
  {
    abilities: {
+      functionCall: true,
      reasoning: true,
      vision: true,
    },
@@ -220,6 +251,7 @@ export const openaiChatModels: AIChatModelCard[] = [
  {
    abilities: {
      functionCall: true,
+      search: true,
      vision: true,
    },
    contextWindowTokens: 128_000,
@@ -234,6 +266,9 @@ export const openaiChatModels: AIChatModelCard[] = [
      output: 0.6,
    },
    releasedAt: '2024-07-18',
+    settings: {
+      searchImpl: 'params',
+    },
    type: 'chat',
  },
  {
@@ -259,6 +294,29 @@ export const openaiChatModels: AIChatModelCard[] = [
  {
    abilities: {
      functionCall: true,
+      //search: true,
+    },
+    contextWindowTokens: 128_000,
+    description: 'GPT-4o mini Audio 模型，支持音频输入输出',
+    displayName: 'GPT-4o mini Audio',
+    id: 'gpt-4o-mini-audio-preview',
+    maxOutput: 16_384,
+    pricing: {
+      input: 0.15,
+      output: 0.6,
+    },
+    releasedAt: '2024-12-17',
+    /*
+    settings: {
+      searchImpl: 'params',
+    },
+    */
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      search: true,
      vision: true,
    },
    contextWindowTokens: 128_000,
@@ -272,6 +330,9 @@ export const openaiChatModels: AIChatModelCard[] = [
      output: 10,
    },
    releasedAt: '2024-05-13',
+    settings: {
+      searchImpl: 'params',
+    },
    type: 'chat',
  },
  {
@@ -297,6 +358,7 @@ export const openaiChatModels: AIChatModelCard[] = [
  {
    abilities: {
      functionCall: true,
+      search: true,
      vision: true,
    },
    contextWindowTokens: 128_000,
@@ -310,11 +372,15 @@ export const openaiChatModels: AIChatModelCard[] = [
      output: 10,
    },
    releasedAt: '2024-11-20',
+    settings: {
+      searchImpl: 'params',
+    },
    type: 'chat',
  },
  {
    abilities: {
      functionCall: true,
+      search: true,
      vision: true,
    },
    contextWindowTokens: 128_000,
@@ -327,9 +393,16 @@ export const openaiChatModels: AIChatModelCard[] = [
      output: 15,
    },
    releasedAt: '2024-05-13',
+    settings: {
+      searchImpl: 'params',
+    },
    type: 'chat',
  },
  {
+    abilities: {
+      functionCall: true,
+      //search: true,
+    },
    contextWindowTokens: 128_000,
    description: 'GPT-4o Audio 模型，支持音频输入输出',
    displayName: 'GPT-4o Audio',
@@ -340,6 +413,11 @@ export const openaiChatModels: AIChatModelCard[] = [
      output: 10,
    },
    releasedAt: '2024-10-01',
+    /*
+    settings: {
+      searchImpl: 'params',
+    },
+    */
    type: 'chat',
  },
  {
@@ -545,6 +623,48 @@ export const openaiChatModels: AIChatModelCard[] = [
    },
    type: 'chat',
  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      vision: true,
+    },
+    contextWindowTokens: 200_000,
+    description: 'codex-mini-latest 是 o4-mini 的微调版本，专门用于 Codex CLI。对于直接通过 API 使用，我们推荐从 gpt-4.1 开始。',
+    displayName: 'Codex mini',
+    id: 'codex-mini-latest',
+    maxOutput: 100_000,
+    pricing: {
+      input: 1.5,
+      output: 6,
+    },
+    releasedAt: '2025-06-01',
+    settings: {
+      extendParams: ['reasoningEffort'],
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      vision: true,
+    },
+    contextWindowTokens: 8192,
+    description: 'computer-use-preview 模型是专为“计算机使用工具”设计的专用模型，经过训练以理解并执行计算机相关任务。',
+    displayName: 'Computer Use Preview',
+    id: 'computer-use-preview',
+    maxOutput: 1024,
+    pricing: {
+      input: 3,
+      output: 12,
+    },
+    releasedAt: '2025-03-11',
+    settings: {
+      extendParams: ['reasoningEffort'],
+    },
+    type: 'chat',
+  },
 ];

 export const openaiEmbeddingModels: AIEmbeddingModelCard[] = [
@@ -6,7 +6,32 @@ export const systemToUserModels = new Set([
 ]);

 // TODO: 临时写法，后续要重构成 model card 展示配置
-export const disableStreamModels = new Set(['o1', 'o1-2024-12-17']);
+export const disableStreamModels = new Set([
+  'o1',
+  'o1-2024-12-17',
+  'o1-pro',
+  'o1-pro-2025-03-19',
+  /*
+  官网显示不支持，但是实际试下来支持 Streaming，暂时注释掉
+  'o3-pro',
+  'o3-pro-2025-06-10',
+  */
+  'computer-use-preview',
+  'computer-use-preview-2025-03-11',
+]);
+
+/**
+ * models use Responses API only
+ */
+export const responsesAPIModels = new Set([
+  'o1-pro',
+  'o1-pro-2025-03-19',
+  'o3-pro',
+  'o3-pro-2025-06-10',
+  'codex-mini-latest',
+  'computer-use-preview',
+  'computer-use-preview-2025-03-11',
+]);

 /**
 * models support context caching
@@ -2,21 +2,24 @@ import { ChatStreamPayload, ModelProvider } from '../types';
 import { processMultiProviderModelList } from '../utils/modelParse';
 import { createOpenAICompatibleRuntime } from '../utils/openaiCompatibleFactory';
 import { pruneReasoningPayload } from '../utils/openaiHelpers';
+import { responsesAPIModels } from '@/const/models';

 export interface OpenAIModelCard {
  id: string;
 }

-const prunePrefixes = ['o1', 'o3', 'o4'];
+const prunePrefixes = ['o1', 'o3', 'o4', 'codex', 'computer-use'];
+
+const oaiSearchContextSize = process.env.OPENAI_SEARCH_CONTEXT_SIZE; // low, medium, high

 export const LobeOpenAI = createOpenAICompatibleRuntime({
  baseURL: 'https://api.openai.com/v1',
  chatCompletion: {
    handlePayload: (payload) => {
-      const { model } = payload;
+      const { enabledSearch, model, ...rest } = payload;

-      if (model === 'o1-pro') {
-        return { ...payload, apiMode: 'responses' } as ChatStreamPayload;
+      if (responsesAPIModels.has(model) || enabledSearch) {
+        return { ...rest, apiMode: 'responses', enabledSearch, model } as ChatStreamPayload;
      }

      if (prunePrefixes.some((prefix) => model.startsWith(prefix))) {
@@ -24,11 +27,10 @@ export const LobeOpenAI = createOpenAICompatibleRuntime({
      }

      if (model.includes('-search-')) {
-        const oaiSearchContextSize = process.env.OPENAI_SEARCH_CONTEXT_SIZE; // low, medium, high
-
        return {
-          ...payload,
+          ...rest,
          frequency_penalty: undefined,
+          model,
          presence_penalty: undefined,
          stream: payload.stream ?? true,
          temperature: undefined,
@@ -41,7 +43,7 @@ export const LobeOpenAI = createOpenAICompatibleRuntime({
        } as any;
      }

-      return { ...payload, stream: payload.stream ?? true };
+      return { ...rest, model, stream: payload.stream ?? true };
    },
  },
  debug: {
@@ -57,17 +59,37 @@ export const LobeOpenAI = createOpenAICompatibleRuntime({
  },
  provider: ModelProvider.OpenAI,
  responses: {
-    handlePayload: (payload: ChatStreamPayload) => {
-      const { model } = payload;
+    handlePayload: (payload) => {
+      const { enabledSearch, model, tools, ...rest } = payload;
+
+      const openaiTools = enabledSearch
+        ? [
+            ...(tools || []),
+            {
+              type: 'web_search_preview',
+              ...(oaiSearchContextSize && {
+                search_context_size: oaiSearchContextSize,
+              }),
+            },
+          ]
+        : tools;
+
      if (prunePrefixes.some((prefix) => model.startsWith(prefix))) {
        if (!payload.reasoning) {
          payload.reasoning = { summary: 'auto' };
        } else {
          payload.reasoning.summary = 'auto';
        }
+
+        // computer-use series must set truncation as auto
+        if (model.startsWith('computer-use')) {
+          payload.truncation = 'auto';
+        }
+
+        return pruneReasoningPayload(payload) as any;
      }

-      return { ...payload, stream: payload.stream ?? true };
+      return { ...rest, model, stream: payload.stream ?? true, tools: openaiTools } as any;
    },
  },
 });
@@ -107,6 +107,7 @@ export interface ChatStreamPayload {
    effort?: string;
    summary?: string;
  };
+  reasoning_effort?: 'low' | 'medium' | 'high';
  responseMode?: 'stream' | 'json';
  /**
   * @title 是否开启流式请求
@@ -132,6 +133,7 @@ export interface ChatStreamPayload {
   * @default 1
   */
  top_p?: number;
+  truncation?: 'auto' | 'disabled';
 }

 export interface ChatMethodOptions {
@@ -209,14 +209,9 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
    }

    async chat(
-      { responseMode, apiMode, ...payload }: ChatStreamPayload,
+      { responseMode, ...payload }: ChatStreamPayload,
      options?: ChatMethodOptions,
    ) {
-      // new openai Response API
-      if (apiMode === 'responses') {
-        return this.handleResponseAPIMode(payload, options);
-      }
-
      try {
        const inputStartAt = Date.now();
        const postPayload = chatCompletion?.handlePayload
@@ -226,6 +221,11 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
              stream: payload.stream ?? true,
            } as OpenAI.ChatCompletionCreateParamsStreaming);

+        // new openai Response API
+        if ((postPayload as any).apiMode === 'responses') {
+          return this.handleResponseAPIMode(payload, options);
+        }
+
        const messages = await convertOpenAIMessages(postPayload.messages);

        let response: Stream<OpenAI.Chat.Completions.ChatCompletionChunk>;
@@ -478,11 +478,12 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
    ): Promise<Response> {
      const inputStartAt = Date.now();

-      const { messages, ...res } = responses?.handlePayload
+      const { messages, reasoning_effort, tools, ...res } = responses?.handlePayload
        ? (responses?.handlePayload(payload, this._options) as ChatStreamPayload)
        : payload;

      // remove penalty params
+      delete res.apiMode;
      delete res.frequency_penalty;
      delete res.presence_penalty;

@@ -490,9 +491,10 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an

      const postPayload = {
        ...res,
+        ...(reasoning_effort ? { reasoning: { effort: reasoning_effort } } : {}),
        input,
        store: false,
-        tools: payload.tools?.map((tool) => this.convertChatCompletionToolToResponseTool(tool)),
+        tools: tools?.map((tool) => this.convertChatCompletionToolToResponseTool(tool)),
      } as OpenAI.Responses.ResponseCreateParamsStreaming;

      if (debug?.responses?.()) {
@@ -86,11 +86,11 @@ exports[`OpenAIResponsesStream > Reasoning > summary 1`] = `
  "data: " analyzing"

 ",
-  "id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
+  "id: rs_684313b9774481908ee856625f82fb8c0b502bf083132d0d
 ",
-  "event: data
+  "event: text
 ",
-  "data: {"type":"response.output_item.done","output_index":0,"item":{"id":"rs_684313b9774481908ee856625f82fb8c0b502bf083132d0d","type":"reasoning","summary":[{"type":"summary_text","text":"**Answering a numeric comparison**\\n\\nThe user is asking in Chinese which number is larger: 9.1 or 9.92. This is straightforward since 9.92 is clearly larger, as it's greater than 9.1. We can respond with \\"9.92大于9.1\\" without needing to search for more information. It's simple comparison, but I could also add a little explanation, noting that 9.92 is indeed 0.82 more than 9.1. However, keeping it simple with \\"9.92 > 9.1\\" is perfectly fine!"}]}}
+  "data: null

 ",
  "id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
@@ -128,11 +128,11 @@ exports[`OpenAIResponsesStream > Reasoning > summary 1`] = `
  "data: {"type":"response.content_part.done","item_id":"msg_684313bee2c88190b0f4b09621ad7dc60b502bf083132d0d","output_index":1,"content_index":0,"part":{"type":"output_text","annotations":[],"text":"9.92 比 9.1 大。"}}

 ",
-  "id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
+  "id: msg_684313bee2c88190b0f4b09621ad7dc60b502bf083132d0d
 ",
-  "event: data
+  "event: text
 ",
-  "data: {"type":"response.output_item.done","output_index":1,"item":{"id":"msg_684313bee2c88190b0f4b09621ad7dc60b502bf083132d0d","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"9.92 比 9. 大。"}],"role":"assistant"}}
+  "data: null

 ",
  "id: resp_684313b89200819087f27686e0c822260b502bf083132d0d
@@ -1,7 +1,7 @@
 import OpenAI from 'openai';
 import type { Stream } from 'openai/streaming';

-import { ChatMessageError } from '@/types/message';
+import { ChatMessageError, CitationItem } from '@/types/message';

 import { AgentRuntimeErrorType } from '../../../error';
 import { convertResponseUsage } from '../../usageConverter';
@@ -20,7 +20,17 @@ import {
 import { OpenAIStreamOptions } from './openai';

 const transformOpenAIStream = (
-  chunk: OpenAI.Responses.ResponseStreamEvent,
+  chunk: OpenAI.Responses.ResponseStreamEvent | {
+    annotation: {
+      end_index: number;
+      start_index: number;
+      title: string;
+      type: 'url_citation';
+      url: string;
+    };
+    item_id: string;
+    type: 'response.output_text.annotation.added';
+  },
  streamContext: StreamContext,
 ): StreamProtocolChunk | StreamProtocolChunk[] => {
  // handle the first chunk error
@@ -42,6 +52,7 @@ const transformOpenAIStream = (
    switch (chunk.type) {
      case 'response.created': {
        streamContext.id = chunk.response.id;
+        streamContext.returnedCitationArray = [];

        return { data: chunk.response.status, id: streamContext.id, type: 'data' };
      }
@@ -106,6 +117,31 @@ const transformOpenAIStream = (
        return { data: chunk.delta, id: chunk.item_id, type: 'reasoning' };
      }

+      case 'response.output_text.annotation.added': {
+        const citations = chunk.annotation;
+
+        if (streamContext.returnedCitationArray) {
+          streamContext.returnedCitationArray.push({
+            title: citations.title,
+            url: citations.url,
+          } as CitationItem);
+        }
+
+        return { data: null, id: chunk.item_id, type: 'text' };
+      }
+
+      case 'response.output_item.done': {
+        if (streamContext.returnedCitationArray?.length) {
+          return {
+            data: { citations: streamContext.returnedCitationArray },
+            id: chunk.item.id,
+            type: 'grounding',
+          }
+        }
+
+        return { data: null, id: chunk.item.id, type: 'text' };
+      }
+
      case 'response.completed': {
        if (chunk.response.usage) {
          return {