feat: add more provider support for search & crawl (#8033)

*  feat: support tavily as search provider

*  feat: support tavily as crawl provider

* 🐛 fix: fix category mapping

*  feat: support firecrwal as crawl provider

*  feat: support firecrawl as search provider

* 🔨 chore: support firecrawl baseUrl for self-host

* 🐛 fix: fix build error

*  feat: support jina as search provider

* 🐛 fix: fix build error

*  feat: support BoChaAI & Exa as search & crawl provider

* 🐛 fix: fix build error

* 🔨 chore: rename to bocha

* 🐛 fix: fix typo in bocha impl

* ♻️ refactor: add `category` & `time_range` support for Exa

---------

Co-authored-by: Arvin Xu <arvinx@foxmail.com>
This commit is contained in:
Zhijie He
2025-06-03 21:38:36 +08:00
committed by GitHub
parent 3e02c2502e
commit 23fade3cf8
15 changed files with 1118 additions and 0 deletions
+93
View File
@@ -0,0 +1,93 @@
import { CrawlImpl, CrawlSuccessResult } from '../type';
import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';
interface ExaResults {
author?: string;
favicon?: string;
id?: string;
image?: string;
publishedDate?: string;
summary?: string;
text: string;
title: string;
url: string;
}
interface ExaResponse {
requestId?: string;
results: ExaResults[];
}
export const exa: CrawlImpl = async (url) => {
// Get API key from environment variable
const apiKey = process.env.EXA_API_KEY;
let res: Response;
try {
res = await withTimeout(
fetch('https://api.exa.ai/contents', {
body: JSON.stringify({
livecrawl: 'fallback', // always, fallback
text: true,
urls: [url],
}),
headers: {
'Content-Type': 'application/json',
'x-api-key': !apiKey ? '' : apiKey,
},
method: 'POST',
}),
DEFAULT_TIMEOUT,
);
} catch (e) {
const error = e as Error;
if (error.message === 'fetch failed') {
throw new NetworkConnectionError();
}
if (error instanceof TimeoutError) {
throw error;
}
throw e;
}
if (!res.ok) {
if (res.status === 404) {
throw new PageNotFoundError(res.statusText);
}
throw new Error(`Exa request failed with status ${res.status}: ${res.statusText}`);
}
try {
const data = (await res.json()) as ExaResponse;
if (!data.results || data.results.length === 0) {
console.warn( 'Exa API returned no results for URL:', url )
return
}
const firstResult = data.results[0];
// Check if content is empty or too short
if (!firstResult.text || firstResult.text.length < 100) {
return;
}
return {
content: firstResult.text,
contentType: 'text',
length: firstResult.text.length,
siteName: new URL(url).hostname,
title: firstResult.title,
url: firstResult.url || url,
} satisfies CrawlSuccessResult;
} catch (error) {
console.error(error);
}
return;
};
@@ -0,0 +1,97 @@
import { CrawlImpl, CrawlSuccessResult } from '../type';
import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';
interface FirecrawlMetadata {
description: string;
keywords: string;
language: string;
ogDescription?: string;
ogImage?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogTitle?: string;
ogUrl?: string;
robots: string;
statusCode: number;
sourceURL: string;
title: string;
}
interface FirecrawlResults {
html?: string;
markdown?: string;
metadata: FirecrawlMetadata;
}
interface FirecrawlResponse {
success: boolean;
data: FirecrawlResults;
}
export const firecrawl: CrawlImpl = async (url) => {
// Get API key from environment variable
const apiKey = process.env.FIRECRAWL_API_KEY;
const baseUrl = process.env.FIRECRAWL_URL || 'https://api.firecrawl.dev/v1';
let res: Response;
try {
res = await withTimeout(
fetch(`${baseUrl}/scrape`, {
body: JSON.stringify({
formats: ["markdown"], // ["markdown", "html"]
url,
}),
headers: {
'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
method: 'POST',
}),
DEFAULT_TIMEOUT,
);
} catch (e) {
const error = e as Error;
if (error.message === 'fetch failed') {
throw new NetworkConnectionError();
}
if (error instanceof TimeoutError) {
throw error;
}
throw e;
}
if (!res.ok) {
if (res.status === 404) {
throw new PageNotFoundError(res.statusText);
}
throw new Error(`Firecrawl request failed with status ${res.status}: ${res.statusText}`);
}
try {
const data = (await res.json()) as FirecrawlResponse;
// Check if content is empty or too short
if (!data.data.markdown || data.data.markdown.length < 100) {
return;
}
return {
content: data.data.markdown,
contentType: 'text',
description: data.data.metadata.description,
length: data.data.markdown.length,
siteName: new URL(url).hostname,
title: data.data.metadata.title,
url: url,
} satisfies CrawlSuccessResult;
} catch (error) {
console.error(error);
}
return;
};
@@ -1,13 +1,19 @@
import { browserless } from './browserless';
import { exa } from './exa';
import { firecrawl } from './firecrawl';
import { jina } from './jina';
import { naive } from './naive';
import { search1api } from './search1api';
import { tavily } from './tavily';
export const crawlImpls = {
browserless,
exa,
firecrawl,
jina,
naive,
search1api,
tavily,
};
export type CrawlImplType = keyof typeof crawlImpls;
@@ -0,0 +1,94 @@
import { CrawlImpl, CrawlSuccessResult } from '../type';
import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';
interface TavilyResults {
images?: string[];
raw_content: string;
url: string;
}
interface TavilyFailedResults {
error?: string;
url: string;
}
interface TavilyResponse {
base_url: string;
failed_results?: TavilyFailedResults[];
response_time: number;
results: TavilyResults[];
}
export const tavily: CrawlImpl = async (url) => {
// Get API key from environment variable
const apiKey = process.env.TAVILY_API_KEY;
let res: Response;
try {
res = await withTimeout(
fetch('https://api.tavily.com/extract', {
body: JSON.stringify({
extract_depth: process.env.TAVILY_EXTRACT_DEPTH || 'basic', // basic or advanced
include_images: false,
urls: url,
}),
headers: {
'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
method: 'POST',
}),
DEFAULT_TIMEOUT,
);
} catch (e) {
const error = e as Error;
if (error.message === 'fetch failed') {
throw new NetworkConnectionError();
}
if (error instanceof TimeoutError) {
throw error;
}
throw e;
}
if (!res.ok) {
if (res.status === 404) {
throw new PageNotFoundError(res.statusText);
}
throw new Error(`Tavily request failed with status ${res.status}: ${res.statusText}`);
}
try {
const data = (await res.json()) as TavilyResponse;
if (!data.results || data.results.length === 0) {
console.warn( 'Tavily API returned no results for URL:', url )
return
}
const firstResult = data.results[0];
// Check if content is empty or too short
if (!firstResult.raw_content || firstResult.raw_content.length < 100) {
return;
}
return {
content: firstResult.raw_content,
contentType: 'text',
length: firstResult.raw_content.length,
siteName: new URL(url).hostname,
title: new URL(url).hostname,
url: firstResult.url || url,
} satisfies CrawlSuccessResult;
} catch (error) {
console.error(error);
}
return;
};
@@ -0,0 +1,124 @@
import { TRPCError } from '@trpc/server';
import debug from 'debug';
import urlJoin from 'url-join';
import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
import { SearchServiceImpl } from '../type';
import { BochaSearchParameters, BochaResponse } from './type';
const log = debug('lobe-search:Bocha');
const timeRangeMapping = {
day: 'oneDay',
month: 'oneMonth',
week: 'oneWeek',
year: 'oneYear',
};
/**
* Bocha implementation of the search service
* Primarily used for web crawling
*/
export class BochaImpl implements SearchServiceImpl {
private get apiKey(): string | undefined {
return process.env.BOCHA_API_KEY;
}
private get baseUrl(): string {
// Assuming the base URL is consistent with the crawl endpoint
return 'https://api.bochaai.com/v1';
}
async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
log('Starting Bocha query with query: "%s", params: %o', query, params);
const endpoint = urlJoin(this.baseUrl, '/web-search');
const defaultQueryParams: BochaSearchParameters = {
count: 15,
query,
summary: true,
};
let body: BochaSearchParameters = {
...defaultQueryParams,
freshness:
params?.searchTimeRange && params.searchTimeRange !== 'anytime'
? timeRangeMapping[params.searchTimeRange as keyof typeof timeRangeMapping] ?? undefined
: undefined,
};
log('Constructed request body: %o', body);
let response: Response;
const startAt = Date.now();
let costTime = 0;
try {
log('Sending request to endpoint: %s', endpoint);
response = await fetch(endpoint, {
body: JSON.stringify(body),
headers: {
'Authorization': this.apiKey ? `Bearer ${this.apiKey}` : '',
'Content-Type': 'application/json',
},
method: 'POST',
});
log('Received response with status: %d', response.status);
costTime = Date.now() - startAt;
} catch (error) {
log.extend('error')('Bocha fetch error: %o', error);
throw new TRPCError({
cause: error,
code: 'SERVICE_UNAVAILABLE',
message: 'Failed to connect to Bocha.',
});
}
if (!response.ok) {
const errorBody = await response.text();
log.extend('error')(
`Bocha request failed with status ${response.status}: %s`,
errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
);
throw new TRPCError({
cause: errorBody,
code: 'SERVICE_UNAVAILABLE',
message: `Bocha request failed: ${response.statusText}`,
});
}
try {
const bochaResponse = (await response.json()) as BochaResponse;
log('Parsed Bocha response: %o', bochaResponse);
const mappedResults = (bochaResponse.data.webPages.value || []).map(
(result): UniformSearchResult => ({
category: 'general', // Default category
content: result.summary || result.snippet || '', // Prioritize content, fallback to snippet
engines: ['bocha'], // Use 'bocha' as the engine name
parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
score: 1, // Default score to 1
title: result.name || '',
url: result.url,
}),
);
log('Mapped %d results to SearchResult format', mappedResults.length);
return {
costTime,
query: query,
resultNumbers: mappedResults.length,
results: mappedResults,
};
} catch (error) {
log.extend('error')('Error parsing Bocha response: %o', error);
throw new TRPCError({
cause: error,
code: 'INTERNAL_SERVER_ERROR',
message: 'Failed to parse Bocha response.',
});
}
}
}
@@ -0,0 +1,47 @@
export interface BochaSearchParameters {
count?: number;
exclude?: string;
freshness?: string;
include?: string;
query: string;
summary?: boolean;
}
interface BochaQueryContext {
originalQuery: string;
}
interface BochaValue {
cachedPageUrl?: string;
dateLastCrawled?: string;
displayUrl?: string;
id?: string | null;
isFamilyFriendly?: boolean;
isNavigational?: boolean;
language?: string;
name: string;
siteName?: string;
snippet?: string;
summary?: string;
url: string;
}
interface BochaWebPages {
totalEstimatedMatches?: number;
value?: BochaValue[];
webSearchUrl?: string;
}
interface BochaData {
images?: any;
queryContext?: BochaQueryContext;
videos?: any;
webPages: BochaWebPages;
}
export interface BochaResponse {
code?: number;
data: BochaData;
log_id?: string;
msg?: string | null;
}
@@ -0,0 +1,129 @@
import { TRPCError } from '@trpc/server';
import debug from 'debug';
import urlJoin from 'url-join';
import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
import { SearchServiceImpl } from '../type';
import { ExaSearchParameters, ExaResponse } from './type';
const log = debug('lobe-search:Exa');
/**
* Exa implementation of the search service
* Primarily used for web crawling
*/
export class ExaImpl implements SearchServiceImpl {
private get apiKey(): string | undefined {
return process.env.EXA_API_KEY;
}
private get baseUrl(): string {
// Assuming the base URL is consistent with the crawl endpoint
return 'https://api.exa.ai';
}
async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
log('Starting Exa query with query: "%s", params: %o', query, params);
const endpoint = urlJoin(this.baseUrl, '/search');
const defaultQueryParams: ExaSearchParameters = {
numResults: 15,
query,
type: 'auto',
};
let body: ExaSearchParameters = {
...defaultQueryParams,
...(params?.searchTimeRange && params.searchTimeRange !== 'anytime'
? (() => {
const now = Date.now();
const days = { day: 1, month: 30, week: 7, year: 365 }[params.searchTimeRange!];
if (days === undefined) return {};
return {
endPublishedDate: new Date(now).toISOString(),
startPublishedDate: new Date(now - days * 86_400 * 1000).toISOString(),
};
})()
: {}),
category:
// Exa 只支持 news 类型
params?.searchCategories?.filter(cat => ['news'].includes(cat))?.[0],
};
log('Constructed request body: %o', body);
let response: Response;
const startAt = Date.now();
let costTime = 0;
try {
log('Sending request to endpoint: %s', endpoint);
response = await fetch(endpoint, {
body: JSON.stringify(body),
headers: {
'Content-Type': 'application/json',
'x-api-key': this.apiKey ? this.apiKey : '',
},
method: 'POST',
});
log('Received response with status: %d', response.status);
costTime = Date.now() - startAt;
} catch (error) {
log.extend('error')('Exa fetch error: %o', error);
throw new TRPCError({
cause: error,
code: 'SERVICE_UNAVAILABLE',
message: 'Failed to connect to Exa.',
});
}
if (!response.ok) {
const errorBody = await response.text();
log.extend('error')(
`Exa request failed with status ${response.status}: %s`,
errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
);
throw new TRPCError({
cause: errorBody,
code: 'SERVICE_UNAVAILABLE',
message: `Exa request failed: ${response.statusText}`,
});
}
try {
const exaResponse = (await response.json()) as ExaResponse;
log('Parsed Exa response: %o', exaResponse);
const mappedResults = (exaResponse.results || []).map(
(result): UniformSearchResult => ({
category: body.category || 'general', // Default category
content: result.text || '', // Prioritize content, fallback to snippet
engines: ['exa'], // Use 'exa' as the engine name
parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
score: result.score || 0, // Default score to 0 if undefined
title: result.title || '',
url: result.url,
}),
);
log('Mapped %d results to SearchResult format', mappedResults.length);
return {
costTime,
query: query,
resultNumbers: mappedResults.length,
results: mappedResults,
};
} catch (error) {
log.extend('error')('Error parsing Exa response: %o', error);
throw new TRPCError({
cause: error,
code: 'INTERNAL_SERVER_ERROR',
message: 'Failed to parse Exa response.',
});
}
}
}
@@ -0,0 +1,39 @@
export interface ExaSearchParameters {
category?: string;
endCrawlDate?: string;
endPublishedDate?: string;
excludeDomains?: string[];
excludeText?: string[];
includeDomains?: string[];
includeText?: string[];
numResults?: number;
query: string;
startCrawlDate?: string;
startPublishedDate?: string;
type?: string;
}
interface ExaCostDollars {
total: number;
}
interface ExaResults {
author?: string | null;
favicon?: string;
id?: string;
image?: string;
publishedDate?: string | null;
score?: number | null;
summery?: string;
text: string;
title: string;
url: string;
}
export interface ExaResponse {
costDollars?: ExaCostDollars;
requestId?: string;
resolvedSearchType?: string;
results: ExaResults[];
searchType?: string;
}
@@ -0,0 +1,128 @@
import { TRPCError } from '@trpc/server';
import debug from 'debug';
import urlJoin from 'url-join';
import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
import { SearchServiceImpl } from '../type';
import { FirecrawlSearchParameters, FirecrawlResponse } from './type';
const log = debug('lobe-search:Firecrawl');
const timeRangeMapping = {
day: 'qdr:d',
month: 'qdr:m',
week: 'qdr:w',
year: 'qdr:y',
};
/**
* Firecrawl implementation of the search service
* Primarily used for web crawling
*/
export class FirecrawlImpl implements SearchServiceImpl {
private get apiKey(): string | undefined {
return process.env.FIRECRAWL_API_KEY;
}
private get baseUrl(): string {
// Assuming the base URL is consistent with the crawl endpoint
return process.env.FIRECRAWL_URL || 'https://api.firecrawl.dev/v1';
}
async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
log('Starting Firecrawl query with query: "%s", params: %o', query, params);
const endpoint = urlJoin(this.baseUrl, '/search');
const defaultQueryParams: FirecrawlSearchParameters = {
limit: 15,
query,
/*
scrapeOptions: {
formats: ["markdown"]
},
*/
};
let body: FirecrawlSearchParameters = {
...defaultQueryParams,
tbs:
params?.searchTimeRange && params.searchTimeRange !== 'anytime'
? timeRangeMapping[params.searchTimeRange as keyof typeof timeRangeMapping] ?? undefined
: undefined,
};
log('Constructed request body: %o', body);
let response: Response;
const startAt = Date.now();
let costTime = 0;
try {
log('Sending request to endpoint: %s', endpoint);
response = await fetch(endpoint, {
body: JSON.stringify(body),
headers: {
'Authorization': this.apiKey ? `Bearer ${this.apiKey}` : '',
'Content-Type': 'application/json',
},
method: 'POST',
});
log('Received response with status: %d', response.status);
costTime = Date.now() - startAt;
} catch (error) {
log.extend('error')('Firecrawl fetch error: %o', error);
throw new TRPCError({
cause: error,
code: 'SERVICE_UNAVAILABLE',
message: 'Failed to connect to Firecrawl.',
});
}
if (!response.ok) {
const errorBody = await response.text();
log.extend('error')(
`Firecrawl request failed with status ${response.status}: %s`,
errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
);
throw new TRPCError({
cause: errorBody,
code: 'SERVICE_UNAVAILABLE',
message: `Firecrawl request failed: ${response.statusText}`,
});
}
try {
const firecrawlResponse = (await response.json()) as FirecrawlResponse;
log('Parsed Firecrawl response: %o', firecrawlResponse);
const mappedResults = (firecrawlResponse.data || []).map(
(result): UniformSearchResult => ({
category: 'general', // Default category
content: result.description || '', // Prioritize content, fallback to snippet
engines: ['firecrawl'], // Use 'firecrawl' as the engine name
parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
score: 1, // Default score to 1
title: result.title || '',
url: result.url,
}),
);
log('Mapped %d results to SearchResult format', mappedResults.length);
return {
costTime,
query: query,
resultNumbers: mappedResults.length,
results: mappedResults,
};
} catch (error) {
log.extend('error')('Error parsing Firecrawl response: %o', error);
throw new TRPCError({
cause: error,
code: 'INTERNAL_SERVER_ERROR',
message: 'Failed to parse Firecrawl response.',
});
}
}
}
@@ -0,0 +1,35 @@
interface FirecrawlScrapeOptions {
formats: string[];
}
export interface FirecrawlSearchParameters {
country?: string;
lang?: string;
limit?: number;
query: string;
scrapeOptions?: FirecrawlScrapeOptions;
tbs?: string;
timeout?: number;
}
interface FirecrawlMetadata {
description?: string;
sourceURL?: string;
statusCode?: number;
title: string;
}
interface FirecrawlData {
description?: string;
html?: string;
links?: string[];
markdown?: string;
metadata?: FirecrawlMetadata;
title?: string;
url: string;
}
export interface FirecrawlResponse {
data: FirecrawlData[];
success?: boolean;
}
+31
View File
@@ -1,13 +1,24 @@
import { BochaImpl } from './bocha';
import { ExaImpl } from './exa';
import { FirecrawlImpl } from './firecrawl';
import { JinaImpl } from './jina';
import { Search1APIImpl } from './search1api';
import { SearXNGImpl } from './searxng';
import { TavilyImpl } from './tavily';
import { SearchServiceImpl } from './type';
/**
* Available search service implementations
*/
export enum SearchImplType {
Bocha = 'bocha',
Exa = 'exa',
Firecrawl = 'firecrawl',
Jina = 'jina',
SearXNG = 'searxng',
Search1API = 'search1api',
Tavily = 'tavily',
}
/**
@@ -17,10 +28,30 @@ export const createSearchServiceImpl = (
type: SearchImplType = SearchImplType.SearXNG,
): SearchServiceImpl => {
switch (type) {
case SearchImplType.Bocha: {
return new BochaImpl();
}
case SearchImplType.Exa: {
return new ExaImpl();
}
case SearchImplType.Firecrawl: {
return new FirecrawlImpl();
}
case SearchImplType.Jina: {
return new JinaImpl();
}
case SearchImplType.SearXNG: {
return new SearXNGImpl();
}
case SearchImplType.Tavily: {
return new TavilyImpl();
}
default: {
return new Search1APIImpl();
}
@@ -0,0 +1,109 @@
import { TRPCError } from '@trpc/server';
import debug from 'debug';
import urlJoin from 'url-join';
import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
import { SearchServiceImpl } from '../type';
import { JinaSearchParameters, JinaResponse } from './type';
const log = debug('lobe-search:Jina');
/**
* Jina implementation of the search service
* Primarily used for web crawling
*/
export class JinaImpl implements SearchServiceImpl {
private get apiKey(): string | undefined {
return process.env.JINA_READER_API_KEY || process.env.JINA_API_KEY;
}
private get baseUrl(): string {
// Assuming the base URL is consistent with the crawl endpoint
return 'https://s.jina.ai';
}
async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
log('Starting Jina query with query: "%s", params: %o', query, params);
const endpoint = urlJoin(this.baseUrl, '/');
let body: JinaSearchParameters = {
q: query,
};
log('Constructed request body: %o', body);
let response: Response;
const startAt = Date.now();
let costTime = 0;
try {
log('Sending request to endpoint: %s', endpoint);
response = await fetch(endpoint, {
body: JSON.stringify(body),
headers: {
'Accept': 'application/json',
'Authorization': this.apiKey ? `Bearer ${this.apiKey}` : '',
'Content-Type': 'application/json',
'X-Respond-With': 'no-content',
},
method: 'POST',
});
log('Received response with status: %d', response.status);
costTime = Date.now() - startAt;
} catch (error) {
log.extend('error')('Jina fetch error: %o', error);
throw new TRPCError({
cause: error,
code: 'SERVICE_UNAVAILABLE',
message: 'Failed to connect to Jina.',
});
}
if (!response.ok) {
const errorBody = await response.text();
log.extend('error')(
`Jina request failed with status ${response.status}: %s`,
errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
);
throw new TRPCError({
cause: errorBody,
code: 'SERVICE_UNAVAILABLE',
message: `Jina request failed: ${response.statusText}`,
});
}
try {
const jinaResponse = (await response.json()) as JinaResponse;
log('Parsed Jina response: %o', jinaResponse);
const mappedResults = (jinaResponse.data || []).map(
(result): UniformSearchResult => ({
category: 'general', // Default category
content: result.description || '', // Prioritize content, fallback to snippet
engines: ['jina'], // Use 'jina' as the engine name
parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
score: 1, // Default score to 1
title: result.title || '',
url: result.url,
}),
);
log('Mapped %d results to SearchResult format', mappedResults.length);
return {
costTime,
query: query,
resultNumbers: mappedResults.length,
results: mappedResults,
};
} catch (error) {
log.extend('error')('Error parsing Jina response: %o', error);
throw new TRPCError({
cause: error,
code: 'INTERNAL_SERVER_ERROR',
message: 'Failed to parse Jina response.',
});
}
}
}
@@ -0,0 +1,26 @@
export interface JinaSearchParameters {
q: string;
}
interface JinaUsage {
tokens: number;
}
interface JinaMeta {
usage: JinaUsage;
}
interface JinaData {
content?: string;
description?: string;
title: string;
url: string;
usage?: JinaUsage;
}
export interface JinaResponse {
code?: number;
data: JinaData[];
meta?: JinaMeta;
status?: number;
}
@@ -0,0 +1,124 @@
import { TRPCError } from '@trpc/server';
import debug from 'debug';
import urlJoin from 'url-join';
import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
import { SearchServiceImpl } from '../type';
import { TavilySearchParameters, TavilyResponse } from './type';
const log = debug('lobe-search:Tavily');
/**
* Tavily implementation of the search service
* Primarily used for web crawling
*/
export class TavilyImpl implements SearchServiceImpl {
private get apiKey(): string | undefined {
return process.env.TAVILY_API_KEY;
}
private get baseUrl(): string {
// Assuming the base URL is consistent with the crawl endpoint
return 'https://api.tavily.com';
}
async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
log('Starting Tavily query with query: "%s", params: %o', query, params);
const endpoint = urlJoin(this.baseUrl, '/search');
const defaultQueryParams: TavilySearchParameters = {
include_answer: false,
include_image_descriptions: true,
include_images: false,
include_raw_content: false,
max_results: 15,
query,
search_depth: process.env.TAVILY_SEARCH_DEPTH || 'basic' // basic or advanced
};
let body: TavilySearchParameters = {
...defaultQueryParams,
time_range:
params?.searchTimeRange && params.searchTimeRange !== 'anytime'
? params.searchTimeRange
: undefined,
topic:
// Tavily 只支持 news 和 general 两种类型
params?.searchCategories?.filter(cat => ['news', 'general'].includes(cat))?.[0],
};
log('Constructed request body: %o', body);
let response: Response;
const startAt = Date.now();
let costTime = 0;
try {
log('Sending request to endpoint: %s', endpoint);
response = await fetch(endpoint, {
body: JSON.stringify(body),
headers: {
'Authorization': this.apiKey ? `Bearer ${this.apiKey}` : '',
'Content-Type': 'application/json',
},
method: 'POST',
});
log('Received response with status: %d', response.status);
costTime = Date.now() - startAt;
} catch (error) {
log.extend('error')('Tavily fetch error: %o', error);
throw new TRPCError({
cause: error,
code: 'SERVICE_UNAVAILABLE',
message: 'Failed to connect to Tavily.',
});
}
if (!response.ok) {
const errorBody = await response.text();
log.extend('error')(
`Tavily request failed with status ${response.status}: %s`,
errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
);
throw new TRPCError({
cause: errorBody,
code: 'SERVICE_UNAVAILABLE',
message: `Tavily request failed: ${response.statusText}`,
});
}
try {
const tavilyResponse = (await response.json()) as TavilyResponse;
log('Parsed Tavily response: %o', tavilyResponse);
const mappedResults = (tavilyResponse.results || []).map(
(result): UniformSearchResult => ({
category: body.topic || 'general', // Default category
content: result.content || '', // Prioritize content, fallback to snippet
engines: ['tavily'], // Use 'tavily' as the engine name
parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
score: result.score || 0, // Default score to 0 if undefined
title: result.title || '',
url: result.url,
}),
);
log('Mapped %d results to SearchResult format', mappedResults.length);
return {
costTime,
query: query,
resultNumbers: mappedResults.length,
results: mappedResults,
};
} catch (error) {
log.extend('error')('Error parsing Tavily response: %o', error);
throw new TRPCError({
cause: error,
code: 'INTERNAL_SERVER_ERROR',
message: 'Failed to parse Tavily response.',
});
}
}
}
@@ -0,0 +1,36 @@
export interface TavilySearchParameters {
chunks_per_source?: number;
days?: number;
exclude_domains?: string[];
include_answer?: boolean | string;
include_domains?: string[];
include_image_descriptions?: boolean;
include_images?: boolean;
include_raw_content?: boolean;
max_results?: number;
query: string;
search_depth?: string;
time_range?: string;
topic?: string;
}
interface TavilyImages {
description?: string;
url: string;
}
interface TavilyResults {
content?: string;
raw_content?: string | null;
score?: number;
title?: string;
url: string;
}
export interface TavilyResponse {
answer?: string;
images?: TavilyImages[];
query: string;
response_time: number;
results: TavilyResults[];
}