mirror of
https://github.com/lobehub/lobe-chat.git
synced 2026-06-14 03:30:19 +00:00
✨ feat(model-runtime): split ProviderBizError into finer codes + reclassify catch-all at write time (#15286)
* ✨ feat(model-runtime): split ProviderBizError into finer codes + reclassify catch-all at write time Add UpstreamGatewayError (E8010), UpstreamMalformedResponse (E8011), and UpstreamHttpError (E8012), migrating the matching patterns out of the ProviderBizError catch-all. Add a refineErrorCode() step (message-pattern match + HTTP-status fallback) wired into formatErrorForState so generic ProviderBizError is reclassified into the correct existing code (rate-limit / quota / network / service-unavailable / model-not-found) instead of collapsing into one opaque 8xxx bucket. Production sampling showed ~72% of ProviderBizError actually belongs to existing codes and only ~5% is a true residual. * ✨ feat(model-runtime): add isFallback flag to mark catch-all error buckets Add an `isFallback` boolean to ErrorCodeSpec / ChatMessageError, set on the catch-all codes (ProviderBizError, UpstreamHttpError, AgentRuntimeError, DatabasePersistError). It flows onto agent_operations.error via the write-path enrichment so monitoring can track how much volume still lands in fallback buckets — the signal for where finer codes are still worth carving out. * ✅ test(model-runtime): add refineErrorCode to @lobechat/model-runtime mocks formatErrorForState now imports refineErrorCode, so the partial module mocks in AgentRuntimeService / RuntimeExecutors must expose it or vitest throws on access. * ✅ test(model-runtime): bump UpstreamGatewayError numericId to 8011 after canary 8010 collision canary claimed 8010 for ProviderContentPolicyViolation, so the Upstream* codes shifted to 8011/8012/8013 during rebase; update the refinement test assertion.
This commit is contained in:
@@ -32,5 +32,8 @@
|
||||
"QuotaLimitReached": "Sorry, the token usage or request count has reached the quota limit for this key. Please increase the key's quota or try again later.",
|
||||
"RateLimitExceeded": "Sorry, the token usage or request count has reached the rate limit for this key. Please try again later or increase the key's quota.",
|
||||
"StreamChunkError": "Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.",
|
||||
"UpstreamGatewayError": "The upstream gateway or proxy returned an error. Please try again shortly; if it persists, check your proxy / endpoint configuration.",
|
||||
"UpstreamHttpError": "The provider returned an HTTP error without further detail. Please try again, or check your request and model configuration.",
|
||||
"UpstreamMalformedResponse": "The provider returned a malformed response that could not be parsed. Please retry; if it persists, try a different model or provider.",
|
||||
"UserConfigError": "Provider configuration is invalid (incorrect base URL, missing environment variable, virtual-key restriction, etc.). Please review the provider settings."
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
export { ErrorClassifier, type ErrorClassifierType } from './classifier';
|
||||
export { isUserSideError, matchErrorPattern, type MatchInput, type MatchResult } from './match';
|
||||
export { ERROR_PATTERNS, type ErrorPattern } from './patterns';
|
||||
export { refineErrorCode, type RefineErrorInput } from './refine';
|
||||
export {
|
||||
type CloudErrorCode,
|
||||
ERROR_CODE_SPECS,
|
||||
|
||||
@@ -218,6 +218,11 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
|
||||
match: sub('reached your session usage limit, upgrade for higher limits'),
|
||||
note: 'Ollama cloud per-session cap',
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.InsufficientQuota,
|
||||
match: sub('Weekly usage limit reached'),
|
||||
note: 'opencodecodingplan rolling weekly plan cap (resets in N days — not retryable)',
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.InsufficientQuota,
|
||||
match: sub('This model is not available on your current plan'),
|
||||
@@ -501,6 +506,13 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
|
||||
{ code: AgentRuntimeErrorType.ProviderNetworkError, match: sub('request to https://') },
|
||||
{ code: AgentRuntimeErrorType.ProviderNetworkError, match: sub('self-signed certificate') },
|
||||
{ code: AgentRuntimeErrorType.ProviderNetworkError, match: sub('Network connection lost') },
|
||||
{
|
||||
code: AgentRuntimeErrorType.ProviderNetworkError,
|
||||
// OpenAI/Anthropic SDK APIConnectionError wrapper — the underlying
|
||||
// ECONNREFUSED / socket failure is buried in the nested cause, only the
|
||||
// generic "Connection error." surfaces on the top-level message.
|
||||
match: sub('Connection error.'),
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// StateStorePersistError — Redis / Upstash agent-state store (NOT the LLM
|
||||
@@ -906,7 +918,69 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// ProviderBizError — generic upstream wrappers that don't fit elsewhere
|
||||
// UpstreamGatewayError — proxy / gateway-layer failure (openresty, litellm,
|
||||
// HTML error bodies, Cloudflare 525). Distinct from the provider's own
|
||||
// service; usually transient. Split out of the ProviderBizError catch-all.
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
{
|
||||
code: AgentRuntimeErrorType.UpstreamGatewayError,
|
||||
match: sub('<center>openresty</center>'),
|
||||
note: 'user-configured proxy returning HTML',
|
||||
},
|
||||
{ code: AgentRuntimeErrorType.UpstreamGatewayError, match: sub('litellm.') },
|
||||
{ code: AgentRuntimeErrorType.UpstreamGatewayError, match: sub('403 <!DOCTYPE html>') },
|
||||
{ code: AgentRuntimeErrorType.UpstreamGatewayError, match: sub('404 <!DOCTYPE html>') },
|
||||
{
|
||||
code: AgentRuntimeErrorType.UpstreamGatewayError,
|
||||
match: sub('525 <!DOCTYPE html>'),
|
||||
note: 'Cloudflare 525 SSL handshake',
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// UpstreamMalformedResponse — provider returned a malformed / unparseable
|
||||
// payload (Go re-marshal failure, bad tool-call JSON, upstream Python
|
||||
// TypeError). Not retryable. Split out of ProviderBizError.
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
{
|
||||
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
|
||||
match: sub('failed to marshal request body to JSON'),
|
||||
note: 'upstream Go gateway re-marshal failure on non-UTF-8 / lone-surrogate bytes',
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
|
||||
match: sub('lone leading surrogate'),
|
||||
note: 'invalid conversation JSON: lone surrogate in tool-call output',
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
|
||||
match: sub("Internal server error: unhashable type: '"),
|
||||
note: 'nvidia / nvidia_custom upstream Python TypeError',
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
|
||||
match: sub('Failed to parse fc related info to json format'),
|
||||
note: 'internlm tool-call parser failure',
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
|
||||
match: sub('codewhisperer#ValidationException'),
|
||||
note: 'kiro / AWS CodeWhisperer proxy malformed payload',
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// UpstreamHttpError — bare upstream HTTP error with no further context.
|
||||
// Split out of ProviderBizError. (400 / 422 here are candidates for a future
|
||||
// `request`-category split; tracked separately.)
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
{ code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('400 status code') },
|
||||
{ code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('403 status code') },
|
||||
{ code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('404 status code') },
|
||||
{ code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('413 Request Entity Too Large') },
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// ProviderBizError — generic upstream wrappers that don't fit elsewhere. The
|
||||
// final provider catch-all; `refineErrorCode` + the HTTP-status fallback try
|
||||
// to reclassify these into a more specific code before this bucket is kept.
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('Upstream request failed') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('Provider returned error') },
|
||||
@@ -915,52 +989,14 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('convert_request_failed') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('failed to parse request') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('upstream error: do request failed') },
|
||||
// Upstream Go gateway re-marshal failure — non-UTF-8 / lone-surrogate bytes
|
||||
// in model tool-call output.
|
||||
{
|
||||
code: AgentRuntimeErrorType.ProviderBizError,
|
||||
match: sub('failed to marshal request body to JSON'),
|
||||
},
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('Internal Server Error (ref:') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('400 status code') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('403 status code') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('404 status code') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('413 Request Entity Too Large') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('403 <!DOCTYPE html>') },
|
||||
{
|
||||
code: AgentRuntimeErrorType.ProviderBizError,
|
||||
match: sub('525 <!DOCTYPE html>'),
|
||||
note: 'Cloudflare 525 SSL handshake',
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.ProviderBizError,
|
||||
match: sub('<center>openresty</center>'),
|
||||
note: 'user-configured proxy returning HTML',
|
||||
},
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('litellm.') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('410 status code (no body)') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('402 status code') },
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('404 <!DOCTYPE html>') },
|
||||
// Nvidia / nvidia_custom upstream Python crash — "unhashable type" TypeError.
|
||||
{
|
||||
code: AgentRuntimeErrorType.ProviderBizError,
|
||||
match: sub("Internal server error: unhashable type: '"),
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.ProviderBizError,
|
||||
match: sub('[upstream:/v1/messages] Upstream returned HTTP'),
|
||||
},
|
||||
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('上游请求参数无效') },
|
||||
{
|
||||
code: AgentRuntimeErrorType.ProviderBizError,
|
||||
match: sub('Failed to parse fc related info to json format'),
|
||||
note: 'internlm tool-call parser failure',
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.ProviderBizError,
|
||||
match: sub('codewhisperer#ValidationException'),
|
||||
note: 'kiro / AWS CodeWhisperer proxy malformed payload',
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// ContextEnginePipelineError — a context-engine pipeline processor crashed.
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
import { AgentRuntimeErrorType } from '@lobechat/types';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { refineErrorCode } from './refine';
|
||||
|
||||
describe('refineErrorCode', () => {
|
||||
it('does not touch a specific (non-refinable) errorType', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.InvalidProviderAPIKey,
|
||||
message: '429 status code (no body)',
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
describe('message-pattern pass', () => {
|
||||
it('reclassifies a rate-limit message into RateLimitExceeded', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: '429 status code (no body)',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.RateLimitExceeded);
|
||||
});
|
||||
|
||||
it('reclassifies a 503 service message into ProviderServiceUnavailable', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: '503 Service temporarily unavailable',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.ProviderServiceUnavailable);
|
||||
});
|
||||
|
||||
it('reclassifies the SDK "Connection error." wrapper into ProviderNetworkError', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: 'Connection error.',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.ProviderNetworkError);
|
||||
});
|
||||
|
||||
it('routes a rolling weekly cap to InsufficientQuota (not the 429 rate-limit fallback)', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: '429 Weekly usage limit reached. Resets in 2 days. To continue using this…',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.InsufficientQuota);
|
||||
});
|
||||
|
||||
it('routes gateway HTML / openresty to UpstreamGatewayError', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: '<center>openresty</center>',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.UpstreamGatewayError);
|
||||
});
|
||||
|
||||
it('routes a marshal failure to UpstreamMalformedResponse', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: 'failed to marshal request body to JSON',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.UpstreamMalformedResponse);
|
||||
});
|
||||
|
||||
it('routes a bare "400 status code" to UpstreamHttpError', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: '400 status code (no body)',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.UpstreamHttpError);
|
||||
});
|
||||
});
|
||||
|
||||
describe('HTTP-status fallback (no message match)', () => {
|
||||
it('uses the structured status when the message carries no pattern', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
httpStatus: 402,
|
||||
message: 'some opaque upstream text',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.InsufficientQuota);
|
||||
});
|
||||
|
||||
it('falls back to the leading status in the message when no structured status', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: '500 upstream blew up in a way we have never seen',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.ProviderServiceUnavailable);
|
||||
});
|
||||
|
||||
it('buckets other 4xx with no context into UpstreamHttpError', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
httpStatus: 409,
|
||||
message: 'conflict, no details',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.UpstreamHttpError);
|
||||
});
|
||||
});
|
||||
|
||||
it('keeps a genuine ProviderBizError residual unrefined', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: 'Upstream request failed',
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,76 @@
|
||||
import { AgentRuntimeErrorType, type ILobeAgentRuntimeErrorType } from '@lobechat/types';
|
||||
|
||||
import { matchErrorPattern } from './match';
|
||||
|
||||
/**
|
||||
* Error codes that are generic enough to be worth re-deriving from the upstream
|
||||
* message / HTTP status. Specific codes assigned by a provider adapter are left
|
||||
* untouched — we only refine the `ProviderBizError` catch-all, which absorbs
|
||||
* any non-OK upstream response that the adapter couldn't name.
|
||||
*/
|
||||
const REFINABLE_CODES = new Set<string>([AgentRuntimeErrorType.ProviderBizError]);
|
||||
|
||||
/**
|
||||
* Last-resort mapping from a bare HTTP status to a code, used only when the
|
||||
* message carried no recognizable pattern. Intentionally coarse: the rich
|
||||
* cases (quota keywords, moderation, model-not-found, …) are already handled by
|
||||
* `matchErrorPattern`, so this just buckets the context-less remainder by
|
||||
* status class.
|
||||
*/
|
||||
const codeFromHttpStatus = (status: number | undefined): ILobeAgentRuntimeErrorType | undefined => {
|
||||
if (!status) return undefined;
|
||||
// 429 / 402 have unambiguous semantics worth special-casing.
|
||||
if (status === 429) return AgentRuntimeErrorType.RateLimitExceeded;
|
||||
if (status === 402) return AgentRuntimeErrorType.InsufficientQuota;
|
||||
if (status >= 500 && status <= 599) return AgentRuntimeErrorType.ProviderServiceUnavailable;
|
||||
// Any other client error with no usable message → the bare-HTTP bucket.
|
||||
if (status >= 400 && status <= 499) return AgentRuntimeErrorType.UpstreamHttpError;
|
||||
return undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
* Runtime error messages are conventionally prefixed with the upstream HTTP
|
||||
* status (e.g. `"429 status code (no body)"`, `"503 Service temporarily
|
||||
* unavailable"`). Pull that leading status out as a fallback when the structured
|
||||
* status isn't available on the error object.
|
||||
*/
|
||||
const leadingStatusFromMessage = (message: string | undefined): number | undefined => {
|
||||
if (!message) return undefined;
|
||||
const match = /^\s*([45]\d{2})\b/.exec(message);
|
||||
return match ? Number(match[1]) : undefined;
|
||||
};
|
||||
|
||||
export interface RefineErrorInput {
|
||||
/** The errorType the adapter assigned (only `ProviderBizError` is refined). */
|
||||
errorType?: string;
|
||||
/** Structured HTTP status from the upstream response, if known. */
|
||||
httpStatus?: number;
|
||||
message?: string;
|
||||
provider?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reclassify a generic provider catch-all (`ProviderBizError`) into a more
|
||||
* specific code using the upstream message and HTTP status. Returns the refined
|
||||
* code, or `undefined` when no better classification is found (caller keeps the
|
||||
* original errorType).
|
||||
*
|
||||
* Priority:
|
||||
* 1. `matchErrorPattern` over the message — most specific, covers the rich
|
||||
* cases plus the migrated `Upstream*` patterns.
|
||||
* 2. HTTP-status fallback for messages that matched nothing.
|
||||
*/
|
||||
export const refineErrorCode = (
|
||||
input: RefineErrorInput,
|
||||
): ILobeAgentRuntimeErrorType | undefined => {
|
||||
const { errorType, httpStatus, message, provider } = input;
|
||||
if (!errorType || !REFINABLE_CODES.has(errorType)) return undefined;
|
||||
|
||||
const matched = matchErrorPattern({ errorType, message, provider });
|
||||
if (matched && matched.code !== errorType) return matched.code;
|
||||
|
||||
const byStatus = codeFromHttpStatus(httpStatus ?? leadingStatusFromMessage(message));
|
||||
if (byStatus && byStatus !== errorType) return byStatus;
|
||||
|
||||
return undefined;
|
||||
};
|
||||
@@ -29,6 +29,14 @@ export interface ErrorCodeSpec {
|
||||
/** HTTP status code returned to the client. */
|
||||
httpStatus: number;
|
||||
|
||||
/**
|
||||
* Marks a catch-all / under-classified bucket (ProviderBizError,
|
||||
* UpstreamHttpError, AgentRuntimeError, DatabasePersistError, …). Orthogonal
|
||||
* to `category`: monitoring tracks total fallback volume to decide where
|
||||
* finer codes are still worth carving out. Omitted (falsy) for terminal codes.
|
||||
*/
|
||||
isFallback?: boolean;
|
||||
|
||||
/**
|
||||
* Stable numeric identifier surfaced as `E<numericId>` (e.g. `E1001`).
|
||||
*
|
||||
@@ -384,6 +392,7 @@ export const ERROR_CODE_SPECS: SpecMap = {
|
||||
httpStatus: 500,
|
||||
retryable: false,
|
||||
countAsFailure: true,
|
||||
isFallback: true,
|
||||
description: 'Persistence-layer query / transaction failed (Drizzle "Failed query: …").',
|
||||
},
|
||||
[AgentRuntimeErrorType.StateStorePersistError]: {
|
||||
@@ -420,6 +429,7 @@ export const ERROR_CODE_SPECS: SpecMap = {
|
||||
httpStatus: 470,
|
||||
retryable: false,
|
||||
countAsFailure: true,
|
||||
isFallback: true,
|
||||
description: 'Generic Agent Runtime module error.',
|
||||
},
|
||||
[AgentRuntimeErrorType.ProviderBizError]: {
|
||||
@@ -431,6 +441,7 @@ export const ERROR_CODE_SPECS: SpecMap = {
|
||||
httpStatus: 471,
|
||||
retryable: false,
|
||||
countAsFailure: true,
|
||||
isFallback: true,
|
||||
description: 'Generic provider biz error (unclassified upstream failure).',
|
||||
},
|
||||
[AgentRuntimeErrorType.ProviderNoImageGenerated]: {
|
||||
@@ -521,6 +532,44 @@ export const ERROR_CODE_SPECS: SpecMap = {
|
||||
countAsFailure: false,
|
||||
description: 'Image-generation provider blocked the request due to content policy.',
|
||||
},
|
||||
[AgentRuntimeErrorType.UpstreamGatewayError]: {
|
||||
code: AgentRuntimeErrorType.UpstreamGatewayError,
|
||||
numericId: 8011,
|
||||
category: 'provider',
|
||||
severity: 'error',
|
||||
attribution: 'provider',
|
||||
httpStatus: 471,
|
||||
// Gateway hiccups (502/525/HTML bodies) are usually transient.
|
||||
retryable: true,
|
||||
countAsFailure: true,
|
||||
description:
|
||||
'Upstream proxy / gateway layer failed (openresty, litellm, HTML 5xx, Cloudflare 525).',
|
||||
},
|
||||
[AgentRuntimeErrorType.UpstreamMalformedResponse]: {
|
||||
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
|
||||
numericId: 8012,
|
||||
category: 'provider',
|
||||
severity: 'error',
|
||||
attribution: 'provider',
|
||||
httpStatus: 471,
|
||||
// Deterministic payload corruption — retrying the same request reproduces it.
|
||||
retryable: false,
|
||||
countAsFailure: true,
|
||||
description:
|
||||
'Provider returned a malformed / unparseable payload (marshal failure, bad tool-call JSON, upstream TypeError).',
|
||||
},
|
||||
[AgentRuntimeErrorType.UpstreamHttpError]: {
|
||||
code: AgentRuntimeErrorType.UpstreamHttpError,
|
||||
numericId: 8013,
|
||||
category: 'provider',
|
||||
severity: 'error',
|
||||
attribution: 'provider',
|
||||
httpStatus: 471,
|
||||
retryable: false,
|
||||
countAsFailure: true,
|
||||
isFallback: true,
|
||||
description: 'Bare upstream HTTP error with no further context (e.g. "400 status code").',
|
||||
},
|
||||
|
||||
// ─── 9xxx Config ──────────────────────────────────────────────────────
|
||||
[AgentRuntimeErrorType.InvalidOllamaArgs]: {
|
||||
|
||||
@@ -27,6 +27,8 @@ export {
|
||||
type MatchInput,
|
||||
type MatchResult,
|
||||
parseErrorRef,
|
||||
refineErrorCode,
|
||||
type RefineErrorInput,
|
||||
type SpecErrorCode,
|
||||
} from './errors';
|
||||
export * from './helpers';
|
||||
|
||||
@@ -85,6 +85,23 @@ export const AgentRuntimeErrorType = {
|
||||
CapabilityNotSupported: 'CapabilityNotSupported',
|
||||
/** Provider rejected the request as malformed (bad JSON, schema validation, etc.). */
|
||||
InvalidRequestFormat: 'InvalidRequestFormat',
|
||||
/**
|
||||
* Upstream proxy / gateway layer failed (openresty, litellm, HTML 5xx,
|
||||
* Cloudflare 525) — distinct from the provider's own service. Split out of
|
||||
* the `ProviderBizError` catch-all.
|
||||
*/
|
||||
UpstreamGatewayError: 'UpstreamGatewayError',
|
||||
/**
|
||||
* Provider returned a malformed / unparseable payload (Go re-marshal failure,
|
||||
* bad tool-call JSON, upstream Python TypeError). Not retryable. Split out of
|
||||
* `ProviderBizError`.
|
||||
*/
|
||||
UpstreamMalformedResponse: 'UpstreamMalformedResponse',
|
||||
/**
|
||||
* Bare upstream HTTP error with no further context (e.g. "400 status code").
|
||||
* The residual provider bucket once the richer codes have had their pass.
|
||||
*/
|
||||
UpstreamHttpError: 'UpstreamHttpError',
|
||||
/** User-side misconfiguration (wrong base URL, missing env var, virtual-key allowlist, etc.). */
|
||||
UserConfigError: 'UserConfigError',
|
||||
/** Gateway watchdog killed an idle agent operation — harness-side. */
|
||||
|
||||
@@ -30,6 +30,13 @@ export interface ChatMessageError {
|
||||
countAsFailure?: boolean;
|
||||
/** HTTP status the runtime returned (or would return) for this error. */
|
||||
httpStatus?: number;
|
||||
/**
|
||||
* Whether this code is a catch-all / under-classified bucket (e.g.
|
||||
* ProviderBizError, UpstreamHttpError, AgentRuntimeError, DatabasePersistError).
|
||||
* Monitoring tracks fallback-bucket volume to decide where finer codes are
|
||||
* still needed.
|
||||
*/
|
||||
isFallback?: boolean;
|
||||
message?: string;
|
||||
/** Stable `E<numericId>` reference for docs / support tickets. */
|
||||
numericId?: number;
|
||||
@@ -45,6 +52,7 @@ export const ChatMessageErrorSchema = z.object({
|
||||
category: z.string().optional(),
|
||||
countAsFailure: z.boolean().optional(),
|
||||
httpStatus: z.number().optional(),
|
||||
isFallback: z.boolean().optional(),
|
||||
message: z.string().optional(),
|
||||
numericId: z.number().optional(),
|
||||
retryable: z.boolean().optional(),
|
||||
|
||||
@@ -69,6 +69,12 @@ export default {
|
||||
'A temporary issue with the conversation state store interrupted this operation. Please try again; if it persists, contact support.',
|
||||
StreamChunkError:
|
||||
'Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.',
|
||||
UpstreamGatewayError:
|
||||
'The upstream gateway or proxy returned an error. Please try again shortly; if it persists, check your proxy / endpoint configuration.',
|
||||
UpstreamHttpError:
|
||||
'The provider returned an HTTP error without further detail. Please try again, or check your request and model configuration.',
|
||||
UpstreamMalformedResponse:
|
||||
'The provider returned a malformed response that could not be parsed. Please retry; if it persists, try a different model or provider.',
|
||||
UserConfigError:
|
||||
'Provider configuration is invalid (incorrect base URL, missing environment variable, virtual-key restriction, etc.). Please review the provider settings.',
|
||||
};
|
||||
|
||||
@@ -50,6 +50,7 @@ vi.mock('@lobechat/model-runtime', () => ({
|
||||
// retry classifier path.
|
||||
ERROR_CODE_SPECS: {},
|
||||
getErrorCodeSpec: () => undefined,
|
||||
refineErrorCode: () => undefined,
|
||||
}));
|
||||
|
||||
vi.mock('@/business/client/model-bank/loadModels', () => ({
|
||||
|
||||
@@ -46,6 +46,7 @@ describe('formatErrorForState', () => {
|
||||
category: 'quota',
|
||||
countAsFailure: false,
|
||||
httpStatus: 429,
|
||||
isFallback: false,
|
||||
numericId: 2001,
|
||||
retryable: false,
|
||||
severity: 'warning',
|
||||
@@ -116,4 +117,56 @@ describe('formatErrorForState', () => {
|
||||
expect(result.numericId).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('ProviderBizError refinement', () => {
|
||||
it('reclassifies a 429 ProviderBizError into RateLimitExceeded (retryable, not a failure)', () => {
|
||||
const result = formatErrorForState({
|
||||
error: { status: 429 },
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: '429 status code (no body)',
|
||||
});
|
||||
|
||||
expect(result.type).toBe(AgentRuntimeErrorType.RateLimitExceeded);
|
||||
expect(result.numericId).toBe(3001);
|
||||
expect(result.retryable).toBe(true);
|
||||
expect(result.countAsFailure).toBe(false);
|
||||
// Original message is preserved for debugging.
|
||||
expect(result.message).toBe('429 status code (no body)');
|
||||
});
|
||||
|
||||
it('reclassifies gateway HTML into UpstreamGatewayError (E8011)', () => {
|
||||
const result = formatErrorForState({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: '<center>openresty</center>',
|
||||
});
|
||||
|
||||
expect(result.type).toBe(AgentRuntimeErrorType.UpstreamGatewayError);
|
||||
expect(result.numericId).toBe(8011);
|
||||
expect(result.retryable).toBe(true);
|
||||
});
|
||||
|
||||
it('uses the HTTP-status fallback for an opaque 402 body', () => {
|
||||
const result = formatErrorForState({
|
||||
error: { status: 402 },
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: 'opaque upstream message',
|
||||
});
|
||||
|
||||
expect(result.type).toBe(AgentRuntimeErrorType.InsufficientQuota);
|
||||
expect(result.category).toBe('quota');
|
||||
});
|
||||
|
||||
it('keeps a genuine residual as ProviderBizError (E8002)', () => {
|
||||
const result = formatErrorForState({
|
||||
errorType: AgentRuntimeErrorType.ProviderBizError,
|
||||
message: 'Upstream request failed',
|
||||
});
|
||||
|
||||
expect(result.type).toBe(AgentRuntimeErrorType.ProviderBizError);
|
||||
expect(result.numericId).toBe(8002);
|
||||
// ProviderBizError is a catch-all — flagged so monitoring can track
|
||||
// how much volume still lands in fallback buckets.
|
||||
expect(result.isFallback).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,24 @@
|
||||
import { getErrorCodeSpec } from '@lobechat/model-runtime';
|
||||
import { getErrorCodeSpec, refineErrorCode } from '@lobechat/model-runtime';
|
||||
import { AgentRuntimeErrorType, ChatErrorType, type ChatMessageError } from '@lobechat/types';
|
||||
|
||||
/** Pull a usable HTTP status out of the nested upstream error object. */
|
||||
const extractHttpStatus = (body: unknown): number | undefined => {
|
||||
if (!body || typeof body !== 'object') return undefined;
|
||||
const b = body as { error?: { status?: unknown }; status?: unknown; statusCode?: unknown };
|
||||
if (typeof b.status === 'number') return b.status;
|
||||
if (typeof b.statusCode === 'number') return b.statusCode;
|
||||
if (b.error && typeof b.error === 'object' && typeof b.error.status === 'number') {
|
||||
return b.error.status;
|
||||
}
|
||||
return undefined;
|
||||
};
|
||||
|
||||
const extractProvider = (body: unknown): string | undefined => {
|
||||
if (!body || typeof body !== 'object') return undefined;
|
||||
const p = (body as { provider?: unknown }).provider;
|
||||
return typeof p === 'string' ? p : undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
* Merge classification metadata from `ERROR_CODE_SPECS` onto a normalized
|
||||
* `ChatMessageError`. Codes that aren't in the spec table (fallbacks like
|
||||
@@ -12,10 +30,21 @@ import { AgentRuntimeErrorType, ChatErrorType, type ChatMessageError } from '@lo
|
||||
* same shape without re-running pattern matching themselves.
|
||||
*/
|
||||
const enrichWithSpec = (formatted: ChatMessageError): ChatMessageError => {
|
||||
// Generic `ProviderBizError` is re-derived from the message / HTTP status into
|
||||
// a more specific code before enrichment, so the catch-all doesn't swallow
|
||||
// rate-limits, network drops, quota, etc. Specific codes pass through.
|
||||
const refined = refineErrorCode({
|
||||
errorType: String(formatted.type),
|
||||
httpStatus: extractHttpStatus(formatted.body),
|
||||
message: formatted.message,
|
||||
provider: extractProvider(formatted.body),
|
||||
});
|
||||
const type = (refined ?? formatted.type) as ChatMessageError['type'];
|
||||
|
||||
// `getErrorCodeSpec` is keyed by `ILobeAgentRuntimeErrorType` strings; coerce
|
||||
// because `ChatMessageError['type']` widens to include numeric `ChatErrorType`
|
||||
// values, which simply miss the lookup and pass through unenriched.
|
||||
const spec = getErrorCodeSpec(String(formatted.type));
|
||||
const spec = getErrorCodeSpec(String(type));
|
||||
if (!spec) return formatted;
|
||||
|
||||
return {
|
||||
@@ -24,9 +53,11 @@ const enrichWithSpec = (formatted: ChatMessageError): ChatMessageError => {
|
||||
category: spec.category,
|
||||
countAsFailure: spec.countAsFailure,
|
||||
httpStatus: spec.httpStatus,
|
||||
isFallback: spec.isFallback ?? false,
|
||||
numericId: spec.numericId,
|
||||
retryable: spec.retryable,
|
||||
severity: spec.severity,
|
||||
type,
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ vi.mock('@lobechat/model-runtime', () => ({
|
||||
// retry classifier path.
|
||||
ERROR_CODE_SPECS: {},
|
||||
getErrorCodeSpec: () => undefined,
|
||||
refineErrorCode: () => undefined,
|
||||
}));
|
||||
|
||||
// Mock trusted client to avoid server-side env access
|
||||
|
||||
Reference in New Issue
Block a user