feat(model-runtime): split ProviderBizError into finer codes + reclassify catch-all at write time (#15286)

*  feat(model-runtime): split ProviderBizError into finer codes + reclassify catch-all at write time

Add UpstreamGatewayError (E8010), UpstreamMalformedResponse (E8011), and
UpstreamHttpError (E8012), migrating the matching patterns out of the
ProviderBizError catch-all. Add a refineErrorCode() step (message-pattern match
+ HTTP-status fallback) wired into formatErrorForState so generic ProviderBizError
is reclassified into the correct existing code (rate-limit / quota / network /
service-unavailable / model-not-found) instead of collapsing into one opaque
8xxx bucket. Production sampling showed ~72% of ProviderBizError actually belongs
to existing codes and only ~5% is a true residual.

*  feat(model-runtime): add isFallback flag to mark catch-all error buckets

Add an `isFallback` boolean to ErrorCodeSpec / ChatMessageError, set on the
catch-all codes (ProviderBizError, UpstreamHttpError, AgentRuntimeError,
DatabasePersistError). It flows onto agent_operations.error via the write-path
enrichment so monitoring can track how much volume still lands in fallback
buckets — the signal for where finer codes are still worth carving out.

*  test(model-runtime): add refineErrorCode to @lobechat/model-runtime mocks

formatErrorForState now imports refineErrorCode, so the partial module mocks in
AgentRuntimeService / RuntimeExecutors must expose it or vitest throws on access.

*  test(model-runtime): bump UpstreamGatewayError numericId to 8011 after canary 8010 collision

canary claimed 8010 for ProviderContentPolicyViolation, so the Upstream* codes
shifted to 8011/8012/8013 during rebase; update the refinement test assertion.
This commit is contained in:
Arvin Xu
2026-05-28 17:02:39 +08:00
committed by GitHub
parent 1024ee961b
commit 1b74566b4c
14 changed files with 445 additions and 41 deletions
+3
View File
@@ -32,5 +32,8 @@
"QuotaLimitReached": "Sorry, the token usage or request count has reached the quota limit for this key. Please increase the key's quota or try again later.",
"RateLimitExceeded": "Sorry, the token usage or request count has reached the rate limit for this key. Please try again later or increase the key's quota.",
"StreamChunkError": "Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.",
"UpstreamGatewayError": "The upstream gateway or proxy returned an error. Please try again shortly; if it persists, check your proxy / endpoint configuration.",
"UpstreamHttpError": "The provider returned an HTTP error without further detail. Please try again, or check your request and model configuration.",
"UpstreamMalformedResponse": "The provider returned a malformed response that could not be parsed. Please retry; if it persists, try a different model or provider.",
"UserConfigError": "Provider configuration is invalid (incorrect base URL, missing environment variable, virtual-key restriction, etc.). Please review the provider settings."
}
@@ -1,6 +1,7 @@
export { ErrorClassifier, type ErrorClassifierType } from './classifier';
export { isUserSideError, matchErrorPattern, type MatchInput, type MatchResult } from './match';
export { ERROR_PATTERNS, type ErrorPattern } from './patterns';
export { refineErrorCode, type RefineErrorInput } from './refine';
export {
type CloudErrorCode,
ERROR_CODE_SPECS,
+75 -39
View File
@@ -218,6 +218,11 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
match: sub('reached your session usage limit, upgrade for higher limits'),
note: 'Ollama cloud per-session cap',
},
{
code: AgentRuntimeErrorType.InsufficientQuota,
match: sub('Weekly usage limit reached'),
note: 'opencodecodingplan rolling weekly plan cap (resets in N days — not retryable)',
},
{
code: AgentRuntimeErrorType.InsufficientQuota,
match: sub('This model is not available on your current plan'),
@@ -501,6 +506,13 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
{ code: AgentRuntimeErrorType.ProviderNetworkError, match: sub('request to https://') },
{ code: AgentRuntimeErrorType.ProviderNetworkError, match: sub('self-signed certificate') },
{ code: AgentRuntimeErrorType.ProviderNetworkError, match: sub('Network connection lost') },
{
code: AgentRuntimeErrorType.ProviderNetworkError,
// OpenAI/Anthropic SDK APIConnectionError wrapper — the underlying
// ECONNREFUSED / socket failure is buried in the nested cause, only the
// generic "Connection error." surfaces on the top-level message.
match: sub('Connection error.'),
},
// ─────────────────────────────────────────────────────────────────────────
// StateStorePersistError — Redis / Upstash agent-state store (NOT the LLM
@@ -906,7 +918,69 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
},
// ─────────────────────────────────────────────────────────────────────────
// ProviderBizError — generic upstream wrappers that don't fit elsewhere
// UpstreamGatewayError — proxy / gateway-layer failure (openresty, litellm,
// HTML error bodies, Cloudflare 525). Distinct from the provider's own
// service; usually transient. Split out of the ProviderBizError catch-all.
// ─────────────────────────────────────────────────────────────────────────
{
code: AgentRuntimeErrorType.UpstreamGatewayError,
match: sub('<center>openresty</center>'),
note: 'user-configured proxy returning HTML',
},
{ code: AgentRuntimeErrorType.UpstreamGatewayError, match: sub('litellm.') },
{ code: AgentRuntimeErrorType.UpstreamGatewayError, match: sub('403 <!DOCTYPE html>') },
{ code: AgentRuntimeErrorType.UpstreamGatewayError, match: sub('404 <!DOCTYPE html>') },
{
code: AgentRuntimeErrorType.UpstreamGatewayError,
match: sub('525 <!DOCTYPE html>'),
note: 'Cloudflare 525 SSL handshake',
},
// ─────────────────────────────────────────────────────────────────────────
// UpstreamMalformedResponse — provider returned a malformed / unparseable
// payload (Go re-marshal failure, bad tool-call JSON, upstream Python
// TypeError). Not retryable. Split out of ProviderBizError.
// ─────────────────────────────────────────────────────────────────────────
{
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
match: sub('failed to marshal request body to JSON'),
note: 'upstream Go gateway re-marshal failure on non-UTF-8 / lone-surrogate bytes',
},
{
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
match: sub('lone leading surrogate'),
note: 'invalid conversation JSON: lone surrogate in tool-call output',
},
{
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
match: sub("Internal server error: unhashable type: '"),
note: 'nvidia / nvidia_custom upstream Python TypeError',
},
{
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
match: sub('Failed to parse fc related info to json format'),
note: 'internlm tool-call parser failure',
},
{
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
match: sub('codewhisperer#ValidationException'),
note: 'kiro / AWS CodeWhisperer proxy malformed payload',
},
// ─────────────────────────────────────────────────────────────────────────
// UpstreamHttpError — bare upstream HTTP error with no further context.
// Split out of ProviderBizError. (400 / 422 here are candidates for a future
// `request`-category split; tracked separately.)
// ─────────────────────────────────────────────────────────────────────────
{ code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('400 status code') },
{ code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('403 status code') },
{ code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('404 status code') },
{ code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('413 Request Entity Too Large') },
// ─────────────────────────────────────────────────────────────────────────
// ProviderBizError — generic upstream wrappers that don't fit elsewhere. The
// final provider catch-all; `refineErrorCode` + the HTTP-status fallback try
// to reclassify these into a more specific code before this bucket is kept.
// ─────────────────────────────────────────────────────────────────────────
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('Upstream request failed') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('Provider returned error') },
@@ -915,52 +989,14 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('convert_request_failed') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('failed to parse request') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('upstream error: do request failed') },
// Upstream Go gateway re-marshal failure — non-UTF-8 / lone-surrogate bytes
// in model tool-call output.
{
code: AgentRuntimeErrorType.ProviderBizError,
match: sub('failed to marshal request body to JSON'),
},
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('Internal Server Error (ref:') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('400 status code') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('403 status code') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('404 status code') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('413 Request Entity Too Large') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('403 <!DOCTYPE html>') },
{
code: AgentRuntimeErrorType.ProviderBizError,
match: sub('525 <!DOCTYPE html>'),
note: 'Cloudflare 525 SSL handshake',
},
{
code: AgentRuntimeErrorType.ProviderBizError,
match: sub('<center>openresty</center>'),
note: 'user-configured proxy returning HTML',
},
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('litellm.') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('410 status code (no body)') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('402 status code') },
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('404 <!DOCTYPE html>') },
// Nvidia / nvidia_custom upstream Python crash — "unhashable type" TypeError.
{
code: AgentRuntimeErrorType.ProviderBizError,
match: sub("Internal server error: unhashable type: '"),
},
{
code: AgentRuntimeErrorType.ProviderBizError,
match: sub('[upstream:/v1/messages] Upstream returned HTTP'),
},
{ code: AgentRuntimeErrorType.ProviderBizError, match: sub('上游请求参数无效') },
{
code: AgentRuntimeErrorType.ProviderBizError,
match: sub('Failed to parse fc related info to json format'),
note: 'internlm tool-call parser failure',
},
{
code: AgentRuntimeErrorType.ProviderBizError,
match: sub('codewhisperer#ValidationException'),
note: 'kiro / AWS CodeWhisperer proxy malformed payload',
},
// ─────────────────────────────────────────────────────────────────────────
// ContextEnginePipelineError — a context-engine pipeline processor crashed.
@@ -0,0 +1,120 @@
import { AgentRuntimeErrorType } from '@lobechat/types';
import { describe, expect, it } from 'vitest';
import { refineErrorCode } from './refine';
describe('refineErrorCode', () => {
it('does not touch a specific (non-refinable) errorType', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.InvalidProviderAPIKey,
message: '429 status code (no body)',
}),
).toBeUndefined();
});
describe('message-pattern pass', () => {
it('reclassifies a rate-limit message into RateLimitExceeded', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: '429 status code (no body)',
}),
).toBe(AgentRuntimeErrorType.RateLimitExceeded);
});
it('reclassifies a 503 service message into ProviderServiceUnavailable', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: '503 Service temporarily unavailable',
}),
).toBe(AgentRuntimeErrorType.ProviderServiceUnavailable);
});
it('reclassifies the SDK "Connection error." wrapper into ProviderNetworkError', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: 'Connection error.',
}),
).toBe(AgentRuntimeErrorType.ProviderNetworkError);
});
it('routes a rolling weekly cap to InsufficientQuota (not the 429 rate-limit fallback)', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: '429 Weekly usage limit reached. Resets in 2 days. To continue using this…',
}),
).toBe(AgentRuntimeErrorType.InsufficientQuota);
});
it('routes gateway HTML / openresty to UpstreamGatewayError', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: '<center>openresty</center>',
}),
).toBe(AgentRuntimeErrorType.UpstreamGatewayError);
});
it('routes a marshal failure to UpstreamMalformedResponse', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: 'failed to marshal request body to JSON',
}),
).toBe(AgentRuntimeErrorType.UpstreamMalformedResponse);
});
it('routes a bare "400 status code" to UpstreamHttpError', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: '400 status code (no body)',
}),
).toBe(AgentRuntimeErrorType.UpstreamHttpError);
});
});
describe('HTTP-status fallback (no message match)', () => {
it('uses the structured status when the message carries no pattern', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
httpStatus: 402,
message: 'some opaque upstream text',
}),
).toBe(AgentRuntimeErrorType.InsufficientQuota);
});
it('falls back to the leading status in the message when no structured status', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: '500 upstream blew up in a way we have never seen',
}),
).toBe(AgentRuntimeErrorType.ProviderServiceUnavailable);
});
it('buckets other 4xx with no context into UpstreamHttpError', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
httpStatus: 409,
message: 'conflict, no details',
}),
).toBe(AgentRuntimeErrorType.UpstreamHttpError);
});
});
it('keeps a genuine ProviderBizError residual unrefined', () => {
expect(
refineErrorCode({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: 'Upstream request failed',
}),
).toBeUndefined();
});
});
@@ -0,0 +1,76 @@
import { AgentRuntimeErrorType, type ILobeAgentRuntimeErrorType } from '@lobechat/types';
import { matchErrorPattern } from './match';
/**
* Error codes that are generic enough to be worth re-deriving from the upstream
* message / HTTP status. Specific codes assigned by a provider adapter are left
* untouched — we only refine the `ProviderBizError` catch-all, which absorbs
* any non-OK upstream response that the adapter couldn't name.
*/
const REFINABLE_CODES = new Set<string>([AgentRuntimeErrorType.ProviderBizError]);
/**
* Last-resort mapping from a bare HTTP status to a code, used only when the
* message carried no recognizable pattern. Intentionally coarse: the rich
* cases (quota keywords, moderation, model-not-found, …) are already handled by
* `matchErrorPattern`, so this just buckets the context-less remainder by
* status class.
*/
const codeFromHttpStatus = (status: number | undefined): ILobeAgentRuntimeErrorType | undefined => {
if (!status) return undefined;
// 429 / 402 have unambiguous semantics worth special-casing.
if (status === 429) return AgentRuntimeErrorType.RateLimitExceeded;
if (status === 402) return AgentRuntimeErrorType.InsufficientQuota;
if (status >= 500 && status <= 599) return AgentRuntimeErrorType.ProviderServiceUnavailable;
// Any other client error with no usable message → the bare-HTTP bucket.
if (status >= 400 && status <= 499) return AgentRuntimeErrorType.UpstreamHttpError;
return undefined;
};
/**
* Runtime error messages are conventionally prefixed with the upstream HTTP
* status (e.g. `"429 status code (no body)"`, `"503 Service temporarily
* unavailable"`). Pull that leading status out as a fallback when the structured
* status isn't available on the error object.
*/
const leadingStatusFromMessage = (message: string | undefined): number | undefined => {
if (!message) return undefined;
const match = /^\s*([45]\d{2})\b/.exec(message);
return match ? Number(match[1]) : undefined;
};
export interface RefineErrorInput {
/** The errorType the adapter assigned (only `ProviderBizError` is refined). */
errorType?: string;
/** Structured HTTP status from the upstream response, if known. */
httpStatus?: number;
message?: string;
provider?: string;
}
/**
* Reclassify a generic provider catch-all (`ProviderBizError`) into a more
* specific code using the upstream message and HTTP status. Returns the refined
* code, or `undefined` when no better classification is found (caller keeps the
* original errorType).
*
* Priority:
* 1. `matchErrorPattern` over the message — most specific, covers the rich
* cases plus the migrated `Upstream*` patterns.
* 2. HTTP-status fallback for messages that matched nothing.
*/
export const refineErrorCode = (
input: RefineErrorInput,
): ILobeAgentRuntimeErrorType | undefined => {
const { errorType, httpStatus, message, provider } = input;
if (!errorType || !REFINABLE_CODES.has(errorType)) return undefined;
const matched = matchErrorPattern({ errorType, message, provider });
if (matched && matched.code !== errorType) return matched.code;
const byStatus = codeFromHttpStatus(httpStatus ?? leadingStatusFromMessage(message));
if (byStatus && byStatus !== errorType) return byStatus;
return undefined;
};
@@ -29,6 +29,14 @@ export interface ErrorCodeSpec {
/** HTTP status code returned to the client. */
httpStatus: number;
/**
* Marks a catch-all / under-classified bucket (ProviderBizError,
* UpstreamHttpError, AgentRuntimeError, DatabasePersistError, …). Orthogonal
* to `category`: monitoring tracks total fallback volume to decide where
* finer codes are still worth carving out. Omitted (falsy) for terminal codes.
*/
isFallback?: boolean;
/**
* Stable numeric identifier surfaced as `E<numericId>` (e.g. `E1001`).
*
@@ -384,6 +392,7 @@ export const ERROR_CODE_SPECS: SpecMap = {
httpStatus: 500,
retryable: false,
countAsFailure: true,
isFallback: true,
description: 'Persistence-layer query / transaction failed (Drizzle "Failed query: …").',
},
[AgentRuntimeErrorType.StateStorePersistError]: {
@@ -420,6 +429,7 @@ export const ERROR_CODE_SPECS: SpecMap = {
httpStatus: 470,
retryable: false,
countAsFailure: true,
isFallback: true,
description: 'Generic Agent Runtime module error.',
},
[AgentRuntimeErrorType.ProviderBizError]: {
@@ -431,6 +441,7 @@ export const ERROR_CODE_SPECS: SpecMap = {
httpStatus: 471,
retryable: false,
countAsFailure: true,
isFallback: true,
description: 'Generic provider biz error (unclassified upstream failure).',
},
[AgentRuntimeErrorType.ProviderNoImageGenerated]: {
@@ -521,6 +532,44 @@ export const ERROR_CODE_SPECS: SpecMap = {
countAsFailure: false,
description: 'Image-generation provider blocked the request due to content policy.',
},
[AgentRuntimeErrorType.UpstreamGatewayError]: {
code: AgentRuntimeErrorType.UpstreamGatewayError,
numericId: 8011,
category: 'provider',
severity: 'error',
attribution: 'provider',
httpStatus: 471,
// Gateway hiccups (502/525/HTML bodies) are usually transient.
retryable: true,
countAsFailure: true,
description:
'Upstream proxy / gateway layer failed (openresty, litellm, HTML 5xx, Cloudflare 525).',
},
[AgentRuntimeErrorType.UpstreamMalformedResponse]: {
code: AgentRuntimeErrorType.UpstreamMalformedResponse,
numericId: 8012,
category: 'provider',
severity: 'error',
attribution: 'provider',
httpStatus: 471,
// Deterministic payload corruption — retrying the same request reproduces it.
retryable: false,
countAsFailure: true,
description:
'Provider returned a malformed / unparseable payload (marshal failure, bad tool-call JSON, upstream TypeError).',
},
[AgentRuntimeErrorType.UpstreamHttpError]: {
code: AgentRuntimeErrorType.UpstreamHttpError,
numericId: 8013,
category: 'provider',
severity: 'error',
attribution: 'provider',
httpStatus: 471,
retryable: false,
countAsFailure: true,
isFallback: true,
description: 'Bare upstream HTTP error with no further context (e.g. "400 status code").',
},
// ─── 9xxx Config ──────────────────────────────────────────────────────
[AgentRuntimeErrorType.InvalidOllamaArgs]: {
+2
View File
@@ -27,6 +27,8 @@ export {
type MatchInput,
type MatchResult,
parseErrorRef,
refineErrorCode,
type RefineErrorInput,
type SpecErrorCode,
} from './errors';
export * from './helpers';
+17
View File
@@ -85,6 +85,23 @@ export const AgentRuntimeErrorType = {
CapabilityNotSupported: 'CapabilityNotSupported',
/** Provider rejected the request as malformed (bad JSON, schema validation, etc.). */
InvalidRequestFormat: 'InvalidRequestFormat',
/**
* Upstream proxy / gateway layer failed (openresty, litellm, HTML 5xx,
* Cloudflare 525) — distinct from the provider's own service. Split out of
* the `ProviderBizError` catch-all.
*/
UpstreamGatewayError: 'UpstreamGatewayError',
/**
* Provider returned a malformed / unparseable payload (Go re-marshal failure,
* bad tool-call JSON, upstream Python TypeError). Not retryable. Split out of
* `ProviderBizError`.
*/
UpstreamMalformedResponse: 'UpstreamMalformedResponse',
/**
* Bare upstream HTTP error with no further context (e.g. "400 status code").
* The residual provider bucket once the richer codes have had their pass.
*/
UpstreamHttpError: 'UpstreamHttpError',
/** User-side misconfiguration (wrong base URL, missing env var, virtual-key allowlist, etc.). */
UserConfigError: 'UserConfigError',
/** Gateway watchdog killed an idle agent operation — harness-side. */
@@ -30,6 +30,13 @@ export interface ChatMessageError {
countAsFailure?: boolean;
/** HTTP status the runtime returned (or would return) for this error. */
httpStatus?: number;
/**
* Whether this code is a catch-all / under-classified bucket (e.g.
* ProviderBizError, UpstreamHttpError, AgentRuntimeError, DatabasePersistError).
* Monitoring tracks fallback-bucket volume to decide where finer codes are
* still needed.
*/
isFallback?: boolean;
message?: string;
/** Stable `E<numericId>` reference for docs / support tickets. */
numericId?: number;
@@ -45,6 +52,7 @@ export const ChatMessageErrorSchema = z.object({
category: z.string().optional(),
countAsFailure: z.boolean().optional(),
httpStatus: z.number().optional(),
isFallback: z.boolean().optional(),
message: z.string().optional(),
numericId: z.number().optional(),
retryable: z.boolean().optional(),
+6
View File
@@ -69,6 +69,12 @@ export default {
'A temporary issue with the conversation state store interrupted this operation. Please try again; if it persists, contact support.',
StreamChunkError:
'Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.',
UpstreamGatewayError:
'The upstream gateway or proxy returned an error. Please try again shortly; if it persists, check your proxy / endpoint configuration.',
UpstreamHttpError:
'The provider returned an HTTP error without further detail. Please try again, or check your request and model configuration.',
UpstreamMalformedResponse:
'The provider returned a malformed response that could not be parsed. Please retry; if it persists, try a different model or provider.',
UserConfigError:
'Provider configuration is invalid (incorrect base URL, missing environment variable, virtual-key restriction, etc.). Please review the provider settings.',
};
@@ -50,6 +50,7 @@ vi.mock('@lobechat/model-runtime', () => ({
// retry classifier path.
ERROR_CODE_SPECS: {},
getErrorCodeSpec: () => undefined,
refineErrorCode: () => undefined,
}));
vi.mock('@/business/client/model-bank/loadModels', () => ({
@@ -46,6 +46,7 @@ describe('formatErrorForState', () => {
category: 'quota',
countAsFailure: false,
httpStatus: 429,
isFallback: false,
numericId: 2001,
retryable: false,
severity: 'warning',
@@ -116,4 +117,56 @@ describe('formatErrorForState', () => {
expect(result.numericId).toBeUndefined();
});
});
describe('ProviderBizError refinement', () => {
it('reclassifies a 429 ProviderBizError into RateLimitExceeded (retryable, not a failure)', () => {
const result = formatErrorForState({
error: { status: 429 },
errorType: AgentRuntimeErrorType.ProviderBizError,
message: '429 status code (no body)',
});
expect(result.type).toBe(AgentRuntimeErrorType.RateLimitExceeded);
expect(result.numericId).toBe(3001);
expect(result.retryable).toBe(true);
expect(result.countAsFailure).toBe(false);
// Original message is preserved for debugging.
expect(result.message).toBe('429 status code (no body)');
});
it('reclassifies gateway HTML into UpstreamGatewayError (E8011)', () => {
const result = formatErrorForState({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: '<center>openresty</center>',
});
expect(result.type).toBe(AgentRuntimeErrorType.UpstreamGatewayError);
expect(result.numericId).toBe(8011);
expect(result.retryable).toBe(true);
});
it('uses the HTTP-status fallback for an opaque 402 body', () => {
const result = formatErrorForState({
error: { status: 402 },
errorType: AgentRuntimeErrorType.ProviderBizError,
message: 'opaque upstream message',
});
expect(result.type).toBe(AgentRuntimeErrorType.InsufficientQuota);
expect(result.category).toBe('quota');
});
it('keeps a genuine residual as ProviderBizError (E8002)', () => {
const result = formatErrorForState({
errorType: AgentRuntimeErrorType.ProviderBizError,
message: 'Upstream request failed',
});
expect(result.type).toBe(AgentRuntimeErrorType.ProviderBizError);
expect(result.numericId).toBe(8002);
// ProviderBizError is a catch-all — flagged so monitoring can track
// how much volume still lands in fallback buckets.
expect(result.isFallback).toBe(true);
});
});
});
@@ -1,6 +1,24 @@
import { getErrorCodeSpec } from '@lobechat/model-runtime';
import { getErrorCodeSpec, refineErrorCode } from '@lobechat/model-runtime';
import { AgentRuntimeErrorType, ChatErrorType, type ChatMessageError } from '@lobechat/types';
/** Pull a usable HTTP status out of the nested upstream error object. */
const extractHttpStatus = (body: unknown): number | undefined => {
if (!body || typeof body !== 'object') return undefined;
const b = body as { error?: { status?: unknown }; status?: unknown; statusCode?: unknown };
if (typeof b.status === 'number') return b.status;
if (typeof b.statusCode === 'number') return b.statusCode;
if (b.error && typeof b.error === 'object' && typeof b.error.status === 'number') {
return b.error.status;
}
return undefined;
};
const extractProvider = (body: unknown): string | undefined => {
if (!body || typeof body !== 'object') return undefined;
const p = (body as { provider?: unknown }).provider;
return typeof p === 'string' ? p : undefined;
};
/**
* Merge classification metadata from `ERROR_CODE_SPECS` onto a normalized
* `ChatMessageError`. Codes that aren't in the spec table (fallbacks like
@@ -12,10 +30,21 @@ import { AgentRuntimeErrorType, ChatErrorType, type ChatMessageError } from '@lo
* same shape without re-running pattern matching themselves.
*/
const enrichWithSpec = (formatted: ChatMessageError): ChatMessageError => {
// Generic `ProviderBizError` is re-derived from the message / HTTP status into
// a more specific code before enrichment, so the catch-all doesn't swallow
// rate-limits, network drops, quota, etc. Specific codes pass through.
const refined = refineErrorCode({
errorType: String(formatted.type),
httpStatus: extractHttpStatus(formatted.body),
message: formatted.message,
provider: extractProvider(formatted.body),
});
const type = (refined ?? formatted.type) as ChatMessageError['type'];
// `getErrorCodeSpec` is keyed by `ILobeAgentRuntimeErrorType` strings; coerce
// because `ChatMessageError['type']` widens to include numeric `ChatErrorType`
// values, which simply miss the lookup and pass through unenriched.
const spec = getErrorCodeSpec(String(formatted.type));
const spec = getErrorCodeSpec(String(type));
if (!spec) return formatted;
return {
@@ -24,9 +53,11 @@ const enrichWithSpec = (formatted: ChatMessageError): ChatMessageError => {
category: spec.category,
countAsFailure: spec.countAsFailure,
httpStatus: spec.httpStatus,
isFallback: spec.isFallback ?? false,
numericId: spec.numericId,
retryable: spec.retryable,
severity: spec.severity,
type,
};
};
@@ -20,6 +20,7 @@ vi.mock('@lobechat/model-runtime', () => ({
// retry classifier path.
ERROR_CODE_SPECS: {},
getErrorCodeSpec: () => undefined,
refineErrorCode: () => undefined,
}));
// Mock trusted client to avoid server-side env access