✨ feat(model-runtime): split ProviderBizError into finer codes + reclassify catch-all at write time (#15286)

* ✨ feat(model-runtime): split ProviderBizError into finer codes + reclassify catch-all at write time Add UpstreamGatewayError (E8010), UpstreamMalformedResponse (E8011), and UpstreamHttpError (E8012), migrating the matching patterns out of the ProviderBizError catch-all. Add a refineErrorCode() step (message-pattern match + HTTP-status fallback) wired into formatErrorForState so generic ProviderBizError is reclassified into the correct existing code (rate-limit / quota / network / service-unavailable / model-not-found) instead of collapsing into one opaque 8xxx bucket. Production sampling showed ~72% of ProviderBizError actually belongs to existing codes and only ~5% is a true residual. * ✨ feat(model-runtime): add isFallback flag to mark catch-all error buckets Add an `isFallback` boolean to ErrorCodeSpec / ChatMessageError, set on the catch-all codes (ProviderBizError, UpstreamHttpError, AgentRuntimeError, DatabasePersistError). It flows onto agent_operations.error via the write-path enrichment so monitoring can track how much volume still lands in fallback buckets — the signal for where finer codes are still worth carving out. * ✅ test(model-runtime): add refineErrorCode to @lobechat/model-runtime mocks formatErrorForState now imports refineErrorCode, so the partial module mocks in AgentRuntimeService / RuntimeExecutors must expose it or vitest throws on access. * ✅ test(model-runtime): bump UpstreamGatewayError numericId to 8011 after canary 8010 collision canary claimed 8010 for ProviderContentPolicyViolation, so the Upstream* codes shifted to 8011/8012/8013 during rebase; update the refinement test assertion.
2026-06-14 03:30:19 +00:00 · 2026-05-28 17:02:39 +08:00
parent 1024ee961b
commit 1b74566b4c
14 changed files with 445 additions and 41 deletions
@@ -32,5 +32,8 @@
  "QuotaLimitReached": "Sorry, the token usage or request count has reached the quota limit for this key. Please increase the key's quota or try again later.",
  "RateLimitExceeded": "Sorry, the token usage or request count has reached the rate limit for this key. Please try again later or increase the key's quota.",
  "StreamChunkError": "Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.",
+  "UpstreamGatewayError": "The upstream gateway or proxy returned an error. Please try again shortly; if it persists, check your proxy / endpoint configuration.",
+  "UpstreamHttpError": "The provider returned an HTTP error without further detail. Please try again, or check your request and model configuration.",
+  "UpstreamMalformedResponse": "The provider returned a malformed response that could not be parsed. Please retry; if it persists, try a different model or provider.",
  "UserConfigError": "Provider configuration is invalid (incorrect base URL, missing environment variable, virtual-key restriction, etc.). Please review the provider settings."
 }
@@ -1,6 +1,7 @@
 export { ErrorClassifier, type ErrorClassifierType } from './classifier';
 export { isUserSideError, matchErrorPattern, type MatchInput, type MatchResult } from './match';
 export { ERROR_PATTERNS, type ErrorPattern } from './patterns';
+export { refineErrorCode, type RefineErrorInput } from './refine';
 export {
  type CloudErrorCode,
  ERROR_CODE_SPECS,
@@ -218,6 +218,11 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
    match: sub('reached your session usage limit, upgrade for higher limits'),
    note: 'Ollama cloud per-session cap',
  },
+  {
+    code: AgentRuntimeErrorType.InsufficientQuota,
+    match: sub('Weekly usage limit reached'),
+    note: 'opencodecodingplan rolling weekly plan cap (resets in N days — not retryable)',
+  },
  {
    code: AgentRuntimeErrorType.InsufficientQuota,
    match: sub('This model is not available on your current plan'),
@@ -501,6 +506,13 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
  { code: AgentRuntimeErrorType.ProviderNetworkError, match: sub('request to https://') },
  { code: AgentRuntimeErrorType.ProviderNetworkError, match: sub('self-signed certificate') },
  { code: AgentRuntimeErrorType.ProviderNetworkError, match: sub('Network connection lost') },
+  {
+    code: AgentRuntimeErrorType.ProviderNetworkError,
+    // OpenAI/Anthropic SDK APIConnectionError wrapper — the underlying
+    // ECONNREFUSED / socket failure is buried in the nested cause, only the
+    // generic "Connection error." surfaces on the top-level message.
+    match: sub('Connection error.'),
+  },

  // ─────────────────────────────────────────────────────────────────────────
  // StateStorePersistError — Redis / Upstash agent-state store (NOT the LLM
@@ -906,7 +918,69 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
  },

  // ─────────────────────────────────────────────────────────────────────────
-  // ProviderBizError — generic upstream wrappers that don't fit elsewhere
+  // UpstreamGatewayError — proxy / gateway-layer failure (openresty, litellm,
+  // HTML error bodies, Cloudflare 525). Distinct from the provider's own
+  // service; usually transient. Split out of the ProviderBizError catch-all.
+  // ─────────────────────────────────────────────────────────────────────────
+  {
+    code: AgentRuntimeErrorType.UpstreamGatewayError,
+    match: sub('<center>openresty</center>'),
+    note: 'user-configured proxy returning HTML',
+  },
+  { code: AgentRuntimeErrorType.UpstreamGatewayError, match: sub('litellm.') },
+  { code: AgentRuntimeErrorType.UpstreamGatewayError, match: sub('403 <!DOCTYPE html>') },
+  { code: AgentRuntimeErrorType.UpstreamGatewayError, match: sub('404 <!DOCTYPE html>') },
+  {
+    code: AgentRuntimeErrorType.UpstreamGatewayError,
+    match: sub('525 <!DOCTYPE html>'),
+    note: 'Cloudflare 525 SSL handshake',
+  },
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // UpstreamMalformedResponse — provider returned a malformed / unparseable
+  // payload (Go re-marshal failure, bad tool-call JSON, upstream Python
+  // TypeError). Not retryable. Split out of ProviderBizError.
+  // ─────────────────────────────────────────────────────────────────────────
+  {
+    code: AgentRuntimeErrorType.UpstreamMalformedResponse,
+    match: sub('failed to marshal request body to JSON'),
+    note: 'upstream Go gateway re-marshal failure on non-UTF-8 / lone-surrogate bytes',
+  },
+  {
+    code: AgentRuntimeErrorType.UpstreamMalformedResponse,
+    match: sub('lone leading surrogate'),
+    note: 'invalid conversation JSON: lone surrogate in tool-call output',
+  },
+  {
+    code: AgentRuntimeErrorType.UpstreamMalformedResponse,
+    match: sub("Internal server error: unhashable type: '"),
+    note: 'nvidia / nvidia_custom upstream Python TypeError',
+  },
+  {
+    code: AgentRuntimeErrorType.UpstreamMalformedResponse,
+    match: sub('Failed to parse fc related info to json format'),
+    note: 'internlm tool-call parser failure',
+  },
+  {
+    code: AgentRuntimeErrorType.UpstreamMalformedResponse,
+    match: sub('codewhisperer#ValidationException'),
+    note: 'kiro / AWS CodeWhisperer proxy malformed payload',
+  },
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // UpstreamHttpError — bare upstream HTTP error with no further context.
+  // Split out of ProviderBizError. (400 / 422 here are candidates for a future
+  // `request`-category split; tracked separately.)
+  // ─────────────────────────────────────────────────────────────────────────
+  { code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('400 status code') },
+  { code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('403 status code') },
+  { code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('404 status code') },
+  { code: AgentRuntimeErrorType.UpstreamHttpError, match: sub('413 Request Entity Too Large') },
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // ProviderBizError — generic upstream wrappers that don't fit elsewhere. The
+  // final provider catch-all; `refineErrorCode` + the HTTP-status fallback try
+  // to reclassify these into a more specific code before this bucket is kept.
  // ─────────────────────────────────────────────────────────────────────────
  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('Upstream request failed') },
  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('Provider returned error') },
@@ -915,52 +989,14 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('convert_request_failed') },
  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('failed to parse request') },
  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('upstream error: do request failed') },
-  // Upstream Go gateway re-marshal failure — non-UTF-8 / lone-surrogate bytes
-  // in model tool-call output.
-  {
-    code: AgentRuntimeErrorType.ProviderBizError,
-    match: sub('failed to marshal request body to JSON'),
-  },
  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('Internal Server Error (ref:') },
-  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('400 status code') },
-  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('403 status code') },
-  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('404 status code') },
-  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('413 Request Entity Too Large') },
-  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('403 <!DOCTYPE html>') },
-  {
-    code: AgentRuntimeErrorType.ProviderBizError,
-    match: sub('525 <!DOCTYPE html>'),
-    note: 'Cloudflare 525 SSL handshake',
-  },
-  {
-    code: AgentRuntimeErrorType.ProviderBizError,
-    match: sub('<center>openresty</center>'),
-    note: 'user-configured proxy returning HTML',
-  },
-  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('litellm.') },
  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('410 status code (no body)') },
  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('402 status code') },
-  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('404 <!DOCTYPE html>') },
-  // Nvidia / nvidia_custom upstream Python crash — "unhashable type" TypeError.
-  {
-    code: AgentRuntimeErrorType.ProviderBizError,
-    match: sub("Internal server error: unhashable type: '"),
-  },
  {
    code: AgentRuntimeErrorType.ProviderBizError,
    match: sub('[upstream:/v1/messages] Upstream returned HTTP'),
  },
  { code: AgentRuntimeErrorType.ProviderBizError, match: sub('上游请求参数无效') },
-  {
-    code: AgentRuntimeErrorType.ProviderBizError,
-    match: sub('Failed to parse fc related info to json format'),
-    note: 'internlm tool-call parser failure',
-  },
-  {
-    code: AgentRuntimeErrorType.ProviderBizError,
-    match: sub('codewhisperer#ValidationException'),
-    note: 'kiro / AWS CodeWhisperer proxy malformed payload',
-  },

  // ─────────────────────────────────────────────────────────────────────────
  // ContextEnginePipelineError — a context-engine pipeline processor crashed.
@@ -0,0 +1,120 @@
+import { AgentRuntimeErrorType } from '@lobechat/types';
+import { describe, expect, it } from 'vitest';
+
+import { refineErrorCode } from './refine';
+
+describe('refineErrorCode', () => {
+  it('does not touch a specific (non-refinable) errorType', () => {
+    expect(
+      refineErrorCode({
+        errorType: AgentRuntimeErrorType.InvalidProviderAPIKey,
+        message: '429 status code (no body)',
+      }),
+    ).toBeUndefined();
+  });
+
+  describe('message-pattern pass', () => {
+    it('reclassifies a rate-limit message into RateLimitExceeded', () => {
+      expect(
+        refineErrorCode({
+          errorType: AgentRuntimeErrorType.ProviderBizError,
+          message: '429 status code (no body)',
+        }),
+      ).toBe(AgentRuntimeErrorType.RateLimitExceeded);
+    });
+
+    it('reclassifies a 503 service message into ProviderServiceUnavailable', () => {
+      expect(
+        refineErrorCode({
+          errorType: AgentRuntimeErrorType.ProviderBizError,
+          message: '503 Service temporarily unavailable',
+        }),
+      ).toBe(AgentRuntimeErrorType.ProviderServiceUnavailable);
+    });
+
+    it('reclassifies the SDK "Connection error." wrapper into ProviderNetworkError', () => {
+      expect(
+        refineErrorCode({
+          errorType: AgentRuntimeErrorType.ProviderBizError,
+          message: 'Connection error.',
+        }),
+      ).toBe(AgentRuntimeErrorType.ProviderNetworkError);
+    });
+
+    it('routes a rolling weekly cap to InsufficientQuota (not the 429 rate-limit fallback)', () => {
+      expect(
+        refineErrorCode({
+          errorType: AgentRuntimeErrorType.ProviderBizError,
+          message: '429 Weekly usage limit reached. Resets in 2 days. To continue using this…',
+        }),
+      ).toBe(AgentRuntimeErrorType.InsufficientQuota);
+    });
+
+    it('routes gateway HTML / openresty to UpstreamGatewayError', () => {
+      expect(
+        refineErrorCode({
+          errorType: AgentRuntimeErrorType.ProviderBizError,
+          message: '<center>openresty</center>',
+        }),
+      ).toBe(AgentRuntimeErrorType.UpstreamGatewayError);
+    });
+
+    it('routes a marshal failure to UpstreamMalformedResponse', () => {
+      expect(
+        refineErrorCode({
+          errorType: AgentRuntimeErrorType.ProviderBizError,
+          message: 'failed to marshal request body to JSON',
+        }),
+      ).toBe(AgentRuntimeErrorType.UpstreamMalformedResponse);
+    });
+
+    it('routes a bare "400 status code" to UpstreamHttpError', () => {
+      expect(
+        refineErrorCode({
+          errorType: AgentRuntimeErrorType.ProviderBizError,
+          message: '400 status code (no body)',
+        }),
+      ).toBe(AgentRuntimeErrorType.UpstreamHttpError);
+    });
+  });
+
+  describe('HTTP-status fallback (no message match)', () => {
+    it('uses the structured status when the message carries no pattern', () => {
+      expect(
+        refineErrorCode({
+          errorType: AgentRuntimeErrorType.ProviderBizError,
+          httpStatus: 402,
+          message: 'some opaque upstream text',
+        }),
+      ).toBe(AgentRuntimeErrorType.InsufficientQuota);
+    });
+
+    it('falls back to the leading status in the message when no structured status', () => {
+      expect(
+        refineErrorCode({
+          errorType: AgentRuntimeErrorType.ProviderBizError,
+          message: '500 upstream blew up in a way we have never seen',
+        }),
+      ).toBe(AgentRuntimeErrorType.ProviderServiceUnavailable);
+    });
+
+    it('buckets other 4xx with no context into UpstreamHttpError', () => {
+      expect(
+        refineErrorCode({
+          errorType: AgentRuntimeErrorType.ProviderBizError,
+          httpStatus: 409,
+          message: 'conflict, no details',
+        }),
+      ).toBe(AgentRuntimeErrorType.UpstreamHttpError);
+    });
+  });
+
+  it('keeps a genuine ProviderBizError residual unrefined', () => {
+    expect(
+      refineErrorCode({
+        errorType: AgentRuntimeErrorType.ProviderBizError,
+        message: 'Upstream request failed',
+      }),
+    ).toBeUndefined();
+  });
+});
@@ -0,0 +1,76 @@
+import { AgentRuntimeErrorType, type ILobeAgentRuntimeErrorType } from '@lobechat/types';
+
+import { matchErrorPattern } from './match';
+
+/**
+ * Error codes that are generic enough to be worth re-deriving from the upstream
+ * message / HTTP status. Specific codes assigned by a provider adapter are left
+ * untouched — we only refine the `ProviderBizError` catch-all, which absorbs
+ * any non-OK upstream response that the adapter couldn't name.
+ */
+const REFINABLE_CODES = new Set<string>([AgentRuntimeErrorType.ProviderBizError]);
+
+/**
+ * Last-resort mapping from a bare HTTP status to a code, used only when the
+ * message carried no recognizable pattern. Intentionally coarse: the rich
+ * cases (quota keywords, moderation, model-not-found, …) are already handled by
+ * `matchErrorPattern`, so this just buckets the context-less remainder by
+ * status class.
+ */
+const codeFromHttpStatus = (status: number | undefined): ILobeAgentRuntimeErrorType | undefined => {
+  if (!status) return undefined;
+  // 429 / 402 have unambiguous semantics worth special-casing.
+  if (status === 429) return AgentRuntimeErrorType.RateLimitExceeded;
+  if (status === 402) return AgentRuntimeErrorType.InsufficientQuota;
+  if (status >= 500 && status <= 599) return AgentRuntimeErrorType.ProviderServiceUnavailable;
+  // Any other client error with no usable message → the bare-HTTP bucket.
+  if (status >= 400 && status <= 499) return AgentRuntimeErrorType.UpstreamHttpError;
+  return undefined;
+};
+
+/**
+ * Runtime error messages are conventionally prefixed with the upstream HTTP
+ * status (e.g. `"429 status code (no body)"`, `"503 Service temporarily
+ * unavailable"`). Pull that leading status out as a fallback when the structured
+ * status isn't available on the error object.
+ */
+const leadingStatusFromMessage = (message: string | undefined): number | undefined => {
+  if (!message) return undefined;
+  const match = /^\s*([45]\d{2})\b/.exec(message);
+  return match ? Number(match[1]) : undefined;
+};
+
+export interface RefineErrorInput {
+  /** The errorType the adapter assigned (only `ProviderBizError` is refined). */
+  errorType?: string;
+  /** Structured HTTP status from the upstream response, if known. */
+  httpStatus?: number;
+  message?: string;
+  provider?: string;
+}
+
+/**
+ * Reclassify a generic provider catch-all (`ProviderBizError`) into a more
+ * specific code using the upstream message and HTTP status. Returns the refined
+ * code, or `undefined` when no better classification is found (caller keeps the
+ * original errorType).
+ *
+ * Priority:
+ *   1. `matchErrorPattern` over the message — most specific, covers the rich
+ *      cases plus the migrated `Upstream*` patterns.
+ *   2. HTTP-status fallback for messages that matched nothing.
+ */
+export const refineErrorCode = (
+  input: RefineErrorInput,
+): ILobeAgentRuntimeErrorType | undefined => {
+  const { errorType, httpStatus, message, provider } = input;
+  if (!errorType || !REFINABLE_CODES.has(errorType)) return undefined;
+
+  const matched = matchErrorPattern({ errorType, message, provider });
+  if (matched && matched.code !== errorType) return matched.code;
+
+  const byStatus = codeFromHttpStatus(httpStatus ?? leadingStatusFromMessage(message));
+  if (byStatus && byStatus !== errorType) return byStatus;
+
+  return undefined;
+};
@@ -29,6 +29,14 @@ export interface ErrorCodeSpec {
  /** HTTP status code returned to the client. */
  httpStatus: number;

+  /**
+   * Marks a catch-all / under-classified bucket (ProviderBizError,
+   * UpstreamHttpError, AgentRuntimeError, DatabasePersistError, …). Orthogonal
+   * to `category`: monitoring tracks total fallback volume to decide where
+   * finer codes are still worth carving out. Omitted (falsy) for terminal codes.
+   */
+  isFallback?: boolean;
+
  /**
   * Stable numeric identifier surfaced as `E<numericId>` (e.g. `E1001`).
   *
@@ -384,6 +392,7 @@ export const ERROR_CODE_SPECS: SpecMap = {
    httpStatus: 500,
    retryable: false,
    countAsFailure: true,
+    isFallback: true,
    description: 'Persistence-layer query / transaction failed (Drizzle "Failed query: …").',
  },
  [AgentRuntimeErrorType.StateStorePersistError]: {
@@ -420,6 +429,7 @@ export const ERROR_CODE_SPECS: SpecMap = {
    httpStatus: 470,
    retryable: false,
    countAsFailure: true,
+    isFallback: true,
    description: 'Generic Agent Runtime module error.',
  },
  [AgentRuntimeErrorType.ProviderBizError]: {
@@ -431,6 +441,7 @@ export const ERROR_CODE_SPECS: SpecMap = {
    httpStatus: 471,
    retryable: false,
    countAsFailure: true,
+    isFallback: true,
    description: 'Generic provider biz error (unclassified upstream failure).',
  },
  [AgentRuntimeErrorType.ProviderNoImageGenerated]: {
@@ -521,6 +532,44 @@ export const ERROR_CODE_SPECS: SpecMap = {
    countAsFailure: false,
    description: 'Image-generation provider blocked the request due to content policy.',
  },
+  [AgentRuntimeErrorType.UpstreamGatewayError]: {
+    code: AgentRuntimeErrorType.UpstreamGatewayError,
+    numericId: 8011,
+    category: 'provider',
+    severity: 'error',
+    attribution: 'provider',
+    httpStatus: 471,
+    // Gateway hiccups (502/525/HTML bodies) are usually transient.
+    retryable: true,
+    countAsFailure: true,
+    description:
+      'Upstream proxy / gateway layer failed (openresty, litellm, HTML 5xx, Cloudflare 525).',
+  },
+  [AgentRuntimeErrorType.UpstreamMalformedResponse]: {
+    code: AgentRuntimeErrorType.UpstreamMalformedResponse,
+    numericId: 8012,
+    category: 'provider',
+    severity: 'error',
+    attribution: 'provider',
+    httpStatus: 471,
+    // Deterministic payload corruption — retrying the same request reproduces it.
+    retryable: false,
+    countAsFailure: true,
+    description:
+      'Provider returned a malformed / unparseable payload (marshal failure, bad tool-call JSON, upstream TypeError).',
+  },
+  [AgentRuntimeErrorType.UpstreamHttpError]: {
+    code: AgentRuntimeErrorType.UpstreamHttpError,
+    numericId: 8013,
+    category: 'provider',
+    severity: 'error',
+    attribution: 'provider',
+    httpStatus: 471,
+    retryable: false,
+    countAsFailure: true,
+    isFallback: true,
+    description: 'Bare upstream HTTP error with no further context (e.g. "400 status code").',
+  },

  // ─── 9xxx Config ──────────────────────────────────────────────────────
  [AgentRuntimeErrorType.InvalidOllamaArgs]: {
@@ -27,6 +27,8 @@ export {
  type MatchInput,
  type MatchResult,
  parseErrorRef,
+  refineErrorCode,
+  type RefineErrorInput,
  type SpecErrorCode,
 } from './errors';
 export * from './helpers';
@@ -85,6 +85,23 @@ export const AgentRuntimeErrorType = {
  CapabilityNotSupported: 'CapabilityNotSupported',
  /** Provider rejected the request as malformed (bad JSON, schema validation, etc.). */
  InvalidRequestFormat: 'InvalidRequestFormat',
+  /**
+   * Upstream proxy / gateway layer failed (openresty, litellm, HTML 5xx,
+   * Cloudflare 525) — distinct from the provider's own service. Split out of
+   * the `ProviderBizError` catch-all.
+   */
+  UpstreamGatewayError: 'UpstreamGatewayError',
+  /**
+   * Provider returned a malformed / unparseable payload (Go re-marshal failure,
+   * bad tool-call JSON, upstream Python TypeError). Not retryable. Split out of
+   * `ProviderBizError`.
+   */
+  UpstreamMalformedResponse: 'UpstreamMalformedResponse',
+  /**
+   * Bare upstream HTTP error with no further context (e.g. "400 status code").
+   * The residual provider bucket once the richer codes have had their pass.
+   */
+  UpstreamHttpError: 'UpstreamHttpError',
  /** User-side misconfiguration (wrong base URL, missing env var, virtual-key allowlist, etc.). */
  UserConfigError: 'UserConfigError',
  /** Gateway watchdog killed an idle agent operation — harness-side. */
@@ -30,6 +30,13 @@ export interface ChatMessageError {
  countAsFailure?: boolean;
  /** HTTP status the runtime returned (or would return) for this error. */
  httpStatus?: number;
+  /**
+   * Whether this code is a catch-all / under-classified bucket (e.g.
+   * ProviderBizError, UpstreamHttpError, AgentRuntimeError, DatabasePersistError).
+   * Monitoring tracks fallback-bucket volume to decide where finer codes are
+   * still needed.
+   */
+  isFallback?: boolean;
  message?: string;
  /** Stable `E<numericId>` reference for docs / support tickets. */
  numericId?: number;
@@ -45,6 +52,7 @@ export const ChatMessageErrorSchema = z.object({
  category: z.string().optional(),
  countAsFailure: z.boolean().optional(),
  httpStatus: z.number().optional(),
+  isFallback: z.boolean().optional(),
  message: z.string().optional(),
  numericId: z.number().optional(),
  retryable: z.boolean().optional(),
@@ -69,6 +69,12 @@ export default {
    'A temporary issue with the conversation state store interrupted this operation. Please try again; if it persists, contact support.',
  StreamChunkError:
    'Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.',
+  UpstreamGatewayError:
+    'The upstream gateway or proxy returned an error. Please try again shortly; if it persists, check your proxy / endpoint configuration.',
+  UpstreamHttpError:
+    'The provider returned an HTTP error without further detail. Please try again, or check your request and model configuration.',
+  UpstreamMalformedResponse:
+    'The provider returned a malformed response that could not be parsed. Please retry; if it persists, try a different model or provider.',
  UserConfigError:
    'Provider configuration is invalid (incorrect base URL, missing environment variable, virtual-key restriction, etc.). Please review the provider settings.',
 };
@@ -50,6 +50,7 @@ vi.mock('@lobechat/model-runtime', () => ({
  // retry classifier path.
  ERROR_CODE_SPECS: {},
  getErrorCodeSpec: () => undefined,
+  refineErrorCode: () => undefined,
 }));

 vi.mock('@/business/client/model-bank/loadModels', () => ({
@@ -46,6 +46,7 @@ describe('formatErrorForState', () => {
        category: 'quota',
        countAsFailure: false,
        httpStatus: 429,
+        isFallback: false,
        numericId: 2001,
        retryable: false,
        severity: 'warning',
@@ -116,4 +117,56 @@ describe('formatErrorForState', () => {
      expect(result.numericId).toBeUndefined();
    });
  });
+
+  describe('ProviderBizError refinement', () => {
+    it('reclassifies a 429 ProviderBizError into RateLimitExceeded (retryable, not a failure)', () => {
+      const result = formatErrorForState({
+        error: { status: 429 },
+        errorType: AgentRuntimeErrorType.ProviderBizError,
+        message: '429 status code (no body)',
+      });
+
+      expect(result.type).toBe(AgentRuntimeErrorType.RateLimitExceeded);
+      expect(result.numericId).toBe(3001);
+      expect(result.retryable).toBe(true);
+      expect(result.countAsFailure).toBe(false);
+      // Original message is preserved for debugging.
+      expect(result.message).toBe('429 status code (no body)');
+    });
+
+    it('reclassifies gateway HTML into UpstreamGatewayError (E8011)', () => {
+      const result = formatErrorForState({
+        errorType: AgentRuntimeErrorType.ProviderBizError,
+        message: '<center>openresty</center>',
+      });
+
+      expect(result.type).toBe(AgentRuntimeErrorType.UpstreamGatewayError);
+      expect(result.numericId).toBe(8011);
+      expect(result.retryable).toBe(true);
+    });
+
+    it('uses the HTTP-status fallback for an opaque 402 body', () => {
+      const result = formatErrorForState({
+        error: { status: 402 },
+        errorType: AgentRuntimeErrorType.ProviderBizError,
+        message: 'opaque upstream message',
+      });
+
+      expect(result.type).toBe(AgentRuntimeErrorType.InsufficientQuota);
+      expect(result.category).toBe('quota');
+    });
+
+    it('keeps a genuine residual as ProviderBizError (E8002)', () => {
+      const result = formatErrorForState({
+        errorType: AgentRuntimeErrorType.ProviderBizError,
+        message: 'Upstream request failed',
+      });
+
+      expect(result.type).toBe(AgentRuntimeErrorType.ProviderBizError);
+      expect(result.numericId).toBe(8002);
+      // ProviderBizError is a catch-all — flagged so monitoring can track
+      // how much volume still lands in fallback buckets.
+      expect(result.isFallback).toBe(true);
+    });
+  });
 });
@@ -1,6 +1,24 @@
-import { getErrorCodeSpec } from '@lobechat/model-runtime';
+import { getErrorCodeSpec, refineErrorCode } from '@lobechat/model-runtime';
 import { AgentRuntimeErrorType, ChatErrorType, type ChatMessageError } from '@lobechat/types';

+/** Pull a usable HTTP status out of the nested upstream error object. */
+const extractHttpStatus = (body: unknown): number | undefined => {
+  if (!body || typeof body !== 'object') return undefined;
+  const b = body as { error?: { status?: unknown }; status?: unknown; statusCode?: unknown };
+  if (typeof b.status === 'number') return b.status;
+  if (typeof b.statusCode === 'number') return b.statusCode;
+  if (b.error && typeof b.error === 'object' && typeof b.error.status === 'number') {
+    return b.error.status;
+  }
+  return undefined;
+};
+
+const extractProvider = (body: unknown): string | undefined => {
+  if (!body || typeof body !== 'object') return undefined;
+  const p = (body as { provider?: unknown }).provider;
+  return typeof p === 'string' ? p : undefined;
+};
+
 /**
 * Merge classification metadata from `ERROR_CODE_SPECS` onto a normalized
 * `ChatMessageError`. Codes that aren't in the spec table (fallbacks like
@@ -12,10 +30,21 @@ import { AgentRuntimeErrorType, ChatErrorType, type ChatMessageError } from '@lo
 * same shape without re-running pattern matching themselves.
 */
 const enrichWithSpec = (formatted: ChatMessageError): ChatMessageError => {
+  // Generic `ProviderBizError` is re-derived from the message / HTTP status into
+  // a more specific code before enrichment, so the catch-all doesn't swallow
+  // rate-limits, network drops, quota, etc. Specific codes pass through.
+  const refined = refineErrorCode({
+    errorType: String(formatted.type),
+    httpStatus: extractHttpStatus(formatted.body),
+    message: formatted.message,
+    provider: extractProvider(formatted.body),
+  });
+  const type = (refined ?? formatted.type) as ChatMessageError['type'];
+
  // `getErrorCodeSpec` is keyed by `ILobeAgentRuntimeErrorType` strings; coerce
  // because `ChatMessageError['type']` widens to include numeric `ChatErrorType`
  // values, which simply miss the lookup and pass through unenriched.
-  const spec = getErrorCodeSpec(String(formatted.type));
+  const spec = getErrorCodeSpec(String(type));
  if (!spec) return formatted;

  return {
@@ -24,9 +53,11 @@ const enrichWithSpec = (formatted: ChatMessageError): ChatMessageError => {
    category: spec.category,
    countAsFailure: spec.countAsFailure,
    httpStatus: spec.httpStatus,
+    isFallback: spec.isFallback ?? false,
    numericId: spec.numericId,
    retryable: spec.retryable,
    severity: spec.severity,
+    type,
  };
 };

@@ -20,6 +20,7 @@ vi.mock('@lobechat/model-runtime', () => ({
  // retry classifier path.
  ERROR_CODE_SPECS: {},
  getErrorCodeSpec: () => undefined,
+  refineErrorCode: () => undefined,
 }));

 // Mock trusted client to avoid server-side env access