mirror of
https://github.com/lobehub/lobe-chat.git
synced 2026-06-14 03:30:19 +00:00
🐛 fix(model-runtime): classify untyped Error throws via message patterns (#15767)
* 🐛 fix(model-runtime): classify untyped Error throws via message patterns `refineErrorCode` only re-derived a specific code when the incoming errorType was `ProviderBizError`, so raw `Error` throws — which `formatErrorForState` wraps as `InternalServerError` (HTTP 500) — never reached `matchErrorPattern`. Persistence-layer (`Failed query: …`) and state-store drops therefore landed as bare, un-classified 500s instead of `DatabasePersistError` etc. Add the two un-typed fallback wrappers (`InternalServerError`, `AgentRuntimeError`) to `REFINABLE_CODES` so their message runs through the pattern registry before falling back. The existing `Failed query:` pattern already classifies these; this just lets it run again. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * 🐛 fix(model-runtime): classify Upstash readonly-upgrade & dropped-caller drops Add `READONLY Writes are temporarily rejected` and `ERR caller gone` to the StateStorePersistError pattern block — both are Redis/Upstash state-store failures that otherwise fall through to a bare 500. They describe the connection/server condition rather than a specific command, so there is no read-vs-write signal to split on. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * 🐛 fix(model-runtime): split caller-gone state-store reads into StateStoreReadError `ERR caller gone` is an Upstash reply when an in-flight blocking READ (XREAD on the agent event stream, BLPOP on a tool result) is aborted because the originating caller disconnected — a benign client abandonment tied to the request lifecycle, not a write/persist fault. Bucketing it under StateStorePersistError mislabelled it as a harness failure (attribution: harness, countAsFailure: true). Add a dedicated StateStoreReadError (E7007, attribution: system, severity: warning, countAsFailure: false) and route `ERR caller gone` to it. The write-side rejection `READONLY Writes are temporarily rejected` stays under StateStorePersistError. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * 🐛 fix(model-runtime): scope HTTP-status fallback to provider catch-alls Opening the un-typed wrappers (InternalServerError / AgentRuntimeError) to the full refine path also let them hit the leadingStatusFromMessage / codeFromHttpStatus fallback. A harness/DB/Redis throw like `Error('429 …')` or `Error('500 …')` with no registered pattern would then be recast as RateLimitExceeded / ProviderServiceUnavailable — provider retry/failure semantics on a harness error. Split the sets: PATTERN_REFINABLE_CODES (message matching) stays open to the wrappers; STATUS_REFINABLE_CODES (the coarse HTTP-status bucket) is limited to ProviderBizError, where a leading status is a real upstream signal. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -132,6 +132,14 @@ describe('formatErrorForState', () => {
|
||||
expect(result.countAsFailure).toBeUndefined();
|
||||
expect(result.numericId).toBeUndefined();
|
||||
});
|
||||
|
||||
it('classifies a raw Drizzle "Failed query" Error via its message instead of a bare 500', () => {
|
||||
const result = formatErrorForState(new Error('Failed query: rollback\nparams: '));
|
||||
|
||||
expect(result.type).toBe(AgentRuntimeErrorType.DatabasePersistError);
|
||||
expect(result.numericId).toBe(7004);
|
||||
expect(result.attribution).toBe('harness');
|
||||
});
|
||||
});
|
||||
|
||||
describe('ProviderBizError refinement', () => {
|
||||
|
||||
@@ -69,6 +69,8 @@ export default {
|
||||
"Sorry, the token usage or request count has reached the rate limit for this key. Please try again later or increase the key's quota.",
|
||||
StateStorePersistError:
|
||||
'A temporary issue with the conversation state store interrupted this operation. Please try again; if it persists, contact support.',
|
||||
StateStoreReadError:
|
||||
'This operation was ended because the connection closed before it finished. This is usually harmless — reopen the conversation to continue.',
|
||||
StreamChunkError:
|
||||
'Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.',
|
||||
UpstreamGatewayError:
|
||||
|
||||
@@ -94,6 +94,20 @@ describe('matchErrorPattern', () => {
|
||||
).toBe(AgentRuntimeErrorType.StateStorePersistError);
|
||||
});
|
||||
|
||||
it('classifies the Upstash readonly-upgrade write rejection as StateStorePersistError', () => {
|
||||
expect(
|
||||
matchErrorPattern({
|
||||
message: 'READONLY Writes are temporarily rejected due to server upgrade',
|
||||
})?.code,
|
||||
).toBe(AgentRuntimeErrorType.StateStorePersistError);
|
||||
});
|
||||
|
||||
it('classifies a caller-gone blocking-read abort as StateStoreReadError (benign, not a persist failure)', () => {
|
||||
expect(matchErrorPattern({ message: 'ERR caller gone' })?.code).toBe(
|
||||
AgentRuntimeErrorType.StateStoreReadError,
|
||||
);
|
||||
});
|
||||
|
||||
it('classifies harness JS runtime crashes as AgentRuntimeError', () => {
|
||||
for (const message of [
|
||||
'e.trim is not a function',
|
||||
|
||||
@@ -514,9 +514,21 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
|
||||
match: sub('Connection error.'),
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// StateStoreReadError — a blocking state-store READ (XREAD / BLPOP) aborted
|
||||
// because the caller disconnected. Benign client abandonment; must precede
|
||||
// StateStorePersistError so the write-side bucket doesn't claim it.
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
{
|
||||
code: AgentRuntimeErrorType.StateStoreReadError,
|
||||
match: sub('ERR caller gone'),
|
||||
note: 'Upstash aborts the in-flight blocking read (XREAD/BLPOP) when the originating request is already gone.',
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// StateStorePersistError — Redis / Upstash agent-state store (NOT the LLM
|
||||
// provider). ioredis aborts, request-size cap, suspended DB.
|
||||
// provider). ioredis aborts, request-size cap, suspended DB, readonly upgrade
|
||||
// window. Write / connection-level drops.
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
{
|
||||
code: AgentRuntimeErrorType.StateStorePersistError,
|
||||
@@ -531,6 +543,11 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
|
||||
code: AgentRuntimeErrorType.StateStorePersistError,
|
||||
match: sub('database has been suspended'),
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.StateStorePersistError,
|
||||
match: sub('READONLY Writes are temporarily rejected'),
|
||||
note: 'Upstash rejects writes against the read-only replica during a server upgrade.',
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// NoAvailableChannel — router / proxy has no upstream
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { AgentRuntimeErrorType } from '@lobechat/types';
|
||||
import { AgentRuntimeErrorType, ChatErrorType } from '@lobechat/types';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { refineErrorCode } from './refine';
|
||||
@@ -13,6 +13,57 @@ describe('refineErrorCode', () => {
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
describe('un-typed throw wrappers', () => {
|
||||
// A raw `Error` (e.g. a Drizzle "Failed query: …" throw) is wrapped by
|
||||
// formatErrorForState as InternalServerError (HTTP 500). It must still reach
|
||||
// the message patterns, otherwise it persists as a bare, un-classified 500.
|
||||
it('reclassifies a 500-wrapped Drizzle throw into DatabasePersistError', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: String(ChatErrorType.InternalServerError),
|
||||
message: 'Failed query: rollback\nparams: ',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.DatabasePersistError);
|
||||
});
|
||||
|
||||
it('reclassifies an AgentRuntimeError-wrapped throw via its message', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.AgentRuntimeError,
|
||||
message: 'Failed query: select "id" from "messages"',
|
||||
}),
|
||||
).toBe(AgentRuntimeErrorType.DatabasePersistError);
|
||||
});
|
||||
|
||||
it('leaves a 500 wrapper unrefined when nothing matches', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: String(ChatErrorType.InternalServerError),
|
||||
message: 'Agent state not found for operation op_xxx',
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
// The HTTP-status fallback is provider-only: a leading "429"/"500" in a
|
||||
// harness/DB/Redis throw is not a real upstream status and must NOT recast
|
||||
// the error with provider retry/failure semantics.
|
||||
it('does not apply the HTTP-status fallback to un-typed wrappers', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: String(ChatErrorType.InternalServerError),
|
||||
message: '429 some harness throw with no registered pattern',
|
||||
}),
|
||||
).toBeUndefined();
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: AgentRuntimeErrorType.AgentRuntimeError,
|
||||
httpStatus: 500,
|
||||
message: 'opaque internal failure',
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('message-pattern pass', () => {
|
||||
it('reclassifies a rate-limit message into RateLimitExceeded', () => {
|
||||
expect(
|
||||
|
||||
@@ -1,14 +1,35 @@
|
||||
import { AgentRuntimeErrorType, type ILobeAgentRuntimeErrorType } from '@lobechat/types';
|
||||
import {
|
||||
AgentRuntimeErrorType,
|
||||
ChatErrorType,
|
||||
type ILobeAgentRuntimeErrorType,
|
||||
} from '@lobechat/types';
|
||||
|
||||
import { matchErrorPattern } from './match';
|
||||
|
||||
/**
|
||||
* Error codes that are generic enough to be worth re-deriving from the upstream
|
||||
* message / HTTP status. Specific codes assigned by a provider adapter are left
|
||||
* untouched — we only refine the `ProviderBizError` catch-all, which absorbs
|
||||
* any non-OK upstream response that the adapter couldn't name.
|
||||
* Codes whose message is worth running through `matchErrorPattern`.
|
||||
*
|
||||
* Besides the `ProviderBizError` upstream catch-all, this covers the two
|
||||
* fallback wrappers `formatErrorForState` produces for un-typed throws: a raw
|
||||
* `Error` is wrapped as `InternalServerError` (HTTP 500) and any other value as
|
||||
* `AgentRuntimeError`. They must be pattern-refinable so persistence-layer
|
||||
* throws (`Failed query: …`) and state-store drops reach the registry — without
|
||||
* them those land as a bare, un-classified 500.
|
||||
*/
|
||||
const REFINABLE_CODES = new Set<string>([AgentRuntimeErrorType.ProviderBizError]);
|
||||
const PATTERN_REFINABLE_CODES = new Set<string>([
|
||||
AgentRuntimeErrorType.AgentRuntimeError,
|
||||
AgentRuntimeErrorType.ProviderBizError,
|
||||
String(ChatErrorType.InternalServerError),
|
||||
]);
|
||||
|
||||
/**
|
||||
* Codes eligible for the coarse HTTP-status fallback — provider catch-alls
|
||||
* only. A leading "429"/"500" in an upstream body is a real status signal, but
|
||||
* the same digits in a harness/DB/Redis throw (e.g. `Error('500 …')`) are not:
|
||||
* those must keep their original `InternalServerError` / `AgentRuntimeError`
|
||||
* code rather than being recast with provider retry/failure semantics.
|
||||
*/
|
||||
const STATUS_REFINABLE_CODES = new Set<string>([AgentRuntimeErrorType.ProviderBizError]);
|
||||
|
||||
/**
|
||||
* Last-resort mapping from a bare HTTP status to a code, used only when the
|
||||
@@ -50,27 +71,31 @@ export interface RefineErrorInput {
|
||||
}
|
||||
|
||||
/**
|
||||
* Reclassify a generic provider catch-all (`ProviderBizError`) into a more
|
||||
* specific code using the upstream message and HTTP status. Returns the refined
|
||||
* code, or `undefined` when no better classification is found (caller keeps the
|
||||
* original errorType).
|
||||
* Reclassify a generic catch-all (`ProviderBizError`, or the
|
||||
* `InternalServerError` / `AgentRuntimeError` fallback wrappers) into a more
|
||||
* specific code using the message and HTTP status. Returns the refined code, or
|
||||
* `undefined` when no better classification is found (caller keeps the original
|
||||
* errorType).
|
||||
*
|
||||
* Priority:
|
||||
* 1. `matchErrorPattern` over the message — most specific, covers the rich
|
||||
* cases plus the migrated `Upstream*` patterns.
|
||||
* 2. HTTP-status fallback for messages that matched nothing.
|
||||
* cases plus the migrated `Upstream*` patterns. Open to all wrappers.
|
||||
* 2. HTTP-status fallback for messages that matched nothing — provider
|
||||
* catch-alls only (see `STATUS_REFINABLE_CODES`).
|
||||
*/
|
||||
export const refineErrorCode = (
|
||||
input: RefineErrorInput,
|
||||
): ILobeAgentRuntimeErrorType | undefined => {
|
||||
const { errorType, httpStatus, message, provider } = input;
|
||||
if (!errorType || !REFINABLE_CODES.has(errorType)) return undefined;
|
||||
if (!errorType || !PATTERN_REFINABLE_CODES.has(errorType)) return undefined;
|
||||
|
||||
const matched = matchErrorPattern({ errorType, message, provider });
|
||||
if (matched && matched.code !== errorType) return matched.code;
|
||||
|
||||
const byStatus = codeFromHttpStatus(httpStatus ?? leadingStatusFromMessage(message));
|
||||
if (byStatus && byStatus !== errorType) return byStatus;
|
||||
if (STATUS_REFINABLE_CODES.has(errorType)) {
|
||||
const byStatus = codeFromHttpStatus(httpStatus ?? leadingStatusFromMessage(message));
|
||||
if (byStatus && byStatus !== errorType) return byStatus;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
};
|
||||
|
||||
@@ -418,6 +418,18 @@ export const ERROR_CODE_SPECS: SpecMap = {
|
||||
description:
|
||||
'Context-engine pipeline processor crashed ("Processor [<name>] execution failed").',
|
||||
},
|
||||
[AgentRuntimeErrorType.StateStoreReadError]: {
|
||||
code: AgentRuntimeErrorType.StateStoreReadError,
|
||||
numericId: 7007,
|
||||
category: 'stream',
|
||||
severity: 'warning',
|
||||
attribution: 'system',
|
||||
httpStatus: 500,
|
||||
retryable: false,
|
||||
countAsFailure: false,
|
||||
description:
|
||||
'State-store (Redis / Upstash) blocking read (XREAD / BLPOP) aborted because the caller disconnected ("ERR caller gone") — benign client abandonment.',
|
||||
},
|
||||
|
||||
// ─── 8xxx Provider (catch-all) ────────────────────────────────────────
|
||||
[AgentRuntimeErrorType.AgentRuntimeError]: {
|
||||
|
||||
@@ -137,6 +137,15 @@ export const AgentRuntimeErrorType = {
|
||||
* DB, …). Harness-side infra — the agent state layer, not the LLM provider.
|
||||
*/
|
||||
StateStorePersistError: 'StateStorePersistError',
|
||||
/**
|
||||
* A blocking state-store read (XREAD / BLPOP, e.g. consuming the agent event
|
||||
* stream or waiting on a tool result) was aborted because the originating
|
||||
* caller disconnected — Upstash replies "ERR caller gone". Benign client
|
||||
* abandonment tied to the request lifecycle, not a harness fault; kept
|
||||
* distinct from the write-side StateStorePersistError so it is not counted
|
||||
* as a failure.
|
||||
*/
|
||||
StateStoreReadError: 'StateStoreReadError',
|
||||
/**
|
||||
* A context-engine pipeline processor threw while building the prompt
|
||||
* context ("Processor [<name>] execution failed"). Harness-side bug in the
|
||||
|
||||
Reference in New Issue
Block a user