mirror of
https://github.com/lobehub/lobe-chat.git
synced 2026-06-13 19:20:04 +00:00
🐛 fix(model-runtime): classify "Agent state not found" as StateStoreReadError (#15778)
`coordinator.loadAgentState(operationId)` returning null throws a raw
`Error("Agent state not found for operation …")`, which (after the refine fix)
otherwise lands as a bare 500. It is a state-store READ failure, so route it to
StateStoreReadError alongside the caller-gone abort.
Because losing an operation's state is a genuine system fault (not benign
client abandonment), promote StateStoreReadError to countAsFailure: true /
severity: error. `ERR caller gone` now counts too — accepted trade-off, both
are system-side read failures worth tracking.
Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -70,7 +70,7 @@ export default {
|
||||
StateStorePersistError:
|
||||
'A temporary issue with the conversation state store interrupted this operation. Please try again; if it persists, contact support.',
|
||||
StateStoreReadError:
|
||||
'This operation was ended because the connection closed before it finished. This is usually harmless — reopen the conversation to continue.',
|
||||
'This operation could not be resumed because its session state was unavailable. Please reopen the conversation to continue; if it persists, contact support.',
|
||||
StreamChunkError:
|
||||
'Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.',
|
||||
UpstreamGatewayError:
|
||||
|
||||
@@ -102,12 +102,20 @@ describe('matchErrorPattern', () => {
|
||||
).toBe(AgentRuntimeErrorType.StateStorePersistError);
|
||||
});
|
||||
|
||||
it('classifies a caller-gone blocking-read abort as StateStoreReadError (benign, not a persist failure)', () => {
|
||||
it('classifies a caller-gone blocking-read abort as StateStoreReadError', () => {
|
||||
expect(matchErrorPattern({ message: 'ERR caller gone' })?.code).toBe(
|
||||
AgentRuntimeErrorType.StateStoreReadError,
|
||||
);
|
||||
});
|
||||
|
||||
it('classifies a missing-agent-state read as StateStoreReadError', () => {
|
||||
expect(
|
||||
matchErrorPattern({
|
||||
message: 'Agent state not found for operation op_1781276404066_agt_x_tpc_y_z',
|
||||
})?.code,
|
||||
).toBe(AgentRuntimeErrorType.StateStoreReadError);
|
||||
});
|
||||
|
||||
it('classifies harness JS runtime crashes as AgentRuntimeError', () => {
|
||||
for (const message of [
|
||||
'e.trim is not a function',
|
||||
|
||||
@@ -515,15 +515,21 @@ export const ERROR_PATTERNS: ErrorPattern[] = [
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// StateStoreReadError — a blocking state-store READ (XREAD / BLPOP) aborted
|
||||
// because the caller disconnected. Benign client abandonment; must precede
|
||||
// StateStorePersistError so the write-side bucket doesn't claim it.
|
||||
// StateStoreReadError — a state-store READ failed: either a blocking read
|
||||
// (XREAD / BLPOP) aborted because the caller disconnected, or the operation's
|
||||
// agent state could not be loaded. Must precede StateStorePersistError so the
|
||||
// write-side bucket doesn't claim it.
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
{
|
||||
code: AgentRuntimeErrorType.StateStoreReadError,
|
||||
match: sub('ERR caller gone'),
|
||||
note: 'Upstash aborts the in-flight blocking read (XREAD/BLPOP) when the originating request is already gone.',
|
||||
},
|
||||
{
|
||||
code: AgentRuntimeErrorType.StateStoreReadError,
|
||||
match: sub('Agent state not found for operation'),
|
||||
note: 'coordinator.loadAgentState() returned null — the operation state was evicted/cleaned up before this read.',
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// StateStorePersistError — Redis / Upstash agent-state store (NOT the LLM
|
||||
|
||||
@@ -39,7 +39,7 @@ describe('refineErrorCode', () => {
|
||||
expect(
|
||||
refineErrorCode({
|
||||
errorType: String(ChatErrorType.InternalServerError),
|
||||
message: 'Agent state not found for operation op_xxx',
|
||||
message: 'some opaque internal failure with no registered pattern',
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
@@ -422,13 +422,13 @@ export const ERROR_CODE_SPECS: SpecMap = {
|
||||
code: AgentRuntimeErrorType.StateStoreReadError,
|
||||
numericId: 7007,
|
||||
category: 'stream',
|
||||
severity: 'warning',
|
||||
severity: 'error',
|
||||
attribution: 'system',
|
||||
httpStatus: 500,
|
||||
retryable: false,
|
||||
countAsFailure: false,
|
||||
countAsFailure: true,
|
||||
description:
|
||||
'State-store (Redis / Upstash) blocking read (XREAD / BLPOP) aborted because the caller disconnected ("ERR caller gone") — benign client abandonment.',
|
||||
'State-store (Redis / Upstash) read failed: a blocking read (XREAD / BLPOP) aborted because the caller disconnected ("ERR caller gone"), or the operation\'s agent state could not be loaded ("Agent state not found for operation …"). System-side — counts as a failure.',
|
||||
},
|
||||
|
||||
// ─── 8xxx Provider (catch-all) ────────────────────────────────────────
|
||||
|
||||
@@ -138,12 +138,12 @@ export const AgentRuntimeErrorType = {
|
||||
*/
|
||||
StateStorePersistError: 'StateStorePersistError',
|
||||
/**
|
||||
* A blocking state-store read (XREAD / BLPOP, e.g. consuming the agent event
|
||||
* stream or waiting on a tool result) was aborted because the originating
|
||||
* caller disconnected — Upstash replies "ERR caller gone". Benign client
|
||||
* abandonment tied to the request lifecycle, not a harness fault; kept
|
||||
* distinct from the write-side StateStorePersistError so it is not counted
|
||||
* as a failure.
|
||||
* A state-store (Redis / Upstash) READ failed: either a blocking read
|
||||
* (XREAD / BLPOP, consuming the agent event stream or waiting on a tool
|
||||
* result) was aborted because the caller disconnected ("ERR caller gone"), or
|
||||
* the operation's agent state could not be loaded ("Agent state not found for
|
||||
* operation …"). System-side read failure, kept distinct from the write-side
|
||||
* StateStorePersistError; counts as a failure.
|
||||
*/
|
||||
StateStoreReadError: 'StateStoreReadError',
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user