🐛 fix(agent-runtime): persist assistant reasoning to DB (#15690)

2026-06-14 03:30:19 +00:00 · 2026-06-11 21:05:23 +08:00
parent 813d756b9c
commit f6c23e3654
2 changed files with 218 additions and 8 deletions
@@ -202,6 +202,51 @@ const isEmptyModelCompletion = (params: {
  return true;
 };

+type ReasoningReplayNode = {
+  children?: ReasoningReplayNode[];
+  members?: ReasoningReplayNode[];
+  reasoning?: unknown;
+};
+
+const stripAssistantReasoningForReplay = (messages: UIChatMessage[]): UIChatMessage[] => {
+  const stripMessage = <T extends ReasoningReplayNode>(message: T): T => {
+    let changed = false;
+
+    const children = message.children?.map((child) => {
+      const strippedChild = stripMessage(child);
+      if (strippedChild !== child) changed = true;
+      return strippedChild;
+    });
+
+    const members = message.members?.map((member) => {
+      const strippedMember = stripMessage(member);
+      if (strippedMember !== member) changed = true;
+      return strippedMember;
+    });
+
+    if ('reasoning' in message) changed = true;
+    if (!changed) return message;
+
+    const { reasoning: _reasoning, ...messageWithoutReasoning } = message;
+
+    return {
+      ...messageWithoutReasoning,
+      ...(children ? { children } : {}),
+      ...(members ? { members } : {}),
+    } as T;
+  };
+
+  let changed = false;
+
+  const strippedMessages = messages.map((message) => {
+    const strippedMessage = stripMessage(message);
+    if (strippedMessage !== message) changed = true;
+    return strippedMessage;
+  });
+
+  return changed ? strippedMessages : messages;
+};
+
 const GEN_AI_FUNCTION_TOOL_TYPE: ToolType = 'function';

 type ToolFailureKind = 'replan' | 'retry' | 'stop';
@@ -704,13 +749,16 @@ export const createRuntimeExecutors = (
          modelSupportsPreserveThinking && typeof preserveThinkingConfigured === 'boolean'
            ? preserveThinkingConfigured
            : undefined;
+        const messagesForContext = shouldReplayAssistantReasoning
+          ? (llmPayload.messages as UIChatMessage[])
+          : stripAssistantReasoningForReplay(llmPayload.messages as UIChatMessage[]);

        // Extract <refer_topic> tags from messages and fetch summaries.
        // Skip if messages already contain injected topic_reference_context
        // (e.g., from client-side contextEngineering preprocessing) to avoid double injection.
        let topicReferences;
        const alreadyHasTopicRefs = (
-          llmPayload.messages as Array<{ content: string | unknown }>
+          messagesForContext as Array<{ content: string | unknown }>
        ).some(
          (m) => typeof m.content === 'string' && m.content.includes('topic_reference_context'),
        );
@@ -719,7 +767,7 @@ export const createRuntimeExecutors = (
          const topicModel = new TopicModel(ctx.serverDB, ctx.userId, ctx.workspaceId);
          const messageModel = new MessageModelClass(ctx.serverDB, ctx.userId, ctx.workspaceId);
          topicReferences = await resolveTopicReferences(
-            llmPayload.messages as Array<{ content: string | unknown }>,
+            messagesForContext as Array<{ content: string | unknown }>,
            async (topicId) => topicModel.findById(topicId),
            async (topicId) => {
              const topic = await topicModel.findById(topicId);
@@ -761,7 +809,7 @@ export const createRuntimeExecutors = (
          agentConfig?.slug === 'web-onboarding' ||
          resolved.enabledToolIds.includes('lobe-web-onboarding');
        const alreadyHasOnboardingContext = (
-          llmPayload.messages as Array<{ content: string | unknown }>
+          messagesForContext as Array<{ content: string | unknown }>
        ).some((message) => {
          if (typeof message.content !== 'string') return false;

@@ -1042,7 +1090,7 @@ export const createRuntimeExecutors = (
                name: kb.name ?? '',
              })),
          },
-          messages: llmPayload.messages as UIChatMessage[],
+          messages: messagesForContext,
          model,
          provider,
          systemRole: agentConfig.systemRole ?? undefined,
@@ -1070,14 +1118,14 @@ export const createRuntimeExecutors = (
          CONTEXT_ENGINEERING_SPAN_NAME,
          {
            attributes: buildContextEngineeringAttributes({
-              hasImages: (llmPayload.messages as Array<{ content?: unknown }>).some(
+              hasImages: (messagesForContext as Array<{ content?: unknown }>).some(
                (m) =>
                  Array.isArray(m.content) &&
                  (m.content as Array<{ type?: string }>).some((p) => p?.type === 'image_url'),
              ),
              historyCompressed:
-                Array.isArray(llmPayload.messages) &&
-                llmPayload.messages.some((m: { role?: string }) => m?.role === 'compressedGroup'),
+                Array.isArray(messagesForContext) &&
+                messagesForContext.some((m: { role?: string }) => m?.role === 'compressedGroup'),
              knowledgeCount:
                (contextEngineInput.knowledge?.knowledgeBases?.length ?? 0) +
                (contextEngineInput.knowledge?.fileContents?.length ?? 0),
@@ -1085,7 +1133,7 @@ export const createRuntimeExecutors = (
                (contextEngineInput.knowledge?.knowledgeBases?.length ?? 0) > 0 ||
                (contextEngineInput.knowledge?.fileContents?.length ?? 0) > 0,
              memoryInjected: Boolean(contextEngineInput.userMemory?.memories),
-              messageCount: llmPayload.messages.length,
+              messageCount: messagesForContext.length,
              operationId,
              stepIndex,
              systemRoleLength: contextEngineInput.systemRole?.length,
@@ -1611,6 +1611,168 @@ describe('RuntimeExecutors', () => {
        );
      });

+      it('should strip stored assistant reasoning before context processing when replay gate is off', async () => {
+        const ctxWithConfig: RuntimeExecutorContext = {
+          ...ctx,
+          agentConfig: {
+            plugins: [],
+            systemRole: 'test',
+          },
+        };
+        const executors = createRuntimeExecutors(ctxWithConfig);
+        const state = createMockState();
+        const messages = [
+          {
+            content: 'Previous answer',
+            reasoning: { content: 'stored reasoning should stay display-only' },
+            role: 'assistant',
+          },
+          { content: 'Continue', role: 'user' },
+        ];
+
+        await executors.call_llm!(
+          {
+            payload: {
+              messages,
+              model: 'gpt-4',
+              provider: 'openai',
+            },
+            type: 'call_llm' as const,
+          },
+          state,
+        );
+
+        const engineInput = engineSpy.mock.calls[0][0];
+        expect(engineInput.messages[0]).toEqual({
+          content: 'Previous answer',
+          role: 'assistant',
+        });
+        expect(messages[0]).toEqual(
+          expect.objectContaining({
+            reasoning: { content: 'stored reasoning should stay display-only' },
+          }),
+        );
+      });
+
+      it('should strip stored reasoning from grouped assistant messages before context processing when replay gate is off', async () => {
+        const ctxWithConfig: RuntimeExecutorContext = {
+          ...ctx,
+          agentConfig: {
+            plugins: [],
+            systemRole: 'test',
+          },
+        };
+        const executors = createRuntimeExecutors(ctxWithConfig);
+        const state = createMockState();
+        const groupedChild = {
+          content: 'Grouped answer',
+          id: 'group-child-1',
+          reasoning: { content: 'grouped child reasoning should stay display-only' },
+          role: 'assistant',
+        };
+        const councilMember = {
+          content: 'Council member answer',
+          id: 'member-1',
+          reasoning: { content: 'member reasoning should stay display-only' },
+          role: 'assistant',
+        };
+        const nestedCouncilChild = {
+          content: 'Nested council answer',
+          id: 'member-child-1',
+          reasoning: { content: 'nested member reasoning should stay display-only' },
+          role: 'assistant',
+        };
+        const messages = [
+          {
+            children: [groupedChild],
+            content: '',
+            id: 'group-1',
+            role: 'assistantGroup',
+          },
+          {
+            content: '',
+            id: 'council-1',
+            members: [
+              councilMember,
+              {
+                children: [nestedCouncilChild],
+                content: '',
+                id: 'member-group-1',
+                role: 'assistantGroup',
+              },
+            ],
+            role: 'agentCouncil',
+          },
+          { content: 'Continue', role: 'user' },
+        ];
+
+        await executors.call_llm!(
+          {
+            payload: {
+              messages,
+              model: 'gpt-4',
+              provider: 'openai',
+            },
+            type: 'call_llm' as const,
+          },
+          state,
+        );
+
+        const engineInput = engineSpy.mock.calls[0][0];
+        expect(engineInput.messages[0].children[0]).not.toHaveProperty('reasoning');
+        expect(engineInput.messages[1].members[0]).not.toHaveProperty('reasoning');
+        expect(engineInput.messages[1].members[1].children[0]).not.toHaveProperty('reasoning');
+        expect(groupedChild).toHaveProperty('reasoning');
+        expect(councilMember).toHaveProperty('reasoning');
+        expect(nestedCouncilChild).toHaveProperty('reasoning');
+      });
+
+      it('should keep stored assistant reasoning before context processing when replay gate is enabled', async () => {
+        const ctxWithConfig: RuntimeExecutorContext = {
+          ...ctx,
+          agentConfig: {
+            chatConfig: { preserveThinking: true },
+            plugins: [],
+            systemRole: 'test',
+          },
+        };
+        const executors = createRuntimeExecutors(ctxWithConfig);
+        const state = createMockState({
+          modelRuntimeConfig: {
+            model: 'qwen3.6-plus',
+            provider: 'qwen',
+          },
+        });
+
+        await executors.call_llm!(
+          {
+            payload: {
+              messages: [
+                {
+                  content: 'Previous answer',
+                  reasoning: { content: 'reasoning to replay' },
+                  role: 'assistant',
+                },
+                { content: 'Continue', role: 'user' },
+              ],
+              model: 'qwen3.6-plus',
+              provider: 'qwen',
+            },
+            type: 'call_llm' as const,
+          },
+          state,
+        );
+
+        const engineInput = engineSpy.mock.calls[0][0];
+        expect(engineInput.messages[0]).toEqual(
+          expect.objectContaining({
+            content: 'Previous answer',
+            reasoning: { content: 'reasoning to replay' },
+            role: 'assistant',
+          }),
+        );
+      });
+
      it('should not call serverMessagesEngine when agentConfig is not set', async () => {
        const executors = createRuntimeExecutors(ctx); // ctx without agentConfig
        const state = createMockState();