🐛 fix(memory-user-memory): should pre-process date & time (#10979)

This commit is contained in:
Neko
2025-12-26 03:39:59 +08:00
committed by GitHub
parent 4f592ce100
commit c2bcf73f9d
3 changed files with 34 additions and 12 deletions
@@ -1,7 +1,6 @@
import { MemorySourceType } from '@lobechat/types';
import { convertLocomoFile } from '../../src/converters/locomo';
import { exit } from 'node:process';
import { writeFile } from 'node:fs/promises';
const baseUrl = process.env.MEMORY_USER_MEMORY_LOBEHUB_BASE_URL;
const benchmarkLoCoMoFile = process.env.MEMORY_USER_MEMORY_BENCHMARKS_LOCOMO_DATASETS;
@@ -38,12 +37,6 @@ async function main() {
speakerRoles: { defaultRole: 'user', speakerA: 'user', speakerB: 'assistant' },
topicIdPrefix: 'sample',
});
writeFile(
'locomo-ingest-payloads.json',
JSON.stringify(payloads, null, 2),
).catch(() => {
// ignore
});
console.log(
`[@lobechat/memory-user-memory/benchmarks/locomo] ingesting ${payloads.length} samples to ${baseUrl} (one user per sample)`,
+1
View File
@@ -20,6 +20,7 @@
"@lobechat/model-runtime": "workspace:*",
"@lobechat/prompts": "workspace:*",
"dotenv": "^17.2.3",
"dayjs": "^1.11.11",
"ora": "^9.0.0",
"unist-builder": "^4.0.0",
"xast-util-to-xml": "^4.0.0",
@@ -2,6 +2,9 @@ import { readFileSync } from 'node:fs';
import { resolve } from 'node:path';
import type { MemorySourceType } from '@lobechat/types';
import { MemorySourceType as MemorySourceTypeEnum } from '@lobechat/types';
import dayjs from 'dayjs';
import customParseFormat from 'dayjs/plugin/customParseFormat';
import utc from 'dayjs/plugin/utc';
export type LocomoQASample = {
conversation: Record<string, unknown>;
@@ -63,6 +66,23 @@ export type BuildIngestOptions = {
const SESSION_KEY_REGEX = /^session_(\d+)$/;
dayjs.extend(customParseFormat);
dayjs.extend(utc);
const parseDate = (value?: string) => {
if (!value) return undefined;
const formats = ['h:mm a [on] D MMMM, YYYY', 'h:mm a [on] D MMM, YYYY'];
for (const format of formats) {
const parsed = dayjs.utc(value, format, true);
if (parsed.isValid()) return parsed.toISOString();
}
console.warn(`[locomo converter] failed to parse date "${value}" with custom format, falling back to Date parser`);
const fallback = new Date(value);
return Number.isNaN(fallback.getTime()) ? undefined : fallback.toISOString();
};
const normalizeArray = (value?: string | string[]) => {
if (!value) return [];
return Array.isArray(value) ? value.filter(Boolean) : [value];
@@ -77,7 +97,7 @@ const buildTurnText = (turn: LocomoTurn, includeImageCaptions?: boolean) => {
};
const extractSessions = (conversation: Record<string, unknown>): LocomoSession[] => {
const sessions: { session: LocomoSession; order: number }[] = [];
const sessions: { order: number, session: LocomoSession; }[] = [];
Object.entries(conversation).forEach(([key, value]) => {
const match = key.match(SESSION_KEY_REGEX);
@@ -114,11 +134,18 @@ export const buildIngestPayload = (
const speakerB = sample.conversation['speaker_b'] as string | undefined;
const sessions = extractSessions(sample.conversation);
const sessionPayloads: IngestSessionPayload[] = sessions.map((session) => ({
const sessionPayloads: IngestSessionPayload[] = sessions.map((session) => {
if (!session.dateTime) {
console.warn(`[locomo converter] session ${session.id} is missing dateTime, turns will have no createdAt`);
}
console.log(`[locomo converter] processing sample ${sample.sample_id} session ${session.id} (on ${session.dateTime ?? 'unknown dateTime'}) with ${session.turns.length} turns`);
return {
sessionId: session.id,
timestamp: session.dateTime,
timestamp: parseDate(session.dateTime),
turns: session.turns.map((turn) => ({
createdAt: session.dateTime,
createdAt: parseDate(session.dateTime),
diaId: turn.dia_id,
imageCaption: normalizeArray(turn.blip_caption).join('\n') || undefined,
imageUrls: normalizeArray(turn.img_url).length ? normalizeArray(turn.img_url) : undefined,
@@ -126,7 +153,8 @@ export const buildIngestPayload = (
speaker: turn.speaker,
text: buildTurnText(turn, options.includeImageCaptions),
})),
}));
}
});
return {
force: true,