mirror of
https://github.com/lobehub/lobe-chat.git
synced 2026-06-14 03:30:19 +00:00
🐛 fix(memory-user-memory): should pre-process date & time (#10979)
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
import { MemorySourceType } from '@lobechat/types';
|
||||
import { convertLocomoFile } from '../../src/converters/locomo';
|
||||
import { exit } from 'node:process';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
|
||||
const baseUrl = process.env.MEMORY_USER_MEMORY_LOBEHUB_BASE_URL;
|
||||
const benchmarkLoCoMoFile = process.env.MEMORY_USER_MEMORY_BENCHMARKS_LOCOMO_DATASETS;
|
||||
@@ -38,12 +37,6 @@ async function main() {
|
||||
speakerRoles: { defaultRole: 'user', speakerA: 'user', speakerB: 'assistant' },
|
||||
topicIdPrefix: 'sample',
|
||||
});
|
||||
writeFile(
|
||||
'locomo-ingest-payloads.json',
|
||||
JSON.stringify(payloads, null, 2),
|
||||
).catch(() => {
|
||||
// ignore
|
||||
});
|
||||
|
||||
console.log(
|
||||
`[@lobechat/memory-user-memory/benchmarks/locomo] ingesting ${payloads.length} samples to ${baseUrl} (one user per sample)`,
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
"@lobechat/model-runtime": "workspace:*",
|
||||
"@lobechat/prompts": "workspace:*",
|
||||
"dotenv": "^17.2.3",
|
||||
"dayjs": "^1.11.11",
|
||||
"ora": "^9.0.0",
|
||||
"unist-builder": "^4.0.0",
|
||||
"xast-util-to-xml": "^4.0.0",
|
||||
|
||||
@@ -2,6 +2,9 @@ import { readFileSync } from 'node:fs';
|
||||
import { resolve } from 'node:path';
|
||||
import type { MemorySourceType } from '@lobechat/types';
|
||||
import { MemorySourceType as MemorySourceTypeEnum } from '@lobechat/types';
|
||||
import dayjs from 'dayjs';
|
||||
import customParseFormat from 'dayjs/plugin/customParseFormat';
|
||||
import utc from 'dayjs/plugin/utc';
|
||||
|
||||
export type LocomoQASample = {
|
||||
conversation: Record<string, unknown>;
|
||||
@@ -63,6 +66,23 @@ export type BuildIngestOptions = {
|
||||
|
||||
const SESSION_KEY_REGEX = /^session_(\d+)$/;
|
||||
|
||||
dayjs.extend(customParseFormat);
|
||||
dayjs.extend(utc);
|
||||
|
||||
const parseDate = (value?: string) => {
|
||||
if (!value) return undefined;
|
||||
|
||||
const formats = ['h:mm a [on] D MMMM, YYYY', 'h:mm a [on] D MMM, YYYY'];
|
||||
for (const format of formats) {
|
||||
const parsed = dayjs.utc(value, format, true);
|
||||
if (parsed.isValid()) return parsed.toISOString();
|
||||
}
|
||||
|
||||
console.warn(`[locomo converter] failed to parse date "${value}" with custom format, falling back to Date parser`);
|
||||
const fallback = new Date(value);
|
||||
return Number.isNaN(fallback.getTime()) ? undefined : fallback.toISOString();
|
||||
};
|
||||
|
||||
const normalizeArray = (value?: string | string[]) => {
|
||||
if (!value) return [];
|
||||
return Array.isArray(value) ? value.filter(Boolean) : [value];
|
||||
@@ -77,7 +97,7 @@ const buildTurnText = (turn: LocomoTurn, includeImageCaptions?: boolean) => {
|
||||
};
|
||||
|
||||
const extractSessions = (conversation: Record<string, unknown>): LocomoSession[] => {
|
||||
const sessions: { session: LocomoSession; order: number }[] = [];
|
||||
const sessions: { order: number, session: LocomoSession; }[] = [];
|
||||
|
||||
Object.entries(conversation).forEach(([key, value]) => {
|
||||
const match = key.match(SESSION_KEY_REGEX);
|
||||
@@ -114,11 +134,18 @@ export const buildIngestPayload = (
|
||||
const speakerB = sample.conversation['speaker_b'] as string | undefined;
|
||||
|
||||
const sessions = extractSessions(sample.conversation);
|
||||
const sessionPayloads: IngestSessionPayload[] = sessions.map((session) => ({
|
||||
const sessionPayloads: IngestSessionPayload[] = sessions.map((session) => {
|
||||
if (!session.dateTime) {
|
||||
console.warn(`[locomo converter] session ${session.id} is missing dateTime, turns will have no createdAt`);
|
||||
}
|
||||
|
||||
console.log(`[locomo converter] processing sample ${sample.sample_id} session ${session.id} (on ${session.dateTime ?? 'unknown dateTime'}) with ${session.turns.length} turns`);
|
||||
|
||||
return {
|
||||
sessionId: session.id,
|
||||
timestamp: session.dateTime,
|
||||
timestamp: parseDate(session.dateTime),
|
||||
turns: session.turns.map((turn) => ({
|
||||
createdAt: session.dateTime,
|
||||
createdAt: parseDate(session.dateTime),
|
||||
diaId: turn.dia_id,
|
||||
imageCaption: normalizeArray(turn.blip_caption).join('\n') || undefined,
|
||||
imageUrls: normalizeArray(turn.img_url).length ? normalizeArray(turn.img_url) : undefined,
|
||||
@@ -126,7 +153,8 @@ export const buildIngestPayload = (
|
||||
speaker: turn.speaker,
|
||||
text: buildTurnText(turn, options.includeImageCaptions),
|
||||
})),
|
||||
}));
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
force: true,
|
||||
|
||||
Reference in New Issue
Block a user