2024-08-21 21:28:29 +08:00
|
|
|
// @vitest-environment node
|
2025-08-05 17:25:27 +08:00
|
|
|
import { eq } from 'drizzle-orm';
|
2024-12-03 21:54:14 +08:00
|
|
|
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
2024-08-21 21:28:29 +08:00
|
|
|
|
2026-02-05 21:40:43 +08:00
|
|
|
import { uuid } from '@/utils/uuid';
|
2024-08-21 21:28:29 +08:00
|
|
|
|
2026-02-05 21:40:43 +08:00
|
|
|
import { getTestDB } from '../../core/getTestDB';
|
2026-06-09 15:54:26 +08:00
|
|
|
import {
|
|
|
|
|
chunks,
|
|
|
|
|
embeddings,
|
|
|
|
|
fileChunks,
|
|
|
|
|
files,
|
|
|
|
|
unstructuredChunks,
|
|
|
|
|
users,
|
|
|
|
|
workspaces,
|
|
|
|
|
} from '../../schemas';
|
2026-02-05 21:40:43 +08:00
|
|
|
import type { LobeChatDatabase } from '../../type';
|
2025-03-29 23:15:24 +08:00
|
|
|
import { ChunkModel } from '../chunk';
|
2024-08-21 21:28:29 +08:00
|
|
|
import { codeEmbedding, designThinkingQuery, designThinkingQuery2 } from './fixtures/embedding';
|
|
|
|
|
|
2025-03-18 18:50:55 +08:00
|
|
|
const serverDB: LobeChatDatabase = await getTestDB();
|
2024-08-21 21:28:29 +08:00
|
|
|
|
|
|
|
|
const userId = 'chunk-model-test-user-id';
|
2026-06-09 15:54:26 +08:00
|
|
|
const workspaceId = 'chunk-model-workspace';
|
2024-12-03 21:54:14 +08:00
|
|
|
const chunkModel = new ChunkModel(serverDB, userId);
|
2024-08-21 21:28:29 +08:00
|
|
|
const sharedFileList = [
|
|
|
|
|
{
|
|
|
|
|
id: '1',
|
|
|
|
|
name: 'document.pdf',
|
|
|
|
|
url: 'https://example.com/document.pdf',
|
|
|
|
|
size: 1000,
|
|
|
|
|
fileType: 'application/pdf',
|
|
|
|
|
userId,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
id: '2',
|
|
|
|
|
name: 'image.jpg',
|
|
|
|
|
url: 'https://example.com/image.jpg',
|
|
|
|
|
size: 500,
|
|
|
|
|
fileType: 'image/jpeg',
|
|
|
|
|
userId,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
id: '3',
|
|
|
|
|
name: 'audio.mp3',
|
|
|
|
|
url: 'https://example.com/audio.mp3',
|
|
|
|
|
size: 2000,
|
|
|
|
|
fileType: 'audio/mpeg',
|
|
|
|
|
userId,
|
|
|
|
|
},
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
beforeEach(async () => {
|
|
|
|
|
await serverDB.delete(users);
|
|
|
|
|
await serverDB.insert(users).values([{ id: userId }]);
|
2026-06-09 15:54:26 +08:00
|
|
|
await serverDB.insert(workspaces).values({
|
|
|
|
|
id: workspaceId,
|
|
|
|
|
name: 'Chunk Workspace',
|
|
|
|
|
primaryOwnerId: userId,
|
|
|
|
|
slug: workspaceId,
|
|
|
|
|
});
|
2024-08-21 21:28:29 +08:00
|
|
|
await serverDB.insert(files).values(sharedFileList);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
afterEach(async () => {
|
|
|
|
|
await serverDB.delete(users).where(eq(users.id, userId));
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('ChunkModel', () => {
|
|
|
|
|
describe('bulkCreate', () => {
|
|
|
|
|
it('should create multiple chunks', async () => {
|
|
|
|
|
const params = [
|
|
|
|
|
{ text: 'Chunk 1', userId },
|
|
|
|
|
{ text: 'Chunk 2', userId },
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
await chunkModel.bulkCreate(params, '1');
|
|
|
|
|
|
|
|
|
|
const createdChunks = await serverDB.query.chunks.findMany({
|
|
|
|
|
where: eq(chunks.userId, userId),
|
|
|
|
|
});
|
|
|
|
|
expect(createdChunks).toHaveLength(2);
|
|
|
|
|
expect(createdChunks[0]).toMatchObject(params[0]);
|
|
|
|
|
expect(createdChunks[1]).toMatchObject(params[1]);
|
|
|
|
|
});
|
2024-12-03 21:54:14 +08:00
|
|
|
|
2026-04-13 16:14:09 +08:00
|
|
|
// Test empty params scenario
|
2024-12-03 21:54:14 +08:00
|
|
|
it('should handle empty params array', async () => {
|
|
|
|
|
const result = await chunkModel.bulkCreate([], '1');
|
|
|
|
|
expect(result).toHaveLength(0);
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-13 16:14:09 +08:00
|
|
|
// Test transaction rollback
|
2024-12-03 21:54:14 +08:00
|
|
|
it('should rollback transaction on error', async () => {
|
|
|
|
|
const invalidParams = [
|
|
|
|
|
{ text: 'Chunk 1', userId },
|
2026-04-13 16:14:09 +08:00
|
|
|
{ index: 'abc', userId }, // This will cause an error
|
2024-12-03 21:54:14 +08:00
|
|
|
] as any;
|
|
|
|
|
|
|
|
|
|
await expect(chunkModel.bulkCreate(invalidParams, '1')).rejects.toThrow();
|
|
|
|
|
|
|
|
|
|
const createdChunks = await serverDB.query.chunks.findMany({
|
|
|
|
|
where: eq(chunks.userId, userId),
|
|
|
|
|
});
|
|
|
|
|
expect(createdChunks).toHaveLength(0);
|
|
|
|
|
});
|
2024-08-21 21:28:29 +08:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('delete', () => {
|
|
|
|
|
it('should delete a chunk by id', async () => {
|
|
|
|
|
const { id } = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values({ text: 'Test Chunk', userId })
|
|
|
|
|
.returning()
|
|
|
|
|
.then((res) => res[0]);
|
|
|
|
|
|
|
|
|
|
await chunkModel.delete(id);
|
|
|
|
|
|
|
|
|
|
const chunk = await serverDB.query.chunks.findFirst({
|
|
|
|
|
where: eq(chunks.id, id),
|
|
|
|
|
});
|
|
|
|
|
expect(chunk).toBeUndefined();
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2024-08-24 01:15:27 +08:00
|
|
|
describe('deleteOrphanChunks', () => {
|
|
|
|
|
it('should delete orphaned chunks', async () => {
|
|
|
|
|
// Create orphaned chunks
|
|
|
|
|
await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([
|
|
|
|
|
{ text: 'Orphan Chunk 1', userId },
|
|
|
|
|
{ text: 'Orphan Chunk 2', userId },
|
|
|
|
|
])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
// Create a non-orphaned chunk
|
|
|
|
|
const [nonOrphanChunk] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([{ text: 'Non-Orphan Chunk', userId }])
|
|
|
|
|
.returning();
|
|
|
|
|
|
2025-03-18 18:50:55 +08:00
|
|
|
await serverDB
|
|
|
|
|
.insert(fileChunks)
|
|
|
|
|
.values([{ fileId: '1', chunkId: nonOrphanChunk.id, userId }]);
|
2024-08-24 01:15:27 +08:00
|
|
|
|
|
|
|
|
// Execute the method
|
|
|
|
|
await chunkModel.deleteOrphanChunks();
|
|
|
|
|
|
|
|
|
|
// Check if orphaned chunks are deleted
|
|
|
|
|
const remainingChunks = await serverDB.query.chunks.findMany();
|
|
|
|
|
expect(remainingChunks).toHaveLength(1);
|
|
|
|
|
expect(remainingChunks[0].id).toBe(nonOrphanChunk.id);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should not delete any chunks when there are no orphans', async () => {
|
|
|
|
|
// Create non-orphaned chunks
|
|
|
|
|
const [chunk1, chunk2] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([
|
|
|
|
|
{ text: 'Chunk 1', userId },
|
|
|
|
|
{ text: 'Chunk 2', userId },
|
|
|
|
|
])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(fileChunks).values([
|
2025-03-18 18:50:55 +08:00
|
|
|
{ fileId: '1', chunkId: chunk1.id, userId },
|
|
|
|
|
{ fileId: '2', chunkId: chunk2.id, userId },
|
2024-08-24 01:15:27 +08:00
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
// Execute the method
|
|
|
|
|
await chunkModel.deleteOrphanChunks();
|
|
|
|
|
|
|
|
|
|
// Check if all chunks are still present
|
|
|
|
|
const remainingChunks = await serverDB.query.chunks.findMany();
|
|
|
|
|
expect(remainingChunks).toHaveLength(2);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should not throw an error when the database is empty', async () => {
|
|
|
|
|
// Ensure the database is empty
|
|
|
|
|
await serverDB.delete(chunks);
|
|
|
|
|
await serverDB.delete(fileChunks);
|
|
|
|
|
|
|
|
|
|
// Execute the method and expect it not to throw
|
|
|
|
|
await expect(chunkModel.deleteOrphanChunks()).resolves.not.toThrow();
|
|
|
|
|
});
|
|
|
|
|
});
|
2024-08-21 21:28:29 +08:00
|
|
|
|
|
|
|
|
describe('semanticSearch', () => {
|
|
|
|
|
it('should perform semantic search and return results', async () => {
|
|
|
|
|
const fileId = '1';
|
|
|
|
|
const [chunk1, chunk2] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([
|
|
|
|
|
{ text: 'Test Chunk 1', userId },
|
|
|
|
|
{ text: 'Test Chunk 2', userId },
|
|
|
|
|
])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(fileChunks).values([
|
2025-03-18 18:50:55 +08:00
|
|
|
{ fileId, chunkId: chunk1.id, userId },
|
|
|
|
|
{ fileId, chunkId: chunk2.id, userId },
|
2024-08-21 21:28:29 +08:00
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(embeddings).values([
|
|
|
|
|
{ chunkId: chunk1.id, embeddings: designThinkingQuery, userId },
|
|
|
|
|
{ chunkId: chunk2.id, embeddings: codeEmbedding, userId },
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.semanticSearch({
|
|
|
|
|
embedding: designThinkingQuery2,
|
|
|
|
|
fileIds: [fileId],
|
|
|
|
|
query: 'design thinking',
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(result).toHaveLength(2);
|
|
|
|
|
expect(result[0].id).toBe(chunk1.id);
|
|
|
|
|
expect(result[1].id).toBe(chunk2.id);
|
|
|
|
|
expect(result[0].similarity).toBeGreaterThan(result[1].similarity);
|
|
|
|
|
});
|
2026-04-13 16:14:09 +08:00
|
|
|
// Additional search scenario without file ID
|
2024-12-03 21:54:14 +08:00
|
|
|
it('should perform semantic search without fileIds', async () => {
|
|
|
|
|
const [chunk1, chunk2] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([
|
|
|
|
|
{ text: 'Test Chunk 1', userId },
|
|
|
|
|
{ text: 'Test Chunk 2', userId },
|
|
|
|
|
])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(embeddings).values([
|
|
|
|
|
{ chunkId: chunk1.id, embeddings: designThinkingQuery, userId },
|
|
|
|
|
{ chunkId: chunk2.id, embeddings: codeEmbedding, userId },
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.semanticSearch({
|
|
|
|
|
embedding: designThinkingQuery2,
|
|
|
|
|
fileIds: undefined,
|
|
|
|
|
query: 'design thinking',
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(result).toBeDefined();
|
|
|
|
|
expect(result).toHaveLength(2);
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-13 16:14:09 +08:00
|
|
|
// Test empty result scenario
|
2024-12-03 21:54:14 +08:00
|
|
|
it('should return empty array when no matches found', async () => {
|
|
|
|
|
const result = await chunkModel.semanticSearch({
|
|
|
|
|
embedding: designThinkingQuery,
|
|
|
|
|
fileIds: ['non-existent-file'],
|
|
|
|
|
query: 'no matches',
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(result).toHaveLength(0);
|
|
|
|
|
});
|
2024-08-21 21:28:29 +08:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('bulkCreateUnstructuredChunks', () => {
|
|
|
|
|
it('should create multiple unstructured chunks', async () => {
|
|
|
|
|
const [chunk] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([{ text: 'Chunk 1', userId, index: 0 }])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
const params = [
|
|
|
|
|
{ text: 'Unstructured Chunk 1', userId, fileId: '1', parentId: '1', compositeId: chunk.id },
|
|
|
|
|
{ text: 'Unstructured Chunk 2', userId, fileId: '1', parentId: '1', compositeId: chunk.id },
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
await chunkModel.bulkCreateUnstructuredChunks(params);
|
|
|
|
|
|
|
|
|
|
const createdChunks = await serverDB.query.unstructuredChunks.findMany({
|
|
|
|
|
where: eq(unstructuredChunks.userId, userId),
|
|
|
|
|
});
|
|
|
|
|
expect(createdChunks).toHaveLength(2);
|
|
|
|
|
expect(createdChunks[0]).toMatchObject(params[0]);
|
|
|
|
|
expect(createdChunks[1]).toMatchObject(params[1]);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('findByFileId', () => {
|
|
|
|
|
it('should find chunks by file id with pagination', async () => {
|
|
|
|
|
const fileId = '1';
|
|
|
|
|
const [chunk1, chunk2, chunk3] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([
|
|
|
|
|
{ text: 'Chunk 1', userId, index: 0 },
|
|
|
|
|
{ text: 'Chunk 2', userId, index: 1 },
|
|
|
|
|
{ text: 'Chunk 3', userId, index: 2 },
|
|
|
|
|
])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(fileChunks).values([
|
2025-03-18 18:50:55 +08:00
|
|
|
{ fileId, chunkId: chunk1.id, userId },
|
|
|
|
|
{ fileId, chunkId: chunk2.id, userId },
|
|
|
|
|
{ fileId, chunkId: chunk3.id, userId },
|
2024-08-21 21:28:29 +08:00
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.findByFileId(fileId, 0);
|
|
|
|
|
|
|
|
|
|
expect(result).toHaveLength(3);
|
|
|
|
|
expect(result[0].index).toBe(0);
|
|
|
|
|
expect(result[1].index).toBe(1);
|
|
|
|
|
expect(result[2].index).toBe(2);
|
|
|
|
|
});
|
2025-11-17 09:35:49 +08:00
|
|
|
|
|
|
|
|
it('should handle chunks with null metadata and return undefined pageNumber', async () => {
|
|
|
|
|
const fileId = '1';
|
|
|
|
|
const [chunk] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([{ text: 'Chunk with null metadata', userId, index: 0, metadata: null }])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(fileChunks).values([{ fileId, chunkId: chunk.id, userId }]);
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.findByFileId(fileId, 0);
|
|
|
|
|
|
|
|
|
|
expect(result).toHaveLength(1);
|
|
|
|
|
expect(result[0].metadata).toBeNull();
|
|
|
|
|
expect(result[0].pageNumber).toBeUndefined();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should handle chunks with metadata containing pageNumber', async () => {
|
|
|
|
|
const fileId = '1';
|
|
|
|
|
const [chunk] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([
|
|
|
|
|
{
|
|
|
|
|
text: 'Chunk with pageNumber',
|
|
|
|
|
userId,
|
|
|
|
|
index: 0,
|
|
|
|
|
metadata: { pageNumber: 5 } as any,
|
|
|
|
|
},
|
|
|
|
|
])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(fileChunks).values([{ fileId, chunkId: chunk.id, userId }]);
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.findByFileId(fileId, 0);
|
|
|
|
|
|
|
|
|
|
expect(result).toHaveLength(1);
|
|
|
|
|
expect(result[0].pageNumber).toBe(5);
|
|
|
|
|
});
|
2024-08-21 21:28:29 +08:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('getChunksTextByFileId', () => {
|
|
|
|
|
it('should get chunks text by file id', async () => {
|
|
|
|
|
const fileId = '1';
|
|
|
|
|
const [chunk1, chunk2] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([
|
|
|
|
|
{ text: 'Chunk 1', userId },
|
|
|
|
|
{ text: 'Chunk 2', userId },
|
|
|
|
|
])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(fileChunks).values([
|
2025-03-18 18:50:55 +08:00
|
|
|
{ fileId, chunkId: chunk1.id, userId },
|
|
|
|
|
{ fileId, chunkId: chunk2.id, userId },
|
2024-08-21 21:28:29 +08:00
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.getChunksTextByFileId(fileId);
|
|
|
|
|
|
|
|
|
|
expect(result).toHaveLength(2);
|
|
|
|
|
expect(result[0].text).toBe('Chunk 1');
|
|
|
|
|
expect(result[1].text).toBe('Chunk 2');
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('countByFileIds', () => {
|
|
|
|
|
it('should count chunks by file ids', async () => {
|
|
|
|
|
const fileIds = ['1', '2'];
|
|
|
|
|
const [chunk1, chunk2, chunk3] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([
|
|
|
|
|
{ text: 'Chunk 1', userId, index: 0 },
|
|
|
|
|
{ text: 'Chunk 2', userId, index: 1 },
|
|
|
|
|
{ text: 'Chunk 3', userId, index: 2 },
|
|
|
|
|
])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(fileChunks).values([
|
2025-03-18 18:50:55 +08:00
|
|
|
{ fileId: '1', chunkId: chunk1.id, userId },
|
|
|
|
|
{ fileId: '1', chunkId: chunk2.id, userId },
|
|
|
|
|
{ fileId: '2', chunkId: chunk3.id, userId },
|
2024-08-21 21:28:29 +08:00
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.countByFileIds(fileIds);
|
|
|
|
|
|
|
|
|
|
expect(result).toHaveLength(2);
|
|
|
|
|
expect(result.find((r) => r.id === '1')?.count).toBe(2);
|
|
|
|
|
expect(result.find((r) => r.id === '2')?.count).toBe(1);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return empty array for empty file ids', async () => {
|
|
|
|
|
const result = await chunkModel.countByFileIds([]);
|
|
|
|
|
|
|
|
|
|
expect(result).toHaveLength(0);
|
|
|
|
|
});
|
2026-06-09 15:54:26 +08:00
|
|
|
|
|
|
|
|
it('should not count workspace chunks from personal scope', async () => {
|
|
|
|
|
await serverDB.insert(files).values({
|
|
|
|
|
id: 'workspace-file',
|
|
|
|
|
name: 'workspace.pdf',
|
|
|
|
|
url: 'https://example.com/workspace.pdf',
|
|
|
|
|
size: 1000,
|
|
|
|
|
fileType: 'application/pdf',
|
|
|
|
|
userId,
|
|
|
|
|
workspaceId,
|
|
|
|
|
});
|
|
|
|
|
const [chunk] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values({ text: 'Workspace Chunk', userId, workspaceId })
|
|
|
|
|
.returning();
|
|
|
|
|
await serverDB
|
|
|
|
|
.insert(fileChunks)
|
|
|
|
|
.values({ chunkId: chunk.id, fileId: 'workspace-file', userId, workspaceId });
|
|
|
|
|
|
|
|
|
|
await expect(chunkModel.countByFileIds(['workspace-file'])).resolves.toHaveLength(0);
|
|
|
|
|
});
|
2024-08-21 21:28:29 +08:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('countByFileId', () => {
|
|
|
|
|
it('should count chunks by file id', async () => {
|
|
|
|
|
const fileId = '1';
|
|
|
|
|
const [chunk1, chunk2, chunk3] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([
|
|
|
|
|
{ text: 'Chunk 1', userId, index: 0 },
|
|
|
|
|
{ text: 'Chunk 2', userId, index: 1 },
|
|
|
|
|
])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(fileChunks).values([
|
2025-03-18 18:50:55 +08:00
|
|
|
{ fileId, chunkId: chunk1.id, userId },
|
|
|
|
|
{ fileId, chunkId: chunk2.id, userId },
|
2024-08-21 21:28:29 +08:00
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.countByFileId(fileId);
|
|
|
|
|
|
|
|
|
|
expect(result).toBe(2);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return 0 for non-existent file id', async () => {
|
|
|
|
|
const result = await chunkModel.countByFileId('non-existent');
|
|
|
|
|
|
|
|
|
|
expect(result).toBe(0);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('semanticSearchForChat', () => {
|
|
|
|
|
it('should perform semantic search for chat and return results', async () => {
|
|
|
|
|
const fileId = '1';
|
|
|
|
|
const [chunk1, chunk2] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values([
|
|
|
|
|
{ text: 'Test Chunk 1', userId },
|
|
|
|
|
{ text: 'Test Chunk 2', userId },
|
|
|
|
|
])
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(fileChunks).values([
|
2025-03-18 18:50:55 +08:00
|
|
|
{ fileId, chunkId: chunk1.id, userId },
|
|
|
|
|
{ fileId, chunkId: chunk2.id, userId },
|
2024-08-21 21:28:29 +08:00
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(embeddings).values([
|
|
|
|
|
{ chunkId: chunk1.id, embeddings: designThinkingQuery, userId },
|
|
|
|
|
{ chunkId: chunk2.id, embeddings: codeEmbedding, userId },
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.semanticSearchForChat({
|
|
|
|
|
embedding: designThinkingQuery2,
|
|
|
|
|
fileIds: [fileId],
|
|
|
|
|
query: 'design thinking',
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(result).toHaveLength(2);
|
|
|
|
|
expect(result[0].id).toBe(chunk1.id);
|
|
|
|
|
expect(result[1].id).toBe(chunk2.id);
|
|
|
|
|
expect(result[0].similarity).toBeGreaterThan(result[1].similarity);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('mapChunkText', () => {
|
|
|
|
|
it('should map chunk text correctly for non-Table type', () => {
|
|
|
|
|
const chunk = {
|
|
|
|
|
text: 'Normal text',
|
|
|
|
|
type: 'Text',
|
|
|
|
|
metadata: {},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const result = chunkModel['mapChunkText'](chunk);
|
|
|
|
|
|
|
|
|
|
expect(result).toBe('Normal text');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should map chunk text correctly for Table type', () => {
|
|
|
|
|
const chunk = {
|
|
|
|
|
text: 'Table text',
|
|
|
|
|
type: 'Table',
|
|
|
|
|
metadata: {
|
|
|
|
|
text_as_html: '<table>...</table>',
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const result = chunkModel['mapChunkText'](chunk);
|
|
|
|
|
|
|
|
|
|
expect(result).toBe(`Table text
|
|
|
|
|
|
|
|
|
|
content in Table html is below:
|
|
|
|
|
<table>...</table>
|
|
|
|
|
`);
|
|
|
|
|
});
|
2024-12-03 21:54:14 +08:00
|
|
|
|
|
|
|
|
it('should handle null text', () => {
|
|
|
|
|
const chunk = {
|
|
|
|
|
text: null,
|
|
|
|
|
type: 'Text',
|
|
|
|
|
metadata: {},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const result = chunkModel['mapChunkText'](chunk);
|
|
|
|
|
expect(result).toBeNull();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should handle missing metadata for Table type', () => {
|
|
|
|
|
const chunk = {
|
|
|
|
|
text: 'Table text',
|
|
|
|
|
type: 'Table',
|
|
|
|
|
metadata: {},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const result = chunkModel['mapChunkText'](chunk);
|
|
|
|
|
expect(result).toContain('Table text');
|
|
|
|
|
expect(result).toContain('content in Table html is below:');
|
|
|
|
|
expect(result).toContain('undefined'); // metadata.text_as_html is undefined
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('findById', () => {
|
|
|
|
|
it('should find a chunk by id', async () => {
|
|
|
|
|
// Create a test chunk
|
|
|
|
|
const [chunk] = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values({ text: 'Test Chunk', userId })
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.findById(chunk.id);
|
|
|
|
|
|
|
|
|
|
expect(result).toBeDefined();
|
|
|
|
|
expect(result?.id).toBe(chunk.id);
|
|
|
|
|
expect(result?.text).toBe('Test Chunk');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return null for non-existent id', async () => {
|
|
|
|
|
const result = await chunkModel.findById(uuid());
|
|
|
|
|
expect(result).toBeUndefined();
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('semanticSearchForChat', () => {
|
2026-04-13 16:14:09 +08:00
|
|
|
// Test empty file ID list scenario
|
2024-12-03 21:54:14 +08:00
|
|
|
it('should return empty array when fileIds is empty', async () => {
|
|
|
|
|
const result = await chunkModel.semanticSearchForChat({
|
|
|
|
|
embedding: designThinkingQuery,
|
|
|
|
|
fileIds: [],
|
|
|
|
|
query: 'test',
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(result).toHaveLength(0);
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-13 16:14:09 +08:00
|
|
|
// Test result limit
|
2025-03-15 17:45:38 +08:00
|
|
|
it('should limit results to 15 items', async () => {
|
2024-12-03 21:54:14 +08:00
|
|
|
const fileId = '1';
|
2025-03-15 17:45:38 +08:00
|
|
|
// Create 24 chunks
|
2024-12-03 21:54:14 +08:00
|
|
|
const chunkResult = await serverDB
|
|
|
|
|
.insert(chunks)
|
|
|
|
|
.values(
|
2026-02-06 00:13:55 +08:00
|
|
|
Array.from({ length: 24 })
|
2024-12-03 21:54:14 +08:00
|
|
|
.fill(0)
|
|
|
|
|
.map((_, i) => ({ text: `Test Chunk ${i}`, userId })),
|
|
|
|
|
)
|
|
|
|
|
.returning();
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(fileChunks).values(
|
|
|
|
|
chunkResult.map((chunk) => ({
|
|
|
|
|
fileId,
|
|
|
|
|
chunkId: chunk.id,
|
2025-03-18 18:50:55 +08:00
|
|
|
userId,
|
2024-12-03 21:54:14 +08:00
|
|
|
})),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
await serverDB.insert(embeddings).values(
|
|
|
|
|
chunkResult.map((chunk) => ({
|
|
|
|
|
chunkId: chunk.id,
|
|
|
|
|
embeddings: designThinkingQuery,
|
|
|
|
|
userId,
|
|
|
|
|
})),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const result = await chunkModel.semanticSearchForChat({
|
|
|
|
|
embedding: designThinkingQuery2,
|
|
|
|
|
fileIds: [fileId],
|
|
|
|
|
query: 'test',
|
|
|
|
|
});
|
|
|
|
|
|
2025-03-15 17:45:38 +08:00
|
|
|
expect(result).toHaveLength(15);
|
2024-12-03 21:54:14 +08:00
|
|
|
});
|
2024-08-21 21:28:29 +08:00
|
|
|
});
|
|
|
|
|
});
|