diff --git a/src/memory/manager.embedding-batches.test.ts b/src/memory/manager.embedding-batches.test.ts index db709ce8541..3471ca09924 100644 --- a/src/memory/manager.embedding-batches.test.ts +++ b/src/memory/manager.embedding-batches.test.ts @@ -1,7 +1,7 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; +import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; import { getMemorySearchManager, type MemoryIndexManager } from "./index.js"; const embedBatch = vi.fn(async (texts: string[]) => texts.map(() => [0, 1, 0])); @@ -33,50 +33,23 @@ vi.mock("./embeddings.js", () => ({ describe("memory embedding batches", () => { let fixtureRoot: string; - let caseId = 0; let workspaceDir: string; - let indexPath: string; - let manager: MemoryIndexManager | null = null; + let memoryDir: string; + let indexPathLarge: string; + let indexPathSmall: string; + let managerLarge: MemoryIndexManager | null = null; + let managerSmall: MemoryIndexManager | null = null; - beforeAll(async () => { - fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-")); - }); - - afterAll(async () => { - await fs.rm(fixtureRoot, { recursive: true, force: true }); - }); - - beforeEach(async () => { - embedBatch.mockClear(); - embedQuery.mockClear(); - workspaceDir = path.join(fixtureRoot, `case-${++caseId}`); - indexPath = path.join(workspaceDir, "index.sqlite"); - await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true }); - }); - - afterEach(async () => { - if (manager) { - await manager.close(); - manager = null; - } - }); - - it("splits large files across multiple embedding batches", async () => { - // Keep this small but above the embedding batch byte threshold (8k) so we - // exercise multi-batch behavior without generating lots of chunks/DB rows. - const line = "a".repeat(5000); - const content = [line, line].join("\n"); - await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-03.md"), content); - - const cfg = { + function createCfg(params: { indexPath: string; tokens: number }) { + return { agents: { defaults: { workspace: workspaceDir, memorySearch: { provider: "openai", model: "mock-embed", - store: { path: indexPath, vector: { enabled: false } }, - chunking: { tokens: 1250, overlap: 0 }, + store: { path: params.indexPath, vector: { enabled: false } }, + chunking: { tokens: params.tokens, overlap: 0 }, sync: { watch: false, onSessionStart: false, onSearch: false }, query: { minScore: 0, hybrid: { enabled: false } }, }, @@ -84,21 +57,86 @@ describe("memory embedding batches", () => { list: [{ id: "main", default: true }], }, }; + } - const result = await getMemorySearchManager({ cfg, agentId: "main" }); - expect(result.manager).not.toBeNull(); - if (!result.manager) { + beforeAll(async () => { + fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-")); + workspaceDir = path.join(fixtureRoot, "workspace"); + memoryDir = path.join(workspaceDir, "memory"); + indexPathLarge = path.join(fixtureRoot, "index.large.sqlite"); + indexPathSmall = path.join(fixtureRoot, "index.small.sqlite"); + await fs.mkdir(memoryDir, { recursive: true }); + + const large = await getMemorySearchManager({ + cfg: createCfg({ indexPath: indexPathLarge, tokens: 1250 }), + agentId: "main", + }); + expect(large.manager).not.toBeNull(); + if (!large.manager) { + throw new Error("manager missing"); + } + managerLarge = large.manager; + + const small = await getMemorySearchManager({ + cfg: createCfg({ indexPath: indexPathSmall, tokens: 200 }), + agentId: "main", + }); + expect(small.manager).not.toBeNull(); + if (!small.manager) { + throw new Error("manager missing"); + } + managerSmall = small.manager; + }); + + afterAll(async () => { + if (managerLarge) { + await managerLarge.close(); + managerLarge = null; + } + if (managerSmall) { + await managerSmall.close(); + managerSmall = null; + } + await fs.rm(fixtureRoot, { recursive: true, force: true }); + }); + + beforeEach(async () => { + embedBatch.mockClear(); + embedQuery.mockClear(); + embedBatch.mockImplementation(async (texts: string[]) => texts.map(() => [0, 1, 0])); + embedQuery.mockImplementation(async () => [0, 1, 0]); + + await fs.rm(memoryDir, { recursive: true, force: true }); + await fs.mkdir(memoryDir, { recursive: true }); + + const reset = (manager: MemoryIndexManager | null) => { + if (!manager) { + throw new Error("manager missing"); + } + (manager as unknown as { resetIndex: () => void }).resetIndex(); + (manager as unknown as { dirty: boolean }).dirty = true; + }; + reset(managerLarge); + reset(managerSmall); + }); + + it("splits large files across multiple embedding batches", async () => { + // Keep this small but above the embedding batch byte threshold (8k) so we + // exercise multi-batch behavior without generating lots of chunks/DB rows. + const line = "a".repeat(5000); + const content = [line, line].join("\n"); + await fs.writeFile(path.join(memoryDir, "2026-01-03.md"), content); + if (!managerLarge) { throw new Error("manager missing"); } - manager = result.manager; const updates: Array<{ completed: number; total: number; label?: string }> = []; - await manager.sync({ + await managerLarge.sync({ progress: (update) => { updates.push(update); }, }); - const status = manager.status(); + const status = managerLarge.status(); const totalTexts = embedBatch.mock.calls.reduce((sum, call) => sum + (call[0]?.length ?? 0), 0); expect(totalTexts).toBe(status.chunks); expect(embedBatch.mock.calls.length).toBeGreaterThan(1); @@ -112,32 +150,11 @@ describe("memory embedding batches", () => { it("keeps small files in a single embedding batch", async () => { const line = "b".repeat(120); const content = Array.from({ length: 4 }, () => line).join("\n"); - await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-04.md"), content); - - const cfg = { - agents: { - defaults: { - workspace: workspaceDir, - memorySearch: { - provider: "openai", - model: "mock-embed", - store: { path: indexPath, vector: { enabled: false } }, - chunking: { tokens: 200, overlap: 0 }, - sync: { watch: false, onSessionStart: false, onSearch: false }, - query: { minScore: 0, hybrid: { enabled: false } }, - }, - }, - list: [{ id: "main", default: true }], - }, - }; - - const result = await getMemorySearchManager({ cfg, agentId: "main" }); - expect(result.manager).not.toBeNull(); - if (!result.manager) { + await fs.writeFile(path.join(memoryDir, "2026-01-04.md"), content); + if (!managerSmall) { throw new Error("manager missing"); } - manager = result.manager; - await manager.sync({ reason: "test" }); + await managerSmall.sync({ reason: "test" }); expect(embedBatch.mock.calls.length).toBe(1); }); @@ -145,7 +162,7 @@ describe("memory embedding batches", () => { it("retries embeddings on transient rate limit and 5xx errors", async () => { const line = "d".repeat(120); const content = Array.from({ length: 4 }, () => line).join("\n"); - await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-06.md"), content); + await fs.writeFile(path.join(memoryDir, "2026-01-06.md"), content); const transientErrors = [ "openai embeddings failed: 429 rate limit", @@ -173,32 +190,11 @@ describe("memory embedding batches", () => { } return realSetTimeout(handler, delay, ...args); }) as typeof setTimeout); - - const cfg = { - agents: { - defaults: { - workspace: workspaceDir, - memorySearch: { - provider: "openai", - model: "mock-embed", - store: { path: indexPath, vector: { enabled: false } }, - chunking: { tokens: 200, overlap: 0 }, - sync: { watch: false, onSessionStart: false, onSearch: false }, - query: { minScore: 0, hybrid: { enabled: false } }, - }, - }, - list: [{ id: "main", default: true }], - }, - }; - - const result = await getMemorySearchManager({ cfg, agentId: "main" }); - expect(result.manager).not.toBeNull(); - if (!result.manager) { + if (!managerSmall) { throw new Error("manager missing"); } - manager = result.manager; try { - await manager.sync({ reason: "test" }); + await managerSmall.sync({ reason: "test" }); } finally { setTimeoutSpy.mockRestore(); } @@ -207,31 +203,11 @@ describe("memory embedding batches", () => { }, 10000); it("skips empty chunks so embeddings input stays valid", async () => { - await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-07.md"), "\n\n\n"); - - const cfg = { - agents: { - defaults: { - workspace: workspaceDir, - memorySearch: { - provider: "openai", - model: "mock-embed", - store: { path: indexPath, vector: { enabled: false } }, - sync: { watch: false, onSessionStart: false, onSearch: false }, - query: { minScore: 0, hybrid: { enabled: false } }, - }, - }, - list: [{ id: "main", default: true }], - }, - }; - - const result = await getMemorySearchManager({ cfg, agentId: "main" }); - expect(result.manager).not.toBeNull(); - if (!result.manager) { + await fs.writeFile(path.join(memoryDir, "2026-01-07.md"), "\n\n\n"); + if (!managerSmall) { throw new Error("manager missing"); } - manager = result.manager; - await manager.sync({ reason: "test" }); + await managerSmall.sync({ reason: "test" }); const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []); expect(inputs).not.toContain("");