From 71c1d09f22db7762f1a300793ad5cb5b39cbf217 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 15 Feb 2026 15:07:09 +0000 Subject: [PATCH] refactor(test): share memory embedding fixture --- src/memory/embedding-manager.test-harness.ts | 105 ++++++++++++++ src/memory/manager.embedding-batches.test.ts | 136 +++++------------- .../manager.embedding-token-limit.test.ts | 124 ++++------------ 3 files changed, 165 insertions(+), 200 deletions(-) create mode 100644 src/memory/embedding-manager.test-harness.ts diff --git a/src/memory/embedding-manager.test-harness.ts b/src/memory/embedding-manager.test-harness.ts new file mode 100644 index 00000000000..fe50368f022 --- /dev/null +++ b/src/memory/embedding-manager.test-harness.ts @@ -0,0 +1,105 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterAll, beforeAll, beforeEach, expect } from "vitest"; +import { getEmbedBatchMock, resetEmbeddingMocks } from "./embedding.test-mocks.js"; +import { getMemorySearchManager, type MemoryIndexManager } from "./index.js"; + +export function installEmbeddingManagerFixture(opts: { + fixturePrefix: string; + largeTokens: number; + smallTokens: number; + createCfg: (params: { workspaceDir: string; indexPath: string; tokens: number }) => unknown; + resetIndexEachTest?: boolean; +}) { + const embedBatch = getEmbedBatchMock(); + const resetIndexEachTest = opts.resetIndexEachTest ?? true; + + let fixtureRoot: string | undefined; + let workspaceDir: string | undefined; + let memoryDir: string | undefined; + let managerLarge: MemoryIndexManager | undefined; + let managerSmall: MemoryIndexManager | undefined; + + const resetManager = (manager: MemoryIndexManager) => { + (manager as unknown as { resetIndex: () => void }).resetIndex(); + (manager as unknown as { dirty: boolean }).dirty = true; + }; + + const requireValue = (value: T | undefined, name: string): T => { + if (!value) { + throw new Error(`${name} missing`); + } + return value; + }; + + beforeAll(async () => { + fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), opts.fixturePrefix)); + workspaceDir = path.join(fixtureRoot, "workspace"); + memoryDir = path.join(workspaceDir, "memory"); + await fs.mkdir(memoryDir, { recursive: true }); + + const indexPathLarge = path.join(fixtureRoot, "index.large.sqlite"); + const indexPathSmall = path.join(fixtureRoot, "index.small.sqlite"); + + const large = await getMemorySearchManager({ + cfg: opts.createCfg({ + workspaceDir, + indexPath: indexPathLarge, + tokens: opts.largeTokens, + }), + agentId: "main", + }); + expect(large.manager).not.toBeNull(); + managerLarge = large.manager ?? undefined; + + const small = await getMemorySearchManager({ + cfg: opts.createCfg({ + workspaceDir, + indexPath: indexPathSmall, + tokens: opts.smallTokens, + }), + agentId: "main", + }); + expect(small.manager).not.toBeNull(); + managerSmall = small.manager ?? undefined; + }); + + afterAll(async () => { + if (managerLarge) { + await managerLarge.close(); + managerLarge = undefined; + } + if (managerSmall) { + await managerSmall.close(); + managerSmall = undefined; + } + if (fixtureRoot) { + await fs.rm(fixtureRoot, { recursive: true, force: true }); + fixtureRoot = undefined; + } + }); + + beforeEach(async () => { + resetEmbeddingMocks(); + + const dir = requireValue(memoryDir, "memoryDir"); + await fs.rm(dir, { recursive: true, force: true }); + await fs.mkdir(dir, { recursive: true }); + + if (resetIndexEachTest) { + resetManager(requireValue(managerLarge, "managerLarge")); + resetManager(requireValue(managerSmall, "managerSmall")); + } + }); + + return { + embedBatch, + getFixtureRoot: () => requireValue(fixtureRoot, "fixtureRoot"), + getWorkspaceDir: () => requireValue(workspaceDir, "workspaceDir"), + getMemoryDir: () => requireValue(memoryDir, "memoryDir"), + getManagerLarge: () => requireValue(managerLarge, "managerLarge"), + getManagerSmall: () => requireValue(managerSmall, "managerSmall"), + resetManager, + }; +} diff --git a/src/memory/manager.embedding-batches.test.ts b/src/memory/manager.embedding-batches.test.ts index 13caa1906d6..1fc1dbad2c9 100644 --- a/src/memory/manager.embedding-batches.test.ts +++ b/src/memory/manager.embedding-batches.test.ts @@ -1,106 +1,40 @@ import fs from "node:fs/promises"; -import os from "node:os"; import path from "node:path"; -import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; -import { getEmbedBatchMock, resetEmbeddingMocks } from "./embedding.test-mocks.js"; -import { getMemorySearchManager, type MemoryIndexManager } from "./index.js"; +import { describe, expect, it, vi } from "vitest"; +import { installEmbeddingManagerFixture } from "./embedding-manager.test-harness.js"; -const embedBatch = getEmbedBatchMock(); +const fx = installEmbeddingManagerFixture({ + fixturePrefix: "openclaw-mem-", + largeTokens: 1250, + smallTokens: 200, + createCfg: ({ workspaceDir, indexPath, tokens }) => ({ + agents: { + defaults: { + workspace: workspaceDir, + memorySearch: { + provider: "openai", + model: "mock-embed", + store: { path: indexPath, vector: { enabled: false } }, + chunking: { tokens, overlap: 0 }, + sync: { watch: false, onSessionStart: false, onSearch: false }, + query: { minScore: 0, hybrid: { enabled: false } }, + }, + }, + list: [{ id: "main", default: true }], + }, + }), +}); +const { embedBatch } = fx; describe("memory embedding batches", () => { - let fixtureRoot: string; - let workspaceDir: string; - let memoryDir: string; - let indexPathLarge: string; - let indexPathSmall: string; - let managerLarge: MemoryIndexManager | null = null; - let managerSmall: MemoryIndexManager | null = null; - - function resetManagerForTest(manager: MemoryIndexManager | null) { - if (!manager) { - throw new Error("manager missing"); - } - (manager as unknown as { resetIndex: () => void }).resetIndex(); - (manager as unknown as { dirty: boolean }).dirty = true; - } - - function createCfg(params: { indexPath: string; tokens: number }) { - return { - agents: { - defaults: { - workspace: workspaceDir, - memorySearch: { - provider: "openai", - model: "mock-embed", - store: { path: params.indexPath, vector: { enabled: false } }, - chunking: { tokens: params.tokens, overlap: 0 }, - sync: { watch: false, onSessionStart: false, onSearch: false }, - query: { minScore: 0, hybrid: { enabled: false } }, - }, - }, - list: [{ id: "main", default: true }], - }, - }; - } - - beforeAll(async () => { - fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-")); - workspaceDir = path.join(fixtureRoot, "workspace"); - memoryDir = path.join(workspaceDir, "memory"); - indexPathLarge = path.join(fixtureRoot, "index.large.sqlite"); - indexPathSmall = path.join(fixtureRoot, "index.small.sqlite"); - await fs.mkdir(memoryDir, { recursive: true }); - - const large = await getMemorySearchManager({ - cfg: createCfg({ indexPath: indexPathLarge, tokens: 1250 }), - agentId: "main", - }); - expect(large.manager).not.toBeNull(); - if (!large.manager) { - throw new Error("manager missing"); - } - managerLarge = large.manager; - - const small = await getMemorySearchManager({ - cfg: createCfg({ indexPath: indexPathSmall, tokens: 200 }), - agentId: "main", - }); - expect(small.manager).not.toBeNull(); - if (!small.manager) { - throw new Error("manager missing"); - } - managerSmall = small.manager; - }); - - afterAll(async () => { - if (managerLarge) { - await managerLarge.close(); - managerLarge = null; - } - if (managerSmall) { - await managerSmall.close(); - managerSmall = null; - } - await fs.rm(fixtureRoot, { recursive: true, force: true }); - }); - - beforeEach(async () => { - resetEmbeddingMocks(); - - await fs.rm(memoryDir, { recursive: true, force: true }); - await fs.mkdir(memoryDir, { recursive: true }); - }); - it("splits large files across multiple embedding batches", async () => { + const memoryDir = fx.getMemoryDir(); + const managerLarge = fx.getManagerLarge(); // Keep this small but above the embedding batch byte threshold (8k) so we // exercise multi-batch behavior without generating lots of chunks/DB rows. const line = "a".repeat(4200); const content = [line, line].join("\n"); await fs.writeFile(path.join(memoryDir, "2026-01-03.md"), content); - resetManagerForTest(managerLarge); - if (!managerLarge) { - throw new Error("manager missing"); - } const updates: Array<{ completed: number; total: number; label?: string }> = []; await managerLarge.sync({ progress: (update) => { @@ -120,19 +54,19 @@ describe("memory embedding batches", () => { }); it("keeps small files in a single embedding batch", async () => { + const memoryDir = fx.getMemoryDir(); + const managerSmall = fx.getManagerSmall(); const line = "b".repeat(120); const content = Array.from({ length: 4 }, () => line).join("\n"); await fs.writeFile(path.join(memoryDir, "2026-01-04.md"), content); - resetManagerForTest(managerSmall); - if (!managerSmall) { - throw new Error("manager missing"); - } await managerSmall.sync({ reason: "test" }); expect(embedBatch.mock.calls.length).toBe(1); }); it("retries embeddings on transient rate limit and 5xx errors", async () => { + const memoryDir = fx.getMemoryDir(); + const managerSmall = fx.getManagerSmall(); const line = "d".repeat(120); const content = Array.from({ length: 4 }, () => line).join("\n"); await fs.writeFile(path.join(memoryDir, "2026-01-06.md"), content); @@ -163,10 +97,6 @@ describe("memory embedding batches", () => { } return realSetTimeout(handler, delay, ...args); }) as typeof setTimeout); - resetManagerForTest(managerSmall); - if (!managerSmall) { - throw new Error("manager missing"); - } try { await managerSmall.sync({ reason: "test" }); } finally { @@ -177,11 +107,9 @@ describe("memory embedding batches", () => { }, 10000); it("skips empty chunks so embeddings input stays valid", async () => { + const memoryDir = fx.getMemoryDir(); + const managerSmall = fx.getManagerSmall(); await fs.writeFile(path.join(memoryDir, "2026-01-07.md"), "\n\n\n"); - resetManagerForTest(managerSmall); - if (!managerSmall) { - throw new Error("manager missing"); - } await managerSmall.sync({ reason: "test" }); const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []); diff --git a/src/memory/manager.embedding-token-limit.test.ts b/src/memory/manager.embedding-token-limit.test.ts index 41a1aaa0469..51b0de421b6 100644 --- a/src/memory/manager.embedding-token-limit.test.ts +++ b/src/memory/manager.embedding-token-limit.test.ts @@ -1,104 +1,37 @@ import fs from "node:fs/promises"; -import os from "node:os"; import path from "node:path"; -import { afterAll, beforeAll, beforeEach, describe, expect, it } from "vitest"; -import { getEmbedBatchMock, resetEmbeddingMocks } from "./embedding.test-mocks.js"; -import { getMemorySearchManager, type MemoryIndexManager } from "./index.js"; +import { describe, expect, it } from "vitest"; +import { installEmbeddingManagerFixture } from "./embedding-manager.test-harness.js"; -const embedBatch = getEmbedBatchMock(); +const fx = installEmbeddingManagerFixture({ + fixturePrefix: "openclaw-mem-token-", + largeTokens: 10_000, + smallTokens: 1000, + createCfg: ({ workspaceDir, indexPath, tokens }) => ({ + agents: { + defaults: { + workspace: workspaceDir, + memorySearch: { + provider: "openai", + model: "mock-embed", + store: { path: indexPath, vector: { enabled: false } }, + chunking: { tokens, overlap: 0 }, + sync: { watch: false, onSessionStart: false, onSearch: false }, + query: { minScore: 0 }, + }, + }, + list: [{ id: "main", default: true }], + }, + }), +}); +const { embedBatch } = fx; describe("memory embedding token limits", () => { - let fixtureRoot: string; - let workspaceDir: string; - let memoryDir: string; - let indexPathLarge: string; - let indexPathSmall: string; - let managerLarge: MemoryIndexManager | null = null; - let managerSmall: MemoryIndexManager | null = null; - - function createCfg(params: { indexPath: string; tokens: number }) { - return { - agents: { - defaults: { - workspace: workspaceDir, - memorySearch: { - provider: "openai", - model: "mock-embed", - store: { path: params.indexPath, vector: { enabled: false } }, - chunking: { tokens: params.tokens, overlap: 0 }, - sync: { watch: false, onSessionStart: false, onSearch: false }, - query: { minScore: 0 }, - }, - }, - list: [{ id: "main", default: true }], - }, - }; - } - - beforeAll(async () => { - fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-token-")); - workspaceDir = path.join(fixtureRoot, "workspace"); - memoryDir = path.join(workspaceDir, "memory"); - indexPathLarge = path.join(fixtureRoot, "index.large.sqlite"); - indexPathSmall = path.join(fixtureRoot, "index.small.sqlite"); - await fs.mkdir(memoryDir, { recursive: true }); - - const large = await getMemorySearchManager({ - cfg: createCfg({ indexPath: indexPathLarge, tokens: 10_000 }), - agentId: "main", - }); - expect(large.manager).not.toBeNull(); - if (!large.manager) { - throw new Error("manager missing"); - } - managerLarge = large.manager; - - const small = await getMemorySearchManager({ - cfg: createCfg({ indexPath: indexPathSmall, tokens: 1000 }), - agentId: "main", - }); - expect(small.manager).not.toBeNull(); - if (!small.manager) { - throw new Error("manager missing"); - } - managerSmall = small.manager; - }); - - afterAll(async () => { - if (managerLarge) { - await managerLarge.close(); - managerLarge = null; - } - if (managerSmall) { - await managerSmall.close(); - managerSmall = null; - } - await fs.rm(fixtureRoot, { recursive: true, force: true }); - }); - - beforeEach(async () => { - resetEmbeddingMocks(); - - await fs.rm(memoryDir, { recursive: true, force: true }); - await fs.mkdir(memoryDir, { recursive: true }); - - const reset = (manager: MemoryIndexManager | null) => { - if (!manager) { - throw new Error("manager missing"); - } - (manager as unknown as { resetIndex: () => void }).resetIndex(); - (manager as unknown as { dirty: boolean }).dirty = true; - }; - reset(managerLarge); - reset(managerSmall); - }); - it("splits oversized chunks so each embedding input stays <= 8192 UTF-8 bytes", async () => { + const memoryDir = fx.getMemoryDir(); + const managerLarge = fx.getManagerLarge(); const content = "x".repeat(9500); await fs.writeFile(path.join(memoryDir, "2026-01-09.md"), content); - if (!managerLarge) { - throw new Error("manager missing"); - } await managerLarge.sync({ reason: "test" }); const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []); @@ -109,12 +42,11 @@ describe("memory embedding token limits", () => { }); it("uses UTF-8 byte estimates when batching multibyte chunks", async () => { + const memoryDir = fx.getMemoryDir(); + const managerSmall = fx.getManagerSmall(); const line = "😀".repeat(1800); const content = `${line}\n${line}\n${line}`; await fs.writeFile(path.join(memoryDir, "2026-01-10.md"), content); - if (!managerSmall) { - throw new Error("manager missing"); - } await managerSmall.sync({ reason: "test" }); const batchSizes = embedBatch.mock.calls.map(