mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 01:58:26 +00:00
perf(test): reuse managers in embedding batches suite
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
import fs from "node:fs/promises";
|
import fs from "node:fs/promises";
|
||||||
import os from "node:os";
|
import os from "node:os";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||||
import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
|
import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
|
||||||
|
|
||||||
const embedBatch = vi.fn(async (texts: string[]) => texts.map(() => [0, 1, 0]));
|
const embedBatch = vi.fn(async (texts: string[]) => texts.map(() => [0, 1, 0]));
|
||||||
@@ -33,50 +33,23 @@ vi.mock("./embeddings.js", () => ({
|
|||||||
|
|
||||||
describe("memory embedding batches", () => {
|
describe("memory embedding batches", () => {
|
||||||
let fixtureRoot: string;
|
let fixtureRoot: string;
|
||||||
let caseId = 0;
|
|
||||||
let workspaceDir: string;
|
let workspaceDir: string;
|
||||||
let indexPath: string;
|
let memoryDir: string;
|
||||||
let manager: MemoryIndexManager | null = null;
|
let indexPathLarge: string;
|
||||||
|
let indexPathSmall: string;
|
||||||
|
let managerLarge: MemoryIndexManager | null = null;
|
||||||
|
let managerSmall: MemoryIndexManager | null = null;
|
||||||
|
|
||||||
beforeAll(async () => {
|
function createCfg(params: { indexPath: string; tokens: number }) {
|
||||||
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-"));
|
return {
|
||||||
});
|
|
||||||
|
|
||||||
afterAll(async () => {
|
|
||||||
await fs.rm(fixtureRoot, { recursive: true, force: true });
|
|
||||||
});
|
|
||||||
|
|
||||||
beforeEach(async () => {
|
|
||||||
embedBatch.mockClear();
|
|
||||||
embedQuery.mockClear();
|
|
||||||
workspaceDir = path.join(fixtureRoot, `case-${++caseId}`);
|
|
||||||
indexPath = path.join(workspaceDir, "index.sqlite");
|
|
||||||
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
|
|
||||||
});
|
|
||||||
|
|
||||||
afterEach(async () => {
|
|
||||||
if (manager) {
|
|
||||||
await manager.close();
|
|
||||||
manager = null;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it("splits large files across multiple embedding batches", async () => {
|
|
||||||
// Keep this small but above the embedding batch byte threshold (8k) so we
|
|
||||||
// exercise multi-batch behavior without generating lots of chunks/DB rows.
|
|
||||||
const line = "a".repeat(5000);
|
|
||||||
const content = [line, line].join("\n");
|
|
||||||
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-03.md"), content);
|
|
||||||
|
|
||||||
const cfg = {
|
|
||||||
agents: {
|
agents: {
|
||||||
defaults: {
|
defaults: {
|
||||||
workspace: workspaceDir,
|
workspace: workspaceDir,
|
||||||
memorySearch: {
|
memorySearch: {
|
||||||
provider: "openai",
|
provider: "openai",
|
||||||
model: "mock-embed",
|
model: "mock-embed",
|
||||||
store: { path: indexPath, vector: { enabled: false } },
|
store: { path: params.indexPath, vector: { enabled: false } },
|
||||||
chunking: { tokens: 1250, overlap: 0 },
|
chunking: { tokens: params.tokens, overlap: 0 },
|
||||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||||
query: { minScore: 0, hybrid: { enabled: false } },
|
query: { minScore: 0, hybrid: { enabled: false } },
|
||||||
},
|
},
|
||||||
@@ -84,21 +57,86 @@ describe("memory embedding batches", () => {
|
|||||||
list: [{ id: "main", default: true }],
|
list: [{ id: "main", default: true }],
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
beforeAll(async () => {
|
||||||
expect(result.manager).not.toBeNull();
|
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-"));
|
||||||
if (!result.manager) {
|
workspaceDir = path.join(fixtureRoot, "workspace");
|
||||||
|
memoryDir = path.join(workspaceDir, "memory");
|
||||||
|
indexPathLarge = path.join(fixtureRoot, "index.large.sqlite");
|
||||||
|
indexPathSmall = path.join(fixtureRoot, "index.small.sqlite");
|
||||||
|
await fs.mkdir(memoryDir, { recursive: true });
|
||||||
|
|
||||||
|
const large = await getMemorySearchManager({
|
||||||
|
cfg: createCfg({ indexPath: indexPathLarge, tokens: 1250 }),
|
||||||
|
agentId: "main",
|
||||||
|
});
|
||||||
|
expect(large.manager).not.toBeNull();
|
||||||
|
if (!large.manager) {
|
||||||
|
throw new Error("manager missing");
|
||||||
|
}
|
||||||
|
managerLarge = large.manager;
|
||||||
|
|
||||||
|
const small = await getMemorySearchManager({
|
||||||
|
cfg: createCfg({ indexPath: indexPathSmall, tokens: 200 }),
|
||||||
|
agentId: "main",
|
||||||
|
});
|
||||||
|
expect(small.manager).not.toBeNull();
|
||||||
|
if (!small.manager) {
|
||||||
|
throw new Error("manager missing");
|
||||||
|
}
|
||||||
|
managerSmall = small.manager;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
if (managerLarge) {
|
||||||
|
await managerLarge.close();
|
||||||
|
managerLarge = null;
|
||||||
|
}
|
||||||
|
if (managerSmall) {
|
||||||
|
await managerSmall.close();
|
||||||
|
managerSmall = null;
|
||||||
|
}
|
||||||
|
await fs.rm(fixtureRoot, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
embedBatch.mockClear();
|
||||||
|
embedQuery.mockClear();
|
||||||
|
embedBatch.mockImplementation(async (texts: string[]) => texts.map(() => [0, 1, 0]));
|
||||||
|
embedQuery.mockImplementation(async () => [0, 1, 0]);
|
||||||
|
|
||||||
|
await fs.rm(memoryDir, { recursive: true, force: true });
|
||||||
|
await fs.mkdir(memoryDir, { recursive: true });
|
||||||
|
|
||||||
|
const reset = (manager: MemoryIndexManager | null) => {
|
||||||
|
if (!manager) {
|
||||||
|
throw new Error("manager missing");
|
||||||
|
}
|
||||||
|
(manager as unknown as { resetIndex: () => void }).resetIndex();
|
||||||
|
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||||
|
};
|
||||||
|
reset(managerLarge);
|
||||||
|
reset(managerSmall);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("splits large files across multiple embedding batches", async () => {
|
||||||
|
// Keep this small but above the embedding batch byte threshold (8k) so we
|
||||||
|
// exercise multi-batch behavior without generating lots of chunks/DB rows.
|
||||||
|
const line = "a".repeat(5000);
|
||||||
|
const content = [line, line].join("\n");
|
||||||
|
await fs.writeFile(path.join(memoryDir, "2026-01-03.md"), content);
|
||||||
|
if (!managerLarge) {
|
||||||
throw new Error("manager missing");
|
throw new Error("manager missing");
|
||||||
}
|
}
|
||||||
manager = result.manager;
|
|
||||||
const updates: Array<{ completed: number; total: number; label?: string }> = [];
|
const updates: Array<{ completed: number; total: number; label?: string }> = [];
|
||||||
await manager.sync({
|
await managerLarge.sync({
|
||||||
progress: (update) => {
|
progress: (update) => {
|
||||||
updates.push(update);
|
updates.push(update);
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const status = manager.status();
|
const status = managerLarge.status();
|
||||||
const totalTexts = embedBatch.mock.calls.reduce((sum, call) => sum + (call[0]?.length ?? 0), 0);
|
const totalTexts = embedBatch.mock.calls.reduce((sum, call) => sum + (call[0]?.length ?? 0), 0);
|
||||||
expect(totalTexts).toBe(status.chunks);
|
expect(totalTexts).toBe(status.chunks);
|
||||||
expect(embedBatch.mock.calls.length).toBeGreaterThan(1);
|
expect(embedBatch.mock.calls.length).toBeGreaterThan(1);
|
||||||
@@ -112,32 +150,11 @@ describe("memory embedding batches", () => {
|
|||||||
it("keeps small files in a single embedding batch", async () => {
|
it("keeps small files in a single embedding batch", async () => {
|
||||||
const line = "b".repeat(120);
|
const line = "b".repeat(120);
|
||||||
const content = Array.from({ length: 4 }, () => line).join("\n");
|
const content = Array.from({ length: 4 }, () => line).join("\n");
|
||||||
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-04.md"), content);
|
await fs.writeFile(path.join(memoryDir, "2026-01-04.md"), content);
|
||||||
|
if (!managerSmall) {
|
||||||
const cfg = {
|
|
||||||
agents: {
|
|
||||||
defaults: {
|
|
||||||
workspace: workspaceDir,
|
|
||||||
memorySearch: {
|
|
||||||
provider: "openai",
|
|
||||||
model: "mock-embed",
|
|
||||||
store: { path: indexPath, vector: { enabled: false } },
|
|
||||||
chunking: { tokens: 200, overlap: 0 },
|
|
||||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
|
||||||
query: { minScore: 0, hybrid: { enabled: false } },
|
|
||||||
},
|
|
||||||
},
|
|
||||||
list: [{ id: "main", default: true }],
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
|
||||||
expect(result.manager).not.toBeNull();
|
|
||||||
if (!result.manager) {
|
|
||||||
throw new Error("manager missing");
|
throw new Error("manager missing");
|
||||||
}
|
}
|
||||||
manager = result.manager;
|
await managerSmall.sync({ reason: "test" });
|
||||||
await manager.sync({ reason: "test" });
|
|
||||||
|
|
||||||
expect(embedBatch.mock.calls.length).toBe(1);
|
expect(embedBatch.mock.calls.length).toBe(1);
|
||||||
});
|
});
|
||||||
@@ -145,7 +162,7 @@ describe("memory embedding batches", () => {
|
|||||||
it("retries embeddings on transient rate limit and 5xx errors", async () => {
|
it("retries embeddings on transient rate limit and 5xx errors", async () => {
|
||||||
const line = "d".repeat(120);
|
const line = "d".repeat(120);
|
||||||
const content = Array.from({ length: 4 }, () => line).join("\n");
|
const content = Array.from({ length: 4 }, () => line).join("\n");
|
||||||
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-06.md"), content);
|
await fs.writeFile(path.join(memoryDir, "2026-01-06.md"), content);
|
||||||
|
|
||||||
const transientErrors = [
|
const transientErrors = [
|
||||||
"openai embeddings failed: 429 rate limit",
|
"openai embeddings failed: 429 rate limit",
|
||||||
@@ -173,32 +190,11 @@ describe("memory embedding batches", () => {
|
|||||||
}
|
}
|
||||||
return realSetTimeout(handler, delay, ...args);
|
return realSetTimeout(handler, delay, ...args);
|
||||||
}) as typeof setTimeout);
|
}) as typeof setTimeout);
|
||||||
|
if (!managerSmall) {
|
||||||
const cfg = {
|
|
||||||
agents: {
|
|
||||||
defaults: {
|
|
||||||
workspace: workspaceDir,
|
|
||||||
memorySearch: {
|
|
||||||
provider: "openai",
|
|
||||||
model: "mock-embed",
|
|
||||||
store: { path: indexPath, vector: { enabled: false } },
|
|
||||||
chunking: { tokens: 200, overlap: 0 },
|
|
||||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
|
||||||
query: { minScore: 0, hybrid: { enabled: false } },
|
|
||||||
},
|
|
||||||
},
|
|
||||||
list: [{ id: "main", default: true }],
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
|
||||||
expect(result.manager).not.toBeNull();
|
|
||||||
if (!result.manager) {
|
|
||||||
throw new Error("manager missing");
|
throw new Error("manager missing");
|
||||||
}
|
}
|
||||||
manager = result.manager;
|
|
||||||
try {
|
try {
|
||||||
await manager.sync({ reason: "test" });
|
await managerSmall.sync({ reason: "test" });
|
||||||
} finally {
|
} finally {
|
||||||
setTimeoutSpy.mockRestore();
|
setTimeoutSpy.mockRestore();
|
||||||
}
|
}
|
||||||
@@ -207,31 +203,11 @@ describe("memory embedding batches", () => {
|
|||||||
}, 10000);
|
}, 10000);
|
||||||
|
|
||||||
it("skips empty chunks so embeddings input stays valid", async () => {
|
it("skips empty chunks so embeddings input stays valid", async () => {
|
||||||
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-07.md"), "\n\n\n");
|
await fs.writeFile(path.join(memoryDir, "2026-01-07.md"), "\n\n\n");
|
||||||
|
if (!managerSmall) {
|
||||||
const cfg = {
|
|
||||||
agents: {
|
|
||||||
defaults: {
|
|
||||||
workspace: workspaceDir,
|
|
||||||
memorySearch: {
|
|
||||||
provider: "openai",
|
|
||||||
model: "mock-embed",
|
|
||||||
store: { path: indexPath, vector: { enabled: false } },
|
|
||||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
|
||||||
query: { minScore: 0, hybrid: { enabled: false } },
|
|
||||||
},
|
|
||||||
},
|
|
||||||
list: [{ id: "main", default: true }],
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
|
||||||
expect(result.manager).not.toBeNull();
|
|
||||||
if (!result.manager) {
|
|
||||||
throw new Error("manager missing");
|
throw new Error("manager missing");
|
||||||
}
|
}
|
||||||
manager = result.manager;
|
await managerSmall.sync({ reason: "test" });
|
||||||
await manager.sync({ reason: "test" });
|
|
||||||
|
|
||||||
const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []);
|
const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []);
|
||||||
expect(inputs).not.toContain("");
|
expect(inputs).not.toContain("");
|
||||||
|
|||||||
Reference in New Issue
Block a user