mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-30 04:35:03 +00:00
- Add safety comment for RELATIONSHIP_TYPE_PATTERN Cypher interpolation - Add concurrency batching (8) to findDuplicateClusters vector queries - Bounds-validate memory_recall limit parameter (1-50) - Fix maxRetries comment (default 2 = 3 attempts, not 1 = 2) - Fix countByExtractionStatus passing undefined agentId to Cypher - Fix assistant auto-capture silently disabled when extraction disabled - Add agentId scoping to findSimilar (dedup + auto-capture) - Fix BM25 single-result normalization (0.5 instead of inflated 1.0) - Wrap pruneMemories in retryOnTransient for resilience - Use UNWIND batch update in reindex instead of N individual queries - Raise auto-delete threshold from 0.9 to 0.95 to reduce false positives Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
482 lines
17 KiB
TypeScript
482 lines
17 KiB
TypeScript
/**
|
|
* Tests for embeddings.ts — Embedding Provider.
|
|
*
|
|
* Tests the Embeddings class with mocked OpenAI client and mocked fetch for Ollama.
|
|
*/
|
|
|
|
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
|
|
|
// ============================================================================
|
|
// Constructor
|
|
// ============================================================================
|
|
|
|
describe("Embeddings constructor", () => {
|
|
it("should throw when OpenAI provider is used without API key", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
expect(() => new Embeddings(undefined, "text-embedding-3-small", "openai")).toThrow(
|
|
"API key required for OpenAI embeddings",
|
|
);
|
|
});
|
|
|
|
it("should not require API key for ollama provider", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
expect(emb).toBeDefined();
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Ollama embed
|
|
// ============================================================================
|
|
|
|
describe("Embeddings - Ollama provider", () => {
|
|
const originalFetch = globalThis.fetch;
|
|
|
|
afterEach(() => {
|
|
globalThis.fetch = originalFetch;
|
|
});
|
|
|
|
it("should call Ollama API with correct request body", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const mockVector = [0.1, 0.2, 0.3, 0.4];
|
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
ok: true,
|
|
json: () => Promise.resolve({ embeddings: [mockVector] }),
|
|
});
|
|
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
const result = await emb.embed("test text");
|
|
|
|
expect(result).toEqual(mockVector);
|
|
expect(globalThis.fetch).toHaveBeenCalledWith(
|
|
"http://localhost:11434/api/embed",
|
|
expect.objectContaining({
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
model: "mxbai-embed-large",
|
|
input: "test text",
|
|
}),
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("should use custom baseUrl for Ollama", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const mockVector = [0.5, 0.6];
|
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
ok: true,
|
|
json: () => Promise.resolve({ embeddings: [mockVector] }),
|
|
});
|
|
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama", "http://my-host:11434");
|
|
await emb.embed("test");
|
|
|
|
expect(globalThis.fetch).toHaveBeenCalledWith(
|
|
"http://my-host:11434/api/embed",
|
|
expect.any(Object),
|
|
);
|
|
});
|
|
|
|
it("should strip trailing slashes from baseUrl", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const mockVector = [0.1, 0.2];
|
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
ok: true,
|
|
json: () => Promise.resolve({ embeddings: [mockVector] }),
|
|
});
|
|
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama", "http://my-host:11434/");
|
|
await emb.embed("test");
|
|
|
|
expect(globalThis.fetch).toHaveBeenCalledWith(
|
|
"http://my-host:11434/api/embed",
|
|
expect.any(Object),
|
|
);
|
|
});
|
|
|
|
it("should strip multiple trailing slashes from baseUrl", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const mockVector = [0.1, 0.2];
|
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
ok: true,
|
|
json: () => Promise.resolve({ embeddings: [mockVector] }),
|
|
});
|
|
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama", "http://my-host:11434///");
|
|
await emb.embed("test");
|
|
|
|
expect(globalThis.fetch).toHaveBeenCalledWith(
|
|
"http://my-host:11434/api/embed",
|
|
expect.any(Object),
|
|
);
|
|
});
|
|
|
|
it("should throw when Ollama returns error status", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
ok: false,
|
|
status: 500,
|
|
text: () => Promise.resolve("Internal Server Error"),
|
|
});
|
|
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
await expect(emb.embed("test")).rejects.toThrow("Ollama embedding failed: 500");
|
|
});
|
|
|
|
it("should throw when Ollama returns no embeddings", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
ok: true,
|
|
json: () => Promise.resolve({ embeddings: [] }),
|
|
});
|
|
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
await expect(emb.embed("test")).rejects.toThrow("No embedding returned from Ollama");
|
|
});
|
|
|
|
it("should throw when Ollama returns null embeddings", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
ok: true,
|
|
json: () => Promise.resolve({}),
|
|
});
|
|
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
await expect(emb.embed("test")).rejects.toThrow("No embedding returned from Ollama");
|
|
});
|
|
|
|
it("should propagate fetch errors for Ollama", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
globalThis.fetch = vi.fn().mockRejectedValue(new Error("Network error"));
|
|
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
await expect(emb.embed("test")).rejects.toThrow("Network error");
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// OpenAI embed (via mocked client internals)
|
|
// ============================================================================
|
|
|
|
describe("Embeddings - OpenAI provider", () => {
|
|
it("should create instance with OpenAI provider when API key provided", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
// Just verify construction succeeds with valid params
|
|
const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
|
|
expect(emb).toBeDefined();
|
|
});
|
|
|
|
it("should have embed and embedBatch methods", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
|
|
expect(typeof emb.embed).toBe("function");
|
|
expect(typeof emb.embedBatch).toBe("function");
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Batch embedding
|
|
// ============================================================================
|
|
|
|
describe("Embeddings - embedBatch", () => {
|
|
const originalFetch = globalThis.fetch;
|
|
|
|
afterEach(() => {
|
|
globalThis.fetch = originalFetch;
|
|
});
|
|
|
|
it("should return empty array for empty input (openai)", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings("sk-test", "text-embedding-3-small", "openai");
|
|
const results = await emb.embedBatch([]);
|
|
expect(results).toEqual([]);
|
|
});
|
|
|
|
it("should return empty array for empty input (ollama)", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
const results = await emb.embedBatch([]);
|
|
expect(results).toEqual([]);
|
|
});
|
|
|
|
it("should use sequential calls for Ollama batch (no native batch support)", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
let callCount = 0;
|
|
globalThis.fetch = vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
return Promise.resolve({
|
|
ok: true,
|
|
json: () => Promise.resolve({ embeddings: [[callCount * 0.1, callCount * 0.2]] }),
|
|
});
|
|
});
|
|
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
const results = await emb.embedBatch(["text1", "text2", "text3"]);
|
|
|
|
// Should make 3 separate calls
|
|
expect(globalThis.fetch).toHaveBeenCalledTimes(3);
|
|
expect(results).toHaveLength(3);
|
|
// Each result should be a vector
|
|
for (const r of results) {
|
|
expect(Array.isArray(r)).toBe(true);
|
|
expect(r.length).toBe(2);
|
|
}
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Ollama context-length truncation
|
|
// ============================================================================
|
|
|
|
describe("Embeddings - Ollama context-length truncation", () => {
|
|
const originalFetch = globalThis.fetch;
|
|
|
|
beforeEach(() => {
|
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
ok: true,
|
|
json: () => Promise.resolve({ embeddings: [[0.1, 0.2, 0.3]] }),
|
|
});
|
|
});
|
|
|
|
afterEach(() => {
|
|
globalThis.fetch = originalFetch;
|
|
});
|
|
|
|
it("should truncate long input before calling Ollama embed", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
|
|
// mxbai-embed-large context length is 512, so maxChars = 512 * 3 = 1536
|
|
// Create input that exceeds the limit
|
|
const longText = "word ".repeat(500); // ~2500 chars, well above 1536
|
|
await emb.embed(longText);
|
|
|
|
// Verify the text sent to Ollama was truncated
|
|
const call = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
|
|
const body = JSON.parse(call[1].body as string);
|
|
expect(body.input.length).toBeLessThanOrEqual(512 * 3);
|
|
});
|
|
|
|
it("should truncate at word boundary (not mid-word)", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
|
|
// maxChars for mxbai-embed-large = 512 * 3 = 1536
|
|
// Each "abcdefghij " is 11 chars; 200 repeats = 2200 chars total (exceeds 1536)
|
|
const longText = "abcdefghij ".repeat(200);
|
|
await emb.embed(longText);
|
|
|
|
const call = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
|
|
const body = JSON.parse(call[1].body as string);
|
|
const sentText = body.input as string;
|
|
|
|
expect(sentText.length).toBeLessThanOrEqual(512 * 3);
|
|
// The truncation should land on a word boundary: the sent text should
|
|
// be a prefix of the original that ends at a complete word (i.e. the
|
|
// character after the sent text in the original should be a space).
|
|
// Since the pattern is "abcdefghij " repeated, a word-boundary cut
|
|
// means sentText ends with "abcdefghij" (no trailing partial word).
|
|
expect(sentText).toMatch(/abcdefghij$/);
|
|
// Verify it's a proper prefix of the original
|
|
expect(longText.startsWith(sentText)).toBe(true);
|
|
});
|
|
|
|
it("should pass short input through unchanged", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
|
|
const shortText = "This is a short text that fits within context length.";
|
|
await emb.embed(shortText);
|
|
|
|
const call = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
|
|
const body = JSON.parse(call[1].body as string);
|
|
expect(body.input).toBe(shortText);
|
|
});
|
|
|
|
it("should use model-specific context length for truncation", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
// nomic-embed-text has context length 8192, maxChars = 8192 * 3 = 24576
|
|
const emb = new Embeddings(undefined, "nomic-embed-text", "ollama");
|
|
|
|
// Create text that exceeds mxbai limit (1536) but fits nomic limit (24576)
|
|
const mediumText = "hello ".repeat(400); // ~2400 chars
|
|
await emb.embed(mediumText);
|
|
|
|
const call = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
|
|
const body = JSON.parse(call[1].body as string);
|
|
// Should NOT be truncated since 2400 < 24576
|
|
expect(body.input).toBe(mediumText);
|
|
});
|
|
|
|
it("should truncate each item individually in embedBatch", async () => {
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
|
|
|
|
// maxChars for mxbai-embed-large = 512 * 3 = 1536
|
|
const longText = "word ".repeat(500); // ~2500 chars, exceeds limit
|
|
const shortText = "short text"; // well under limit
|
|
|
|
await emb.embedBatch([longText, shortText]);
|
|
|
|
const calls = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls;
|
|
expect(calls).toHaveLength(2);
|
|
|
|
// First call: long text should be truncated
|
|
const body1 = JSON.parse(calls[0][1].body as string);
|
|
expect(body1.input.length).toBeLessThanOrEqual(512 * 3);
|
|
expect(body1.input.length).toBeLessThan(longText.length);
|
|
|
|
// Second call: short text should pass through unchanged
|
|
const body2 = JSON.parse(calls[1][1].body as string);
|
|
expect(body2.input).toBe(shortText);
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// OpenAI embed — functional tests with mocked OpenAI client
|
|
// ============================================================================
|
|
|
|
describe("Embeddings - OpenAI functional", () => {
|
|
beforeEach(() => {
|
|
vi.resetModules();
|
|
});
|
|
|
|
afterEach(() => {
|
|
vi.restoreAllMocks();
|
|
});
|
|
|
|
it("embed() should call OpenAI API with correct model and input", async () => {
|
|
const mockCreate = vi.fn().mockResolvedValue({
|
|
data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }],
|
|
});
|
|
|
|
// Mock the openai module
|
|
vi.doMock("openai", () => ({
|
|
default: class MockOpenAI {
|
|
embeddings = { create: mockCreate };
|
|
},
|
|
}));
|
|
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
|
|
const result = await emb.embed("hello world");
|
|
|
|
expect(result).toEqual([0.1, 0.2, 0.3]);
|
|
expect(mockCreate).toHaveBeenCalledWith({
|
|
model: "text-embedding-3-small",
|
|
input: "hello world",
|
|
});
|
|
});
|
|
|
|
it("embedBatch() should send all texts in a single API call and return correctly ordered results", async () => {
|
|
const mockCreate = vi.fn().mockResolvedValue({
|
|
// Return out-of-order to verify sorting by index
|
|
data: [
|
|
{ index: 2, embedding: [0.7, 0.8, 0.9] },
|
|
{ index: 0, embedding: [0.1, 0.2, 0.3] },
|
|
{ index: 1, embedding: [0.4, 0.5, 0.6] },
|
|
],
|
|
});
|
|
|
|
vi.doMock("openai", () => ({
|
|
default: class MockOpenAI {
|
|
embeddings = { create: mockCreate };
|
|
},
|
|
}));
|
|
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
|
|
const results = await emb.embedBatch(["first", "second", "third"]);
|
|
|
|
// Should have made exactly one API call with all texts
|
|
expect(mockCreate).toHaveBeenCalledTimes(1);
|
|
expect(mockCreate).toHaveBeenCalledWith({
|
|
model: "text-embedding-3-small",
|
|
input: ["first", "second", "third"],
|
|
});
|
|
|
|
// Results should be sorted by index (0, 1, 2)
|
|
expect(results).toEqual([
|
|
[0.1, 0.2, 0.3],
|
|
[0.4, 0.5, 0.6],
|
|
[0.7, 0.8, 0.9],
|
|
]);
|
|
});
|
|
|
|
it("embed() should propagate OpenAI API errors", async () => {
|
|
const mockCreate = vi.fn().mockRejectedValue(new Error("API rate limit exceeded"));
|
|
|
|
vi.doMock("openai", () => ({
|
|
default: class MockOpenAI {
|
|
embeddings = { create: mockCreate };
|
|
},
|
|
}));
|
|
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
|
|
|
|
await expect(emb.embed("test")).rejects.toThrow("API rate limit exceeded");
|
|
});
|
|
|
|
it("embed() should return cached result on second call for same text", async () => {
|
|
const mockCreate = vi.fn().mockResolvedValue({
|
|
data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }],
|
|
});
|
|
|
|
vi.doMock("openai", () => ({
|
|
default: class MockOpenAI {
|
|
embeddings = { create: mockCreate };
|
|
},
|
|
}));
|
|
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
|
|
|
|
const result1 = await emb.embed("cached text");
|
|
const result2 = await emb.embed("cached text");
|
|
|
|
expect(result1).toEqual([0.1, 0.2, 0.3]);
|
|
expect(result2).toEqual([0.1, 0.2, 0.3]);
|
|
// Should only make one API call — second call uses cache
|
|
expect(mockCreate).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it("embedBatch() should use cache for previously embedded texts", async () => {
|
|
const mockCreate = vi
|
|
.fn()
|
|
.mockResolvedValueOnce({
|
|
data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }],
|
|
})
|
|
.mockResolvedValueOnce({
|
|
data: [{ index: 0, embedding: [0.7, 0.8, 0.9] }],
|
|
});
|
|
|
|
vi.doMock("openai", () => ({
|
|
default: class MockOpenAI {
|
|
embeddings = { create: mockCreate };
|
|
},
|
|
}));
|
|
|
|
const { Embeddings } = await import("./embeddings.js");
|
|
const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
|
|
|
|
// First: embed "alpha" to populate cache
|
|
await emb.embed("alpha");
|
|
expect(mockCreate).toHaveBeenCalledTimes(1);
|
|
|
|
// Now batch with "alpha" (cached) and "beta" (uncached)
|
|
const results = await emb.embedBatch(["alpha", "beta"]);
|
|
// Should only call API once more for "beta"
|
|
expect(mockCreate).toHaveBeenCalledTimes(2);
|
|
expect(mockCreate).toHaveBeenLastCalledWith({
|
|
model: "text-embedding-3-small",
|
|
input: ["beta"],
|
|
});
|
|
expect(results).toEqual([
|
|
[0.1, 0.2, 0.3], // cached
|
|
[0.7, 0.8, 0.9], // freshly computed
|
|
]);
|
|
});
|
|
});
|