openclaw/extensions/memory-neo4j/embeddings.test.ts

/**
 * Tests for embeddings.ts — Embedding Provider.
 *
 * Tests the Embeddings class with mocked OpenAI client and mocked fetch for Ollama.
 */

import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";

// ============================================================================
// Constructor
// ============================================================================

describe("Embeddings constructor", () => {
  it("should throw when OpenAI provider is used without API key", async () => {
    const { Embeddings } = await import("./embeddings.js");
    expect(() => new Embeddings(undefined, "text-embedding-3-small", "openai")).toThrow(
      "API key required for OpenAI embeddings",
    );
  });

  it("should not require API key for ollama provider", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
    expect(emb).toBeDefined();
  });
});

// ============================================================================
// Ollama embed
// ============================================================================

describe("Embeddings - Ollama provider", () => {
  const originalFetch = globalThis.fetch;

  afterEach(() => {
    globalThis.fetch = originalFetch;
  });

  it("should call Ollama API with correct request body", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const mockVector = [0.1, 0.2, 0.3, 0.4];
    globalThis.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({ embeddings: [mockVector] }),
    });

    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
    const result = await emb.embed("test text");

    expect(result).toEqual(mockVector);
    expect(globalThis.fetch).toHaveBeenCalledWith(
      "http://localhost:11434/api/embed",
      expect.objectContaining({
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          model: "mxbai-embed-large",
          input: "test text",
        }),
      }),
    );
  });

  it("should use custom baseUrl for Ollama", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const mockVector = [0.5, 0.6];
    globalThis.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({ embeddings: [mockVector] }),
    });

    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama", "http://my-host:11434");
    await emb.embed("test");

    expect(globalThis.fetch).toHaveBeenCalledWith(
      "http://my-host:11434/api/embed",
      expect.any(Object),
    );
  });

  it("should strip trailing slashes from baseUrl", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const mockVector = [0.1, 0.2];
    globalThis.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({ embeddings: [mockVector] }),
    });

    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama", "http://my-host:11434/");
    await emb.embed("test");

    expect(globalThis.fetch).toHaveBeenCalledWith(
      "http://my-host:11434/api/embed",
      expect.any(Object),
    );
  });

  it("should strip multiple trailing slashes from baseUrl", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const mockVector = [0.1, 0.2];
    globalThis.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({ embeddings: [mockVector] }),
    });

    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama", "http://my-host:11434///");
    await emb.embed("test");

    expect(globalThis.fetch).toHaveBeenCalledWith(
      "http://my-host:11434/api/embed",
      expect.any(Object),
    );
  });

  it("should throw when Ollama returns error status", async () => {
    const { Embeddings } = await import("./embeddings.js");
    globalThis.fetch = vi.fn().mockResolvedValue({
      ok: false,
      status: 500,
      text: () => Promise.resolve("Internal Server Error"),
    });

    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
    await expect(emb.embed("test")).rejects.toThrow("Ollama embedding failed: 500");
  });

  it("should throw when Ollama returns no embeddings", async () => {
    const { Embeddings } = await import("./embeddings.js");
    globalThis.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({ embeddings: [] }),
    });

    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
    await expect(emb.embed("test")).rejects.toThrow("No embedding returned from Ollama");
  });

  it("should throw when Ollama returns null embeddings", async () => {
    const { Embeddings } = await import("./embeddings.js");
    globalThis.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({}),
    });

    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
    await expect(emb.embed("test")).rejects.toThrow("No embedding returned from Ollama");
  });

  it("should propagate fetch errors for Ollama", async () => {
    const { Embeddings } = await import("./embeddings.js");
    globalThis.fetch = vi.fn().mockRejectedValue(new Error("Network error"));

    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
    await expect(emb.embed("test")).rejects.toThrow("Network error");
  });
});

// ============================================================================
// OpenAI embed (via mocked client internals)
// ============================================================================

describe("Embeddings - OpenAI provider", () => {
  it("should create instance with OpenAI provider when API key provided", async () => {
    const { Embeddings } = await import("./embeddings.js");
    // Just verify construction succeeds with valid params
    const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
    expect(emb).toBeDefined();
  });

  it("should have embed and embedBatch methods", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
    expect(typeof emb.embed).toBe("function");
    expect(typeof emb.embedBatch).toBe("function");
  });
});

// ============================================================================
// Batch embedding
// ============================================================================

describe("Embeddings - embedBatch", () => {
  const originalFetch = globalThis.fetch;

  afterEach(() => {
    globalThis.fetch = originalFetch;
  });

  it("should return empty array for empty input (openai)", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings("sk-test", "text-embedding-3-small", "openai");
    const results = await emb.embedBatch([]);
    expect(results).toEqual([]);
  });

  it("should return empty array for empty input (ollama)", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
    const results = await emb.embedBatch([]);
    expect(results).toEqual([]);
  });

  it("should use sequential calls for Ollama batch (no native batch support)", async () => {
    const { Embeddings } = await import("./embeddings.js");
    let callCount = 0;
    globalThis.fetch = vi.fn().mockImplementation(() => {
      callCount++;
      return Promise.resolve({
        ok: true,
        json: () => Promise.resolve({ embeddings: [[callCount * 0.1, callCount * 0.2]] }),
      });
    });

    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");
    const results = await emb.embedBatch(["text1", "text2", "text3"]);

    // Should make 3 separate calls
    expect(globalThis.fetch).toHaveBeenCalledTimes(3);
    expect(results).toHaveLength(3);
    // Each result should be a vector
    for (const r of results) {
      expect(Array.isArray(r)).toBe(true);
      expect(r.length).toBe(2);
    }
  });
});

// ============================================================================
// Ollama context-length truncation
// ============================================================================

describe("Embeddings - Ollama context-length truncation", () => {
  const originalFetch = globalThis.fetch;

  beforeEach(() => {
    globalThis.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({ embeddings: [[0.1, 0.2, 0.3]] }),
    });
  });

  afterEach(() => {
    globalThis.fetch = originalFetch;
  });

  it("should truncate long input before calling Ollama embed", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");

    // mxbai-embed-large context length is 512, so maxChars = 512 * 3 = 1536
    // Create input that exceeds the limit
    const longText = "word ".repeat(500); // ~2500 chars, well above 1536
    await emb.embed(longText);

    // Verify the text sent to Ollama was truncated
    const call = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
    const body = JSON.parse(call[1].body as string);
    expect(body.input.length).toBeLessThanOrEqual(512 * 3);
  });

  it("should truncate at word boundary (not mid-word)", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");

    // maxChars for mxbai-embed-large = 512 * 3 = 1536
    // Each "abcdefghij " is 11 chars; 200 repeats = 2200 chars total (exceeds 1536)
    const longText = "abcdefghij ".repeat(200);
    await emb.embed(longText);

    const call = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
    const body = JSON.parse(call[1].body as string);
    const sentText = body.input as string;

    expect(sentText.length).toBeLessThanOrEqual(512 * 3);
    // The truncation should land on a word boundary: the sent text should
    // be a prefix of the original that ends at a complete word (i.e. the
    // character after the sent text in the original should be a space).
    // Since the pattern is "abcdefghij " repeated, a word-boundary cut
    // means sentText ends with "abcdefghij" (no trailing partial word).
    expect(sentText).toMatch(/abcdefghij$/);
    // Verify it's a proper prefix of the original
    expect(longText.startsWith(sentText)).toBe(true);
  });

  it("should pass short input through unchanged", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");

    const shortText = "This is a short text that fits within context length.";
    await emb.embed(shortText);

    const call = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
    const body = JSON.parse(call[1].body as string);
    expect(body.input).toBe(shortText);
  });

  it("should use model-specific context length for truncation", async () => {
    const { Embeddings } = await import("./embeddings.js");
    // nomic-embed-text has context length 8192, maxChars = 8192 * 3 = 24576
    const emb = new Embeddings(undefined, "nomic-embed-text", "ollama");

    // Create text that exceeds mxbai limit (1536) but fits nomic limit (24576)
    const mediumText = "hello ".repeat(400); // ~2400 chars
    await emb.embed(mediumText);

    const call = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
    const body = JSON.parse(call[1].body as string);
    // Should NOT be truncated since 2400 < 24576
    expect(body.input).toBe(mediumText);
  });

  it("should truncate each item individually in embedBatch", async () => {
    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings(undefined, "mxbai-embed-large", "ollama");

    // maxChars for mxbai-embed-large = 512 * 3 = 1536
    const longText = "word ".repeat(500); // ~2500 chars, exceeds limit
    const shortText = "short text"; // well under limit

    await emb.embedBatch([longText, shortText]);

    const calls = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls;
    expect(calls).toHaveLength(2);

    // First call: long text should be truncated
    const body1 = JSON.parse(calls[0][1].body as string);
    expect(body1.input.length).toBeLessThanOrEqual(512 * 3);
    expect(body1.input.length).toBeLessThan(longText.length);

    // Second call: short text should pass through unchanged
    const body2 = JSON.parse(calls[1][1].body as string);
    expect(body2.input).toBe(shortText);
  });
});

// ============================================================================
// OpenAI embed — functional tests with mocked OpenAI client
// ============================================================================

describe("Embeddings - OpenAI functional", () => {
  beforeEach(() => {
    vi.resetModules();
  });

  afterEach(() => {
    vi.restoreAllMocks();
  });

  it("embed() should call OpenAI API with correct model and input", async () => {
    const mockCreate = vi.fn().mockResolvedValue({
      data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }],
    });

    // Mock the openai module
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
        embeddings = { create: mockCreate };
      },
    }));

    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
    const result = await emb.embed("hello world");

    expect(result).toEqual([0.1, 0.2, 0.3]);
    expect(mockCreate).toHaveBeenCalledWith({
      model: "text-embedding-3-small",
      input: "hello world",
    });
  });

  it("embedBatch() should send all texts in a single API call and return correctly ordered results", async () => {
    const mockCreate = vi.fn().mockResolvedValue({
      // Return out-of-order to verify sorting by index
      data: [
        { index: 2, embedding: [0.7, 0.8, 0.9] },
        { index: 0, embedding: [0.1, 0.2, 0.3] },
        { index: 1, embedding: [0.4, 0.5, 0.6] },
      ],
    });

    vi.doMock("openai", () => ({
      default: class MockOpenAI {
        embeddings = { create: mockCreate };
      },
    }));

    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");
    const results = await emb.embedBatch(["first", "second", "third"]);

    // Should have made exactly one API call with all texts
    expect(mockCreate).toHaveBeenCalledTimes(1);
    expect(mockCreate).toHaveBeenCalledWith({
      model: "text-embedding-3-small",
      input: ["first", "second", "third"],
    });

    // Results should be sorted by index (0, 1, 2)
    expect(results).toEqual([
      [0.1, 0.2, 0.3],
      [0.4, 0.5, 0.6],
      [0.7, 0.8, 0.9],
    ]);
  });

  it("embed() should propagate OpenAI API errors", async () => {
    const mockCreate = vi.fn().mockRejectedValue(new Error("API rate limit exceeded"));

    vi.doMock("openai", () => ({
      default: class MockOpenAI {
        embeddings = { create: mockCreate };
      },
    }));

    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");

    await expect(emb.embed("test")).rejects.toThrow("API rate limit exceeded");
  });

  it("embed() should return cached result on second call for same text", async () => {
    const mockCreate = vi.fn().mockResolvedValue({
      data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }],
    });

    vi.doMock("openai", () => ({
      default: class MockOpenAI {
        embeddings = { create: mockCreate };
      },
    }));

    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");

    const result1 = await emb.embed("cached text");
    const result2 = await emb.embed("cached text");

    expect(result1).toEqual([0.1, 0.2, 0.3]);
    expect(result2).toEqual([0.1, 0.2, 0.3]);
    // Should only make one API call — second call uses cache
    expect(mockCreate).toHaveBeenCalledTimes(1);
  });

  it("embedBatch() should use cache for previously embedded texts", async () => {
    const mockCreate = vi
      .fn()
      .mockResolvedValueOnce({
        data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }],
      })
      .mockResolvedValueOnce({
        data: [{ index: 0, embedding: [0.7, 0.8, 0.9] }],
      });

    vi.doMock("openai", () => ({
      default: class MockOpenAI {
        embeddings = { create: mockCreate };
      },
    }));

    const { Embeddings } = await import("./embeddings.js");
    const emb = new Embeddings("sk-test-key", "text-embedding-3-small", "openai");

    // First: embed "alpha" to populate cache
    await emb.embed("alpha");
    expect(mockCreate).toHaveBeenCalledTimes(1);

    // Now batch with "alpha" (cached) and "beta" (uncached)
    const results = await emb.embedBatch(["alpha", "beta"]);
    // Should only call API once more for "beta"
    expect(mockCreate).toHaveBeenCalledTimes(2);
    expect(mockCreate).toHaveBeenLastCalledWith({
      model: "text-embedding-3-small",
      input: ["beta"],
    });
    expect(results).toEqual([
      [0.1, 0.2, 0.3], // cached
      [0.7, 0.8, 0.9], // freshly computed
    ]);
  });
});