fix: L2-normalize local embedding vectors to fix semantic search (#5332)

* fix: L2-normalize local embedding vectors to fix semantic search

* fix: handle non‑finite magnitude in L2 normalization and remove stale test reset

* refactor: add braces to l2Normalize guard clause in embeddings

* fix: sanitize local embeddings (#5332) (thanks @akramcodez)

---------

Co-authored-by: Gustavo Madeira Santana <gumadeiras@gmail.com>
This commit is contained in:
Sk Akram
2026-02-02 09:26:44 +05:30
committed by GitHub
parent b9910ab037
commit 5020bfa2a9
2 changed files with 165 additions and 2 deletions

View File

@@ -6,6 +6,15 @@ import { createGeminiEmbeddingProvider, type GeminiEmbeddingClient } from "./emb
import { createOpenAiEmbeddingProvider, type OpenAiEmbeddingClient } from "./embeddings-openai.js";
import { importNodeLlamaCpp } from "./node-llama.js";
function sanitizeAndNormalizeEmbedding(vec: number[]): number[] {
const sanitized = vec.map((value) => (Number.isFinite(value) ? value : 0));
const magnitude = Math.sqrt(sanitized.reduce((sum, value) => sum + value * value, 0));
if (magnitude < 1e-10) {
return sanitized;
}
return sanitized.map((value) => value / magnitude);
}
export type { GeminiEmbeddingClient } from "./embeddings-gemini.js";
export type { OpenAiEmbeddingClient } from "./embeddings-openai.js";
@@ -98,14 +107,14 @@ async function createLocalEmbeddingProvider(
embedQuery: async (text) => {
const ctx = await ensureContext();
const embedding = await ctx.getEmbeddingFor(text);
return Array.from(embedding.vector);
return sanitizeAndNormalizeEmbedding(Array.from(embedding.vector));
},
embedBatch: async (texts) => {
const ctx = await ensureContext();
const embeddings = await Promise.all(
texts.map(async (text) => {
const embedding = await ctx.getEmbeddingFor(text);
return Array.from(embedding.vector);
return sanitizeAndNormalizeEmbedding(Array.from(embedding.vector));
}),
);
return embeddings;