mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 08:21:26 +00:00
fix: L2-normalize local embedding vectors to fix semantic search (#5332)
* fix: L2-normalize local embedding vectors to fix semantic search * fix: handle non‑finite magnitude in L2 normalization and remove stale test reset * refactor: add braces to l2Normalize guard clause in embeddings * fix: sanitize local embeddings (#5332) (thanks @akramcodez) --------- Co-authored-by: Gustavo Madeira Santana <gumadeiras@gmail.com>
This commit is contained in:
@@ -6,6 +6,15 @@ import { createGeminiEmbeddingProvider, type GeminiEmbeddingClient } from "./emb
|
||||
import { createOpenAiEmbeddingProvider, type OpenAiEmbeddingClient } from "./embeddings-openai.js";
|
||||
import { importNodeLlamaCpp } from "./node-llama.js";
|
||||
|
||||
function sanitizeAndNormalizeEmbedding(vec: number[]): number[] {
|
||||
const sanitized = vec.map((value) => (Number.isFinite(value) ? value : 0));
|
||||
const magnitude = Math.sqrt(sanitized.reduce((sum, value) => sum + value * value, 0));
|
||||
if (magnitude < 1e-10) {
|
||||
return sanitized;
|
||||
}
|
||||
return sanitized.map((value) => value / magnitude);
|
||||
}
|
||||
|
||||
export type { GeminiEmbeddingClient } from "./embeddings-gemini.js";
|
||||
export type { OpenAiEmbeddingClient } from "./embeddings-openai.js";
|
||||
|
||||
@@ -98,14 +107,14 @@ async function createLocalEmbeddingProvider(
|
||||
embedQuery: async (text) => {
|
||||
const ctx = await ensureContext();
|
||||
const embedding = await ctx.getEmbeddingFor(text);
|
||||
return Array.from(embedding.vector);
|
||||
return sanitizeAndNormalizeEmbedding(Array.from(embedding.vector));
|
||||
},
|
||||
embedBatch: async (texts) => {
|
||||
const ctx = await ensureContext();
|
||||
const embeddings = await Promise.all(
|
||||
texts.map(async (text) => {
|
||||
const embedding = await ctx.getEmbeddingFor(text);
|
||||
return Array.from(embedding.vector);
|
||||
return sanitizeAndNormalizeEmbedding(Array.from(embedding.vector));
|
||||
}),
|
||||
);
|
||||
return embeddings;
|
||||
|
||||
Reference in New Issue
Block a user