fix(memory): add input_type to Voyage AI embeddings for improved retrieval (#10818)

* fix(memory): add input_type to Voyage AI embeddings for improved retrieval

Voyage AI recommends passing input_type='document' when indexing and
input_type='query' when searching. This improves retrieval quality by
optimising the embedding space for each direction.

Changes:
- embedQuery now passes input_type: 'query'
- embedBatch now passes input_type: 'document'
- Batch API request_params includes input_type: 'document'
- Tests updated to verify input_type is passed correctly

* Changelog: note Voyage embeddings input_type fix (#10818) (thanks @mcinteerj)

---------

Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
Jake
2026-02-07 16:55:09 +13:00
committed by GitHub
parent 4c1da23a71
commit e78ae48e69
5 changed files with 57 additions and 8 deletions

View File

@@ -1,5 +1,5 @@
import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js";
import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";
export type VoyageEmbeddingClient = {
baseUrl: string;
@@ -23,12 +23,18 @@ export async function createVoyageEmbeddingProvider(
const client = await resolveVoyageEmbeddingClient(options);
const url = `${client.baseUrl.replace(/\/$/, "")}/embeddings`;
const embed = async (input: string[]): Promise<number[][]> => {
const embed = async (input: string[], input_type?: "query" | "document"): Promise<number[][]> => {
if (input.length === 0) return [];
const body: { model: string; input: string[]; input_type?: "query" | "document" } = {
model: client.model,
input,
};
if (input_type) body.input_type = input_type;
const res = await fetch(url, {
method: "POST",
headers: client.headers,
body: JSON.stringify({ model: client.model, input }),
body: JSON.stringify(body),
});
if (!res.ok) {
const text = await res.text();
@@ -46,10 +52,10 @@ export async function createVoyageEmbeddingProvider(
id: "voyage",
model: client.model,
embedQuery: async (text) => {
const [vec] = await embed([text]);
const [vec] = await embed([text], "query");
return vec ?? [];
},
embedBatch: embed,
embedBatch: async (texts) => embed(texts, "document"),
},
client,
};