refactor(memory): consolidate embeddings and batch helpers

This commit is contained in:
Peter Steinberger
2026-02-17 00:10:32 +00:00
parent 423b7a0f28
commit 9bfd3ca195
11 changed files with 443 additions and 423 deletions

View File

@@ -4,8 +4,9 @@ import type { VoyageEmbeddingClient } from "./embeddings-voyage.js";
import { extractBatchErrorMessage, formatUnavailableBatchError } from "./batch-error-utils.js";
import { postJsonWithRetry } from "./batch-http.js";
import { applyEmbeddingBatchOutputLine } from "./batch-output.js";
import { buildBatchHeaders, normalizeBatchBaseUrl, splitBatchRequests } from "./batch-utils.js";
import { hashText, runWithConcurrency } from "./internal.js";
import { runEmbeddingBatchGroups } from "./batch-runner.js";
import { uploadBatchJsonlFile } from "./batch-upload.js";
import { buildBatchHeaders, normalizeBatchBaseUrl } from "./batch-utils.js";
/**
* Voyage Batch API Input Line format.
@@ -47,36 +48,18 @@ async function submitVoyageBatch(params: {
agentId: string;
}): Promise<VoyageBatchStatus> {
const baseUrl = normalizeBatchBaseUrl(params.client);
const jsonl = params.requests.map((request) => JSON.stringify(request)).join("\n");
const form = new FormData();
form.append("purpose", "batch");
form.append(
"file",
new Blob([jsonl], { type: "application/jsonl" }),
`memory-embeddings.${hashText(String(Date.now()))}.jsonl`,
);
// 1. Upload file using Voyage Files API
const fileRes = await fetch(`${baseUrl}/files`, {
method: "POST",
headers: buildBatchHeaders(params.client, { json: false }),
body: form,
const inputFileId = await uploadBatchJsonlFile({
client: params.client,
requests: params.requests,
errorPrefix: "voyage batch file upload failed",
});
if (!fileRes.ok) {
const text = await fileRes.text();
throw new Error(`voyage batch file upload failed: ${fileRes.status} ${text}`);
}
const filePayload = (await fileRes.json()) as { id?: string };
if (!filePayload.id) {
throw new Error("voyage batch file upload failed: missing file id");
}
// 2. Create batch job using Voyage Batches API
return await postJsonWithRetry<VoyageBatchStatus>({
url: `${baseUrl}/batches`,
headers: buildBatchHeaders(params.client, { json: true }),
body: {
input_file_id: filePayload.id,
input_file_id: inputFileId,
endpoint: VOYAGE_BATCH_ENDPOINT,
completion_window: VOYAGE_BATCH_COMPLETION_WINDOW,
request_params: {
@@ -192,99 +175,95 @@ export async function runVoyageEmbeddingBatches(params: {
concurrency: number;
debug?: (message: string, data?: Record<string, unknown>) => void;
}): Promise<Map<string, number[]>> {
if (params.requests.length === 0) {
return new Map();
}
const groups = splitBatchRequests(params.requests, VOYAGE_BATCH_MAX_REQUESTS);
const byCustomId = new Map<string, number[]>();
const tasks = groups.map((group, groupIndex) => async () => {
const batchInfo = await submitVoyageBatch({
client: params.client,
requests: group,
agentId: params.agentId,
});
if (!batchInfo.id) {
throw new Error("voyage batch create failed: missing batch id");
}
params.debug?.("memory embeddings: voyage batch created", {
batchId: batchInfo.id,
status: batchInfo.status,
group: groupIndex + 1,
groups: groups.length,
requests: group.length,
});
if (!params.wait && batchInfo.status !== "completed") {
throw new Error(
`voyage batch ${batchInfo.id} submitted; enable remote.batch.wait to await completion`,
);
}
const completed =
batchInfo.status === "completed"
? {
outputFileId: batchInfo.output_file_id ?? "",
errorFileId: batchInfo.error_file_id ?? undefined,
}
: await waitForVoyageBatch({
client: params.client,
batchId: batchInfo.id,
wait: params.wait,
pollIntervalMs: params.pollIntervalMs,
timeoutMs: params.timeoutMs,
debug: params.debug,
initial: batchInfo,
});
if (!completed.outputFileId) {
throw new Error(`voyage batch ${batchInfo.id} completed without output file`);
}
const baseUrl = normalizeBatchBaseUrl(params.client);
const contentRes = await fetch(`${baseUrl}/files/${completed.outputFileId}/content`, {
headers: buildBatchHeaders(params.client, { json: true }),
});
if (!contentRes.ok) {
const text = await contentRes.text();
throw new Error(`voyage batch file content failed: ${contentRes.status} ${text}`);
}
const errors: string[] = [];
const remaining = new Set(group.map((request) => request.custom_id));
if (contentRes.body) {
const reader = createInterface({
input: Readable.fromWeb(contentRes.body as unknown as import("stream/web").ReadableStream),
terminal: false,
});
for await (const rawLine of reader) {
if (!rawLine.trim()) {
continue;
}
const line = JSON.parse(rawLine) as VoyageBatchOutputLine;
applyEmbeddingBatchOutputLine({ line, remaining, errors, byCustomId });
}
}
if (errors.length > 0) {
throw new Error(`voyage batch ${batchInfo.id} failed: ${errors.join("; ")}`);
}
if (remaining.size > 0) {
throw new Error(`voyage batch ${batchInfo.id} missing ${remaining.size} embedding responses`);
}
});
params.debug?.("memory embeddings: voyage batch submit", {
requests: params.requests.length,
groups: groups.length,
return await runEmbeddingBatchGroups({
requests: params.requests,
maxRequests: VOYAGE_BATCH_MAX_REQUESTS,
wait: params.wait,
concurrency: params.concurrency,
pollIntervalMs: params.pollIntervalMs,
timeoutMs: params.timeoutMs,
});
concurrency: params.concurrency,
debug: params.debug,
debugLabel: "memory embeddings: voyage batch submit",
runGroup: async ({ group, groupIndex, groups, byCustomId }) => {
const batchInfo = await submitVoyageBatch({
client: params.client,
requests: group,
agentId: params.agentId,
});
if (!batchInfo.id) {
throw new Error("voyage batch create failed: missing batch id");
}
await runWithConcurrency(tasks, params.concurrency);
return byCustomId;
params.debug?.("memory embeddings: voyage batch created", {
batchId: batchInfo.id,
status: batchInfo.status,
group: groupIndex + 1,
groups,
requests: group.length,
});
if (!params.wait && batchInfo.status !== "completed") {
throw new Error(
`voyage batch ${batchInfo.id} submitted; enable remote.batch.wait to await completion`,
);
}
const completed =
batchInfo.status === "completed"
? {
outputFileId: batchInfo.output_file_id ?? "",
errorFileId: batchInfo.error_file_id ?? undefined,
}
: await waitForVoyageBatch({
client: params.client,
batchId: batchInfo.id,
wait: params.wait,
pollIntervalMs: params.pollIntervalMs,
timeoutMs: params.timeoutMs,
debug: params.debug,
initial: batchInfo,
});
if (!completed.outputFileId) {
throw new Error(`voyage batch ${batchInfo.id} completed without output file`);
}
const baseUrl = normalizeBatchBaseUrl(params.client);
const contentRes = await fetch(`${baseUrl}/files/${completed.outputFileId}/content`, {
headers: buildBatchHeaders(params.client, { json: true }),
});
if (!contentRes.ok) {
const text = await contentRes.text();
throw new Error(`voyage batch file content failed: ${contentRes.status} ${text}`);
}
const errors: string[] = [];
const remaining = new Set(group.map((request) => request.custom_id));
if (contentRes.body) {
const reader = createInterface({
input: Readable.fromWeb(
contentRes.body as unknown as import("stream/web").ReadableStream,
),
terminal: false,
});
for await (const rawLine of reader) {
if (!rawLine.trim()) {
continue;
}
const line = JSON.parse(rawLine) as VoyageBatchOutputLine;
applyEmbeddingBatchOutputLine({ line, remaining, errors, byCustomId });
}
}
if (errors.length > 0) {
throw new Error(`voyage batch ${batchInfo.id} failed: ${errors.join("; ")}`);
}
if (remaining.size > 0) {
throw new Error(
`voyage batch ${batchInfo.id} missing ${remaining.size} embedding responses`,
);
}
},
});
}