mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 03:42:43 +00:00
memory: add multimodal image and audio indexing
This commit is contained in:
@@ -284,9 +284,45 @@ Notes:
|
|||||||
|
|
||||||
- Paths can be absolute or workspace-relative.
|
- Paths can be absolute or workspace-relative.
|
||||||
- Directories are scanned recursively for `.md` files.
|
- Directories are scanned recursively for `.md` files.
|
||||||
- Only Markdown files are indexed.
|
- By default, only Markdown files are indexed.
|
||||||
|
- If `memorySearch.multimodal.enabled = true`, OpenClaw also indexes supported image/audio files under `extraPaths` only. Default memory roots (`MEMORY.md`, `memory.md`, `memory/**/*.md`) stay Markdown-only.
|
||||||
- Symlinks are ignored (files or directories).
|
- Symlinks are ignored (files or directories).
|
||||||
|
|
||||||
|
### Multimodal memory files (Gemini image + audio)
|
||||||
|
|
||||||
|
OpenClaw can index image and audio files from `memorySearch.extraPaths` when using Gemini embedding 2:
|
||||||
|
|
||||||
|
```json5
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
extraPaths: ["assets/reference", "voice-notes"],
|
||||||
|
multimodal: {
|
||||||
|
enabled: true,
|
||||||
|
modalities: ["image", "audio"], // or ["all"]
|
||||||
|
maxFileBytes: 10000000
|
||||||
|
},
|
||||||
|
remote: {
|
||||||
|
apiKey: "YOUR_GEMINI_API_KEY"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
- Multimodal memory is currently supported only for `gemini-embedding-2-preview`.
|
||||||
|
- Multimodal indexing applies only to files discovered through `memorySearch.extraPaths`.
|
||||||
|
- Supported modalities in this phase: image and audio.
|
||||||
|
- `memorySearch.fallback` must stay `"none"` while multimodal memory is enabled.
|
||||||
|
- Supported image extensions: `.jpg`, `.jpeg`, `.png`, `.webp`, `.gif`, `.heic`, `.heif`.
|
||||||
|
- Supported audio extensions: `.mp3`, `.wav`, `.ogg`, `.opus`, `.m4a`, `.aac`, `.flac`.
|
||||||
|
- Search queries remain text, but Gemini can compare those text queries against indexed image/audio embeddings.
|
||||||
|
- `memory_get` still reads Markdown only; binary files are searchable but not returned as raw file contents.
|
||||||
|
|
||||||
### Gemini embeddings (native)
|
### Gemini embeddings (native)
|
||||||
|
|
||||||
Set the provider to `gemini` to use the Gemini embeddings API directly:
|
Set the provider to `gemini` to use the Gemini embeddings API directly:
|
||||||
|
|||||||
@@ -131,6 +131,65 @@ describe("memory search config", () => {
|
|||||||
expect(resolved?.extraPaths).toEqual(["/shared/notes", "docs", "../team-notes"]);
|
expect(resolved?.extraPaths).toEqual(["/shared/notes", "docs", "../team-notes"]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("normalizes multimodal settings", () => {
|
||||||
|
const cfg = asConfig({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
multimodal: {
|
||||||
|
enabled: true,
|
||||||
|
modalities: ["all"],
|
||||||
|
maxFileBytes: 8192,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||||
|
expect(resolved?.multimodal).toEqual({
|
||||||
|
enabled: true,
|
||||||
|
modalities: ["image", "audio"],
|
||||||
|
maxFileBytes: 8192,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects multimodal memory on unsupported providers", () => {
|
||||||
|
const cfg = asConfig({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "text-embedding-3-small",
|
||||||
|
multimodal: { enabled: true, modalities: ["image"] },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(() => resolveMemorySearchConfig(cfg, "main")).toThrow(
|
||||||
|
/memorySearch\.multimodal requires memorySearch\.provider = "gemini"/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects multimodal memory when fallback is configured", () => {
|
||||||
|
const cfg = asConfig({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
fallback: "openai",
|
||||||
|
multimodal: { enabled: true, modalities: ["image"] },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(() => resolveMemorySearchConfig(cfg, "main")).toThrow(
|
||||||
|
/memorySearch\.multimodal does not support memorySearch\.fallback/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it("includes batch defaults for openai without remote overrides", () => {
|
it("includes batch defaults for openai without remote overrides", () => {
|
||||||
const cfg = configWithDefaultProvider("openai");
|
const cfg = configWithDefaultProvider("openai");
|
||||||
const resolved = resolveMemorySearchConfig(cfg, "main");
|
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||||
|
|||||||
@@ -3,6 +3,11 @@ import path from "node:path";
|
|||||||
import type { OpenClawConfig, MemorySearchConfig } from "../config/config.js";
|
import type { OpenClawConfig, MemorySearchConfig } from "../config/config.js";
|
||||||
import { resolveStateDir } from "../config/paths.js";
|
import { resolveStateDir } from "../config/paths.js";
|
||||||
import type { SecretInput } from "../config/types.secrets.js";
|
import type { SecretInput } from "../config/types.secrets.js";
|
||||||
|
import {
|
||||||
|
normalizeMemoryMultimodalSettings,
|
||||||
|
supportsMemoryMultimodalEmbeddings,
|
||||||
|
type MemoryMultimodalSettings,
|
||||||
|
} from "../memory/multimodal.js";
|
||||||
import { clampInt, clampNumber, resolveUserPath } from "../utils.js";
|
import { clampInt, clampNumber, resolveUserPath } from "../utils.js";
|
||||||
import { resolveAgentConfig } from "./agent-scope.js";
|
import { resolveAgentConfig } from "./agent-scope.js";
|
||||||
|
|
||||||
@@ -10,6 +15,7 @@ export type ResolvedMemorySearchConfig = {
|
|||||||
enabled: boolean;
|
enabled: boolean;
|
||||||
sources: Array<"memory" | "sessions">;
|
sources: Array<"memory" | "sessions">;
|
||||||
extraPaths: string[];
|
extraPaths: string[];
|
||||||
|
multimodal: MemoryMultimodalSettings;
|
||||||
provider: "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama" | "auto";
|
provider: "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama" | "auto";
|
||||||
remote?: {
|
remote?: {
|
||||||
baseUrl?: string;
|
baseUrl?: string;
|
||||||
@@ -204,6 +210,11 @@ function mergeConfig(
|
|||||||
.map((value) => value.trim())
|
.map((value) => value.trim())
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
const extraPaths = Array.from(new Set(rawPaths));
|
const extraPaths = Array.from(new Set(rawPaths));
|
||||||
|
const multimodal = normalizeMemoryMultimodalSettings({
|
||||||
|
enabled: overrides?.multimodal?.enabled ?? defaults?.multimodal?.enabled,
|
||||||
|
modalities: overrides?.multimodal?.modalities ?? defaults?.multimodal?.modalities,
|
||||||
|
maxFileBytes: overrides?.multimodal?.maxFileBytes ?? defaults?.multimodal?.maxFileBytes,
|
||||||
|
});
|
||||||
const vector = {
|
const vector = {
|
||||||
enabled: overrides?.store?.vector?.enabled ?? defaults?.store?.vector?.enabled ?? true,
|
enabled: overrides?.store?.vector?.enabled ?? defaults?.store?.vector?.enabled ?? true,
|
||||||
extensionPath:
|
extensionPath:
|
||||||
@@ -307,6 +318,7 @@ function mergeConfig(
|
|||||||
enabled,
|
enabled,
|
||||||
sources,
|
sources,
|
||||||
extraPaths,
|
extraPaths,
|
||||||
|
multimodal,
|
||||||
provider,
|
provider,
|
||||||
remote,
|
remote,
|
||||||
experimental: {
|
experimental: {
|
||||||
@@ -365,5 +377,21 @@ export function resolveMemorySearchConfig(
|
|||||||
if (!resolved.enabled) {
|
if (!resolved.enabled) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
if (
|
||||||
|
resolved.multimodal.enabled &&
|
||||||
|
!supportsMemoryMultimodalEmbeddings({
|
||||||
|
provider: resolved.provider,
|
||||||
|
model: resolved.model,
|
||||||
|
})
|
||||||
|
) {
|
||||||
|
throw new Error(
|
||||||
|
'agents.*.memorySearch.multimodal requires memorySearch.provider = "gemini" and model = "gemini-embedding-2-preview".',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (resolved.multimodal.enabled && resolved.fallback !== "none") {
|
||||||
|
throw new Error(
|
||||||
|
'agents.*.memorySearch.multimodal does not support memorySearch.fallback. Set fallback to "none".',
|
||||||
|
);
|
||||||
|
}
|
||||||
return resolved;
|
return resolved;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,6 +72,10 @@ const TARGET_KEYS = [
|
|||||||
"agents.defaults.memorySearch.fallback",
|
"agents.defaults.memorySearch.fallback",
|
||||||
"agents.defaults.memorySearch.sources",
|
"agents.defaults.memorySearch.sources",
|
||||||
"agents.defaults.memorySearch.extraPaths",
|
"agents.defaults.memorySearch.extraPaths",
|
||||||
|
"agents.defaults.memorySearch.multimodal",
|
||||||
|
"agents.defaults.memorySearch.multimodal.enabled",
|
||||||
|
"agents.defaults.memorySearch.multimodal.modalities",
|
||||||
|
"agents.defaults.memorySearch.multimodal.maxFileBytes",
|
||||||
"agents.defaults.memorySearch.experimental.sessionMemory",
|
"agents.defaults.memorySearch.experimental.sessionMemory",
|
||||||
"agents.defaults.memorySearch.remote.baseUrl",
|
"agents.defaults.memorySearch.remote.baseUrl",
|
||||||
"agents.defaults.memorySearch.remote.apiKey",
|
"agents.defaults.memorySearch.remote.apiKey",
|
||||||
|
|||||||
@@ -778,7 +778,15 @@ export const FIELD_HELP: Record<string, string> = {
|
|||||||
"agents.defaults.memorySearch.sources":
|
"agents.defaults.memorySearch.sources":
|
||||||
'Chooses which sources are indexed: "memory" reads MEMORY.md + memory files, and "sessions" includes transcript history. Keep ["memory"] unless you need recall from prior chat transcripts.',
|
'Chooses which sources are indexed: "memory" reads MEMORY.md + memory files, and "sessions" includes transcript history. Keep ["memory"] unless you need recall from prior chat transcripts.',
|
||||||
"agents.defaults.memorySearch.extraPaths":
|
"agents.defaults.memorySearch.extraPaths":
|
||||||
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; keep paths small and intentional to avoid noisy recall.",
|
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; when multimodal memory is enabled, matching image/audio files under these paths are also eligible for indexing.",
|
||||||
|
"agents.defaults.memorySearch.multimodal":
|
||||||
|
'Optional multimodal memory settings for indexing image and audio files from configured extra paths. Keep this off unless your embedding model explicitly supports cross-modal embeddings, and set `memorySearch.fallback` to "none" while it is enabled.',
|
||||||
|
"agents.defaults.memorySearch.multimodal.enabled":
|
||||||
|
"Enables image/audio memory indexing from extraPaths. This currently requires Gemini embedding-2, keeps the default memory roots Markdown-only, and disables memory-search fallback providers.",
|
||||||
|
"agents.defaults.memorySearch.multimodal.modalities":
|
||||||
|
'Selects which multimodal file types are indexed from extraPaths: "image", "audio", or "all". Keep this narrow to avoid indexing large binary corpora unintentionally.',
|
||||||
|
"agents.defaults.memorySearch.multimodal.maxFileBytes":
|
||||||
|
"Sets the maximum bytes allowed per multimodal file before it is skipped during memory indexing. Use this to cap upload cost and indexing latency, or raise it for short high-quality audio clips.",
|
||||||
"agents.defaults.memorySearch.experimental.sessionMemory":
|
"agents.defaults.memorySearch.experimental.sessionMemory":
|
||||||
"Indexes session transcripts into memory search so responses can reference prior chat turns. Keep this off unless transcript recall is needed, because indexing cost and storage usage both increase.",
|
"Indexes session transcripts into memory search so responses can reference prior chat turns. Keep this off unless transcript recall is needed, because indexing cost and storage usage both increase.",
|
||||||
"agents.defaults.memorySearch.provider":
|
"agents.defaults.memorySearch.provider":
|
||||||
|
|||||||
@@ -319,6 +319,10 @@ export const FIELD_LABELS: Record<string, string> = {
|
|||||||
"agents.defaults.memorySearch.enabled": "Enable Memory Search",
|
"agents.defaults.memorySearch.enabled": "Enable Memory Search",
|
||||||
"agents.defaults.memorySearch.sources": "Memory Search Sources",
|
"agents.defaults.memorySearch.sources": "Memory Search Sources",
|
||||||
"agents.defaults.memorySearch.extraPaths": "Extra Memory Paths",
|
"agents.defaults.memorySearch.extraPaths": "Extra Memory Paths",
|
||||||
|
"agents.defaults.memorySearch.multimodal": "Memory Search Multimodal",
|
||||||
|
"agents.defaults.memorySearch.multimodal.enabled": "Enable Memory Search Multimodal",
|
||||||
|
"agents.defaults.memorySearch.multimodal.modalities": "Memory Search Multimodal Modalities",
|
||||||
|
"agents.defaults.memorySearch.multimodal.maxFileBytes": "Memory Search Multimodal Max File Bytes",
|
||||||
"agents.defaults.memorySearch.experimental.sessionMemory":
|
"agents.defaults.memorySearch.experimental.sessionMemory":
|
||||||
"Memory Search Session Index (Experimental)",
|
"Memory Search Session Index (Experimental)",
|
||||||
"agents.defaults.memorySearch.provider": "Memory Search Provider",
|
"agents.defaults.memorySearch.provider": "Memory Search Provider",
|
||||||
|
|||||||
@@ -319,6 +319,15 @@ export type MemorySearchConfig = {
|
|||||||
sources?: Array<"memory" | "sessions">;
|
sources?: Array<"memory" | "sessions">;
|
||||||
/** Extra paths to include in memory search (directories or .md files). */
|
/** Extra paths to include in memory search (directories or .md files). */
|
||||||
extraPaths?: string[];
|
extraPaths?: string[];
|
||||||
|
/** Optional multimodal file indexing for selected extra paths. */
|
||||||
|
multimodal?: {
|
||||||
|
/** Enable image/audio embeddings from extraPaths. */
|
||||||
|
enabled?: boolean;
|
||||||
|
/** Which non-text file types to index. */
|
||||||
|
modalities?: Array<"image" | "audio" | "all">;
|
||||||
|
/** Max bytes allowed per multimodal file before it is skipped. */
|
||||||
|
maxFileBytes?: number;
|
||||||
|
};
|
||||||
/** Experimental memory search settings. */
|
/** Experimental memory search settings. */
|
||||||
experimental?: {
|
experimental?: {
|
||||||
/** Enable session transcript indexing (experimental, default: false). */
|
/** Enable session transcript indexing (experimental, default: false). */
|
||||||
|
|||||||
@@ -553,6 +553,16 @@ export const MemorySearchSchema = z
|
|||||||
enabled: z.boolean().optional(),
|
enabled: z.boolean().optional(),
|
||||||
sources: z.array(z.union([z.literal("memory"), z.literal("sessions")])).optional(),
|
sources: z.array(z.union([z.literal("memory"), z.literal("sessions")])).optional(),
|
||||||
extraPaths: z.array(z.string()).optional(),
|
extraPaths: z.array(z.string()).optional(),
|
||||||
|
multimodal: z
|
||||||
|
.object({
|
||||||
|
enabled: z.boolean().optional(),
|
||||||
|
modalities: z
|
||||||
|
.array(z.union([z.literal("image"), z.literal("audio"), z.literal("all")]))
|
||||||
|
.optional(),
|
||||||
|
maxFileBytes: z.number().int().positive().optional(),
|
||||||
|
})
|
||||||
|
.strict()
|
||||||
|
.optional(),
|
||||||
experimental: z
|
experimental: z
|
||||||
.object({
|
.object({
|
||||||
sessionMemory: z.boolean().optional(),
|
sessionMemory: z.boolean().optional(),
|
||||||
|
|||||||
@@ -12,6 +12,10 @@ const EXT_BY_MIME: Record<string, string> = {
|
|||||||
"image/gif": ".gif",
|
"image/gif": ".gif",
|
||||||
"audio/ogg": ".ogg",
|
"audio/ogg": ".ogg",
|
||||||
"audio/mpeg": ".mp3",
|
"audio/mpeg": ".mp3",
|
||||||
|
"audio/wav": ".wav",
|
||||||
|
"audio/flac": ".flac",
|
||||||
|
"audio/aac": ".aac",
|
||||||
|
"audio/opus": ".opus",
|
||||||
"audio/x-m4a": ".m4a",
|
"audio/x-m4a": ".m4a",
|
||||||
"audio/mp4": ".m4a",
|
"audio/mp4": ".m4a",
|
||||||
"video/mp4": ".mp4",
|
"video/mp4": ".mp4",
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { estimateUtf8Bytes, splitTextToUtf8ByteLimit } from "./embedding-input-limits.js";
|
import { estimateUtf8Bytes, splitTextToUtf8ByteLimit } from "./embedding-input-limits.js";
|
||||||
|
import { hasNonTextEmbeddingParts } from "./embedding-inputs.js";
|
||||||
import { resolveEmbeddingMaxInputTokens } from "./embedding-model-limits.js";
|
import { resolveEmbeddingMaxInputTokens } from "./embedding-model-limits.js";
|
||||||
import type { EmbeddingProvider } from "./embeddings.js";
|
import type { EmbeddingProvider } from "./embeddings.js";
|
||||||
import { hashText, type MemoryChunk } from "./internal.js";
|
import { hashText, type MemoryChunk } from "./internal.js";
|
||||||
@@ -16,6 +17,10 @@ export function enforceEmbeddingMaxInputTokens(
|
|||||||
const out: MemoryChunk[] = [];
|
const out: MemoryChunk[] = [];
|
||||||
|
|
||||||
for (const chunk of chunks) {
|
for (const chunk of chunks) {
|
||||||
|
if (hasNonTextEmbeddingParts(chunk.embeddingInput)) {
|
||||||
|
out.push(chunk);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (estimateUtf8Bytes(chunk.text) <= maxInputTokens) {
|
if (estimateUtf8Bytes(chunk.text) <= maxInputTokens) {
|
||||||
out.push(chunk);
|
out.push(chunk);
|
||||||
continue;
|
continue;
|
||||||
@@ -27,6 +32,7 @@ export function enforceEmbeddingMaxInputTokens(
|
|||||||
endLine: chunk.endLine,
|
endLine: chunk.endLine,
|
||||||
text,
|
text,
|
||||||
hash: hashText(text),
|
hash: hashText(text),
|
||||||
|
embeddingInput: { text },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||||
|
|
||||||
// Helpers for enforcing embedding model input size limits.
|
// Helpers for enforcing embedding model input size limits.
|
||||||
//
|
//
|
||||||
// We use UTF-8 byte length as a conservative upper bound for tokenizer output.
|
// We use UTF-8 byte length as a conservative upper bound for tokenizer output.
|
||||||
@@ -11,6 +13,22 @@ export function estimateUtf8Bytes(text: string): number {
|
|||||||
return Buffer.byteLength(text, "utf8");
|
return Buffer.byteLength(text, "utf8");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function estimateStructuredEmbeddingInputBytes(input: EmbeddingInput): number {
|
||||||
|
if (!input.parts?.length) {
|
||||||
|
return estimateUtf8Bytes(input.text);
|
||||||
|
}
|
||||||
|
let total = 0;
|
||||||
|
for (const part of input.parts) {
|
||||||
|
if (part.type === "text") {
|
||||||
|
total += estimateUtf8Bytes(part.text);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
total += estimateUtf8Bytes(part.mimeType);
|
||||||
|
total += estimateUtf8Bytes(part.data);
|
||||||
|
}
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
export function splitTextToUtf8ByteLimit(text: string, maxUtf8Bytes: number): string[] {
|
export function splitTextToUtf8ByteLimit(text: string, maxUtf8Bytes: number): string[] {
|
||||||
if (maxUtf8Bytes <= 0) {
|
if (maxUtf8Bytes <= 0) {
|
||||||
return [text];
|
return [text];
|
||||||
|
|||||||
34
src/memory/embedding-inputs.ts
Normal file
34
src/memory/embedding-inputs.ts
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
export type EmbeddingInputTextPart = {
|
||||||
|
type: "text";
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type EmbeddingInputInlineDataPart = {
|
||||||
|
type: "inline-data";
|
||||||
|
mimeType: string;
|
||||||
|
data: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type EmbeddingInputPart = EmbeddingInputTextPart | EmbeddingInputInlineDataPart;
|
||||||
|
|
||||||
|
export type EmbeddingInput = {
|
||||||
|
text: string;
|
||||||
|
parts?: EmbeddingInputPart[];
|
||||||
|
};
|
||||||
|
|
||||||
|
export function buildTextEmbeddingInput(text: string): EmbeddingInput {
|
||||||
|
return { text };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isInlineDataEmbeddingInputPart(
|
||||||
|
part: EmbeddingInputPart,
|
||||||
|
): part is EmbeddingInputInlineDataPart {
|
||||||
|
return part.type === "inline-data";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function hasNonTextEmbeddingParts(input: EmbeddingInput | undefined): boolean {
|
||||||
|
if (!input?.parts?.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return input.parts.some((part) => isInlineDataEmbeddingInputPart(part));
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||||
import * as authModule from "../agents/model-auth.js";
|
import * as authModule from "../agents/model-auth.js";
|
||||||
import {
|
import {
|
||||||
|
buildGeminiEmbeddingRequest,
|
||||||
buildFileDataPart,
|
buildFileDataPart,
|
||||||
buildGeminiParts,
|
buildGeminiParts,
|
||||||
buildGeminiTextEmbeddingRequest,
|
buildGeminiTextEmbeddingRequest,
|
||||||
@@ -113,6 +114,35 @@ describe("buildGeminiTextEmbeddingRequest", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("buildGeminiEmbeddingRequest", () => {
|
||||||
|
it("builds a multimodal request from structured input parts", () => {
|
||||||
|
expect(
|
||||||
|
buildGeminiEmbeddingRequest({
|
||||||
|
input: {
|
||||||
|
text: "Image file: diagram.png",
|
||||||
|
parts: [
|
||||||
|
{ type: "text", text: "Image file: diagram.png" },
|
||||||
|
{ type: "inline-data", mimeType: "image/png", data: "abc123" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
taskType: "RETRIEVAL_DOCUMENT",
|
||||||
|
modelPath: "models/gemini-embedding-2-preview",
|
||||||
|
outputDimensionality: 1536,
|
||||||
|
}),
|
||||||
|
).toEqual({
|
||||||
|
model: "models/gemini-embedding-2-preview",
|
||||||
|
content: {
|
||||||
|
parts: [
|
||||||
|
{ text: "Image file: diagram.png" },
|
||||||
|
{ inlineData: { mimeType: "image/png", data: "abc123" } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
taskType: "RETRIEVAL_DOCUMENT",
|
||||||
|
outputDimensionality: 1536,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// ---------- Model detection ----------
|
// ---------- Model detection ----------
|
||||||
|
|
||||||
describe("isGeminiEmbedding2Model", () => {
|
describe("isGeminiEmbedding2Model", () => {
|
||||||
@@ -341,6 +371,63 @@ describe("gemini-embedding-2-preview provider", () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("supports multimodal embedBatchInputs requests", async () => {
|
||||||
|
const fetchMock = createGeminiBatchFetchMock(2);
|
||||||
|
vi.stubGlobal("fetch", fetchMock);
|
||||||
|
mockResolvedProviderKey();
|
||||||
|
|
||||||
|
const { provider } = await createGeminiEmbeddingProvider({
|
||||||
|
config: {} as never,
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
fallback: "none",
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(provider.embedBatchInputs).toBeDefined();
|
||||||
|
await provider.embedBatchInputs?.([
|
||||||
|
{
|
||||||
|
text: "Image file: diagram.png",
|
||||||
|
parts: [
|
||||||
|
{ type: "text", text: "Image file: diagram.png" },
|
||||||
|
{ type: "inline-data", mimeType: "image/png", data: "img" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
text: "Audio file: note.wav",
|
||||||
|
parts: [
|
||||||
|
{ type: "text", text: "Audio file: note.wav" },
|
||||||
|
{ type: "inline-data", mimeType: "audio/wav", data: "aud" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
const body = parseFetchBody(fetchMock);
|
||||||
|
expect(body.requests).toEqual([
|
||||||
|
{
|
||||||
|
model: "models/gemini-embedding-2-preview",
|
||||||
|
content: {
|
||||||
|
parts: [
|
||||||
|
{ text: "Image file: diagram.png" },
|
||||||
|
{ inlineData: { mimeType: "image/png", data: "img" } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
taskType: "RETRIEVAL_DOCUMENT",
|
||||||
|
outputDimensionality: 3072,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
model: "models/gemini-embedding-2-preview",
|
||||||
|
content: {
|
||||||
|
parts: [
|
||||||
|
{ text: "Audio file: note.wav" },
|
||||||
|
{ inlineData: { mimeType: "audio/wav", data: "aud" } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
taskType: "RETRIEVAL_DOCUMENT",
|
||||||
|
outputDimensionality: 3072,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
it("throws for invalid outputDimensionality", async () => {
|
it("throws for invalid outputDimensionality", async () => {
|
||||||
mockResolvedProviderKey();
|
mockResolvedProviderKey();
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js
|
|||||||
import { parseGeminiAuth } from "../infra/gemini-auth.js";
|
import { parseGeminiAuth } from "../infra/gemini-auth.js";
|
||||||
import type { SsrFPolicy } from "../infra/net/ssrf.js";
|
import type { SsrFPolicy } from "../infra/net/ssrf.js";
|
||||||
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
||||||
|
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||||
import { debugEmbeddingsLog } from "./embeddings-debug.js";
|
import { debugEmbeddingsLog } from "./embeddings-debug.js";
|
||||||
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js";
|
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js";
|
||||||
import { buildRemoteBaseUrlPolicy, withRemoteHttpResponse } from "./remote-http.js";
|
import { buildRemoteBaseUrlPolicy, withRemoteHttpResponse } from "./remote-http.js";
|
||||||
@@ -54,12 +55,13 @@ export type GeminiFilePart = {
|
|||||||
fileData: { mimeType: string; fileUri: string };
|
fileData: { mimeType: string; fileUri: string };
|
||||||
};
|
};
|
||||||
export type GeminiPart = GeminiTextPart | GeminiInlinePart | GeminiFilePart;
|
export type GeminiPart = GeminiTextPart | GeminiInlinePart | GeminiFilePart;
|
||||||
export type GeminiTextEmbeddingRequest = {
|
export type GeminiEmbeddingRequest = {
|
||||||
content: { parts: GeminiTextPart[] };
|
content: { parts: GeminiPart[] };
|
||||||
taskType: GeminiTaskType;
|
taskType: GeminiTaskType;
|
||||||
outputDimensionality?: number;
|
outputDimensionality?: number;
|
||||||
model?: string;
|
model?: string;
|
||||||
};
|
};
|
||||||
|
export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest;
|
||||||
|
|
||||||
/** Convert a string or pre-built parts array into `GeminiPart[]`. */
|
/** Convert a string or pre-built parts array into `GeminiPart[]`. */
|
||||||
export function buildGeminiParts(input: string | GeminiPart[]): GeminiPart[] {
|
export function buildGeminiParts(input: string | GeminiPart[]): GeminiPart[] {
|
||||||
@@ -86,8 +88,30 @@ export function buildGeminiTextEmbeddingRequest(params: {
|
|||||||
outputDimensionality?: number;
|
outputDimensionality?: number;
|
||||||
modelPath?: string;
|
modelPath?: string;
|
||||||
}): GeminiTextEmbeddingRequest {
|
}): GeminiTextEmbeddingRequest {
|
||||||
const request: GeminiTextEmbeddingRequest = {
|
return buildGeminiEmbeddingRequest({
|
||||||
content: { parts: [{ text: params.text }] },
|
input: { text: params.text },
|
||||||
|
taskType: params.taskType,
|
||||||
|
outputDimensionality: params.outputDimensionality,
|
||||||
|
modelPath: params.modelPath,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildGeminiEmbeddingRequest(params: {
|
||||||
|
input: EmbeddingInput;
|
||||||
|
taskType: GeminiTaskType;
|
||||||
|
outputDimensionality?: number;
|
||||||
|
modelPath?: string;
|
||||||
|
}): GeminiEmbeddingRequest {
|
||||||
|
const request: GeminiEmbeddingRequest = {
|
||||||
|
content: {
|
||||||
|
parts: params.input.parts?.map((part) =>
|
||||||
|
part.type === "text"
|
||||||
|
? ({ text: part.text } satisfies GeminiTextPart)
|
||||||
|
: ({
|
||||||
|
inlineData: { mimeType: part.mimeType, data: part.data },
|
||||||
|
} satisfies GeminiInlinePart),
|
||||||
|
) ?? [{ text: params.input.text }],
|
||||||
|
},
|
||||||
taskType: params.taskType,
|
taskType: params.taskType,
|
||||||
};
|
};
|
||||||
if (params.modelPath) {
|
if (params.modelPath) {
|
||||||
@@ -143,7 +167,7 @@ function resolveRemoteApiKey(remoteApiKey: unknown): string | undefined {
|
|||||||
return trimmed;
|
return trimmed;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeGeminiModel(model: string): string {
|
export function normalizeGeminiModel(model: string): string {
|
||||||
const trimmed = model.trim();
|
const trimmed = model.trim();
|
||||||
if (!trimmed) {
|
if (!trimmed) {
|
||||||
return DEFAULT_GEMINI_EMBEDDING_MODEL;
|
return DEFAULT_GEMINI_EMBEDDING_MODEL;
|
||||||
@@ -158,6 +182,46 @@ function normalizeGeminiModel(model: string): string {
|
|||||||
return withoutPrefix;
|
return withoutPrefix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchGeminiEmbeddingPayload(params: {
|
||||||
|
client: GeminiEmbeddingClient;
|
||||||
|
endpoint: string;
|
||||||
|
body: unknown;
|
||||||
|
}): Promise<{
|
||||||
|
embedding?: { values?: number[] };
|
||||||
|
embeddings?: Array<{ values?: number[] }>;
|
||||||
|
}> {
|
||||||
|
return await executeWithApiKeyRotation({
|
||||||
|
provider: "google",
|
||||||
|
apiKeys: params.client.apiKeys,
|
||||||
|
execute: async (apiKey) => {
|
||||||
|
const authHeaders = parseGeminiAuth(apiKey);
|
||||||
|
const headers = {
|
||||||
|
...authHeaders.headers,
|
||||||
|
...params.client.headers,
|
||||||
|
};
|
||||||
|
return await withRemoteHttpResponse({
|
||||||
|
url: params.endpoint,
|
||||||
|
ssrfPolicy: params.client.ssrfPolicy,
|
||||||
|
init: {
|
||||||
|
method: "POST",
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify(params.body),
|
||||||
|
},
|
||||||
|
onResponse: async (res) => {
|
||||||
|
if (!res.ok) {
|
||||||
|
const text = await res.text();
|
||||||
|
throw new Error(`gemini embeddings failed: ${res.status} ${text}`);
|
||||||
|
}
|
||||||
|
return (await res.json()) as {
|
||||||
|
embedding?: { values?: number[] };
|
||||||
|
embeddings?: Array<{ values?: number[] }>;
|
||||||
|
};
|
||||||
|
},
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function normalizeGeminiBaseUrl(raw: string): string {
|
function normalizeGeminiBaseUrl(raw: string): string {
|
||||||
const trimmed = raw.replace(/\/+$/, "");
|
const trimmed = raw.replace(/\/+$/, "");
|
||||||
const openAiIndex = trimmed.indexOf("/openai");
|
const openAiIndex = trimmed.indexOf("/openai");
|
||||||
@@ -181,71 +245,50 @@ export async function createGeminiEmbeddingProvider(
|
|||||||
const isV2 = isGeminiEmbedding2Model(client.model);
|
const isV2 = isGeminiEmbedding2Model(client.model);
|
||||||
const outputDimensionality = client.outputDimensionality;
|
const outputDimensionality = client.outputDimensionality;
|
||||||
|
|
||||||
const fetchWithGeminiAuth = async (apiKey: string, endpoint: string, body: unknown) => {
|
|
||||||
const authHeaders = parseGeminiAuth(apiKey);
|
|
||||||
const headers = {
|
|
||||||
...authHeaders.headers,
|
|
||||||
...client.headers,
|
|
||||||
};
|
|
||||||
const payload = await withRemoteHttpResponse({
|
|
||||||
url: endpoint,
|
|
||||||
ssrfPolicy: client.ssrfPolicy,
|
|
||||||
init: {
|
|
||||||
method: "POST",
|
|
||||||
headers,
|
|
||||||
body: JSON.stringify(body),
|
|
||||||
},
|
|
||||||
onResponse: async (res) => {
|
|
||||||
if (!res.ok) {
|
|
||||||
const text = await res.text();
|
|
||||||
throw new Error(`gemini embeddings failed: ${res.status} ${text}`);
|
|
||||||
}
|
|
||||||
return (await res.json()) as {
|
|
||||||
embedding?: { values?: number[] };
|
|
||||||
embeddings?: Array<{ values?: number[] }>;
|
|
||||||
};
|
|
||||||
},
|
|
||||||
});
|
|
||||||
return payload;
|
|
||||||
};
|
|
||||||
|
|
||||||
const embedQuery = async (text: string): Promise<number[]> => {
|
const embedQuery = async (text: string): Promise<number[]> => {
|
||||||
if (!text.trim()) {
|
if (!text.trim()) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const body = buildGeminiTextEmbeddingRequest({
|
const payload = await fetchGeminiEmbeddingPayload({
|
||||||
text,
|
client,
|
||||||
taskType: options.taskType ?? "RETRIEVAL_QUERY",
|
endpoint: embedUrl,
|
||||||
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
body: buildGeminiTextEmbeddingRequest({
|
||||||
});
|
text,
|
||||||
const payload = await executeWithApiKeyRotation({
|
taskType: options.taskType ?? "RETRIEVAL_QUERY",
|
||||||
provider: "google",
|
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
||||||
apiKeys: client.apiKeys,
|
}),
|
||||||
execute: (apiKey) => fetchWithGeminiAuth(apiKey, embedUrl, body),
|
|
||||||
});
|
});
|
||||||
return sanitizeAndNormalizeEmbedding(payload.embedding?.values ?? []);
|
return sanitizeAndNormalizeEmbedding(payload.embedding?.values ?? []);
|
||||||
};
|
};
|
||||||
|
|
||||||
const embedBatch = async (texts: string[]): Promise<number[][]> => {
|
const embedBatchInputs = async (inputs: EmbeddingInput[]): Promise<number[][]> => {
|
||||||
if (texts.length === 0) {
|
if (inputs.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const requests = texts.map((text) =>
|
const payload = await fetchGeminiEmbeddingPayload({
|
||||||
buildGeminiTextEmbeddingRequest({
|
client,
|
||||||
text,
|
endpoint: batchUrl,
|
||||||
modelPath: client.modelPath,
|
body: {
|
||||||
taskType: options.taskType ?? "RETRIEVAL_DOCUMENT",
|
requests: inputs.map((input) =>
|
||||||
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
buildGeminiEmbeddingRequest({
|
||||||
}),
|
input,
|
||||||
);
|
modelPath: client.modelPath,
|
||||||
const batchBody = { requests };
|
taskType: options.taskType ?? "RETRIEVAL_DOCUMENT",
|
||||||
const payload = await executeWithApiKeyRotation({
|
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
||||||
provider: "google",
|
}),
|
||||||
apiKeys: client.apiKeys,
|
),
|
||||||
execute: (apiKey) => fetchWithGeminiAuth(apiKey, batchUrl, batchBody),
|
},
|
||||||
});
|
});
|
||||||
const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : [];
|
const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : [];
|
||||||
return texts.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? []));
|
return inputs.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? []));
|
||||||
|
};
|
||||||
|
|
||||||
|
const embedBatch = async (texts: string[]): Promise<number[][]> => {
|
||||||
|
return await embedBatchInputs(
|
||||||
|
texts.map((text) => ({
|
||||||
|
text,
|
||||||
|
})),
|
||||||
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -255,6 +298,7 @@ export async function createGeminiEmbeddingProvider(
|
|||||||
maxInputTokens: GEMINI_MAX_INPUT_TOKENS[client.model],
|
maxInputTokens: GEMINI_MAX_INPUT_TOKENS[client.model],
|
||||||
embedQuery,
|
embedQuery,
|
||||||
embedBatch,
|
embedBatch,
|
||||||
|
embedBatchInputs,
|
||||||
},
|
},
|
||||||
client,
|
client,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import type { SecretInput } from "../config/types.secrets.js";
|
|||||||
import { formatErrorMessage } from "../infra/errors.js";
|
import { formatErrorMessage } from "../infra/errors.js";
|
||||||
import { resolveUserPath } from "../utils.js";
|
import { resolveUserPath } from "../utils.js";
|
||||||
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
||||||
|
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||||
import {
|
import {
|
||||||
createGeminiEmbeddingProvider,
|
createGeminiEmbeddingProvider,
|
||||||
type GeminiEmbeddingClient,
|
type GeminiEmbeddingClient,
|
||||||
@@ -31,6 +32,7 @@ export type EmbeddingProvider = {
|
|||||||
maxInputTokens?: number;
|
maxInputTokens?: number;
|
||||||
embedQuery: (text: string) => Promise<number[]>;
|
embedQuery: (text: string) => Promise<number[]>;
|
||||||
embedBatch: (texts: string[]) => Promise<number[][]>;
|
embedBatch: (texts: string[]) => Promise<number[][]>;
|
||||||
|
embedBatchInputs?: (inputs: EmbeddingInput[]) => Promise<number[][]>;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type EmbeddingProviderId = "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama";
|
export type EmbeddingProviderId = "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama";
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import { randomUUID } from "node:crypto";
|
||||||
import fs from "node:fs/promises";
|
import fs from "node:fs/promises";
|
||||||
import os from "node:os";
|
import os from "node:os";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
@@ -6,6 +7,7 @@ import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
|
|||||||
import "./test-runtime-mocks.js";
|
import "./test-runtime-mocks.js";
|
||||||
|
|
||||||
let embedBatchCalls = 0;
|
let embedBatchCalls = 0;
|
||||||
|
let embedBatchInputCalls = 0;
|
||||||
let providerCalls: Array<{ provider?: string; model?: string; outputDimensionality?: number }> = [];
|
let providerCalls: Array<{ provider?: string; model?: string; outputDimensionality?: number }> = [];
|
||||||
|
|
||||||
vi.mock("./embeddings.js", () => {
|
vi.mock("./embeddings.js", () => {
|
||||||
@@ -13,7 +15,9 @@ vi.mock("./embeddings.js", () => {
|
|||||||
const lower = text.toLowerCase();
|
const lower = text.toLowerCase();
|
||||||
const alpha = lower.split("alpha").length - 1;
|
const alpha = lower.split("alpha").length - 1;
|
||||||
const beta = lower.split("beta").length - 1;
|
const beta = lower.split("beta").length - 1;
|
||||||
return [alpha, beta];
|
const image = lower.split("image").length - 1;
|
||||||
|
const audio = lower.split("audio").length - 1;
|
||||||
|
return [alpha, beta, image, audio];
|
||||||
};
|
};
|
||||||
return {
|
return {
|
||||||
createEmbeddingProvider: async (options: {
|
createEmbeddingProvider: async (options: {
|
||||||
@@ -38,6 +42,32 @@ vi.mock("./embeddings.js", () => {
|
|||||||
embedBatchCalls += 1;
|
embedBatchCalls += 1;
|
||||||
return texts.map(embedText);
|
return texts.map(embedText);
|
||||||
},
|
},
|
||||||
|
...(providerId === "gemini"
|
||||||
|
? {
|
||||||
|
embedBatchInputs: async (
|
||||||
|
inputs: Array<{
|
||||||
|
text: string;
|
||||||
|
parts?: Array<
|
||||||
|
{ type: "text"; text: string } | { type: "inline-data"; mimeType: string }
|
||||||
|
>;
|
||||||
|
}>,
|
||||||
|
) => {
|
||||||
|
embedBatchInputCalls += 1;
|
||||||
|
return inputs.map((input) => {
|
||||||
|
const mimeType = input.parts?.find(
|
||||||
|
(part) => part.type === "inline-data",
|
||||||
|
)?.mimeType;
|
||||||
|
if (mimeType?.startsWith("image/")) {
|
||||||
|
return [0, 0, 1, 0];
|
||||||
|
}
|
||||||
|
if (mimeType?.startsWith("audio/")) {
|
||||||
|
return [0, 0, 0, 1];
|
||||||
|
}
|
||||||
|
return embedText(input.text);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
},
|
},
|
||||||
...(providerId === "gemini"
|
...(providerId === "gemini"
|
||||||
? {
|
? {
|
||||||
@@ -64,6 +94,7 @@ describe("memory index", () => {
|
|||||||
let indexVectorPath = "";
|
let indexVectorPath = "";
|
||||||
let indexMainPath = "";
|
let indexMainPath = "";
|
||||||
let indexExtraPath = "";
|
let indexExtraPath = "";
|
||||||
|
let indexMultimodalPath = "";
|
||||||
let indexStatusPath = "";
|
let indexStatusPath = "";
|
||||||
let indexSourceChangePath = "";
|
let indexSourceChangePath = "";
|
||||||
let indexModelPath = "";
|
let indexModelPath = "";
|
||||||
@@ -97,6 +128,7 @@ describe("memory index", () => {
|
|||||||
indexMainPath = path.join(workspaceDir, "index-main.sqlite");
|
indexMainPath = path.join(workspaceDir, "index-main.sqlite");
|
||||||
indexVectorPath = path.join(workspaceDir, "index-vector.sqlite");
|
indexVectorPath = path.join(workspaceDir, "index-vector.sqlite");
|
||||||
indexExtraPath = path.join(workspaceDir, "index-extra.sqlite");
|
indexExtraPath = path.join(workspaceDir, "index-extra.sqlite");
|
||||||
|
indexMultimodalPath = path.join(workspaceDir, "index-multimodal.sqlite");
|
||||||
indexStatusPath = path.join(workspaceDir, "index-status.sqlite");
|
indexStatusPath = path.join(workspaceDir, "index-status.sqlite");
|
||||||
indexSourceChangePath = path.join(workspaceDir, "index-source-change.sqlite");
|
indexSourceChangePath = path.join(workspaceDir, "index-source-change.sqlite");
|
||||||
indexModelPath = path.join(workspaceDir, "index-model-change.sqlite");
|
indexModelPath = path.join(workspaceDir, "index-model-change.sqlite");
|
||||||
@@ -119,6 +151,7 @@ describe("memory index", () => {
|
|||||||
// Keep atomic reindex tests on the safe path.
|
// Keep atomic reindex tests on the safe path.
|
||||||
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "1");
|
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "1");
|
||||||
embedBatchCalls = 0;
|
embedBatchCalls = 0;
|
||||||
|
embedBatchInputCalls = 0;
|
||||||
providerCalls = [];
|
providerCalls = [];
|
||||||
|
|
||||||
// Keep the workspace stable to allow manager reuse across tests.
|
// Keep the workspace stable to allow manager reuse across tests.
|
||||||
@@ -149,6 +182,11 @@ describe("memory index", () => {
|
|||||||
provider?: "openai" | "gemini";
|
provider?: "openai" | "gemini";
|
||||||
model?: string;
|
model?: string;
|
||||||
outputDimensionality?: number;
|
outputDimensionality?: number;
|
||||||
|
multimodal?: {
|
||||||
|
enabled?: boolean;
|
||||||
|
modalities?: Array<"image" | "audio" | "all">;
|
||||||
|
maxFileBytes?: number;
|
||||||
|
};
|
||||||
vectorEnabled?: boolean;
|
vectorEnabled?: boolean;
|
||||||
cacheEnabled?: boolean;
|
cacheEnabled?: boolean;
|
||||||
minScore?: number;
|
minScore?: number;
|
||||||
@@ -172,6 +210,7 @@ describe("memory index", () => {
|
|||||||
},
|
},
|
||||||
cache: params.cacheEnabled ? { enabled: true } : undefined,
|
cache: params.cacheEnabled ? { enabled: true } : undefined,
|
||||||
extraPaths: params.extraPaths,
|
extraPaths: params.extraPaths,
|
||||||
|
multimodal: params.multimodal,
|
||||||
sources: params.sources,
|
sources: params.sources,
|
||||||
experimental: { sessionMemory: params.sessionMemory ?? false },
|
experimental: { sessionMemory: params.sessionMemory ?? false },
|
||||||
},
|
},
|
||||||
@@ -247,6 +286,31 @@ describe("memory index", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("indexes multimodal image and audio files from extra paths with Gemini structured inputs", async () => {
|
||||||
|
const mediaDir = path.join(workspaceDir, "media-memory");
|
||||||
|
await fs.mkdir(mediaDir, { recursive: true });
|
||||||
|
await fs.writeFile(path.join(mediaDir, "diagram.png"), Buffer.from("png"));
|
||||||
|
await fs.writeFile(path.join(mediaDir, "meeting.wav"), Buffer.from("wav"));
|
||||||
|
|
||||||
|
const cfg = createCfg({
|
||||||
|
storePath: indexMultimodalPath,
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
extraPaths: [mediaDir],
|
||||||
|
multimodal: { enabled: true, modalities: ["image", "audio"] },
|
||||||
|
});
|
||||||
|
const manager = await getPersistentManager(cfg);
|
||||||
|
await manager.sync({ reason: "test" });
|
||||||
|
|
||||||
|
expect(embedBatchInputCalls).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const imageResults = await manager.search("image");
|
||||||
|
expect(imageResults.some((result) => result.path.endsWith("diagram.png"))).toBe(true);
|
||||||
|
|
||||||
|
const audioResults = await manager.search("audio");
|
||||||
|
expect(audioResults.some((result) => result.path.endsWith("meeting.wav"))).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
it("keeps dirty false in status-only manager after prior indexing", async () => {
|
it("keeps dirty false in status-only manager after prior indexing", async () => {
|
||||||
const cfg = createCfg({ storePath: indexStatusPath });
|
const cfg = createCfg({ storePath: indexStatusPath });
|
||||||
|
|
||||||
@@ -433,6 +497,82 @@ describe("memory index", () => {
|
|||||||
await secondManager.close?.();
|
await secondManager.close?.();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("reindexes when extraPaths change", async () => {
|
||||||
|
const storePath = path.join(workspaceDir, `index-scope-extra-${randomUUID()}.sqlite`);
|
||||||
|
const firstExtraDir = path.join(workspaceDir, "scope-extra-a");
|
||||||
|
const secondExtraDir = path.join(workspaceDir, "scope-extra-b");
|
||||||
|
await fs.rm(firstExtraDir, { recursive: true, force: true });
|
||||||
|
await fs.rm(secondExtraDir, { recursive: true, force: true });
|
||||||
|
await fs.mkdir(firstExtraDir, { recursive: true });
|
||||||
|
await fs.mkdir(secondExtraDir, { recursive: true });
|
||||||
|
await fs.writeFile(path.join(firstExtraDir, "a.md"), "alpha only");
|
||||||
|
await fs.writeFile(path.join(secondExtraDir, "b.md"), "beta only");
|
||||||
|
|
||||||
|
const first = await getMemorySearchManager({
|
||||||
|
cfg: createCfg({
|
||||||
|
storePath,
|
||||||
|
extraPaths: [firstExtraDir],
|
||||||
|
}),
|
||||||
|
agentId: "main",
|
||||||
|
});
|
||||||
|
const firstManager = requireManager(first);
|
||||||
|
await firstManager.sync?.({ reason: "test" });
|
||||||
|
await firstManager.close?.();
|
||||||
|
|
||||||
|
const second = await getMemorySearchManager({
|
||||||
|
cfg: createCfg({
|
||||||
|
storePath,
|
||||||
|
extraPaths: [secondExtraDir],
|
||||||
|
}),
|
||||||
|
agentId: "main",
|
||||||
|
});
|
||||||
|
const secondManager = requireManager(second);
|
||||||
|
await secondManager.sync?.({ reason: "test" });
|
||||||
|
const results = await secondManager.search("beta");
|
||||||
|
expect(results.some((result) => result.path.endsWith("scope-extra-b/b.md"))).toBe(true);
|
||||||
|
expect(results.some((result) => result.path.endsWith("scope-extra-a/a.md"))).toBe(false);
|
||||||
|
await secondManager.close?.();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("reindexes when multimodal settings change", async () => {
|
||||||
|
const storePath = path.join(workspaceDir, `index-scope-multimodal-${randomUUID()}.sqlite`);
|
||||||
|
const mediaDir = path.join(workspaceDir, "scope-media");
|
||||||
|
await fs.rm(mediaDir, { recursive: true, force: true });
|
||||||
|
await fs.mkdir(mediaDir, { recursive: true });
|
||||||
|
await fs.writeFile(path.join(mediaDir, "diagram.png"), Buffer.from("png"));
|
||||||
|
|
||||||
|
const first = await getMemorySearchManager({
|
||||||
|
cfg: createCfg({
|
||||||
|
storePath,
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
extraPaths: [mediaDir],
|
||||||
|
}),
|
||||||
|
agentId: "main",
|
||||||
|
});
|
||||||
|
const firstManager = requireManager(first);
|
||||||
|
await firstManager.sync?.({ reason: "test" });
|
||||||
|
const multimodalCallsAfterFirstSync = embedBatchInputCalls;
|
||||||
|
await firstManager.close?.();
|
||||||
|
|
||||||
|
const second = await getMemorySearchManager({
|
||||||
|
cfg: createCfg({
|
||||||
|
storePath,
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
extraPaths: [mediaDir],
|
||||||
|
multimodal: { enabled: true, modalities: ["image"] },
|
||||||
|
}),
|
||||||
|
agentId: "main",
|
||||||
|
});
|
||||||
|
const secondManager = requireManager(second);
|
||||||
|
await secondManager.sync?.({ reason: "test" });
|
||||||
|
expect(embedBatchInputCalls).toBeGreaterThan(multimodalCallsAfterFirstSync);
|
||||||
|
const results = await secondManager.search("image");
|
||||||
|
expect(results.some((result) => result.path.endsWith("scope-media/diagram.png"))).toBe(true);
|
||||||
|
await secondManager.close?.();
|
||||||
|
});
|
||||||
|
|
||||||
it("reuses cached embeddings on forced reindex", async () => {
|
it("reuses cached embeddings on forced reindex", async () => {
|
||||||
const cfg = createCfg({ storePath: indexMainPath, cacheEnabled: true });
|
const cfg = createCfg({ storePath: indexMainPath, cacheEnabled: true });
|
||||||
const manager = await getPersistentManager(cfg);
|
const manager = await getPersistentManager(cfg);
|
||||||
|
|||||||
@@ -9,6 +9,10 @@ import {
|
|||||||
normalizeExtraMemoryPaths,
|
normalizeExtraMemoryPaths,
|
||||||
remapChunkLines,
|
remapChunkLines,
|
||||||
} from "./internal.js";
|
} from "./internal.js";
|
||||||
|
import {
|
||||||
|
DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||||
|
type MemoryMultimodalSettings,
|
||||||
|
} from "./multimodal.js";
|
||||||
|
|
||||||
function setupTempDirLifecycle(prefix: string): () => string {
|
function setupTempDirLifecycle(prefix: string): () => string {
|
||||||
let tmpDir = "";
|
let tmpDir = "";
|
||||||
@@ -38,6 +42,11 @@ describe("normalizeExtraMemoryPaths", () => {
|
|||||||
|
|
||||||
describe("listMemoryFiles", () => {
|
describe("listMemoryFiles", () => {
|
||||||
const getTmpDir = setupTempDirLifecycle("memory-test-");
|
const getTmpDir = setupTempDirLifecycle("memory-test-");
|
||||||
|
const multimodal: MemoryMultimodalSettings = {
|
||||||
|
enabled: true,
|
||||||
|
modalities: ["image", "audio"],
|
||||||
|
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||||
|
};
|
||||||
|
|
||||||
it("includes files from additional paths (directory)", async () => {
|
it("includes files from additional paths (directory)", async () => {
|
||||||
const tmpDir = getTmpDir();
|
const tmpDir = getTmpDir();
|
||||||
@@ -131,10 +140,29 @@ describe("listMemoryFiles", () => {
|
|||||||
const memoryMatches = files.filter((file) => file.endsWith("MEMORY.md"));
|
const memoryMatches = files.filter((file) => file.endsWith("MEMORY.md"));
|
||||||
expect(memoryMatches).toHaveLength(1);
|
expect(memoryMatches).toHaveLength(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("includes image and audio files from extra paths when multimodal is enabled", async () => {
|
||||||
|
const tmpDir = getTmpDir();
|
||||||
|
const extraDir = path.join(tmpDir, "media");
|
||||||
|
await fs.mkdir(extraDir, { recursive: true });
|
||||||
|
await fs.writeFile(path.join(extraDir, "diagram.png"), Buffer.from("png"));
|
||||||
|
await fs.writeFile(path.join(extraDir, "note.wav"), Buffer.from("wav"));
|
||||||
|
await fs.writeFile(path.join(extraDir, "ignore.bin"), Buffer.from("bin"));
|
||||||
|
|
||||||
|
const files = await listMemoryFiles(tmpDir, [extraDir], multimodal);
|
||||||
|
expect(files.some((file) => file.endsWith("diagram.png"))).toBe(true);
|
||||||
|
expect(files.some((file) => file.endsWith("note.wav"))).toBe(true);
|
||||||
|
expect(files.some((file) => file.endsWith("ignore.bin"))).toBe(false);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("buildFileEntry", () => {
|
describe("buildFileEntry", () => {
|
||||||
const getTmpDir = setupTempDirLifecycle("memory-build-entry-");
|
const getTmpDir = setupTempDirLifecycle("memory-build-entry-");
|
||||||
|
const multimodal: MemoryMultimodalSettings = {
|
||||||
|
enabled: true,
|
||||||
|
modalities: ["image", "audio"],
|
||||||
|
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||||
|
};
|
||||||
|
|
||||||
it("returns null when the file disappears before reading", async () => {
|
it("returns null when the file disappears before reading", async () => {
|
||||||
const tmpDir = getTmpDir();
|
const tmpDir = getTmpDir();
|
||||||
@@ -154,6 +182,26 @@ describe("buildFileEntry", () => {
|
|||||||
expect(entry?.path).toBe("note.md");
|
expect(entry?.path).toBe("note.md");
|
||||||
expect(entry?.size).toBeGreaterThan(0);
|
expect(entry?.size).toBeGreaterThan(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("returns multimodal metadata for eligible image files", async () => {
|
||||||
|
const tmpDir = getTmpDir();
|
||||||
|
const target = path.join(tmpDir, "diagram.png");
|
||||||
|
await fs.writeFile(target, Buffer.from("png"));
|
||||||
|
|
||||||
|
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||||
|
|
||||||
|
expect(entry).toMatchObject({
|
||||||
|
path: "diagram.png",
|
||||||
|
kind: "multimodal",
|
||||||
|
modality: "image",
|
||||||
|
mimeType: "image/png",
|
||||||
|
contentText: "Image file: diagram.png",
|
||||||
|
});
|
||||||
|
expect(entry?.embeddingInput?.parts).toEqual([
|
||||||
|
{ type: "text", text: "Image file: diagram.png" },
|
||||||
|
expect.objectContaining({ type: "inline-data", mimeType: "image/png" }),
|
||||||
|
]);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("chunkMarkdown", () => {
|
describe("chunkMarkdown", () => {
|
||||||
|
|||||||
@@ -2,8 +2,16 @@ import crypto from "node:crypto";
|
|||||||
import fsSync from "node:fs";
|
import fsSync from "node:fs";
|
||||||
import fs from "node:fs/promises";
|
import fs from "node:fs/promises";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
|
import { detectMime } from "../media/mime.js";
|
||||||
import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
|
import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
|
||||||
|
import { buildTextEmbeddingInput, type EmbeddingInput } from "./embedding-inputs.js";
|
||||||
import { isFileMissingError } from "./fs-utils.js";
|
import { isFileMissingError } from "./fs-utils.js";
|
||||||
|
import {
|
||||||
|
classifyMemoryMultimodalPath,
|
||||||
|
isMemoryMultimodalEnabled,
|
||||||
|
type MemoryMultimodalModality,
|
||||||
|
type MemoryMultimodalSettings,
|
||||||
|
} from "./multimodal.js";
|
||||||
|
|
||||||
export type MemoryFileEntry = {
|
export type MemoryFileEntry = {
|
||||||
path: string;
|
path: string;
|
||||||
@@ -11,6 +19,11 @@ export type MemoryFileEntry = {
|
|||||||
mtimeMs: number;
|
mtimeMs: number;
|
||||||
size: number;
|
size: number;
|
||||||
hash: string;
|
hash: string;
|
||||||
|
kind?: "markdown" | "multimodal";
|
||||||
|
contentText?: string;
|
||||||
|
embeddingInput?: EmbeddingInput;
|
||||||
|
modality?: MemoryMultimodalModality;
|
||||||
|
mimeType?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type MemoryChunk = {
|
export type MemoryChunk = {
|
||||||
@@ -18,6 +31,13 @@ export type MemoryChunk = {
|
|||||||
endLine: number;
|
endLine: number;
|
||||||
text: string;
|
text: string;
|
||||||
hash: string;
|
hash: string;
|
||||||
|
embeddingInput?: EmbeddingInput;
|
||||||
|
};
|
||||||
|
|
||||||
|
const DISABLED_MULTIMODAL_SETTINGS: MemoryMultimodalSettings = {
|
||||||
|
enabled: false,
|
||||||
|
modalities: [],
|
||||||
|
maxFileBytes: 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
export function ensureDir(dir: string): string {
|
export function ensureDir(dir: string): string {
|
||||||
@@ -56,7 +76,16 @@ export function isMemoryPath(relPath: string): boolean {
|
|||||||
return normalized.startsWith("memory/");
|
return normalized.startsWith("memory/");
|
||||||
}
|
}
|
||||||
|
|
||||||
async function walkDir(dir: string, files: string[]) {
|
function isAllowedMemoryFilePath(filePath: string, multimodal?: MemoryMultimodalSettings): boolean {
|
||||||
|
if (filePath.endsWith(".md")) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
classifyMemoryMultimodalPath(filePath, multimodal ?? DISABLED_MULTIMODAL_SETTINGS) !== null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function walkDir(dir: string, files: string[], multimodal?: MemoryMultimodalSettings) {
|
||||||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||||
for (const entry of entries) {
|
for (const entry of entries) {
|
||||||
const full = path.join(dir, entry.name);
|
const full = path.join(dir, entry.name);
|
||||||
@@ -64,13 +93,13 @@ async function walkDir(dir: string, files: string[]) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (entry.isDirectory()) {
|
if (entry.isDirectory()) {
|
||||||
await walkDir(full, files);
|
await walkDir(full, files, multimodal);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!entry.isFile()) {
|
if (!entry.isFile()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!entry.name.endsWith(".md")) {
|
if (!isAllowedMemoryFilePath(full, multimodal)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
files.push(full);
|
files.push(full);
|
||||||
@@ -80,6 +109,7 @@ async function walkDir(dir: string, files: string[]) {
|
|||||||
export async function listMemoryFiles(
|
export async function listMemoryFiles(
|
||||||
workspaceDir: string,
|
workspaceDir: string,
|
||||||
extraPaths?: string[],
|
extraPaths?: string[],
|
||||||
|
multimodal?: MemoryMultimodalSettings,
|
||||||
): Promise<string[]> {
|
): Promise<string[]> {
|
||||||
const result: string[] = [];
|
const result: string[] = [];
|
||||||
const memoryFile = path.join(workspaceDir, "MEMORY.md");
|
const memoryFile = path.join(workspaceDir, "MEMORY.md");
|
||||||
@@ -117,10 +147,10 @@ export async function listMemoryFiles(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (stat.isDirectory()) {
|
if (stat.isDirectory()) {
|
||||||
await walkDir(inputPath, result);
|
await walkDir(inputPath, result, multimodal);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (stat.isFile() && inputPath.endsWith(".md")) {
|
if (stat.isFile() && isAllowedMemoryFilePath(inputPath, multimodal)) {
|
||||||
result.push(inputPath);
|
result.push(inputPath);
|
||||||
}
|
}
|
||||||
} catch {}
|
} catch {}
|
||||||
@@ -152,6 +182,7 @@ export function hashText(value: string): string {
|
|||||||
export async function buildFileEntry(
|
export async function buildFileEntry(
|
||||||
absPath: string,
|
absPath: string,
|
||||||
workspaceDir: string,
|
workspaceDir: string,
|
||||||
|
multimodal?: MemoryMultimodalSettings,
|
||||||
): Promise<MemoryFileEntry | null> {
|
): Promise<MemoryFileEntry | null> {
|
||||||
let stat;
|
let stat;
|
||||||
try {
|
try {
|
||||||
@@ -162,6 +193,63 @@ export async function buildFileEntry(
|
|||||||
}
|
}
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
|
const normalizedPath = path.relative(workspaceDir, absPath).replace(/\\/g, "/");
|
||||||
|
const multimodalSettings = multimodal ?? DISABLED_MULTIMODAL_SETTINGS;
|
||||||
|
const modality = classifyMemoryMultimodalPath(absPath, multimodalSettings);
|
||||||
|
if (modality) {
|
||||||
|
if (!isMemoryMultimodalEnabled(multimodalSettings)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (stat.size > multimodalSettings.maxFileBytes) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
let buffer: Buffer;
|
||||||
|
try {
|
||||||
|
buffer = await fs.readFile(absPath);
|
||||||
|
} catch (err) {
|
||||||
|
if (isFileMissingError(err)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
const mimeType = await detectMime({ buffer: buffer.subarray(0, 512), filePath: absPath });
|
||||||
|
if (!mimeType || !mimeType.startsWith(`${modality}/`)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const contentText = `${modality === "image" ? "Image" : "Audio"} file: ${normalizedPath}`;
|
||||||
|
const embeddingInput: EmbeddingInput = {
|
||||||
|
text: contentText,
|
||||||
|
parts: [
|
||||||
|
{ type: "text", text: contentText },
|
||||||
|
{
|
||||||
|
type: "inline-data",
|
||||||
|
mimeType,
|
||||||
|
data: buffer.toString("base64"),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
|
||||||
|
const chunkHash = hashText(
|
||||||
|
JSON.stringify({
|
||||||
|
path: normalizedPath,
|
||||||
|
contentText,
|
||||||
|
mimeType,
|
||||||
|
dataHash,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
path: normalizedPath,
|
||||||
|
absPath,
|
||||||
|
mtimeMs: stat.mtimeMs,
|
||||||
|
size: stat.size,
|
||||||
|
hash: chunkHash,
|
||||||
|
kind: "multimodal",
|
||||||
|
contentText,
|
||||||
|
embeddingInput,
|
||||||
|
modality,
|
||||||
|
mimeType,
|
||||||
|
};
|
||||||
|
}
|
||||||
let content: string;
|
let content: string;
|
||||||
try {
|
try {
|
||||||
content = await fs.readFile(absPath, "utf-8");
|
content = await fs.readFile(absPath, "utf-8");
|
||||||
@@ -173,11 +261,12 @@ export async function buildFileEntry(
|
|||||||
}
|
}
|
||||||
const hash = hashText(content);
|
const hash = hashText(content);
|
||||||
return {
|
return {
|
||||||
path: path.relative(workspaceDir, absPath).replace(/\\/g, "/"),
|
path: normalizedPath,
|
||||||
absPath,
|
absPath,
|
||||||
mtimeMs: stat.mtimeMs,
|
mtimeMs: stat.mtimeMs,
|
||||||
size: stat.size,
|
size: stat.size,
|
||||||
hash,
|
hash,
|
||||||
|
kind: "markdown",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -213,6 +302,7 @@ export function chunkMarkdown(
|
|||||||
endLine,
|
endLine,
|
||||||
text,
|
text,
|
||||||
hash: hashText(text),
|
hash: hashText(text),
|
||||||
|
embeddingInput: buildTextEmbeddingInput(text),
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -8,8 +8,12 @@ import {
|
|||||||
} from "./batch-openai.js";
|
} from "./batch-openai.js";
|
||||||
import { type VoyageBatchRequest, runVoyageEmbeddingBatches } from "./batch-voyage.js";
|
import { type VoyageBatchRequest, runVoyageEmbeddingBatches } from "./batch-voyage.js";
|
||||||
import { enforceEmbeddingMaxInputTokens } from "./embedding-chunk-limits.js";
|
import { enforceEmbeddingMaxInputTokens } from "./embedding-chunk-limits.js";
|
||||||
import { estimateUtf8Bytes } from "./embedding-input-limits.js";
|
import {
|
||||||
import { buildGeminiTextEmbeddingRequest } from "./embeddings-gemini.js";
|
estimateStructuredEmbeddingInputBytes,
|
||||||
|
estimateUtf8Bytes,
|
||||||
|
} from "./embedding-input-limits.js";
|
||||||
|
import { type EmbeddingInput, hasNonTextEmbeddingParts } from "./embedding-inputs.js";
|
||||||
|
import { buildGeminiEmbeddingRequest } from "./embeddings-gemini.js";
|
||||||
import {
|
import {
|
||||||
chunkMarkdown,
|
chunkMarkdown,
|
||||||
hashText,
|
hashText,
|
||||||
@@ -53,7 +57,9 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
let currentTokens = 0;
|
let currentTokens = 0;
|
||||||
|
|
||||||
for (const chunk of chunks) {
|
for (const chunk of chunks) {
|
||||||
const estimate = estimateUtf8Bytes(chunk.text);
|
const estimate = chunk.embeddingInput
|
||||||
|
? estimateStructuredEmbeddingInputBytes(chunk.embeddingInput)
|
||||||
|
: estimateUtf8Bytes(chunk.text);
|
||||||
const wouldExceed =
|
const wouldExceed =
|
||||||
current.length > 0 && currentTokens + estimate > EMBEDDING_BATCH_MAX_TOKENS;
|
current.length > 0 && currentTokens + estimate > EMBEDDING_BATCH_MAX_TOKENS;
|
||||||
if (wouldExceed) {
|
if (wouldExceed) {
|
||||||
@@ -188,9 +194,22 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
const missingChunks = missing.map((m) => m.chunk);
|
const missingChunks = missing.map((m) => m.chunk);
|
||||||
const batches = this.buildEmbeddingBatches(missingChunks);
|
const batches = this.buildEmbeddingBatches(missingChunks);
|
||||||
const toCache: Array<{ hash: string; embedding: number[] }> = [];
|
const toCache: Array<{ hash: string; embedding: number[] }> = [];
|
||||||
|
const provider = this.provider;
|
||||||
|
if (!provider) {
|
||||||
|
throw new Error("Cannot embed batch in FTS-only mode (no embedding provider)");
|
||||||
|
}
|
||||||
let cursor = 0;
|
let cursor = 0;
|
||||||
for (const batch of batches) {
|
for (const batch of batches) {
|
||||||
const batchEmbeddings = await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
|
const inputs = batch.map((chunk) => chunk.embeddingInput ?? { text: chunk.text });
|
||||||
|
const hasStructuredInputs = inputs.some((input) => hasNonTextEmbeddingParts(input));
|
||||||
|
if (hasStructuredInputs && !provider.embedBatchInputs) {
|
||||||
|
throw new Error(
|
||||||
|
`Embedding provider "${provider.id}" does not support multimodal memory inputs.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const batchEmbeddings = hasStructuredInputs
|
||||||
|
? await this.embedBatchInputsWithRetry(inputs)
|
||||||
|
: await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
|
||||||
for (let i = 0; i < batch.length; i += 1) {
|
for (let i = 0; i < batch.length; i += 1) {
|
||||||
const item = missing[cursor + i];
|
const item = missing[cursor + i];
|
||||||
const embedding = batchEmbeddings[i] ?? [];
|
const embedding = batchEmbeddings[i] ?? [];
|
||||||
@@ -476,6 +495,9 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
source: MemorySource,
|
source: MemorySource,
|
||||||
): Promise<number[][]> {
|
): Promise<number[][]> {
|
||||||
const gemini = this.gemini;
|
const gemini = this.gemini;
|
||||||
|
if (chunks.some((chunk) => hasNonTextEmbeddingParts(chunk.embeddingInput))) {
|
||||||
|
return await this.embedChunksInBatches(chunks);
|
||||||
|
}
|
||||||
return await this.embedChunksWithProviderBatch<GeminiBatchRequest>({
|
return await this.embedChunksWithProviderBatch<GeminiBatchRequest>({
|
||||||
chunks,
|
chunks,
|
||||||
entry,
|
entry,
|
||||||
@@ -483,9 +505,10 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
provider: "gemini",
|
provider: "gemini",
|
||||||
enabled: Boolean(gemini),
|
enabled: Boolean(gemini),
|
||||||
buildRequest: (chunk) => ({
|
buildRequest: (chunk) => ({
|
||||||
request: buildGeminiTextEmbeddingRequest({
|
request: buildGeminiEmbeddingRequest({
|
||||||
text: chunk.text,
|
input: chunk.embeddingInput ?? { text: chunk.text },
|
||||||
taskType: "RETRIEVAL_DOCUMENT",
|
taskType: "RETRIEVAL_DOCUMENT",
|
||||||
|
modelPath: this.gemini?.modelPath,
|
||||||
outputDimensionality: this.gemini?.outputDimensionality,
|
outputDimensionality: this.gemini?.outputDimensionality,
|
||||||
}),
|
}),
|
||||||
}),
|
}),
|
||||||
@@ -536,6 +559,45 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected async embedBatchInputsWithRetry(inputs: EmbeddingInput[]): Promise<number[][]> {
|
||||||
|
if (inputs.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (!this.provider?.embedBatchInputs) {
|
||||||
|
return await this.embedBatchWithRetry(inputs.map((input) => input.text));
|
||||||
|
}
|
||||||
|
let attempt = 0;
|
||||||
|
let delayMs = EMBEDDING_RETRY_BASE_DELAY_MS;
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
const timeoutMs = this.resolveEmbeddingTimeout("batch");
|
||||||
|
log.debug("memory embeddings: structured batch start", {
|
||||||
|
provider: this.provider.id,
|
||||||
|
items: inputs.length,
|
||||||
|
timeoutMs,
|
||||||
|
});
|
||||||
|
return await this.withTimeout(
|
||||||
|
this.provider.embedBatchInputs(inputs),
|
||||||
|
timeoutMs,
|
||||||
|
`memory embeddings batch timed out after ${Math.round(timeoutMs / 1000)}s`,
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
const message = err instanceof Error ? err.message : String(err);
|
||||||
|
if (!this.isRetryableEmbeddingError(message) || attempt >= EMBEDDING_RETRY_MAX_ATTEMPTS) {
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
const waitMs = Math.min(
|
||||||
|
EMBEDDING_RETRY_MAX_DELAY_MS,
|
||||||
|
Math.round(delayMs * (1 + Math.random() * 0.2)),
|
||||||
|
);
|
||||||
|
log.warn(`memory embeddings rate limited; retrying structured batch in ${waitMs}ms`);
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||||
|
delayMs *= 2;
|
||||||
|
attempt += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private isRetryableEmbeddingError(message: string): boolean {
|
private isRetryableEmbeddingError(message: string): boolean {
|
||||||
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare|tokens per day)/i.test(
|
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare|tokens per day)/i.test(
|
||||||
message,
|
message,
|
||||||
@@ -708,16 +770,29 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
|
let chunks: MemoryChunk[];
|
||||||
const chunks = enforceEmbeddingMaxInputTokens(
|
if ("kind" in entry && entry.kind === "multimodal" && entry.embeddingInput) {
|
||||||
this.provider,
|
chunks = [
|
||||||
chunkMarkdown(content, this.settings.chunking).filter(
|
{
|
||||||
(chunk) => chunk.text.trim().length > 0,
|
startLine: 1,
|
||||||
),
|
endLine: 1,
|
||||||
EMBEDDING_BATCH_MAX_TOKENS,
|
text: entry.contentText ?? entry.embeddingInput.text,
|
||||||
);
|
hash: entry.hash,
|
||||||
if (options.source === "sessions" && "lineMap" in entry) {
|
embeddingInput: entry.embeddingInput,
|
||||||
remapChunkLines(chunks, entry.lineMap);
|
},
|
||||||
|
];
|
||||||
|
} else {
|
||||||
|
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
|
||||||
|
chunks = enforceEmbeddingMaxInputTokens(
|
||||||
|
this.provider,
|
||||||
|
chunkMarkdown(content, this.settings.chunking).filter(
|
||||||
|
(chunk) => chunk.text.trim().length > 0,
|
||||||
|
),
|
||||||
|
EMBEDDING_BATCH_MAX_TOKENS,
|
||||||
|
);
|
||||||
|
if (options.source === "sessions" && "lineMap" in entry) {
|
||||||
|
remapChunkLines(chunks, entry.lineMap);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const embeddings = this.batch.enabled
|
const embeddings = this.batch.enabled
|
||||||
? await this.embedChunksWithBatch(chunks, entry, options.source)
|
? await this.embedChunksWithBatch(chunks, entry, options.source)
|
||||||
|
|||||||
@@ -29,12 +29,14 @@ import { isFileMissingError } from "./fs-utils.js";
|
|||||||
import {
|
import {
|
||||||
buildFileEntry,
|
buildFileEntry,
|
||||||
ensureDir,
|
ensureDir,
|
||||||
|
hashText,
|
||||||
listMemoryFiles,
|
listMemoryFiles,
|
||||||
normalizeExtraMemoryPaths,
|
normalizeExtraMemoryPaths,
|
||||||
runWithConcurrency,
|
runWithConcurrency,
|
||||||
} from "./internal.js";
|
} from "./internal.js";
|
||||||
import { type MemoryFileEntry } from "./internal.js";
|
import { type MemoryFileEntry } from "./internal.js";
|
||||||
import { ensureMemoryIndexSchema } from "./memory-schema.js";
|
import { ensureMemoryIndexSchema } from "./memory-schema.js";
|
||||||
|
import { classifyMemoryMultimodalPath } from "./multimodal.js";
|
||||||
import type { SessionFileEntry } from "./session-files.js";
|
import type { SessionFileEntry } from "./session-files.js";
|
||||||
import {
|
import {
|
||||||
buildSessionEntry,
|
buildSessionEntry,
|
||||||
@@ -50,6 +52,7 @@ type MemoryIndexMeta = {
|
|||||||
provider: string;
|
provider: string;
|
||||||
providerKey?: string;
|
providerKey?: string;
|
||||||
sources?: MemorySource[];
|
sources?: MemorySource[];
|
||||||
|
scopeHash?: string;
|
||||||
chunkTokens: number;
|
chunkTokens: number;
|
||||||
chunkOverlap: number;
|
chunkOverlap: number;
|
||||||
vectorDims?: number;
|
vectorDims?: number;
|
||||||
@@ -383,9 +386,22 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
}
|
}
|
||||||
if (stat.isDirectory()) {
|
if (stat.isDirectory()) {
|
||||||
watchPaths.add(path.join(entry, "**", "*.md"));
|
watchPaths.add(path.join(entry, "**", "*.md"));
|
||||||
|
if (this.settings.multimodal.enabled) {
|
||||||
|
for (const modality of this.settings.multimodal.modalities) {
|
||||||
|
const pattern =
|
||||||
|
modality === "image"
|
||||||
|
? "*.{jpg,jpeg,png,webp,gif,heic,heif}"
|
||||||
|
: "*.{mp3,wav,ogg,opus,m4a,aac,flac}";
|
||||||
|
watchPaths.add(path.join(entry, "**", pattern));
|
||||||
|
}
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (stat.isFile() && entry.toLowerCase().endsWith(".md")) {
|
if (
|
||||||
|
stat.isFile() &&
|
||||||
|
(entry.toLowerCase().endsWith(".md") ||
|
||||||
|
classifyMemoryMultimodalPath(entry, this.settings.multimodal) !== null)
|
||||||
|
) {
|
||||||
watchPaths.add(entry);
|
watchPaths.add(entry);
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
@@ -649,9 +665,17 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const files = await listMemoryFiles(this.workspaceDir, this.settings.extraPaths);
|
const files = await listMemoryFiles(
|
||||||
|
this.workspaceDir,
|
||||||
|
this.settings.extraPaths,
|
||||||
|
this.settings.multimodal,
|
||||||
|
);
|
||||||
const fileEntries = (
|
const fileEntries = (
|
||||||
await Promise.all(files.map(async (file) => buildFileEntry(file, this.workspaceDir)))
|
await Promise.all(
|
||||||
|
files.map(async (file) =>
|
||||||
|
buildFileEntry(file, this.workspaceDir, this.settings.multimodal),
|
||||||
|
),
|
||||||
|
)
|
||||||
).filter((entry): entry is MemoryFileEntry => entry !== null);
|
).filter((entry): entry is MemoryFileEntry => entry !== null);
|
||||||
log.debug("memory sync: indexing memory files", {
|
log.debug("memory sync: indexing memory files", {
|
||||||
files: fileEntries.length,
|
files: fileEntries.length,
|
||||||
@@ -868,6 +892,7 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
const vectorReady = await this.ensureVectorReady();
|
const vectorReady = await this.ensureVectorReady();
|
||||||
const meta = this.readMeta();
|
const meta = this.readMeta();
|
||||||
const configuredSources = this.resolveConfiguredSourcesForMeta();
|
const configuredSources = this.resolveConfiguredSourcesForMeta();
|
||||||
|
const configuredScopeHash = this.resolveConfiguredScopeHash();
|
||||||
const needsFullReindex =
|
const needsFullReindex =
|
||||||
params?.force ||
|
params?.force ||
|
||||||
!meta ||
|
!meta ||
|
||||||
@@ -875,6 +900,7 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
(this.provider && meta.provider !== this.provider.id) ||
|
(this.provider && meta.provider !== this.provider.id) ||
|
||||||
meta.providerKey !== this.providerKey ||
|
meta.providerKey !== this.providerKey ||
|
||||||
this.metaSourcesDiffer(meta, configuredSources) ||
|
this.metaSourcesDiffer(meta, configuredSources) ||
|
||||||
|
meta.scopeHash !== configuredScopeHash ||
|
||||||
meta.chunkTokens !== this.settings.chunking.tokens ||
|
meta.chunkTokens !== this.settings.chunking.tokens ||
|
||||||
meta.chunkOverlap !== this.settings.chunking.overlap ||
|
meta.chunkOverlap !== this.settings.chunking.overlap ||
|
||||||
(vectorReady && !meta?.vectorDims);
|
(vectorReady && !meta?.vectorDims);
|
||||||
@@ -1088,6 +1114,7 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
provider: this.provider?.id ?? "none",
|
provider: this.provider?.id ?? "none",
|
||||||
providerKey: this.providerKey!,
|
providerKey: this.providerKey!,
|
||||||
sources: this.resolveConfiguredSourcesForMeta(),
|
sources: this.resolveConfiguredSourcesForMeta(),
|
||||||
|
scopeHash: this.resolveConfiguredScopeHash(),
|
||||||
chunkTokens: this.settings.chunking.tokens,
|
chunkTokens: this.settings.chunking.tokens,
|
||||||
chunkOverlap: this.settings.chunking.overlap,
|
chunkOverlap: this.settings.chunking.overlap,
|
||||||
};
|
};
|
||||||
@@ -1159,6 +1186,7 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
provider: this.provider?.id ?? "none",
|
provider: this.provider?.id ?? "none",
|
||||||
providerKey: this.providerKey!,
|
providerKey: this.providerKey!,
|
||||||
sources: this.resolveConfiguredSourcesForMeta(),
|
sources: this.resolveConfiguredSourcesForMeta(),
|
||||||
|
scopeHash: this.resolveConfiguredScopeHash(),
|
||||||
chunkTokens: this.settings.chunking.tokens,
|
chunkTokens: this.settings.chunking.tokens,
|
||||||
chunkOverlap: this.settings.chunking.overlap,
|
chunkOverlap: this.settings.chunking.overlap,
|
||||||
};
|
};
|
||||||
@@ -1236,6 +1264,22 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
return normalized.length > 0 ? normalized : ["memory"];
|
return normalized.length > 0 ? normalized : ["memory"];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private resolveConfiguredScopeHash(): string {
|
||||||
|
const extraPaths = normalizeExtraMemoryPaths(this.workspaceDir, this.settings.extraPaths)
|
||||||
|
.map((value) => value.replace(/\\/g, "/"))
|
||||||
|
.toSorted();
|
||||||
|
return hashText(
|
||||||
|
JSON.stringify({
|
||||||
|
extraPaths,
|
||||||
|
multimodal: {
|
||||||
|
enabled: this.settings.multimodal.enabled,
|
||||||
|
modalities: [...this.settings.multimodal.modalities].toSorted(),
|
||||||
|
maxFileBytes: this.settings.multimodal.maxFileBytes,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
private metaSourcesDiffer(meta: MemoryIndexMeta, configuredSources: MemorySource[]): boolean {
|
private metaSourcesDiffer(meta: MemoryIndexMeta, configuredSources: MemorySource[]): boolean {
|
||||||
const metaSources = this.normalizeMetaSources(meta);
|
const metaSources = this.normalizeMetaSources(meta);
|
||||||
if (metaSources.length !== configuredSources.length) {
|
if (metaSources.length !== configuredSources.length) {
|
||||||
|
|||||||
88
src/memory/multimodal.ts
Normal file
88
src/memory/multimodal.ts
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
export const MEMORY_MULTIMODAL_MODALITIES = ["image", "audio"] as const;
|
||||||
|
export type MemoryMultimodalModality = (typeof MEMORY_MULTIMODAL_MODALITIES)[number];
|
||||||
|
export type MemoryMultimodalSelection = MemoryMultimodalModality | "all";
|
||||||
|
|
||||||
|
export type MemoryMultimodalSettings = {
|
||||||
|
enabled: boolean;
|
||||||
|
modalities: MemoryMultimodalModality[];
|
||||||
|
maxFileBytes: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES = 10 * 1024 * 1024;
|
||||||
|
|
||||||
|
const IMAGE_EXTENSIONS = new Set([".jpg", ".jpeg", ".png", ".webp", ".gif", ".heic", ".heif"]);
|
||||||
|
|
||||||
|
const AUDIO_EXTENSIONS = new Set([".mp3", ".wav", ".ogg", ".opus", ".m4a", ".aac", ".flac"]);
|
||||||
|
|
||||||
|
export function normalizeMemoryMultimodalModalities(
|
||||||
|
raw: MemoryMultimodalSelection[] | undefined,
|
||||||
|
): MemoryMultimodalModality[] {
|
||||||
|
if (!raw?.length || raw.includes("all")) {
|
||||||
|
return [...MEMORY_MULTIMODAL_MODALITIES];
|
||||||
|
}
|
||||||
|
const normalized = new Set<MemoryMultimodalModality>();
|
||||||
|
for (const value of raw) {
|
||||||
|
if (value === "image" || value === "audio") {
|
||||||
|
normalized.add(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Array.from(normalized);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizeMemoryMultimodalSettings(raw: {
|
||||||
|
enabled?: boolean;
|
||||||
|
modalities?: MemoryMultimodalSelection[];
|
||||||
|
maxFileBytes?: number;
|
||||||
|
}): MemoryMultimodalSettings {
|
||||||
|
const enabled = raw.enabled === true;
|
||||||
|
const maxFileBytes =
|
||||||
|
typeof raw.maxFileBytes === "number" && Number.isFinite(raw.maxFileBytes)
|
||||||
|
? Math.max(1, Math.floor(raw.maxFileBytes))
|
||||||
|
: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES;
|
||||||
|
return {
|
||||||
|
enabled,
|
||||||
|
modalities: enabled ? normalizeMemoryMultimodalModalities(raw.modalities) : [],
|
||||||
|
maxFileBytes,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isMemoryMultimodalEnabled(settings: MemoryMultimodalSettings): boolean {
|
||||||
|
return settings.enabled && settings.modalities.length > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function classifyMemoryMultimodalPath(
|
||||||
|
filePath: string,
|
||||||
|
settings: MemoryMultimodalSettings,
|
||||||
|
): MemoryMultimodalModality | null {
|
||||||
|
if (!isMemoryMultimodalEnabled(settings)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const lower = filePath.trim().toLowerCase();
|
||||||
|
for (const modality of settings.modalities) {
|
||||||
|
const extensionSet = modality === "image" ? IMAGE_EXTENSIONS : AUDIO_EXTENSIONS;
|
||||||
|
for (const extension of extensionSet) {
|
||||||
|
if (lower.endsWith(extension)) {
|
||||||
|
return modality;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizeGeminiEmbeddingModelForMemory(model: string): string {
|
||||||
|
const trimmed = model.trim();
|
||||||
|
if (!trimmed) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return trimmed.replace(/^models\//, "").replace(/^(gemini|google)\//, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function supportsMemoryMultimodalEmbeddings(params: {
|
||||||
|
provider: string;
|
||||||
|
model: string;
|
||||||
|
}): boolean {
|
||||||
|
if (params.provider !== "gemini") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return normalizeGeminiEmbeddingModelForMemory(params.model) === "gemini-embedding-2-preview";
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user