memory: add multimodal image and audio indexing

This commit is contained in:
Gustavo Madeira Santana
2026-03-11 20:45:55 +00:00
parent 20d097ac2f
commit 73c9e141a4
21 changed files with 924 additions and 86 deletions

View File

@@ -778,7 +778,15 @@ export const FIELD_HELP: Record<string, string> = {
"agents.defaults.memorySearch.sources":
'Chooses which sources are indexed: "memory" reads MEMORY.md + memory files, and "sessions" includes transcript history. Keep ["memory"] unless you need recall from prior chat transcripts.',
"agents.defaults.memorySearch.extraPaths":
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; keep paths small and intentional to avoid noisy recall.",
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; when multimodal memory is enabled, matching image/audio files under these paths are also eligible for indexing.",
"agents.defaults.memorySearch.multimodal":
'Optional multimodal memory settings for indexing image and audio files from configured extra paths. Keep this off unless your embedding model explicitly supports cross-modal embeddings, and set `memorySearch.fallback` to "none" while it is enabled.',
"agents.defaults.memorySearch.multimodal.enabled":
"Enables image/audio memory indexing from extraPaths. This currently requires Gemini embedding-2, keeps the default memory roots Markdown-only, and disables memory-search fallback providers.",
"agents.defaults.memorySearch.multimodal.modalities":
'Selects which multimodal file types are indexed from extraPaths: "image", "audio", or "all". Keep this narrow to avoid indexing large binary corpora unintentionally.',
"agents.defaults.memorySearch.multimodal.maxFileBytes":
"Sets the maximum bytes allowed per multimodal file before it is skipped during memory indexing. Use this to cap upload cost and indexing latency, or raise it for short high-quality audio clips.",
"agents.defaults.memorySearch.experimental.sessionMemory":
"Indexes session transcripts into memory search so responses can reference prior chat turns. Keep this off unless transcript recall is needed, because indexing cost and storage usage both increase.",
"agents.defaults.memorySearch.provider":