memory-neo4j: fix Ollama embedding context overflow for token-dense inputs

This commit is contained in:
Tarun Sukhani
2026-02-07 23:36:11 +08:00
parent 27cb766209
commit d311438cb4
2 changed files with 8 additions and 6 deletions

View File

@@ -44,11 +44,13 @@ export class Embeddings {
/**
* Truncate text to fit within the model's context length.
* Uses a conservative ~4 chars/token estimate to leave headroom.
* Uses a conservative ~3 chars/token estimate to leave headroom
* code, URLs, and punctuation-heavy text tokenize at 12 chars/token,
* so the classic ~4 estimate is too generous for mixed content.
* Truncates at a word boundary when possible.
*/
private truncateToContext(text: string): string {
const maxChars = this.contextLength * 4;
const maxChars = this.contextLength * 3;
if (text.length <= maxChars) {
return text;
}

View File

@@ -937,10 +937,10 @@ const memoryNeo4jPlugin = {
const agentId = ctx.agentId || "default";
// ~1500 chars is a safe ceiling for most embedding models (~500 tokens).
// Models with larger context (8k+) can handle more, but recall queries
// don't benefit from very long inputs — the embedding quality plateaus.
const MAX_QUERY_CHARS = 1500;
// ~1000 chars keeps us safely within even small embedding contexts
// (mxbai-embed-large = 512 tokens). Longer recall queries don't improve
// embedding quality — it plateaus well before this limit.
const MAX_QUERY_CHARS = 1000;
const query =
event.prompt.length > MAX_QUERY_CHARS
? event.prompt.slice(0, MAX_QUERY_CHARS)