memory-neo4j: fix Ollama embedding context overflow for token-dense inputs

2026-05-21 12:04:59 +00:00 · 2026-02-07 23:36:11 +08:00
parent 27cb766209
commit d311438cb4
2 changed files with 8 additions and 6 deletions
--- a/extensions/memory-neo4j/embeddings.ts
+++ b/extensions/memory-neo4j/embeddings.ts
@@ -44,11 +44,13 @@ export class Embeddings {

  /**
   * Truncate text to fit within the model's context length.
-   * Uses a conservative ~4 chars/token estimate to leave headroom.
+   * Uses a conservative ~3 chars/token estimate to leave headroom —
+   * code, URLs, and punctuation-heavy text tokenize at 1–2 chars/token,
+   * so the classic ~4 estimate is too generous for mixed content.
   * Truncates at a word boundary when possible.
   */
  private truncateToContext(text: string): string {
-    const maxChars = this.contextLength * 4;
+    const maxChars = this.contextLength * 3;
    if (text.length <= maxChars) {
      return text;
    }
--- a/extensions/memory-neo4j/index.ts
+++ b/extensions/memory-neo4j/index.ts
@@ -937,10 +937,10 @@ const memoryNeo4jPlugin = {

        const agentId = ctx.agentId || "default";

-        // ~1500 chars is a safe ceiling for most embedding models (~500 tokens).
-        // Models with larger context (8k+) can handle more, but recall queries
-        // don't benefit from very long inputs — the embedding quality plateaus.
-        const MAX_QUERY_CHARS = 1500;
+        // ~1000 chars keeps us safely within even small embedding contexts
+        // (mxbai-embed-large = 512 tokens). Longer recall queries don't improve
+        // embedding quality — it plateaus well before this limit.
+        const MAX_QUERY_CHARS = 1000;
        const query =
          event.prompt.length > MAX_QUERY_CHARS
            ? event.prompt.slice(0, MAX_QUERY_CHARS)