From fff48a146d3c187177d91ae5b17ace047615f60f Mon Sep 17 00:00:00 2001 From: Tarun Sukhani Date: Sat, 7 Feb 2026 22:00:39 +0800 Subject: [PATCH] memory-neo4j: add auto-recall filtering, assistant capture, importance scoring, conflict detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five high-impact improvements to the memory system: 1. Min RRF score threshold on auto-recall (default 0.25) — filters low-relevance results before injecting into context 2. Deduplicate auto-recall against core memories already present in context 3. Capture assistant messages (decisions, recommendations, synthesized facts) with stricter attention gating and "auto-capture-assistant" source type 4. LLM-judged importance scoring at capture time (0.1-1.0) with 5s timeout fallback to 0.5, replacing the flat 0.5 default 5. Conflict detection in sleep cycle (Phase 1b) — finds contradictory memories sharing entities, uses LLM to resolve, invalidates the loser Co-Authored-By: Claude Opus 4.6 --- extensions/memory-neo4j/config.test.ts | 66 +++ extensions/memory-neo4j/config.ts | 21 +- extensions/memory-neo4j/extractor.test.ts | 606 +++++++++++++++++++++- extensions/memory-neo4j/extractor.ts | 250 ++++++++- extensions/memory-neo4j/index.ts | 162 +++++- extensions/memory-neo4j/neo4j-client.ts | 69 +++ extensions/memory-neo4j/schema.test.ts | 24 + extensions/memory-neo4j/schema.ts | 7 +- 8 files changed, 1179 insertions(+), 26 deletions(-) diff --git a/extensions/memory-neo4j/config.test.ts b/extensions/memory-neo4j/config.test.ts index 101dd3295f6..a56bb22596e 100644 --- a/extensions/memory-neo4j/config.test.ts +++ b/extensions/memory-neo4j/config.test.ts @@ -384,6 +384,72 @@ describe("memoryNeo4jConfigSchema.parse", () => { }); }); + describe("autoRecallMinScore", () => { + it("should default autoRecallMinScore to 0.25 when not specified", () => { + const config = memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", password: "" }, + embedding: { provider: "ollama" }, + }); + expect(config.autoRecallMinScore).toBe(0.25); + }); + + it("should accept an explicit autoRecallMinScore value", () => { + const config = memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", password: "" }, + embedding: { provider: "ollama" }, + autoRecallMinScore: 0.5, + }); + expect(config.autoRecallMinScore).toBe(0.5); + }); + + it("should accept autoRecallMinScore of 0", () => { + const config = memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", password: "" }, + embedding: { provider: "ollama" }, + autoRecallMinScore: 0, + }); + expect(config.autoRecallMinScore).toBe(0); + }); + + it("should accept autoRecallMinScore of 1", () => { + const config = memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", password: "" }, + embedding: { provider: "ollama" }, + autoRecallMinScore: 1, + }); + expect(config.autoRecallMinScore).toBe(1); + }); + + it("should throw when autoRecallMinScore is negative", () => { + expect(() => + memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", password: "" }, + embedding: { provider: "ollama" }, + autoRecallMinScore: -0.1, + }), + ).toThrow("autoRecallMinScore must be between 0 and 1"); + }); + + it("should throw when autoRecallMinScore is greater than 1", () => { + expect(() => + memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", password: "" }, + embedding: { provider: "ollama" }, + autoRecallMinScore: 1.5, + }), + ).toThrow("autoRecallMinScore must be between 0 and 1"); + }); + + it("should default to 0.25 when autoRecallMinScore is a non-number type", () => { + const config = memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", password: "" }, + embedding: { provider: "ollama" }, + autoRecallMinScore: "0.5", + }); + expect(config.autoRecallMinScore).toBe(0.25); + }); + }); + describe("extraction config section", () => { it("should parse extraction config when provided", () => { process.env.EXTRACTION_DUMMY = ""; // avoid env var issues diff --git a/extensions/memory-neo4j/config.ts b/extensions/memory-neo4j/config.ts index 4072ebd8f9b..8d642be3a3a 100644 --- a/extensions/memory-neo4j/config.ts +++ b/extensions/memory-neo4j/config.ts @@ -26,6 +26,7 @@ export type MemoryNeo4jConfig = { }; autoCapture: boolean; autoRecall: boolean; + autoRecallMinScore: number; coreMemory: { enabled: boolean; maxEntries: number; @@ -169,6 +170,15 @@ function assertAllowedKeys(value: Record, allowed: string[], la } } +/** Parse autoRecallMinScore: must be a number between 0 and 1, default 0.25. */ +function parseAutoRecallMinScore(value: unknown): number { + if (typeof value !== "number") return 0.25; + if (value < 0 || value > 1) { + throw new Error(`autoRecallMinScore must be between 0 and 1, got: ${value}`); + } + return value; +} + /** * Config schema with parse method for runtime validation & transformation. * JSON Schema validation is handled by openclaw.plugin.json; this handles @@ -182,7 +192,15 @@ export const memoryNeo4jConfigSchema = { const cfg = value as Record; assertAllowedKeys( cfg, - ["embedding", "neo4j", "autoCapture", "autoRecall", "coreMemory", "extraction"], + [ + "embedding", + "neo4j", + "autoCapture", + "autoRecall", + "autoRecallMinScore", + "coreMemory", + "extraction", + ], "memory-neo4j config", ); @@ -313,6 +331,7 @@ export const memoryNeo4jConfigSchema = { extraction, autoCapture: cfg.autoCapture !== false, autoRecall: cfg.autoRecall !== false, + autoRecallMinScore: parseAutoRecallMinScore(cfg.autoRecallMinScore), coreMemory: { enabled: coreMemoryEnabled, maxEntries: coreMemoryMaxEntries, diff --git a/extensions/memory-neo4j/extractor.test.ts b/extensions/memory-neo4j/extractor.test.ts index 8e220f52ef6..b8c4c6d8b32 100644 --- a/extensions/memory-neo4j/extractor.test.ts +++ b/extensions/memory-neo4j/extractor.test.ts @@ -8,8 +8,16 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import type { ExtractionConfig } from "./config.js"; -import { extractUserMessages, extractEntities, runBackgroundExtraction } from "./extractor.js"; -import { passesAttentionGate } from "./index.js"; +import { + extractUserMessages, + extractAssistantMessages, + stripAssistantWrappers, + extractEntities, + runBackgroundExtraction, + rateImportance, + resolveConflict, +} from "./extractor.js"; +import { passesAttentionGate, passesAssistantAttentionGate } from "./index.js"; // ============================================================================ // passesAttentionGate() @@ -337,6 +345,30 @@ describe("extractUserMessages", () => { const result = extractUserMessages(messages); expect(result).toEqual(["I want 4k imax copy of Interstellar"]); }); + + it("should strip attachment blocks and keep surrounding user text", () => { + const messages = [ + { + role: "user", + content: + 'Can you summarize this? Long PDF content here that would normally be very large', + }, + ]; + const result = extractUserMessages(messages); + expect(result).toEqual(["Can you summarize this?"]); + }); + + it("should filter out messages that are only a block", () => { + const messages = [ + { + role: "user", + content: 'base64data', + }, + ]; + const result = extractUserMessages(messages); + // After stripping, nothing remains (< 10 chars) + expect(result).toEqual([]); + }); }); // ============================================================================ @@ -972,3 +1004,573 @@ describe("runBackgroundExtraction", () => { expect(mockLogger.info).toHaveBeenCalledWith(expect.stringContaining("extraction complete")); }); }); + +// ============================================================================ +// Auto-recall filtering logic (Feature 1 + Feature 2) +// +// These test the filtering patterns used in index.ts auto-recall hook: +// - Feature 1: results.filter(r => r.score >= minScore) +// - Feature 2: results.filter(r => !coreIds.has(r.id)) +// ============================================================================ + +describe("auto-recall score filtering", () => { + type FakeResult = { id: string; score: number; category: string; text: string }; + + function makeResult(id: string, score: number): FakeResult { + return { id, score, category: "fact", text: `Memory ${id}` }; + } + + it("should filter out results below the min score threshold", () => { + const results = [makeResult("a", 0.1), makeResult("b", 0.25), makeResult("c", 0.5)]; + const minScore = 0.25; + const filtered = results.filter((r) => r.score >= minScore); + expect(filtered).toHaveLength(2); + expect(filtered.map((r) => r.id)).toEqual(["b", "c"]); + }); + + it("should keep all results when min score is 0", () => { + const results = [makeResult("a", 0.01), makeResult("b", 0.5)]; + const filtered = results.filter((r) => r.score >= 0); + expect(filtered).toHaveLength(2); + }); + + it("should filter all results when min score is 1 and no perfect scores", () => { + const results = [makeResult("a", 0.99), makeResult("b", 0.5)]; + const filtered = results.filter((r) => r.score >= 1); + expect(filtered).toHaveLength(0); + }); + + it("should keep results exactly at the threshold", () => { + const results = [makeResult("a", 0.25)]; + const filtered = results.filter((r) => r.score >= 0.25); + expect(filtered).toHaveLength(1); + }); +}); + +describe("auto-recall core memory deduplication", () => { + type FakeResult = { id: string; score: number; category: string; text: string }; + + function makeResult(id: string, score: number): FakeResult { + return { id, score, category: "core", text: `Core memory ${id}` }; + } + + it("should filter out results whose IDs are in the core memory set", () => { + const results = [ + makeResult("core-1", 0.8), + makeResult("regular-1", 0.7), + makeResult("core-2", 0.6), + ]; + const coreIds = new Set(["core-1", "core-2"]); + const filtered = results.filter((r) => !coreIds.has(r.id)); + expect(filtered).toHaveLength(1); + expect(filtered[0].id).toBe("regular-1"); + }); + + it("should keep all results when core set is empty", () => { + const results = [makeResult("a", 0.8), makeResult("b", 0.7)]; + const coreIds = new Set(); + const filtered = results.filter((r) => !coreIds.has(r.id)); + expect(filtered).toHaveLength(2); + }); + + it("should keep all results when core set is undefined", () => { + const results = [makeResult("a", 0.8), makeResult("b", 0.7)]; + const coreIds: Set | undefined = undefined; + const filtered = coreIds ? results.filter((r) => !coreIds.has(r.id)) : results; + expect(filtered).toHaveLength(2); + }); + + it("should remove all results when all are in core set", () => { + const results = [makeResult("core-1", 0.8), makeResult("core-2", 0.7)]; + const coreIds = new Set(["core-1", "core-2"]); + const filtered = results.filter((r) => !coreIds.has(r.id)); + expect(filtered).toHaveLength(0); + }); + + it("should work correctly when both score and core dedup filters are applied", () => { + const results = [ + makeResult("core-1", 0.8), // core memory — should be deduped + makeResult("regular-1", 0.1), // low score — should be filtered by score + makeResult("regular-2", 0.5), // good score, not core — should survive + ]; + const minScore = 0.25; + const coreIds = new Set(["core-1"]); + + let filtered = results.filter((r) => r.score >= minScore); + filtered = filtered.filter((r) => !coreIds.has(r.id)); + + expect(filtered).toHaveLength(1); + expect(filtered[0].id).toBe("regular-2"); + }); +}); + +// ============================================================================ +// stripAssistantWrappers() +// ============================================================================ + +describe("stripAssistantWrappers", () => { + it("should strip blocks", () => { + const text = "Here is my analysis. some tool call And more text."; + expect(stripAssistantWrappers(text)).toBe("Here is my analysis. And more text."); + }); + + it("should strip blocks", () => { + const text = "result data The result shows X."; + expect(stripAssistantWrappers(text)).toBe("The result shows X."); + }); + + it("should strip blocks", () => { + const text = "Let me check. fn() Done."; + expect(stripAssistantWrappers(text)).toBe("Let me check. Done."); + }); + + it("should strip blocks", () => { + const text = "Let me think about this deeply... The answer is 42."; + expect(stripAssistantWrappers(text)).toBe("The answer is 42."); + }); + + it("should strip blocks", () => { + const text = "internal reasoning Here is the response."; + expect(stripAssistantWrappers(text)).toBe("Here is the response."); + }); + + it("should strip blocks", () => { + const text = "Running the script: stdout output It succeeded."; + expect(stripAssistantWrappers(text)).toBe("Running the script: It succeeded."); + }); + + it("should strip multiple wrapper types at once", () => { + const text = + "hmm I found that data the answer is clear."; + expect(stripAssistantWrappers(text)).toBe("I found that the answer is clear."); + }); + + it("should return empty string when only wrappers exist", () => { + const text = "just thinking"; + expect(stripAssistantWrappers(text)).toBe(""); + }); + + it("should pass through text with no wrappers", () => { + const text = "This is a normal assistant response with useful information."; + expect(stripAssistantWrappers(text)).toBe(text); + }); +}); + +// ============================================================================ +// extractAssistantMessages() +// ============================================================================ + +describe("extractAssistantMessages", () => { + it("should extract string content from assistant messages", () => { + const messages = [ + { role: "assistant", content: "I recommend using TypeScript for this project" }, + { role: "assistant", content: "The database migration completed successfully" }, + ]; + const result = extractAssistantMessages(messages); + expect(result).toEqual([ + "I recommend using TypeScript for this project", + "The database migration completed successfully", + ]); + }); + + it("should filter out user messages", () => { + const messages = [ + { role: "user", content: "This is a user message that should be skipped" }, + { role: "assistant", content: "This is an assistant message that should be kept" }, + ]; + const result = extractAssistantMessages(messages); + expect(result).toEqual(["This is an assistant message that should be kept"]); + }); + + it("should extract text from content block arrays", () => { + const messages = [ + { + role: "assistant", + content: [ + { type: "text", text: "Here is a content block response from assistant" }, + { type: "tool_use", id: "123" }, + { type: "text", text: "Another text block in the response" }, + ], + }, + ]; + const result = extractAssistantMessages(messages); + expect(result).toEqual([ + "Here is a content block response from assistant", + "Another text block in the response", + ]); + }); + + it("should strip thinking tags from assistant messages", () => { + const messages = [ + { + role: "assistant", + content: + "Let me think about this... The best approach is to use a factory pattern for this use case.", + }, + ]; + const result = extractAssistantMessages(messages); + expect(result).toEqual(["The best approach is to use a factory pattern for this use case."]); + }); + + it("should filter out messages shorter than 10 chars after stripping", () => { + const messages = [ + { role: "assistant", content: "long thinking block OK" }, + { role: "assistant", content: "Short" }, + ]; + const result = extractAssistantMessages(messages); + expect(result).toEqual([]); + }); + + it("should handle null and non-object messages gracefully", () => { + const messages = [ + null, + undefined, + 42, + { role: "assistant", content: "Valid assistant message with enough length" }, + ]; + const result = extractAssistantMessages(messages as unknown[]); + expect(result).toEqual(["Valid assistant message with enough length"]); + }); + + it("should return empty array for empty input", () => { + expect(extractAssistantMessages([])).toEqual([]); + }); +}); + +// ============================================================================ +// passesAssistantAttentionGate() +// ============================================================================ + +describe("passesAssistantAttentionGate", () => { + it("should reject short messages below min chars", () => { + expect(passesAssistantAttentionGate("Hi there")).toBe(false); + }); + + it("should reject messages with fewer than 10 words", () => { + // 9 words — just under the threshold + expect(passesAssistantAttentionGate("I think we should use this approach here.")).toBe(false); + }); + + it("should accept messages with 10+ words and substantive content", () => { + expect( + passesAssistantAttentionGate( + "Based on my analysis, the best approach would be to refactor the database layer to use connection pooling for better performance.", + ), + ).toBe(true); + }); + + it("should reject messages exceeding 1000 chars", () => { + const longMsg = "word ".repeat(250); // ~1250 chars + expect(passesAssistantAttentionGate(longMsg)).toBe(false); + }); + + it("should reject messages that are mostly code blocks", () => { + const msg = + "Here is the fix:\n```typescript\nconst x = 1;\nconst y = 2;\nconst z = x + y;\nconsole.log(z);\nfunction foo() { return bar; }\nclass Baz extends Qux {}\n```"; + expect(passesAssistantAttentionGate(msg)).toBe(false); + }); + + it("should accept messages with some code but mostly text", () => { + const msg = + "I recommend refactoring the authentication module to use JWT tokens instead of session-based auth. The key change would be in the middleware where we validate tokens. Here is a small example: ```const token = jwt.sign(payload, secret);``` This approach is more scalable."; + expect(passesAssistantAttentionGate(msg)).toBe(true); + }); + + it("should reject messages containing tool_result tags", () => { + const msg = + "The some output from executing a tool that returned data result shows that the system is working correctly and we should continue."; + expect(passesAssistantAttentionGate(msg)).toBe(false); + }); + + it("should reject messages containing tool_use tags", () => { + const msg = + "Let me check running some tool call right now and now we can see the output of the analysis clearly."; + expect(passesAssistantAttentionGate(msg)).toBe(false); + }); + + it("should reject messages with injected memory context", () => { + expect( + passesAssistantAttentionGate( + "some context here for the agent and here is a longer response with more than ten words to pass the word check.", + ), + ).toBe(false); + }); + + it("should reject noise patterns", () => { + expect(passesAssistantAttentionGate("ok")).toBe(false); + expect(passesAssistantAttentionGate("sounds good")).toBe(false); + }); +}); + +// ============================================================================ +// rateImportance() +// ============================================================================ + +describe("rateImportance", () => { + const originalFetch = globalThis.fetch; + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + const enabledConfig: ExtractionConfig = { + enabled: true, + apiKey: "test-key", + model: "test-model", + baseUrl: "https://test.ai/api/v1", + temperature: 0.0, + maxRetries: 0, + }; + + const disabledConfig: ExtractionConfig = { + ...enabledConfig, + enabled: false, + }; + + it("should return 0.5 when extraction is disabled", async () => { + const result = await rateImportance("some text", disabledConfig); + expect(result).toBe(0.5); + }); + + it("should return mapped score on happy path", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { message: { content: JSON.stringify({ score: 8, reason: "important decision" }) } }, + ], + }), + }); + + const result = await rateImportance("I decided to switch to Neo4j", enabledConfig); + expect(result).toBe(0.8); + }); + + it("should clamp score to 1-10 range", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { message: { content: JSON.stringify({ score: 15, reason: "very important" }) } }, + ], + }), + }); + + const result = await rateImportance("test", enabledConfig); + expect(result).toBe(1.0); // 15 clamped to 10, mapped to 1.0 + }); + + it("should clamp low scores", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ score: 0, reason: "trivial" }) } }], + }), + }); + + const result = await rateImportance("test", enabledConfig); + expect(result).toBe(0.1); // 0 clamped to 1, mapped to 0.1 + }); + + it("should return 0.5 on fetch timeout", async () => { + globalThis.fetch = vi + .fn() + .mockRejectedValue(new DOMException("signal timed out", "TimeoutError")); + + const result = await rateImportance("test", enabledConfig); + expect(result).toBe(0.5); + }); + + it("should return 0.5 on invalid JSON response", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: "not valid json" } }], + }), + }); + + const result = await rateImportance("test", enabledConfig); + expect(result).toBe(0.5); + }); + + it("should return 0.5 when API returns error status", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: false, + status: 500, + }); + + const result = await rateImportance("test", enabledConfig); + expect(result).toBe(0.5); + }); + + it("should return 0.5 when response has no content", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: null } }], + }), + }); + + const result = await rateImportance("test", enabledConfig); + expect(result).toBe(0.5); + }); + + it("should return 0.5 when score is not a number", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { message: { content: JSON.stringify({ score: "high", reason: "important" }) } }, + ], + }), + }); + + const result = await rateImportance("test", enabledConfig); + expect(result).toBe(0.5); + }); +}); + +// ============================================================================ +// resolveConflict() +// ============================================================================ + +describe("resolveConflict", () => { + const originalFetch = globalThis.fetch; + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + const enabledConfig: ExtractionConfig = { + enabled: true, + apiKey: "test-key", + model: "test-model", + baseUrl: "https://test.ai/api/v1", + temperature: 0.0, + maxRetries: 0, + }; + + const disabledConfig: ExtractionConfig = { + ...enabledConfig, + enabled: false, + }; + + it("should return 'skip' when config is disabled", async () => { + const result = await resolveConflict("mem A", "mem B", disabledConfig); + expect(result).toBe("skip"); + }); + + it("should return 'a' when LLM says keep a", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ keep: "a", reason: "more recent" }) } }], + }), + }); + + const result = await resolveConflict( + "user prefers dark mode", + "user prefers light mode", + enabledConfig, + ); + expect(result).toBe("a"); + }); + + it("should return 'b' when LLM says keep b", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { message: { content: JSON.stringify({ keep: "b", reason: "more specific" }) } }, + ], + }), + }); + + const result = await resolveConflict("old preference", "new preference", enabledConfig); + expect(result).toBe("b"); + }); + + it("should return 'both' when LLM says keep both", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { message: { content: JSON.stringify({ keep: "both", reason: "no conflict" }) } }, + ], + }), + }); + + const result = await resolveConflict("likes coffee", "works at Acme", enabledConfig); + expect(result).toBe("both"); + }); + + it("should return 'skip' on fetch timeout", async () => { + globalThis.fetch = vi + .fn() + .mockRejectedValue(new DOMException("signal timed out", "TimeoutError")); + + const result = await resolveConflict("mem A", "mem B", enabledConfig); + expect(result).toBe("skip"); + }); + + it("should return 'skip' on invalid JSON response", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: "not valid json" } }], + }), + }); + + const result = await resolveConflict("mem A", "mem B", enabledConfig); + expect(result).toBe("skip"); + }); + + it("should return 'skip' when API returns error status", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: false, + status: 500, + text: () => Promise.resolve("Internal Server Error"), + }); + + const result = await resolveConflict("mem A", "mem B", enabledConfig); + expect(result).toBe("skip"); + }); + + it("should return 'skip' when response has no content", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: null } }], + }), + }); + + const result = await resolveConflict("mem A", "mem B", enabledConfig); + expect(result).toBe("skip"); + }); + + it("should return 'skip' when LLM returns unrecognized keep value", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { message: { content: JSON.stringify({ keep: "neither", reason: "confusing" }) } }, + ], + }), + }); + + const result = await resolveConflict("mem A", "mem B", enabledConfig); + expect(result).toBe("skip"); + }); +}); diff --git a/extensions/memory-neo4j/extractor.ts b/extensions/memory-neo4j/extractor.ts index d4184c5c2e2..78acaa73508 100644 --- a/extensions/memory-neo4j/extractor.ts +++ b/extensions/memory-neo4j/extractor.ts @@ -243,6 +243,63 @@ function validateExtractionResult(raw: Record): ExtractionResul }; } +// ============================================================================ +// Conflict Resolution +// ============================================================================ + +/** + * Use an LLM to determine whether two memories genuinely conflict. + * Returns which memory to keep, or "both" if they don't actually conflict. + * Returns "skip" on any failure (network, parse, disabled config). + */ +export async function resolveConflict( + memA: string, + memB: string, + config: ExtractionConfig, +): Promise<"a" | "b" | "both" | "skip"> { + if (!config.enabled) return "skip"; + + const prompt = `Two memories may conflict with each other. Determine which should be kept. + +Memory A: "${memA}" +Memory B: "${memB}" + +If they genuinely contradict each other, keep the one that is more current, specific, or accurate. +If they don't actually conflict (they cover different aspects or are both valid), keep both. + +Return JSON: {"keep": "a"|"b"|"both", "reason": "brief explanation"}`; + + try { + const response = await fetch(`${config.baseUrl}/chat/completions`, { + method: "POST", + headers: { + Authorization: `Bearer ${config.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: config.model, + messages: [{ role: "user", content: prompt }], + temperature: 0.0, + response_format: { type: "json_object" }, + }), + signal: AbortSignal.timeout(10_000), + }); + + if (!response.ok) return "skip"; + + const data = (await response.json()) as { choices?: Array<{ message?: { content?: string } }> }; + const content = data.choices?.[0]?.message?.content; + if (!content) return "skip"; + + const parsed = JSON.parse(content) as { keep?: string }; + const keep = parsed.keep; + if (keep === "a" || keep === "b" || keep === "both") return keep; + return "skip"; + } catch { + return "skip"; + } +} + // ============================================================================ // Background Extraction Pipeline // ============================================================================ @@ -394,6 +451,12 @@ export type SleepCycleResult = { clustersFound: number; memoriesMerged: number; }; + // Phase 1b: Conflict Detection + conflict: { + pairsFound: number; + resolved: number; + invalidated: number; + }; // Phase 2: Pareto Scoring & Threshold pareto: { totalMemories: number; @@ -455,7 +518,15 @@ export type SleepCycleOptions = { // Progress callback onPhaseStart?: ( - phase: "dedup" | "pareto" | "promotion" | "demotion" | "decay" | "extraction" | "cleanup", + phase: + | "dedup" + | "conflict" + | "pareto" + | "promotion" + | "demotion" + | "decay" + | "extraction" + | "cleanup", ) => void; onProgress?: (phase: string, message: string) => void; }; @@ -520,6 +591,7 @@ export async function runSleepCycle( const result: SleepCycleResult = { dedup: { clustersFound: 0, memoriesMerged: 0 }, + conflict: { pairsFound: 0, resolved: 0, invalidated: 0 }, pareto: { totalMemories: 0, coreMemories: 0, regularMemories: 0, threshold: 0 }, promotion: { candidatesFound: 0, promoted: 0 }, demotion: { candidatesFound: 0, demoted: 0 }, @@ -562,6 +634,47 @@ export async function runSleepCycle( } } + // -------------------------------------------------------------------------- + // Phase 1b: Conflict Detection + // -------------------------------------------------------------------------- + if (!abortSignal?.aborted) { + onPhaseStart?.("conflict"); + logger.info("memory-neo4j: [sleep] Phase 1b: Conflict Detection"); + + try { + const pairs = await db.findConflictingMemories(agentId); + result.conflict.pairsFound = pairs.length; + + for (const pair of pairs) { + if (abortSignal?.aborted) break; + + const decision = await resolveConflict(pair.memoryA.text, pair.memoryB.text, config); + + if (decision === "a") { + await db.invalidateMemory(pair.memoryB.id); + result.conflict.invalidated++; + result.conflict.resolved++; + onProgress?.("conflict", `Kept A, invalidated B: "${pair.memoryB.text.slice(0, 40)}..."`); + } else if (decision === "b") { + await db.invalidateMemory(pair.memoryA.id); + result.conflict.invalidated++; + result.conflict.resolved++; + onProgress?.("conflict", `Kept B, invalidated A: "${pair.memoryA.text.slice(0, 40)}..."`); + } else if (decision === "both") { + result.conflict.resolved++; + onProgress?.("conflict", `Kept both: no real conflict`); + } + // "skip" = LLM unavailable, don't count as resolved + } + + logger.info( + `memory-neo4j: [sleep] Phase 1b complete — ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`, + ); + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 1b error: ${String(err)}`); + } + } + // -------------------------------------------------------------------------- // Phase 2: Pareto Scoring & Threshold Calculation // -------------------------------------------------------------------------- @@ -887,6 +1000,8 @@ export function stripMessageWrappers(text: string): string { s = s.replace(/[\s\S]*?<\/relevant-memories>\s*/g, ""); s = s.replace(/[\s\S]*?<\/core-memory-refresh>\s*/g, ""); s = s.replace(/[\s\S]*?<\/system>\s*/g, ""); + // File attachments (PDFs, images, etc. forwarded inline by channels) + s = s.replace(/]*>[\s\S]*?<\/file>\s*/g, ""); // Media attachment preamble (appears before Telegram wrapper) s = s.replace(/^\[media attached:[^\]]*\]\s*(?:To send an image[^\n]*\n?)*/i, ""); // System exec output blocks (may appear before Telegram wrapper) @@ -897,3 +1012,136 @@ export function stripMessageWrappers(text: string): string { s = s.replace(/\n?\[message_id:\s*\d+\]\s*$/i, ""); return s.trim(); } + +// ============================================================================ +// Assistant Message Extraction +// ============================================================================ + +/** + * Strip tool-use, thinking, and code-output blocks from assistant messages + * so the attention gate sees only the substantive assistant text. + */ +export function stripAssistantWrappers(text: string): string { + let s = text; + // Tool-use / tool-result / function_call blocks + s = s.replace(/[\s\S]*?<\/tool_use>\s*/g, ""); + s = s.replace(/[\s\S]*?<\/tool_result>\s*/g, ""); + s = s.replace(/[\s\S]*?<\/function_call>\s*/g, ""); + // Thinking tags + s = s.replace(/[\s\S]*?<\/thinking>\s*/g, ""); + s = s.replace(/[\s\S]*?<\/antThinking>\s*/g, ""); + // Code execution output + s = s.replace(/[\s\S]*?<\/code_output>\s*/g, ""); + return s.trim(); +} + +/** + * Extract assistant message texts from the event.messages array. + * Handles both string content and content block arrays. + */ +export function extractAssistantMessages(messages: unknown[]): string[] { + const texts: string[] = []; + + for (const msg of messages) { + if (!msg || typeof msg !== "object") { + continue; + } + const msgObj = msg as Record; + + if (msgObj.role !== "assistant") { + continue; + } + + const content = msgObj.content; + if (typeof content === "string") { + texts.push(content); + continue; + } + + if (Array.isArray(content)) { + for (const block of content) { + if ( + block && + typeof block === "object" && + "type" in block && + (block as Record).type === "text" && + "text" in block && + typeof (block as Record).text === "string" + ) { + texts.push((block as Record).text as string); + } + } + } + } + + return texts.map(stripAssistantWrappers).filter((t) => t.length >= 10); +} + +// ============================================================================ +// LLM-Judged Importance Rating +// ============================================================================ + +const IMPORTANCE_RATING_PROMPT = `Rate the long-term importance of remembering this information on a scale of 1-10. +1-3: Trivial/transient (greetings, temporary status) +4-6: Moderately useful (general facts, minor preferences) +7-9: Very important (key decisions, strong preferences, critical facts) +10: Essential (identity-defining, safety-critical) + +Information: "{text}" + +Return JSON: {"score": N, "reason": "brief explanation"}`; + +/** Timeout for importance rating calls (much shorter than extraction) */ +const IMPORTANCE_TIMEOUT_MS = 5_000; + +/** + * Rate the long-term importance of a text using an LLM. + * Returns a value between 0.1 and 1.0, or 0.5 on any failure. + */ +export async function rateImportance(text: string, config: ExtractionConfig): Promise { + if (!config.enabled) { + return 0.5; + } + + const prompt = IMPORTANCE_RATING_PROMPT.replace("{text}", text); + + try { + const response = await fetch(`${config.baseUrl}/chat/completions`, { + method: "POST", + headers: { + Authorization: `Bearer ${config.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: config.model, + messages: [{ role: "user", content: prompt }], + temperature: config.temperature, + response_format: { type: "json_object" }, + }), + signal: AbortSignal.timeout(IMPORTANCE_TIMEOUT_MS), + }); + + if (!response.ok) { + return 0.5; + } + + const data = (await response.json()) as { + choices?: Array<{ message?: { content?: string } }>; + }; + const content = data.choices?.[0]?.message?.content; + if (!content) { + return 0.5; + } + + const parsed = JSON.parse(content) as { score?: unknown }; + const score = typeof parsed.score === "number" ? parsed.score : NaN; + if (Number.isNaN(score)) { + return 0.5; + } + + const clamped = Math.max(1, Math.min(10, score)); + return Math.max(0.1, Math.min(1.0, clamped / 10)); + } catch { + return 0.5; + } +} diff --git a/extensions/memory-neo4j/index.ts b/extensions/memory-neo4j/index.ts index 928857dcb3f..73576016aa4 100644 --- a/extensions/memory-neo4j/index.ts +++ b/extensions/memory-neo4j/index.ts @@ -26,7 +26,13 @@ import { vectorDimsForModel, } from "./config.js"; import { Embeddings } from "./embeddings.js"; -import { extractUserMessages, stripMessageWrappers, runSleepCycle } from "./extractor.js"; +import { + extractUserMessages, + extractAssistantMessages, + stripMessageWrappers, + runSleepCycle, + rateImportance, +} from "./extractor.js"; import { Neo4jMemoryClient } from "./neo4j-client.js"; import { hybridSearch } from "./search.js"; @@ -510,9 +516,10 @@ const memoryNeo4jPlugin = { console.log("\n🌙 Memory Sleep Cycle"); console.log("═════════════════════════════════════════════════════════════"); console.log("Seven-phase memory consolidation (Pareto-based):\n"); - console.log(" Phase 1: Deduplication — Merge near-duplicate memories"); + console.log(" Phase 1: Deduplication — Merge near-duplicate memories"); + console.log(" Phase 1b: Conflict Detection — Resolve contradictory memories"); console.log( - " Phase 2: Pareto Scoring — Calculate effective scores for all memories", + " Phase 2: Pareto Scoring — Calculate effective scores for all memories", ); console.log(" Phase 3: Core Promotion — Regular memories above threshold → core"); console.log(" Phase 4: Core Demotion — Core memories below threshold → regular"); @@ -584,8 +591,9 @@ const memoryNeo4jPlugin = { extractionBatchSize: batchSize, extractionDelayMs: delay, onPhaseStart: (phase) => { - const phaseNames = { + const phaseNames: Record = { dedup: "Phase 1: Deduplication", + conflict: "Phase 1b: Conflict Detection", pareto: "Phase 2: Pareto Scoring", promotion: "Phase 3: Core Promotion", demotion: "Phase 4: Core Demotion", @@ -607,6 +615,9 @@ const memoryNeo4jPlugin = { console.log( ` Deduplication: ${result.dedup.clustersFound} clusters → ${result.dedup.memoriesMerged} merged`, ); + console.log( + ` Conflicts: ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`, + ); console.log( ` Pareto: ${result.pareto.totalMemories} total (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`, ); @@ -786,6 +797,7 @@ const memoryNeo4jPlugin = { // hook below also checks for existing conversation history to avoid re-injecting core // memories after restarts. const bootstrappedSessions = new Set(); + const coreMemoryIdsBySession = new Map>(); // Track mid-session refresh: maps sessionKey → tokens at last refresh // Used to avoid refreshing too frequently (only refresh after significant context growth) @@ -813,6 +825,7 @@ const memoryNeo4jPlugin = { if (ts < cutoff) { bootstrappedSessions.delete(key); midSessionRefreshAt.delete(key); + coreMemoryIdsBySession.delete(key); sessionLastSeen.delete(key); } } @@ -830,6 +843,7 @@ const memoryNeo4jPlugin = { if (ctx.sessionKey) { bootstrappedSessions.delete(ctx.sessionKey); midSessionRefreshAt.delete(ctx.sessionKey); + coreMemoryIdsBySession.delete(ctx.sessionKey); sessionLastSeen.delete(ctx.sessionKey); api.logger.info?.( `memory-neo4j: cleared bootstrap/refresh flags for session ${ctx.sessionKey} after compaction`, @@ -846,6 +860,7 @@ const memoryNeo4jPlugin = { if (key) { bootstrappedSessions.delete(key); midSessionRefreshAt.delete(key); + coreMemoryIdsBySession.delete(key); sessionLastSeen.delete(key); api.logger.info?.( `memory-neo4j: cleared bootstrap/refresh flags for session=${key} (session_end)`, @@ -932,7 +947,7 @@ const memoryNeo4jPlugin = { : event.prompt; try { - const results = await hybridSearch( + let results = await hybridSearch( db, embeddings, query, @@ -941,6 +956,16 @@ const memoryNeo4jPlugin = { extractionConfig.enabled, ); + // Feature 1: Filter out low-relevance results below min RRF score + results = results.filter((r) => r.score >= cfg.autoRecallMinScore); + + // Feature 2: Deduplicate against core memories already in context + const sessionKey = ctx.sessionKey ?? ""; + const coreIds = coreMemoryIdsBySession.get(sessionKey); + if (coreIds) { + results = results.filter((r) => !coreIds.has(r.id)); + } + if (results.length === 0) { return; } @@ -1037,6 +1062,7 @@ const memoryNeo4jPlugin = { if (sessionKey) { bootstrappedSessions.add(sessionKey); + coreMemoryIdsBySession.set(sessionKey, new Set(coreMemories.map((m) => m.id))); touchSession(sessionKey); } // Log at info level when actually injecting, debug for skips @@ -1082,19 +1108,12 @@ const memoryNeo4jPlugin = { const sessionKey = ctx.sessionKey; try { - const userMessages = extractUserMessages(event.messages); - if (userMessages.length === 0) { - return; - } - - // Phase 1: Attention gating — fast heuristic filter - const retained = userMessages.filter((text) => passesAttentionGate(text)); - if (retained.length === 0) { - return; - } - - // Phase 2: Short-term retention — embed, dedup, store let stored = 0; + + // Process user messages + const userMessages = extractUserMessages(event.messages); + const retained = userMessages.filter((text) => passesAttentionGate(text)); + for (const text of retained) { try { const vector = await embeddings.embed(text); @@ -1105,11 +1124,13 @@ const memoryNeo4jPlugin = { continue; } + const importance = await rateImportance(text, extractionConfig); + await db.storeMemory({ id: randomUUID(), text, embedding: vector, - importance: 0.5, // neutral — sleep cycle scores via Pareto + importance, category: "other", // sleep cycle will categorize source: "auto-capture", extractionStatus: extractionConfig.enabled ? "pending" : "skipped", @@ -1122,11 +1143,47 @@ const memoryNeo4jPlugin = { } } + // Process assistant messages + const assistantMessages = extractAssistantMessages(event.messages); + const retainedAssistant = assistantMessages.filter((text) => + passesAssistantAttentionGate(text), + ); + + for (const text of retainedAssistant) { + try { + const vector = await embeddings.embed(text); + + const existing = await db.findSimilar(vector, 0.95, 1); + if (existing.length > 0) { + continue; + } + + const importance = await rateImportance(text, extractionConfig); + + await db.storeMemory({ + id: randomUUID(), + text, + embedding: vector, + importance: Math.min(importance, 0.4), // cap assistant importance slightly lower + category: "other", + source: "auto-capture-assistant", + extractionStatus: extractionConfig.enabled ? "pending" : "skipped", + agentId, + sessionKey, + }); + stored++; + } catch (err) { + api.logger.debug?.( + `memory-neo4j: assistant auto-capture item failed: ${String(err)}`, + ); + } + } + if (stored > 0) { api.logger.info(`memory-neo4j: auto-captured ${stored} memories (attention-gated)`); - } else { + } else if (userMessages.length > 0 || assistantMessages.length > 0) { api.logger.info( - `memory-neo4j: auto-capture ran (0 stored, ${userMessages.length} user msgs, ${retained.length} passed attention gate)`, + `memory-neo4j: auto-capture ran (0 stored, ${userMessages.length} user msgs, ${retained.length} passed gate, ${assistantMessages.length} assistant msgs, ${retainedAssistant.length} passed gate)`, ); } } catch (err) { @@ -1247,8 +1304,71 @@ function passesAttentionGate(text: string): boolean { return true; } +// ============================================================================ +// Assistant attention gate — stricter filter for assistant messages +// ============================================================================ + +/** Maximum assistant message length — shorter than user to avoid code dumps. */ +const MAX_ASSISTANT_CAPTURE_CHARS = 1000; + +/** Minimum word count for assistant messages — higher than user. */ +const MIN_ASSISTANT_WORD_COUNT = 10; + +function passesAssistantAttentionGate(text: string): boolean { + const trimmed = text.trim(); + + // Length bounds (stricter than user) + if (trimmed.length < MIN_CAPTURE_CHARS || trimmed.length > MAX_ASSISTANT_CAPTURE_CHARS) { + return false; + } + + // Word count — higher threshold than user messages + const wordCount = trimmed.split(/\s+/).length; + if (wordCount < MIN_ASSISTANT_WORD_COUNT) { + return false; + } + + // Reject messages that are mostly code (>50% inside triple-backtick fences) + const codeBlockRegex = /```[\s\S]*?```/g; + let codeChars = 0; + let match: RegExpExecArray | null; + while ((match = codeBlockRegex.exec(trimmed)) !== null) { + codeChars += match[0].length; + } + if (codeChars > trimmed.length * 0.5) { + return false; + } + + // Reject messages that are mostly tool output + if ( + trimmed.includes("") || + trimmed.includes("") || + trimmed.includes("") + ) { + return false; + } + + // Injected context from the memory system itself + if (trimmed.includes("") || trimmed.includes("")) { + return false; + } + + // Noise patterns (same as user gate) + if (NOISE_PATTERNS.some((r) => r.test(trimmed))) { + return false; + } + + // Excessive emoji (likely reaction, not substance) + const emojiCount = (trimmed.match(/[\u{1F300}-\u{1F9FF}]/gu) || []).length; + if (emojiCount > 3) { + return false; + } + + return true; +} + // Exported for testing -export { passesAttentionGate }; +export { passesAttentionGate, passesAssistantAttentionGate }; // ============================================================================ // Export diff --git a/extensions/memory-neo4j/neo4j-client.ts b/extensions/memory-neo4j/neo4j-client.ts index c64c7cea891..1fb03e813d7 100644 --- a/extensions/memory-neo4j/neo4j-client.ts +++ b/extensions/memory-neo4j/neo4j-client.ts @@ -1389,6 +1389,75 @@ export class Neo4jMemoryClient { } } + // -------------------------------------------------------------------------- + // Sleep Cycle: Conflict Detection + // -------------------------------------------------------------------------- + + /** + * Find memory pairs that share at least one entity (via MENTIONS relationships). + * These are candidates for conflict resolution — the LLM decides if they truly conflict. + * Excludes core memories (conflicts there are handled by promotion/demotion). + */ + async findConflictingMemories(agentId?: string): Promise< + Array<{ + memoryA: { id: string; text: string; importance: number; createdAt: string }; + memoryB: { id: string; text: string; importance: number; createdAt: string }; + }> + > { + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + const agentFilter = agentId ? "AND m1.agentId = $agentId AND m2.agentId = $agentId" : ""; + const result = await session.run( + `MATCH (m1:Memory)-[:MENTIONS]->(e:Entity)<-[:MENTIONS]-(m2:Memory) + WHERE m1.id < m2.id ${agentFilter} + AND m1.category <> 'core' AND m2.category <> 'core' + WITH m1, m2, count(e) AS sharedEntities + WHERE sharedEntities >= 1 + RETURN DISTINCT m1.id AS m1Id, m1.text AS m1Text, m1.importance AS m1Importance, m1.createdAt AS m1CreatedAt, + m2.id AS m2Id, m2.text AS m2Text, m2.importance AS m2Importance, m2.createdAt AS m2CreatedAt + LIMIT 50`, + agentId ? { agentId } : {}, + ); + + return result.records.map((r) => ({ + memoryA: { + id: r.get("m1Id"), + text: r.get("m1Text"), + importance: r.get("m1Importance"), + createdAt: String(r.get("m1CreatedAt") ?? ""), + }, + memoryB: { + id: r.get("m2Id"), + text: r.get("m2Text"), + importance: r.get("m2Importance"), + createdAt: String(r.get("m2CreatedAt") ?? ""), + }, + })); + } finally { + await session.close(); + } + } + + /** + * Invalidate a memory by setting its importance to near-zero. + * Used by conflict resolution to effectively retire the losing memory + * without deleting it (it will be pruned naturally by the decay phase). + */ + async invalidateMemory(id: string): Promise { + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + await session.run( + `MATCH (m:Memory {id: $id}) + SET m.importance = 0.01, m.updatedAt = $now`, + { id, now: new Date().toISOString() }, + ); + } finally { + await session.close(); + } + } + // -------------------------------------------------------------------------- // Sleep Cycle: Core Memory Promotion // -------------------------------------------------------------------------- diff --git a/extensions/memory-neo4j/schema.test.ts b/extensions/memory-neo4j/schema.test.ts index d546c8e1ebf..4933a1d25c4 100644 --- a/extensions/memory-neo4j/schema.test.ts +++ b/extensions/memory-neo4j/schema.test.ts @@ -6,6 +6,7 @@ */ import { describe, it, expect } from "vitest"; +import type { MemorySource } from "./schema.js"; import { escapeLucene, validateRelationshipType, @@ -198,3 +199,26 @@ describe("exported constants", () => { expect(ALLOWED_RELATIONSHIP_TYPES.size).toBe(7); }); }); + +// ============================================================================ +// MemorySource Type +// ============================================================================ + +describe("MemorySource type", () => { + it("should accept 'auto-capture-assistant' as a valid MemorySource value", () => { + // Type-level check: this assignment should compile without error + const source: MemorySource = "auto-capture-assistant"; + expect(source).toBe("auto-capture-assistant"); + }); + + it("should accept all MemorySource values", () => { + const sources: MemorySource[] = [ + "user", + "auto-capture", + "auto-capture-assistant", + "memory-watcher", + "import", + ]; + expect(sources).toHaveLength(5); + }); +}); diff --git a/extensions/memory-neo4j/schema.ts b/extensions/memory-neo4j/schema.ts index cd0d7949d05..745239c653f 100644 --- a/extensions/memory-neo4j/schema.ts +++ b/extensions/memory-neo4j/schema.ts @@ -9,7 +9,12 @@ export type MemoryCategory = "core" | "preference" | "fact" | "decision" | "entity" | "other"; export type EntityType = "person" | "organization" | "location" | "event" | "concept"; export type ExtractionStatus = "pending" | "complete" | "failed" | "skipped"; -export type MemorySource = "user" | "auto-capture" | "memory-watcher" | "import"; +export type MemorySource = + | "user" + | "auto-capture" + | "auto-capture-assistant" + | "memory-watcher" + | "import"; export type MemoryNode = { id: string;