diff --git a/extensions/memory-neo4j/attention-gate.ts b/extensions/memory-neo4j/attention-gate.ts index 7f334fe25c6..c05f25e47ba 100644 --- a/extensions/memory-neo4j/attention-gate.ts +++ b/extensions/memory-neo4j/attention-gate.ts @@ -62,6 +62,30 @@ const MIN_CAPTURE_CHARS = 30; /** Minimum word count — short contextual phrases lack standalone meaning. */ const MIN_WORD_COUNT = 8; +/** Shared checks applied by both user and assistant attention gates. */ +function failsSharedGateChecks(trimmed: string): boolean { + // Injected context from the memory system itself + if (trimmed.includes("") || trimmed.includes("")) { + return true; + } + + // Noise patterns + if (NOISE_PATTERNS.some((r) => r.test(trimmed))) { + return true; + } + + // Excessive emoji (likely reaction, not substance) + const emojiCount = ( + trimmed.match(/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1FA00}-\u{1FAFF}]/gu) || + [] + ).length; + if (emojiCount > 3) { + return true; + } + + return false; +} + export function passesAttentionGate(text: string): boolean { const trimmed = text.trim(); @@ -76,22 +100,7 @@ export function passesAttentionGate(text: string): boolean { return false; } - // Injected context from the memory system itself - if (trimmed.includes("") || trimmed.includes("")) { - return false; - } - - // Noise patterns - if (NOISE_PATTERNS.some((r) => r.test(trimmed))) { - return false; - } - - // Excessive emoji (likely reaction, not substance) - const emojiCount = ( - trimmed.match(/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1FA00}-\u{1FAFF}]/gu) || - [] - ).length; - if (emojiCount > 3) { + if (failsSharedGateChecks(trimmed)) { return false; } @@ -183,13 +192,7 @@ export function passesAssistantAttentionGate(text: string): boolean { return false; } - // Injected context from the memory system itself - if (trimmed.includes("") || trimmed.includes("")) { - return false; - } - - // Noise patterns (same as user gate) - if (NOISE_PATTERNS.some((r) => r.test(trimmed))) { + if (failsSharedGateChecks(trimmed)) { return false; } @@ -198,14 +201,5 @@ export function passesAssistantAttentionGate(text: string): boolean { return false; } - // Excessive emoji (likely reaction, not substance) - const emojiCount = ( - trimmed.match(/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1FA00}-\u{1FAFF}]/gu) || - [] - ).length; - if (emojiCount > 3) { - return false; - } - return true; } diff --git a/extensions/memory-neo4j/auto-capture.test.ts b/extensions/memory-neo4j/auto-capture.test.ts new file mode 100644 index 00000000000..141dd0f5327 --- /dev/null +++ b/extensions/memory-neo4j/auto-capture.test.ts @@ -0,0 +1,573 @@ +/** + * Tests for the auto-capture pipeline: captureMessage and runAutoCapture. + * + * Tests the embed → dedup → rate → store pipeline including: + * - Pre-computed vector usage (batch embedding optimization) + * - Exact dedup (≥0.95 score band) + * - Semantic dedup (0.75-0.95 score band via LLM) + * - Importance pre-screening for assistant messages + * - Batch embedding in runAutoCapture + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { ExtractionConfig } from "./config.js"; +import type { Embeddings } from "./embeddings.js"; +import type { Neo4jMemoryClient } from "./neo4j-client.js"; +import { _captureMessage as captureMessage, _runAutoCapture as runAutoCapture } from "./index.js"; + +// ============================================================================ +// Mocks +// ============================================================================ + +const enabledConfig: ExtractionConfig = { + enabled: true, + apiKey: "test-key", + model: "test-model", + baseUrl: "https://test.ai/api/v1", + temperature: 0.0, + maxRetries: 0, +}; + +const disabledConfig: ExtractionConfig = { + ...enabledConfig, + enabled: false, +}; + +const mockLogger = { + info: vi.fn(), + warn: vi.fn(), + debug: vi.fn(), +}; + +function createMockDb(overrides?: Partial): Neo4jMemoryClient { + return { + findSimilar: vi.fn().mockResolvedValue([]), + storeMemory: vi.fn().mockResolvedValue(undefined), + ...overrides, + } as unknown as Neo4jMemoryClient; +} + +function createMockEmbeddings(overrides?: Partial): Embeddings { + return { + embed: vi.fn().mockResolvedValue([0.1, 0.2, 0.3]), + embedBatch: vi.fn().mockResolvedValue([[0.1, 0.2, 0.3]]), + ...overrides, + } as unknown as Embeddings; +} + +// ============================================================================ +// captureMessage +// ============================================================================ + +describe("captureMessage", () => { + const originalFetch = globalThis.fetch; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + it("should store a new memory when no duplicates exist", async () => { + const db = createMockDb(); + const embeddings = createMockEmbeddings(); + + // Mock rateImportance (LLM call via fetch) + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ score: 7 }) } }], + }), + }); + + const result = await captureMessage( + "I prefer TypeScript over JavaScript", + "auto-capture", + 0.5, + 1.0, + "test-agent", + "session-1", + db, + embeddings, + enabledConfig, + mockLogger, + ); + + expect(result.stored).toBe(true); + expect(result.semanticDeduped).toBe(false); + expect(db.storeMemory).toHaveBeenCalledOnce(); + expect(embeddings.embed).toHaveBeenCalledWith("I prefer TypeScript over JavaScript"); + }); + + it("should use pre-computed vector when provided", async () => { + const db = createMockDb(); + const embeddings = createMockEmbeddings(); + const precomputedVector = [0.5, 0.6, 0.7]; + + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ score: 7 }) } }], + }), + }); + + const result = await captureMessage( + "test text", + "auto-capture", + 0.5, + 1.0, + "test-agent", + undefined, + db, + embeddings, + enabledConfig, + mockLogger, + precomputedVector, + ); + + expect(result.stored).toBe(true); + // Should NOT call embed() since pre-computed vector was provided + expect(embeddings.embed).not.toHaveBeenCalled(); + // Should use the pre-computed vector for findSimilar + expect(db.findSimilar).toHaveBeenCalledWith(precomputedVector, 0.75, 3, "test-agent"); + }); + + it("should skip storage when exact duplicate found (score >= 0.95)", async () => { + const db = createMockDb({ + findSimilar: vi + .fn() + .mockResolvedValue([{ id: "existing-1", text: "duplicate text", score: 0.97 }]), + }); + const embeddings = createMockEmbeddings(); + + const result = await captureMessage( + "duplicate text", + "auto-capture", + 0.5, + 1.0, + "test-agent", + undefined, + db, + embeddings, + enabledConfig, + mockLogger, + ); + + expect(result.stored).toBe(false); + expect(result.semanticDeduped).toBe(false); + expect(db.storeMemory).not.toHaveBeenCalled(); + }); + + it("should semantic dedup when candidate in 0.75-0.95 band is LLM-confirmed duplicate", async () => { + const db = createMockDb({ + findSimilar: vi + .fn() + .mockResolvedValue([{ id: "candidate-1", text: "User prefers TypeScript", score: 0.88 }]), + }); + const embeddings = createMockEmbeddings(); + + // First call: rateImportance, second call: isSemanticDuplicate + let callCount = 0; + globalThis.fetch = vi.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) { + // rateImportance response + return Promise.resolve({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ score: 7 }) } }], + }), + }); + } + // isSemanticDuplicate response + return Promise.resolve({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { + message: { + content: JSON.stringify({ + verdict: "duplicate", + reason: "same preference", + }), + }, + }, + ], + }), + }); + }); + + const result = await captureMessage( + "I like TypeScript", + "auto-capture", + 0.5, + 1.0, + "test-agent", + undefined, + db, + embeddings, + enabledConfig, + mockLogger, + ); + + expect(result.stored).toBe(false); + expect(result.semanticDeduped).toBe(true); + expect(db.storeMemory).not.toHaveBeenCalled(); + }); + + it("should skip importance check when extraction is disabled", async () => { + const db = createMockDb(); + const embeddings = createMockEmbeddings(); + + // With extraction disabled, rateImportance returns 0.5 fallback, + // so the threshold check is skipped entirely + const result = await captureMessage( + "some text to store", + "auto-capture", + 0.5, + 1.0, + "test-agent", + undefined, + db, + embeddings, + disabledConfig, + mockLogger, + ); + + expect(result.stored).toBe(true); + expect(db.storeMemory).toHaveBeenCalledOnce(); + // Verify stored with fallback importance * discount + const storeCall = (db.storeMemory as ReturnType).mock.calls[0][0]; + expect(storeCall.importance).toBe(0.5); // 0.5 fallback * 1.0 discount + expect(storeCall.extractionStatus).toBe("skipped"); + }); + + it("should apply importance discount for assistant messages", async () => { + const db = createMockDb(); + const embeddings = createMockEmbeddings(); + + // For assistant messages, importance is rated first + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ score: 8 }) } }], + }), + }); + + const result = await captureMessage( + "Here's what I know about Neo4j graph databases...", + "auto-capture-assistant", + 0.8, // higher threshold for assistant + 0.75, // 25% discount + "test-agent", + undefined, + db, + embeddings, + enabledConfig, + mockLogger, + ); + + expect(result.stored).toBe(true); + const storeCall = (db.storeMemory as ReturnType).mock.calls[0][0]; + // importance 0.8 (score 8/10) * 0.75 discount ≈ 0.6 + expect(storeCall.importance).toBeCloseTo(0.6); + expect(storeCall.source).toBe("auto-capture-assistant"); + }); + + it("should reject assistant messages below importance threshold", async () => { + const db = createMockDb(); + const embeddings = createMockEmbeddings(); + + // Low importance score + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ score: 3 }) } }], + }), + }); + + const result = await captureMessage( + "Sure, I can help with that.", + "auto-capture-assistant", + 0.8, // threshold 0.8 + 0.75, + "test-agent", + undefined, + db, + embeddings, + enabledConfig, + mockLogger, + ); + + expect(result.stored).toBe(false); + // Should not even embed since importance pre-screen failed + expect(embeddings.embed).not.toHaveBeenCalled(); + expect(db.storeMemory).not.toHaveBeenCalled(); + }); + + it("should reject user messages below importance threshold", async () => { + const db = createMockDb(); + const embeddings = createMockEmbeddings(); + + // Low importance score + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ score: 2 }) } }], + }), + }); + + const result = await captureMessage( + "okay thanks", + "auto-capture", + 0.5, // threshold 0.5 + 1.0, + "test-agent", + undefined, + db, + embeddings, + enabledConfig, + mockLogger, + ); + + expect(result.stored).toBe(false); + expect(db.storeMemory).not.toHaveBeenCalled(); + }); +}); + +// ============================================================================ +// runAutoCapture +// ============================================================================ + +describe("runAutoCapture", () => { + const originalFetch = globalThis.fetch; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + it("should batch-embed all retained messages at once", async () => { + const db = createMockDb(); + const embedBatchMock = vi.fn().mockResolvedValue([ + [0.1, 0.2], + [0.3, 0.4], + ]); + const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock }); + + // Mock rateImportance calls + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ score: 7 }) } }], + }), + }); + + const messages = [ + { + role: "user", + content: "I prefer TypeScript over JavaScript for backend development", + }, + { + role: "assistant", + content: + "TypeScript is great for type safety and developer experience, especially with Node.js projects", + }, + ]; + + await runAutoCapture( + messages, + "test-agent", + "session-1", + db, + embeddings, + enabledConfig, + mockLogger, + ); + + // Should call embedBatch once with both texts + expect(embedBatchMock).toHaveBeenCalledOnce(); + const batchTexts = embedBatchMock.mock.calls[0][0]; + expect(batchTexts.length).toBe(2); + }); + + it("should not call embedBatch when no messages pass the gate", async () => { + const db = createMockDb(); + const embedBatchMock = vi.fn().mockResolvedValue([]); + const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock }); + + // Short messages that won't pass attention gate + const messages = [ + { role: "user", content: "ok" }, + { role: "assistant", content: "yes" }, + ]; + + await runAutoCapture( + messages, + "test-agent", + "session-1", + db, + embeddings, + enabledConfig, + mockLogger, + ); + + expect(embedBatchMock).not.toHaveBeenCalled(); + expect(db.storeMemory).not.toHaveBeenCalled(); + }); + + it("should handle empty messages array", async () => { + const db = createMockDb(); + const embeddings = createMockEmbeddings(); + + await runAutoCapture([], "test-agent", undefined, db, embeddings, enabledConfig, mockLogger); + + expect(db.storeMemory).not.toHaveBeenCalled(); + }); + + it("should continue processing if one message fails", async () => { + const db = createMockDb(); + // First embed call fails, second succeeds + let embedCallCount = 0; + const findSimilarMock = vi.fn().mockImplementation(() => { + embedCallCount++; + if (embedCallCount === 1) { + return Promise.reject(new Error("DB connection failed")); + } + return Promise.resolve([]); + }); + const embedBatchMock = vi.fn().mockResolvedValue([ + [0.1, 0.2], + [0.3, 0.4], + ]); + const dbWithError = createMockDb({ + findSimilar: findSimilarMock, + }); + const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock }); + + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ score: 7 }) } }], + }), + }); + + const messages = [ + { + role: "user", + content: "First message that is long enough to pass the attention gate filter", + }, + { + role: "user", + content: "Second message that is also long enough to pass the attention gate", + }, + ]; + + // Should not throw — errors are caught per-message + await runAutoCapture( + messages, + "test-agent", + "session-1", + dbWithError, + embeddings, + enabledConfig, + mockLogger, + ); + + // The second message should still have been attempted + expect(findSimilarMock).toHaveBeenCalledTimes(2); + }); + + it("should use different thresholds for user vs assistant messages", async () => { + const db = createMockDb(); + const storeMemoryMock = vi.fn().mockResolvedValue(undefined); + const dbWithStore = createMockDb({ storeMemory: storeMemoryMock }); + const embedBatchMock = vi.fn().mockResolvedValue([ + [0.1, 0.2], + [0.3, 0.4], + ]); + const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock }); + + // Always return high importance so both pass + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: JSON.stringify({ score: 9 }) } }], + }), + }); + + const messages = [ + { + role: "user", + content: "I really love working with graph databases like Neo4j for my projects", + }, + { + role: "assistant", + content: + "Graph databases like Neo4j excel at modeling connected data and relationship queries", + }, + ]; + + await runAutoCapture( + messages, + "test-agent", + "session-1", + dbWithStore, + embeddings, + enabledConfig, + mockLogger, + ); + + // Both should be stored + const storeCalls = storeMemoryMock.mock.calls; + if (storeCalls.length === 2) { + // User message: importance * 1.0 discount + expect(storeCalls[0][0].source).toBe("auto-capture"); + // Assistant message: importance * 0.75 discount + expect(storeCalls[1][0].source).toBe("auto-capture-assistant"); + expect(storeCalls[1][0].importance).toBeLessThan(storeCalls[0][0].importance); + } + }); + + it("should log capture errors without throwing", async () => { + const embedBatchMock = vi.fn().mockRejectedValue(new Error("embedding service down")); + const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock }); + const db = createMockDb(); + + const messages = [ + { + role: "user", + content: "A long enough message to pass the attention gate for testing purposes", + }, + ]; + + // Should not throw + await runAutoCapture( + messages, + "test-agent", + "session-1", + db, + embeddings, + enabledConfig, + mockLogger, + ); + + // Should have logged the error + expect(mockLogger.warn).toHaveBeenCalled(); + }); +}); diff --git a/extensions/memory-neo4j/cli.ts b/extensions/memory-neo4j/cli.ts new file mode 100644 index 00000000000..e6271beea90 --- /dev/null +++ b/extensions/memory-neo4j/cli.ts @@ -0,0 +1,514 @@ +/** + * CLI command registration for memory-neo4j. + * + * Registers the `openclaw memory neo4j` subcommand group with commands: + * - list: List memory counts by agent and category + * - search: Search memories via hybrid search + * - stats: Show memory statistics and configuration + * - sleep: Run sleep cycle (seven-phase memory consolidation) + * - promote: Manually promote a memory to core + * - index: Re-embed all memories after changing embedding model + * - cleanup: Retroactively apply attention gate to stored memories + */ + +import type { OpenClawPluginApi } from "openclaw/plugin-sdk"; +import type { ExtractionConfig, MemoryNeo4jConfig } from "./config.js"; +import type { Embeddings } from "./embeddings.js"; +import type { Neo4jMemoryClient } from "./neo4j-client.js"; +import { passesAttentionGate } from "./attention-gate.js"; +import { stripMessageWrappers } from "./message-utils.js"; +import { hybridSearch } from "./search.js"; +import { runSleepCycle } from "./sleep-cycle.js"; + +export type CliDeps = { + db: Neo4jMemoryClient; + embeddings: Embeddings; + cfg: MemoryNeo4jConfig; + extractionConfig: ExtractionConfig; + vectorDim: number; +}; + +/** + * Register the `openclaw memory neo4j` CLI subcommand group. + */ +export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void { + const { db, embeddings, cfg, extractionConfig, vectorDim } = deps; + + api.registerCli( + ({ program }) => { + // Find existing memory command or create fallback + let memoryCmd = program.commands.find((cmd) => cmd.name() === "memory"); + if (!memoryCmd) { + // Fallback if core memory CLI not registered yet + memoryCmd = program.command("memory").description("Memory commands"); + } + + // Add neo4j memory subcommand group + const memory = memoryCmd.command("neo4j").description("Neo4j graph memory commands"); + + memory + .command("list") + .description("List memory counts by agent and category") + .option("--json", "Output as JSON") + .action(async (opts: { json?: boolean }) => { + try { + await db.ensureInitialized(); + const stats = await db.getMemoryStats(); + + if (opts.json) { + console.log(JSON.stringify(stats, null, 2)); + return; + } + + if (stats.length === 0) { + console.log("No memories stored."); + return; + } + + // Group by agentId + const byAgent = new Map< + string, + Array<{ category: string; count: number; avgImportance: number }> + >(); + for (const row of stats) { + const list = byAgent.get(row.agentId) || []; + list.push({ + category: row.category, + count: row.count, + avgImportance: row.avgImportance, + }); + byAgent.set(row.agentId, list); + } + + // Print table for each agent + for (const [agentId, categories] of byAgent) { + const total = categories.reduce((sum, c) => sum + c.count, 0); + console.log(`\n┌─ ${agentId} (${total} total)`); + console.log("│"); + console.log("│ Category Count Avg Importance"); + console.log("│ ─────────────────────────────────────"); + for (const { category, count, avgImportance } of categories) { + const cat = category.padEnd(12); + const cnt = String(count).padStart(5); + const imp = (avgImportance * 100).toFixed(0).padStart(3) + "%"; + console.log(`│ ${cat} ${cnt} ${imp}`); + } + console.log("└"); + } + console.log(""); + } catch (err) { + console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); + process.exitCode = 1; + } + }); + + memory + .command("search") + .description("Search memories") + .argument("", "Search query") + .option("--limit ", "Max results", "5") + .option("--agent ", "Agent id (default: default)") + .action(async (query: string, opts: { limit: string; agent?: string }) => { + try { + const results = await hybridSearch( + db, + embeddings, + query, + parseInt(opts.limit, 10), + opts.agent ?? "default", + extractionConfig.enabled, + { graphSearchDepth: cfg.graphSearchDepth }, + ); + const output = results.map((r) => ({ + id: r.id, + text: r.text, + category: r.category, + importance: r.importance, + score: r.score, + })); + console.log(JSON.stringify(output, null, 2)); + } catch (err) { + console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); + process.exitCode = 1; + } + }); + + memory + .command("stats") + .description("Show memory statistics and configuration") + .action(async () => { + try { + await db.ensureInitialized(); + const stats = await db.getMemoryStats(); + const total = stats.reduce((sum, s) => sum + s.count, 0); + + console.log("\nMemory (Neo4j) Statistics"); + console.log("─────────────────────────"); + console.log(`Total memories: ${total}`); + console.log(`Neo4j URI: ${cfg.neo4j.uri}`); + console.log(`Embedding: ${cfg.embedding.provider}/${cfg.embedding.model}`); + console.log( + `Extraction: ${extractionConfig.enabled ? extractionConfig.model : "disabled"}`, + ); + console.log(`Auto-capture: ${cfg.autoCapture ? "enabled" : "disabled"}`); + console.log(`Auto-recall: ${cfg.autoRecall ? "enabled" : "disabled"}`); + console.log(`Core memory: ${cfg.coreMemory.enabled ? "enabled" : "disabled"}`); + + if (stats.length > 0) { + // Group by category across all agents + const byCategory = new Map(); + for (const row of stats) { + byCategory.set(row.category, (byCategory.get(row.category) ?? 0) + row.count); + } + console.log("\nBy Category:"); + for (const [category, count] of byCategory) { + console.log(` ${category.padEnd(12)} ${count}`); + } + + // Show agent count + const agents = new Set(stats.map((s) => s.agentId)); + console.log(`\nAgents: ${agents.size} (${[...agents].join(", ")})`); + } + console.log(""); + } catch (err) { + console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); + process.exitCode = 1; + } + }); + + memory + .command("sleep") + .description("Run sleep cycle — consolidate memories with Pareto-based promotion") + .option("--agent ", "Agent id (default: all agents)") + .option("--dedup-threshold ", "Vector similarity threshold for dedup (default: 0.95)") + .option("--pareto ", "Top N% for core memory (default: 0.2 = top 20%)") + .option("--promotion-min-age ", "Min age in days before promotion (default: 7)") + .option("--decay-threshold ", "Decay score threshold for pruning (default: 0.1)") + .option("--decay-half-life ", "Base half-life in days (default: 30)") + .option("--batch-size ", "Extraction batch size (default: 50)") + .option("--delay ", "Delay between extraction batches in ms (default: 1000)") + .option("--max-semantic-pairs ", "Max LLM-checked semantic dedup pairs (default: 500)") + .option("--concurrency ", "Parallel LLM calls — match OLLAMA_NUM_PARALLEL (default: 8)") + .option( + "--skip-semantic", + "Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)", + ) + .action( + async (opts: { + agent?: string; + dedupThreshold?: string; + pareto?: string; + promotionMinAge?: string; + decayThreshold?: string; + decayHalfLife?: string; + batchSize?: string; + delay?: string; + maxSemanticPairs?: string; + concurrency?: string; + skipSemantic?: boolean; + }) => { + console.log("\n🌙 Memory Sleep Cycle"); + console.log("═════════════════════════════════════════════════════════════"); + console.log("Seven-phase memory consolidation (Pareto-based):\n"); + console.log(" Phase 1: Deduplication — Merge near-duplicate memories"); + console.log( + " Phase 1b: Semantic Dedup — LLM-based paraphrase detection (0.75–0.95 band)", + ); + console.log(" Phase 1c: Conflict Detection — Resolve contradictory memories"); + console.log( + " Phase 2: Pareto Scoring — Calculate effective scores for all memories", + ); + console.log(" Phase 3: Core Promotion — Regular memories above threshold → core"); + console.log(" Phase 4: Core Demotion — Core memories below threshold → regular"); + console.log(" Phase 5: Extraction — Extract entities and categorize"); + console.log(" Phase 6: Decay & Pruning — Remove stale low-importance memories"); + console.log(" Phase 7: Orphan Cleanup — Remove disconnected nodes\n"); + + try { + // Validate sleep cycle CLI parameters before running + const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : undefined; + const delay = opts.delay ? parseInt(opts.delay, 10) : undefined; + const decayHalfLife = opts.decayHalfLife + ? parseInt(opts.decayHalfLife, 10) + : undefined; + const decayThreshold = opts.decayThreshold + ? parseFloat(opts.decayThreshold) + : undefined; + const pareto = opts.pareto ? parseFloat(opts.pareto) : undefined; + const promotionMinAge = opts.promotionMinAge + ? parseInt(opts.promotionMinAge, 10) + : undefined; + + if (batchSize != null && (Number.isNaN(batchSize) || batchSize <= 0)) { + console.error("Error: --batch-size must be greater than 0"); + process.exitCode = 1; + return; + } + if (delay != null && (Number.isNaN(delay) || delay < 0)) { + console.error("Error: --delay must be >= 0"); + process.exitCode = 1; + return; + } + if (decayHalfLife != null && (Number.isNaN(decayHalfLife) || decayHalfLife <= 0)) { + console.error("Error: --decay-half-life must be greater than 0"); + process.exitCode = 1; + return; + } + if ( + decayThreshold != null && + (Number.isNaN(decayThreshold) || decayThreshold < 0 || decayThreshold > 1) + ) { + console.error("Error: --decay-threshold must be between 0 and 1"); + process.exitCode = 1; + return; + } + if (pareto != null && (Number.isNaN(pareto) || pareto < 0 || pareto > 1)) { + console.error("Error: --pareto must be between 0 and 1"); + process.exitCode = 1; + return; + } + if ( + promotionMinAge != null && + (Number.isNaN(promotionMinAge) || promotionMinAge < 0) + ) { + console.error("Error: --promotion-min-age must be >= 0"); + process.exitCode = 1; + return; + } + + const maxSemanticPairs = opts.maxSemanticPairs + ? parseInt(opts.maxSemanticPairs, 10) + : undefined; + if ( + maxSemanticPairs != null && + (Number.isNaN(maxSemanticPairs) || maxSemanticPairs <= 0) + ) { + console.error("Error: --max-semantic-pairs must be greater than 0"); + process.exitCode = 1; + return; + } + + const concurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : undefined; + if (concurrency != null && (Number.isNaN(concurrency) || concurrency <= 0)) { + console.error("Error: --concurrency must be greater than 0"); + process.exitCode = 1; + return; + } + + await db.ensureInitialized(); + + const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, { + agentId: opts.agent, + dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined, + skipSemanticDedup: opts.skipSemantic === true, + maxSemanticDedupPairs: maxSemanticPairs, + llmConcurrency: concurrency, + paretoPercentile: pareto, + promotionMinAgeDays: promotionMinAge, + decayRetentionThreshold: decayThreshold, + decayBaseHalfLifeDays: decayHalfLife, + decayCurves: Object.keys(cfg.decayCurves).length > 0 ? cfg.decayCurves : undefined, + extractionBatchSize: batchSize, + extractionDelayMs: delay, + onPhaseStart: (phase) => { + const phaseNames: Record = { + dedup: "Phase 1: Deduplication", + semanticDedup: "Phase 1b: Semantic Deduplication", + conflict: "Phase 1c: Conflict Detection", + pareto: "Phase 2: Pareto Scoring", + promotion: "Phase 3: Core Promotion", + extraction: "Phase 4: Extraction", + decay: "Phase 5: Decay & Pruning", + cleanup: "Phase 6: Orphan Cleanup", + }; + console.log(`\n▶ ${phaseNames[phase]}`); + console.log("─────────────────────────────────────────────────────────────"); + }, + onProgress: (_phase, message) => { + console.log(` ${message}`); + }, + }); + + console.log("\n═════════════════════════════════════════════════════════════"); + console.log(`✅ Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s`); + console.log("─────────────────────────────────────────────────────────────"); + console.log( + ` Deduplication: ${result.dedup.clustersFound} clusters → ${result.dedup.memoriesMerged} merged`, + ); + console.log( + ` Conflicts: ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`, + ); + console.log( + ` Semantic Dedup: ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`, + ); + console.log( + ` Pareto: ${result.pareto.totalMemories} total (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`, + ); + console.log( + ` Threshold: ${result.pareto.threshold.toFixed(4)} (top 20%)`, + ); + console.log( + ` Promotion: ${result.promotion.promoted}/${result.promotion.candidatesFound} promoted to core`, + ); + console.log(` Decay/Pruning: ${result.decay.memoriesPruned} memories pruned`); + console.log( + ` Extraction: ${result.extraction.succeeded}/${result.extraction.total} extracted` + + (result.extraction.failed > 0 ? ` (${result.extraction.failed} failed)` : ""), + ); + console.log( + ` Cleanup: ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`, + ); + if (result.aborted) { + console.log("\n⚠️ Sleep cycle was aborted before completion."); + } + console.log(""); + } catch (err) { + console.error( + `\n❌ Sleep cycle failed: ${err instanceof Error ? err.message : String(err)}`, + ); + process.exitCode = 1; + } + }, + ); + + memory + .command("promote") + .description("Manually promote a memory to core status") + .argument("", "Memory ID to promote") + .action(async (id: string) => { + try { + await db.ensureInitialized(); + const promoted = await db.promoteToCore([id]); + if (promoted > 0) { + console.log(`✅ Memory ${id} promoted to core.`); + } else { + console.log(`❌ Memory ${id} not found.`); + process.exitCode = 1; + } + } catch (err) { + console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); + process.exitCode = 1; + } + }); + + memory + .command("index") + .description( + "Re-embed all memories and entities — use after changing embedding model/provider", + ) + .option("--batch-size ", "Embedding batch size (default: 50)") + .action(async (opts: { batchSize?: string }) => { + const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : 50; + if (Number.isNaN(batchSize) || batchSize <= 0) { + console.error("Error: --batch-size must be greater than 0"); + process.exitCode = 1; + return; + } + + console.log("\nMemory Neo4j — Reindex Embeddings"); + console.log("═════════════════════════════════════════════════════════════"); + console.log(`Model: ${cfg.embedding.provider}/${cfg.embedding.model}`); + console.log(`Dimensions: ${vectorDim}`); + console.log(`Batch size: ${batchSize}\n`); + + try { + const startedAt = Date.now(); + const result = await db.reindex((texts) => embeddings.embedBatch(texts), { + batchSize, + onProgress: (phase, done, total) => { + if (phase === "drop-indexes" && done === 0) { + console.log("▶ Dropping old vector index…"); + } else if (phase === "memories") { + console.log(` Memories: ${done}/${total}`); + } else if (phase === "create-indexes" && done === 0) { + console.log("▶ Recreating vector index…"); + } + }, + }); + + const elapsed = ((Date.now() - startedAt) / 1000).toFixed(1); + console.log("\n═════════════════════════════════════════════════════════════"); + console.log(`✅ Reindex complete in ${elapsed}s — ${result.memories} memories`); + console.log(""); + } catch (err) { + console.error( + `\n❌ Reindex failed: ${err instanceof Error ? err.message : String(err)}`, + ); + process.exitCode = 1; + } + }); + + memory + .command("cleanup") + .description( + "Retroactively apply the attention gate — find and remove low-substance memories", + ) + .option("--execute", "Actually delete (default: dry-run preview)") + .option("--all", "Include explicitly-stored memories (default: auto-capture only)") + .option("--agent ", "Only clean up memories for a specific agent") + .action(async (opts: { execute?: boolean; all?: boolean; agent?: string }) => { + try { + await db.ensureInitialized(); + + // Fetch memories — by default only auto-capture (explicit stores are trusted) + const conditions: string[] = []; + if (!opts.all) { + conditions.push("m.source = 'auto-capture'"); + } + if (opts.agent) { + conditions.push("m.agentId = $agentId"); + } + const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : ""; + const allMemories = await db.runQuery<{ + id: string; + text: string; + source: string; + }>( + `MATCH (m:Memory) ${where} + RETURN m.id AS id, m.text AS text, COALESCE(m.source, 'unknown') AS source + ORDER BY m.createdAt ASC`, + opts.agent ? { agentId: opts.agent } : {}, + ); + + // Strip channel metadata wrappers (same as the real pipeline) then gate + const noise: Array<{ id: string; text: string; source: string }> = []; + for (const mem of allMemories) { + const stripped = stripMessageWrappers(mem.text); + if (!passesAttentionGate(stripped)) { + noise.push(mem); + } + } + + if (noise.length === 0) { + console.log("\nNo low-substance memories found. Everything passes the gate."); + return; + } + + console.log( + `\nFound ${noise.length}/${allMemories.length} memories that fail the attention gate:\n`, + ); + + for (const mem of noise) { + const preview = mem.text.length > 80 ? `${mem.text.slice(0, 77)}...` : mem.text; + console.log(` [${mem.source}] "${preview}"`); + } + + if (!opts.execute) { + console.log( + `\nDry run — ${noise.length} memories would be removed. Re-run with --execute to delete.\n`, + ); + return; + } + + // Delete in batch + const deleted = await db.pruneMemories(noise.map((m) => m.id)); + console.log(`\nDeleted ${deleted} low-substance memories.\n`); + } catch (err) { + console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); + process.exitCode = 1; + } + }); + }, + { commands: [] }, // Adds subcommands to existing "memory" command, no conflict + ); +} diff --git a/extensions/memory-neo4j/config.ts b/extensions/memory-neo4j/config.ts index 50dcdc1827c..d6bb3665087 100644 --- a/extensions/memory-neo4j/config.ts +++ b/extensions/memory-neo4j/config.ts @@ -92,24 +92,27 @@ export const EMBEDDING_DIMENSIONS: Record = { // Default dimension for unknown models (Ollama models vary) export const DEFAULT_EMBEDDING_DIMS = 1024; -export function vectorDimsForModel(model: string): number { - // Check exact match first - if (EMBEDDING_DIMENSIONS[model]) { - return EMBEDDING_DIMENSIONS[model]; +/** + * Lookup a value by exact key or longest matching prefix. + * Returns undefined if no match found. + */ +function lookupByPrefix(table: Record, key: string): T | undefined { + if (table[key] !== undefined) { + return table[key]; } - // Prefer longest matching prefix (e.g. "mxbai-embed-large-2k" over "mxbai-embed-large") - let best: { dims: number; keyLen: number } | undefined; - for (const [known, dims] of Object.entries(EMBEDDING_DIMENSIONS)) { - if (model.startsWith(known) && (!best || known.length > best.keyLen)) { - best = { dims, keyLen: known.length }; + let best: { value: T; keyLen: number } | undefined; + for (const [known, value] of Object.entries(table)) { + if (key.startsWith(known) && (!best || known.length > best.keyLen)) { + best = { value, keyLen: known.length }; } } - if (best) { - return best.dims; - } + return best?.value; +} + +export function vectorDimsForModel(model: string): number { // Return default for unknown models — callers should warn when this path is taken, // as the default 1024 dimensions may not match the actual model's output. - return DEFAULT_EMBEDDING_DIMS; + return lookupByPrefix(EMBEDDING_DIMENSIONS, model) ?? DEFAULT_EMBEDDING_DIMS; } /** Max input token lengths for known embedding models. */ @@ -129,17 +132,7 @@ export const EMBEDDING_CONTEXT_LENGTHS: Record = { export const DEFAULT_EMBEDDING_CONTEXT_LENGTH = 512; export function contextLengthForModel(model: string): number { - if (EMBEDDING_CONTEXT_LENGTHS[model]) { - return EMBEDDING_CONTEXT_LENGTHS[model]; - } - // Prefer longest matching prefix (e.g. "mxbai-embed-large-8k" over "mxbai-embed-large") - let best: { len: number; keyLen: number } | undefined; - for (const [known, len] of Object.entries(EMBEDDING_CONTEXT_LENGTHS)) { - if (model.startsWith(known) && (!best || known.length > best.keyLen)) { - best = { len, keyLen: known.length }; - } - } - return best?.len ?? DEFAULT_EMBEDDING_CONTEXT_LENGTH; + return lookupByPrefix(EMBEDDING_CONTEXT_LENGTHS, model) ?? DEFAULT_EMBEDDING_CONTEXT_LENGTH; } /** diff --git a/extensions/memory-neo4j/embeddings.ts b/extensions/memory-neo4j/embeddings.ts index 44b101175c9..931eb97a24d 100644 --- a/extensions/memory-neo4j/embeddings.ts +++ b/extensions/memory-neo4j/embeddings.ts @@ -8,15 +8,9 @@ import { createHash } from "node:crypto"; import OpenAI from "openai"; import type { EmbeddingProvider } from "./config.js"; +import type { Logger } from "./schema.js"; import { contextLengthForModel } from "./config.js"; -type Logger = { - info: (msg: string) => void; - warn: (msg: string) => void; - error: (msg: string) => void; - debug?: (msg: string) => void; -}; - /** * Simple LRU cache for embedding vectors. * Keyed by SHA-256 hash of the input text to avoid storing large strings. diff --git a/extensions/memory-neo4j/extractor.test.ts b/extensions/memory-neo4j/extractor.test.ts index 7bb41012826..9b277154aea 100644 --- a/extensions/memory-neo4j/extractor.test.ts +++ b/extensions/memory-neo4j/extractor.test.ts @@ -8,19 +8,22 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import type { ExtractionConfig } from "./config.js"; +import { passesAttentionGate, passesAssistantAttentionGate } from "./attention-gate.js"; import { - extractUserMessages, - extractAssistantMessages, - stripAssistantWrappers, extractEntities, runBackgroundExtraction, rateImportance, resolveConflict, isSemanticDuplicate, - isTransientError, - runSleepCycle, + SEMANTIC_DEDUP_VECTOR_THRESHOLD, } from "./extractor.js"; -import { passesAttentionGate, passesAssistantAttentionGate } from "./index.js"; +import { isTransientError } from "./llm-client.js"; +import { + extractUserMessages, + extractAssistantMessages, + stripAssistantWrappers, +} from "./message-utils.js"; +import { runSleepCycle } from "./sleep-cycle.js"; // ============================================================================ // passesAttentionGate() @@ -1756,7 +1759,6 @@ describe("runSleepCycle", () => { calculateAllEffectiveScores: vi.fn().mockResolvedValue([]), calculateParetoThreshold: vi.fn().mockReturnValue(0.5), promoteToCore: vi.fn().mockResolvedValue(0), - demoteFromCore: vi.fn().mockResolvedValue(0), findDecayedMemories: vi.fn().mockResolvedValue([]), pruneMemories: vi.fn().mockResolvedValue(0), countByExtractionStatus: vi @@ -1768,11 +1770,6 @@ describe("runSleepCycle", () => { findOrphanTags: vi.fn().mockResolvedValue([]), deleteOrphanTags: vi.fn().mockResolvedValue(0), updateExtractionStatus: vi.fn().mockResolvedValue(undefined), - mergeEntity: vi.fn().mockResolvedValue({ id: "e1", name: "test" }), - createMentions: vi.fn().mockResolvedValue(undefined), - createEntityRelationship: vi.fn().mockResolvedValue(undefined), - tagMemory: vi.fn().mockResolvedValue(undefined), - updateMemoryCategory: vi.fn().mockResolvedValue(undefined), }; }); @@ -2252,64 +2249,7 @@ describe("runSleepCycle", () => { }); }); - // Phase 4: Demotion - describe("Phase 4: Core Demotion", () => { - it("should demote core memories below threshold", async () => { - const scores = [ - { - id: "m1", - text: "test", - category: "core", - importance: 0.3, - retrievalCount: 1, - ageDays: 30, - effectiveScore: 0.3, - }, - { - id: "m2", - text: "test", - category: "core", - importance: 0.9, - retrievalCount: 10, - ageDays: 5, - effectiveScore: 0.95, - }, - ]; - mockDb.calculateAllEffectiveScores.mockResolvedValue(scores); - mockDb.calculateParetoThreshold.mockReturnValue(0.7); - mockDb.demoteFromCore.mockResolvedValue(1); - - const result = await runSleepCycle(mockDb, mockEmbeddings, mockConfig, mockLogger); - - // m1 should be demoted (category=core, score=0.30 < 0.70) - expect(mockDb.demoteFromCore).toHaveBeenCalledWith(["m1"]); - expect(result.demotion.candidatesFound).toBe(1); - expect(result.demotion.demoted).toBe(1); - }); - - it("should not demote regular memories", async () => { - const scores = [ - { - id: "m1", - text: "test", - category: "fact", - importance: 0.2, - retrievalCount: 0, - ageDays: 50, - effectiveScore: 0.1, - }, - ]; - mockDb.calculateAllEffectiveScores.mockResolvedValue(scores); - mockDb.calculateParetoThreshold.mockReturnValue(0.7); - - const result = await runSleepCycle(mockDb, mockEmbeddings, mockConfig, mockLogger); - - expect(result.demotion.candidatesFound).toBe(0); - expect(mockDb.demoteFromCore).not.toHaveBeenCalled(); - }); - }); - - // Phase 5: Extraction + // Phase 4: Extraction describe("Phase 5: Entity Extraction", () => { it("should process pending extractions in batches", async () => { mockDb.countByExtractionStatus.mockResolvedValue({ @@ -2606,7 +2546,6 @@ describe("runSleepCycle", () => { expect(onPhaseStart).toHaveBeenCalledWith("semanticDedup"); expect(onPhaseStart).toHaveBeenCalledWith("pareto"); expect(onPhaseStart).toHaveBeenCalledWith("promotion"); - expect(onPhaseStart).toHaveBeenCalledWith("demotion"); expect(onPhaseStart).toHaveBeenCalledWith("extraction"); expect(onPhaseStart).toHaveBeenCalledWith("decay"); expect(onPhaseStart).toHaveBeenCalledWith("cleanup"); @@ -2642,7 +2581,6 @@ describe("runSleepCycle", () => { expect(result).toHaveProperty("semanticDedup"); expect(result).toHaveProperty("pareto"); expect(result).toHaveProperty("promotion"); - expect(result).toHaveProperty("demotion"); expect(result).toHaveProperty("decay"); expect(result).toHaveProperty("extraction"); expect(result).toHaveProperty("cleanup"); @@ -2669,6 +2607,208 @@ describe("runSleepCycle", () => { // isTransientError() // ============================================================================ +// ============================================================================ +// isSemanticDuplicate +// ============================================================================ + +describe("isSemanticDuplicate", () => { + const originalFetch = globalThis.fetch; + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + const enabledConfig: ExtractionConfig = { + enabled: true, + apiKey: "test-key", + model: "test-model", + baseUrl: "https://test.ai/api/v1", + temperature: 0.0, + maxRetries: 0, + }; + + const disabledConfig: ExtractionConfig = { + ...enabledConfig, + enabled: false, + }; + + it("should return false when extraction is disabled", async () => { + const result = await isSemanticDuplicate("new text", "existing text", disabledConfig); + expect(result).toBe(false); + }); + + it("should return true when LLM says duplicate", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { + message: { + content: JSON.stringify({ verdict: "duplicate", reason: "same fact" }), + }, + }, + ], + }), + }); + + const result = await isSemanticDuplicate("I like Neo4j", "User prefers Neo4j", enabledConfig); + expect(result).toBe(true); + }); + + it("should return false when LLM says unique", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { + message: { + content: JSON.stringify({ verdict: "unique", reason: "different topic" }), + }, + }, + ], + }), + }); + + const result = await isSemanticDuplicate("I like coffee", "User lives in NYC", enabledConfig); + expect(result).toBe(false); + }); + + it("should skip LLM call when vector similarity is below threshold", async () => { + const fetchSpy = vi.fn(); + globalThis.fetch = fetchSpy; + + const result = await isSemanticDuplicate( + "text a", + "text b", + enabledConfig, + SEMANTIC_DEDUP_VECTOR_THRESHOLD - 0.01, + ); + expect(result).toBe(false); + expect(fetchSpy).not.toHaveBeenCalled(); + }); + + it("should call LLM when vector similarity is at or above threshold", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { + message: { + content: JSON.stringify({ verdict: "duplicate", reason: "same" }), + }, + }, + ], + }), + }); + + const result = await isSemanticDuplicate( + "text a", + "text b", + enabledConfig, + SEMANTIC_DEDUP_VECTOR_THRESHOLD, + ); + expect(result).toBe(true); + expect(globalThis.fetch).toHaveBeenCalled(); + }); + + it("should call LLM when no vector similarity is provided", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { + message: { + content: JSON.stringify({ verdict: "unique", reason: "different" }), + }, + }, + ], + }), + }); + + const result = await isSemanticDuplicate("text a", "text b", enabledConfig); + expect(result).toBe(false); + expect(globalThis.fetch).toHaveBeenCalled(); + }); + + it("should return false on fetch error (fail-open)", async () => { + globalThis.fetch = vi + .fn() + .mockRejectedValue(new DOMException("signal timed out", "TimeoutError")); + + const result = await isSemanticDuplicate("text a", "text b", enabledConfig); + expect(result).toBe(false); + }); + + it("should return false on invalid JSON response", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: "not valid json" } }], + }), + }); + + const result = await isSemanticDuplicate("text a", "text b", enabledConfig); + expect(result).toBe(false); + }); + + it("should return false when verdict is missing from response", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [ + { + message: { + content: JSON.stringify({ reason: "no verdict field" }), + }, + }, + ], + }), + }); + + const result = await isSemanticDuplicate("text a", "text b", enabledConfig); + expect(result).toBe(false); + }); + + it("should return false when LLM returns null content", async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: null } }], + }), + }); + + const result = await isSemanticDuplicate("text a", "text b", enabledConfig); + expect(result).toBe(false); + }); + + it("should respect abort signal", async () => { + const controller = new AbortController(); + controller.abort(); + + globalThis.fetch = vi.fn().mockRejectedValue(new DOMException("signal aborted", "AbortError")); + + const result = await isSemanticDuplicate( + "text a", + "text b", + enabledConfig, + undefined, + controller.signal, + ); + expect(result).toBe(false); + }); +}); + +// ============================================================================ +// isTransientError +// ============================================================================ + describe("isTransientError", () => { it("should return false for non-Error values", () => { expect(isTransientError("string error")).toBe(false); diff --git a/extensions/memory-neo4j/extractor.ts b/extensions/memory-neo4j/extractor.ts index 3b9c29d334f..c7d37f724cd 100644 --- a/extensions/memory-neo4j/extractor.ts +++ b/extensions/memory-neo4j/extractor.ts @@ -1,9 +1,12 @@ /** - * LLM-based entity extraction and sleep cycle for memory-neo4j. + * LLM-based entity extraction and memory operations for memory-neo4j. * * Extraction uses a configurable OpenAI-compatible LLM (OpenRouter, Ollama, etc.) to: * - Extract entities, relationships, and tags from stored memories * - Classify memories into categories (preference, fact, decision, etc.) + * - Rate memory importance on a 1-10 scale + * - Detect semantic duplicates via LLM comparison + * - Resolve conflicting memories * * Runs as background fire-and-forget operations with graceful degradation. */ @@ -12,20 +15,10 @@ import { randomUUID } from "node:crypto"; import type { ExtractionConfig } from "./config.js"; import type { Embeddings } from "./embeddings.js"; import type { Neo4jMemoryClient } from "./neo4j-client.js"; -import type { EntityType, ExtractionResult, MemoryCategory } from "./schema.js"; +import type { EntityType, ExtractionResult, Logger, MemoryCategory } from "./schema.js"; +import { callOpenRouter, callOpenRouterStream, isTransientError } from "./llm-client.js"; import { ALLOWED_RELATIONSHIP_TYPES, ENTITY_TYPES, MEMORY_CATEGORIES } from "./schema.js"; -// ============================================================================ -// Types -// ============================================================================ - -type Logger = { - info: (msg: string) => void; - warn: (msg: string) => void; - error: (msg: string) => void; - debug?: (msg: string) => void; -}; - // ============================================================================ // Extraction Prompt // ============================================================================ @@ -58,161 +51,6 @@ Rules: - Keep entity descriptions brief (1 sentence max) - Category: "preference" for opinions/preferences, "fact" for factual info, "decision" for choices made, "entity" for entity-focused, "other" for miscellaneous`; -// ============================================================================ -// OpenRouter API Client -// ============================================================================ - -// Timeout for LLM and embedding fetch calls to prevent hanging indefinitely -const FETCH_TIMEOUT_MS = 30_000; - -async function callOpenRouter( - config: ExtractionConfig, - prompt: string | Array<{ role: string; content: string }>, - abortSignal?: AbortSignal, -): Promise { - const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt; - - for (let attempt = 0; attempt <= config.maxRetries; attempt++) { - try { - // Combine the caller's abort signal with a per-request timeout - const signal = abortSignal - ? AbortSignal.any([abortSignal, AbortSignal.timeout(FETCH_TIMEOUT_MS)]) - : AbortSignal.timeout(FETCH_TIMEOUT_MS); - - const response = await fetch(`${config.baseUrl}/chat/completions`, { - method: "POST", - headers: { - Authorization: `Bearer ${config.apiKey}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - model: config.model, - messages, - temperature: config.temperature, - response_format: { type: "json_object" }, - }), - signal, - }); - - if (!response.ok) { - const body = await response.text().catch(() => ""); - throw new Error(`OpenRouter API error ${response.status}: ${body}`); - } - - const data = (await response.json()) as { - choices?: Array<{ message?: { content?: string } }>; - }; - return data.choices?.[0]?.message?.content ?? null; - } catch (err) { - if (attempt >= config.maxRetries) { - throw err; - } - // Exponential backoff - await new Promise((resolve) => setTimeout(resolve, 500 * Math.pow(2, attempt))); - } - } - return null; -} - -/** - * Streaming variant of callOpenRouter. Uses the streaming API to receive chunks - * incrementally, allowing earlier cancellation via abort signal and better - * latency characteristics for long responses. - * - * Accumulates all chunks into a single response string since extraction - * uses JSON mode (which requires the complete object to parse). - */ -async function callOpenRouterStream( - config: ExtractionConfig, - prompt: string | Array<{ role: string; content: string }>, - abortSignal?: AbortSignal, -): Promise { - const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt; - - for (let attempt = 0; attempt <= config.maxRetries; attempt++) { - try { - const signal = abortSignal - ? AbortSignal.any([abortSignal, AbortSignal.timeout(FETCH_TIMEOUT_MS)]) - : AbortSignal.timeout(FETCH_TIMEOUT_MS); - - const response = await fetch(`${config.baseUrl}/chat/completions`, { - method: "POST", - headers: { - Authorization: `Bearer ${config.apiKey}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - model: config.model, - messages, - temperature: config.temperature, - response_format: { type: "json_object" }, - stream: true, - }), - signal, - }); - - if (!response.ok) { - const body = await response.text().catch(() => ""); - throw new Error(`OpenRouter API error ${response.status}: ${body}`); - } - - if (!response.body) { - throw new Error("No response body for streaming request"); - } - - // Read SSE stream and accumulate content chunks - const reader = response.body.getReader(); - const decoder = new TextDecoder(); - let accumulated = ""; - let buffer = ""; - - for (;;) { - // Check abort between chunks for responsive cancellation - if (abortSignal?.aborted) { - reader.cancel().catch(() => {}); - return null; - } - - const { done, value } = await reader.read(); - if (done) break; - - buffer += decoder.decode(value, { stream: true }); - - // Parse SSE lines - const lines = buffer.split("\n"); - buffer = lines.pop() ?? ""; - - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed.startsWith("data: ")) continue; - const data = trimmed.slice(6); - if (data === "[DONE]") continue; - - try { - const parsed = JSON.parse(data) as { - choices?: Array<{ delta?: { content?: string } }>; - }; - const chunk = parsed.choices?.[0]?.delta?.content; - if (chunk) { - accumulated += chunk; - } - } catch { - // Skip malformed SSE chunks - } - } - } - - return accumulated || null; - } catch (err) { - if (attempt >= config.maxRetries) { - throw err; - } - await new Promise((resolve) => setTimeout(resolve, 500 * Math.pow(2, attempt))); - } - } - return null; -} - // ============================================================================ // Entity Extraction // ============================================================================ @@ -227,32 +65,6 @@ async function callOpenRouterStream( */ const MAX_EXTRACTION_RETRIES = 3; -/** - * Check if an error is transient (network/timeout) vs permanent (JSON parse, etc.) - */ -export function isTransientError(err: unknown): boolean { - if (!(err instanceof Error)) { - return false; - } - const msg = err.message.toLowerCase(); - return ( - err.name === "AbortError" || - err.name === "TimeoutError" || - msg.includes("timeout") || - msg.includes("econnrefused") || - msg.includes("econnreset") || - msg.includes("etimedout") || - msg.includes("enotfound") || - msg.includes("network") || - msg.includes("fetch failed") || - msg.includes("socket hang up") || - msg.includes("api error 429") || - msg.includes("api error 502") || - msg.includes("api error 503") || - msg.includes("api error 504") - ); -} - /** * Extract entities and relationships from a memory text using LLM. * @@ -526,712 +338,6 @@ export async function runBackgroundExtraction( } } -// ============================================================================ -// Sleep Cycle - Seven Phase Memory Consolidation -// ============================================================================ - -/** - * Sleep Cycle Result - aggregated stats from all phases. - */ -export type SleepCycleResult = { - // Phase 1: Deduplication - dedup: { - clustersFound: number; - memoriesMerged: number; - }; - // Phase 1b: Conflict Detection - conflict: { - pairsFound: number; - resolved: number; - invalidated: number; - }; - // Phase 1c: Semantic Deduplication - semanticDedup: { - pairsChecked: number; - duplicatesMerged: number; - }; - // Phase 2: Pareto Scoring & Threshold - pareto: { - totalMemories: number; - coreMemories: number; - regularMemories: number; - threshold: number; // The 80th percentile effective score - }; - // Phase 3: Core Promotion - promotion: { - candidatesFound: number; - promoted: number; - }; - // Phase 4: Core Demotion - demotion: { - candidatesFound: number; - demoted: number; - }; - // Phase 6: Decay & Pruning - decay: { - memoriesPruned: number; - }; - // Phase 5: Entity Extraction - extraction: { - total: number; - processed: number; - succeeded: number; - failed: number; - }; - // Phase 7: Orphan Cleanup - cleanup: { - entitiesRemoved: number; - tagsRemoved: number; - }; - // Overall - durationMs: number; - aborted: boolean; -}; - -export type SleepCycleOptions = { - // Common - agentId?: string; - abortSignal?: AbortSignal; - - // Phase 1: Deduplication - dedupThreshold?: number; // Vector similarity threshold (default: 0.95) - skipSemanticDedup?: boolean; // Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c) - - // Phase 2-4: Pareto-based Promotion/Demotion - paretoPercentile?: number; // Top N% for core (default: 0.2 = top 20%) - promotionMinAgeDays?: number; // Min age before promotion (default: 7) - - // Phase 1b: Semantic Dedup - maxSemanticDedupPairs?: number; // Max LLM-checked pairs (default: 500) - - // Concurrency - llmConcurrency?: number; // Parallel LLM calls (default: 8, match OLLAMA_NUM_PARALLEL) - - // Phase 5: Extraction - extractionBatchSize?: number; // Memories per batch (default: 50) - extractionDelayMs?: number; // Delay between batches (default: 1000) - - // Phase 6: Decay - decayRetentionThreshold?: number; // Below this, memory is pruned (default: 0.1) - decayBaseHalfLifeDays?: number; // Base half-life in days (default: 30) - decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2) - decayCurves?: Record; // Per-category decay curve overrides - - // Progress callback - onPhaseStart?: ( - phase: - | "dedup" - | "conflict" - | "semanticDedup" - | "pareto" - | "promotion" - | "demotion" - | "decay" - | "extraction" - | "cleanup", - ) => void; - onProgress?: (phase: string, message: string) => void; -}; - -/** - * Run the full sleep cycle - seven phases of memory consolidation. - * - * This implements a Pareto-based memory ecosystem where core memory - * is bounded to the top 20% of memories by effective score. - * - * Phases: - * 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy) - * 2. PARETO SCORING - Calculate effective scores for all memories - * 3. CORE PROMOTION - Regular memories above threshold → core - * 4. CORE DEMOTION - Core memories below threshold → regular - * 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve) - * 6. EXTRACTION - Form entity relationships (strengthen connections) - * 7. CLEANUP - Remove orphaned entities/tags (garbage collection) - * - * Effective Score Formulas: - * - Regular memories: importance × freq_boost × recency - * - Core memories: importance × freq_boost × recency (same for threshold comparison) - * - Core memory retrieval ranking: freq_boost × recency (pure usage-based) - * - * Where: - * - freq_boost = 1 + log(1 + retrievalCount) × 0.3 - * - recency = 2^(-days_since_last / 14) - * - * Benefits: - * - Self-regulating core memory size (Pareto distribution) - * - Memories can be promoted AND demoted based on usage - * - Simulates human memory consolidation during sleep - * - * Research basis: - * - Pareto principle (20/80 rule) for memory tiering - * - ACT-R memory model for retrieval-based importance - * - Ebbinghaus forgetting curve for decay - * - MemGPT/Letta for tiered memory architecture - */ -export async function runSleepCycle( - db: Neo4jMemoryClient, - embeddings: Embeddings, - config: ExtractionConfig, - logger: Logger, - options: SleepCycleOptions = {}, -): Promise { - const startTime = Date.now(); - const { - agentId, - abortSignal, - dedupThreshold = 0.95, - skipSemanticDedup = false, - maxSemanticDedupPairs = 500, - llmConcurrency = 8, - paretoPercentile = 0.2, - promotionMinAgeDays = 7, - decayRetentionThreshold = 0.1, - decayBaseHalfLifeDays = 30, - decayImportanceMultiplier = 2, - decayCurves, - extractionBatchSize = 50, - extractionDelayMs = 1000, - onPhaseStart, - onProgress, - } = options; - - const result: SleepCycleResult = { - dedup: { clustersFound: 0, memoriesMerged: 0 }, - conflict: { pairsFound: 0, resolved: 0, invalidated: 0 }, - semanticDedup: { pairsChecked: 0, duplicatesMerged: 0 }, - pareto: { totalMemories: 0, coreMemories: 0, regularMemories: 0, threshold: 0 }, - promotion: { candidatesFound: 0, promoted: 0 }, - demotion: { candidatesFound: 0, demoted: 0 }, - decay: { memoriesPruned: 0 }, - extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 }, - cleanup: { entitiesRemoved: 0, tagsRemoved: 0 }, - durationMs: 0, - aborted: false, - }; - - const LLM_CONCURRENCY = llmConcurrency; - - // -------------------------------------------------------------------------- - // Phase 1: Deduplication (Optimized - combined vector + semantic dedup) - // Call findDuplicateClusters ONCE at 0.75 threshold, then split by similarity band: - // - ≥0.95: vector merge (high-confidence duplicates) - // - 0.75-0.95: semantic dedup via LLM (paraphrases) - // -------------------------------------------------------------------------- - if (!abortSignal?.aborted) { - onPhaseStart?.("dedup"); - logger.info("memory-neo4j: [sleep] Phase 1: Deduplication (vector + semantic)"); - - try { - // Fetch clusters at 0.75 threshold with similarity scores - const allClusters = await db.findDuplicateClusters(0.75, agentId, true); - - // Helper to create canonical pair key (sorted) - const makePairKey = (a: string, b: string): string => { - return a < b ? `${a}:${b}` : `${b}:${a}`; - }; - - // Separate clusters into high-similarity (≥0.95) and medium-similarity (0.75-0.95) - const highSimClusters: typeof allClusters = []; - const mediumSimClusters: typeof allClusters = []; - - for (const cluster of allClusters) { - if (abortSignal?.aborted) break; - if (!cluster.similarities || cluster.memoryIds.length < 2) continue; - - // Check if ANY pair in this cluster has similarity ≥ dedupThreshold - let hasHighSim = false; - for (const [pairKey, score] of cluster.similarities.entries()) { - if (score >= dedupThreshold) { - hasHighSim = true; - break; - } - } - - if (hasHighSim) { - // Split this cluster into high-sim and medium-sim sub-clusters - // For simplicity, if a cluster has ANY high-sim pair, treat the whole cluster as high-sim - // (This matches the old behavior where Phase 1 would merge them all) - highSimClusters.push(cluster); - } else { - mediumSimClusters.push(cluster); - } - } - - // Part 1a: Vector merge for high-similarity clusters (≥0.95) - result.dedup.clustersFound = highSimClusters.length; - - for (const cluster of highSimClusters) { - if (abortSignal?.aborted) break; - - const { deletedCount } = await db.mergeMemoryCluster( - cluster.memoryIds, - cluster.importances, - ); - result.dedup.memoriesMerged += deletedCount; - onProgress?.("dedup", `Merged cluster of ${cluster.memoryIds.length} → 1 (vector)`); - } - - logger.info( - `memory-neo4j: [sleep] Phase 1a (vector) complete — ${result.dedup.clustersFound} clusters, ${result.dedup.memoriesMerged} merged`, - ); - - // Part 1b: Semantic dedup for medium-similarity clusters (0.75-0.95) - if (skipSemanticDedup) { - onPhaseStart?.("semanticDedup"); - logger.info("memory-neo4j: [sleep] Phase 1b: Skipped (--skip-semantic)"); - onProgress?.("semanticDedup", "Skipped — semantic dedup disabled"); - } else { - onPhaseStart?.("semanticDedup"); - logger.info("memory-neo4j: [sleep] Phase 1b: Semantic Deduplication (0.75-0.95 band)"); - - // Collect all candidate pairs upfront (with pairwise similarity for pre-screening) - type DedupPair = { - textA: string; - textB: string; - idA: string; - idB: string; - importanceA: number; - importanceB: number; - similarity?: number; - }; - const allPairs: DedupPair[] = []; - - for (const cluster of mediumSimClusters) { - if (cluster.memoryIds.length < 2) continue; - for (let i = 0; i < cluster.memoryIds.length - 1; i++) { - for (let j = i + 1; j < cluster.memoryIds.length; j++) { - const pairKey = makePairKey(cluster.memoryIds[i], cluster.memoryIds[j]); - allPairs.push({ - textA: cluster.texts[i], - textB: cluster.texts[j], - idA: cluster.memoryIds[i], - idB: cluster.memoryIds[j], - importanceA: cluster.importances[i], - importanceB: cluster.importances[j], - similarity: cluster.similarities?.get(pairKey), - }); - } - } - } - - // Cap the number of LLM-checked pairs to prevent sleep cycle timeouts. - // Sort by similarity descending so higher-similarity pairs (more likely - // to be duplicates) are checked first. - if (allPairs.length > maxSemanticDedupPairs) { - allPairs.sort((a, b) => (b.similarity ?? 0) - (a.similarity ?? 0)); - const skipped = allPairs.length - maxSemanticDedupPairs; - allPairs.length = maxSemanticDedupPairs; - onProgress?.( - "semanticDedup", - `Capped at ${maxSemanticDedupPairs} pairs (${skipped} lower-similarity pairs skipped)`, - ); - logger.info( - `memory-neo4j: [sleep] Phase 1b capped to ${maxSemanticDedupPairs} pairs (${skipped} skipped)`, - ); - } - - // Process pairs in concurrent batches - const invalidatedIds = new Set(); - - for (let i = 0; i < allPairs.length && !abortSignal?.aborted; i += LLM_CONCURRENCY) { - const batch = allPairs.slice(i, i + LLM_CONCURRENCY); - - // Filter out pairs where one side was already invalidated - const activeBatch = batch.filter( - (p) => !invalidatedIds.has(p.idA) && !invalidatedIds.has(p.idB), - ); - - if (activeBatch.length === 0) continue; - - const outcomes = await Promise.allSettled( - activeBatch.map((p) => - isSemanticDuplicate(p.textA, p.textB, config, p.similarity, abortSignal), - ), - ); - - for (let k = 0; k < outcomes.length; k++) { - const pair = activeBatch[k]; - result.semanticDedup.pairsChecked++; - - if ( - outcomes[k].status === "fulfilled" && - (outcomes[k] as PromiseFulfilledResult).value - ) { - // Skip if either side was invalidated by an earlier result in this batch - if (invalidatedIds.has(pair.idA) || invalidatedIds.has(pair.idB)) continue; - - const keepId = pair.importanceA >= pair.importanceB ? pair.idA : pair.idB; - const removeId = keepId === pair.idA ? pair.idB : pair.idA; - const keepText = keepId === pair.idA ? pair.textA : pair.textB; - const removeText = removeId === pair.idA ? pair.textA : pair.textB; - - await db.invalidateMemory(removeId); - invalidatedIds.add(removeId); - result.semanticDedup.duplicatesMerged++; - - onProgress?.( - "semanticDedup", - `Merged: "${removeText.slice(0, 50)}..." → kept "${keepText.slice(0, 50)}..."`, - ); - } - } - } - - logger.info( - `memory-neo4j: [sleep] Phase 1b (semantic) complete — ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`, - ); - } // close skipSemanticDedup else - } catch (err) { - logger.warn(`memory-neo4j: [sleep] Phase 1 error: ${String(err)}`); - } - } - - // -------------------------------------------------------------------------- - // Phase 1c: Conflict Detection (formerly Phase 1b) - // -------------------------------------------------------------------------- - if (!abortSignal?.aborted && !skipSemanticDedup) { - onPhaseStart?.("conflict"); - logger.info("memory-neo4j: [sleep] Phase 1c: Conflict Detection"); - - try { - const pairs = await db.findConflictingMemories(agentId); - result.conflict.pairsFound = pairs.length; - - // Process conflict pairs in parallel chunks of LLM_CONCURRENCY - for (let i = 0; i < pairs.length && !abortSignal?.aborted; i += LLM_CONCURRENCY) { - const chunk = pairs.slice(i, i + LLM_CONCURRENCY); - const outcomes = await Promise.allSettled( - chunk.map((pair) => - resolveConflict(pair.memoryA.text, pair.memoryB.text, config, abortSignal), - ), - ); - - for (let k = 0; k < outcomes.length; k++) { - if (abortSignal?.aborted) break; - const pair = chunk[k]; - const outcome = outcomes[k]; - if (outcome.status !== "fulfilled") continue; - - const decision = outcome.value; - if (decision === "a") { - await db.invalidateMemory(pair.memoryB.id); - result.conflict.invalidated++; - result.conflict.resolved++; - onProgress?.( - "conflict", - `Kept A, invalidated B: "${pair.memoryB.text.slice(0, 40)}..."`, - ); - } else if (decision === "b") { - await db.invalidateMemory(pair.memoryA.id); - result.conflict.invalidated++; - result.conflict.resolved++; - onProgress?.( - "conflict", - `Kept B, invalidated A: "${pair.memoryA.text.slice(0, 40)}..."`, - ); - } else if (decision === "both") { - result.conflict.resolved++; - onProgress?.("conflict", `Kept both: no real conflict`); - } - // "skip" = LLM unavailable, don't count as resolved - } - } - - logger.info( - `memory-neo4j: [sleep] Phase 1c complete — ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`, - ); - } catch (err) { - logger.warn(`memory-neo4j: [sleep] Phase 1c error: ${String(err)}`); - } - } - - // -------------------------------------------------------------------------- - // Phase 2: Pareto Scoring & Threshold Calculation - // -------------------------------------------------------------------------- - let paretoThreshold = 0; - let allScores: Awaited> = []; - if (!abortSignal?.aborted) { - onPhaseStart?.("pareto"); - logger.info("memory-neo4j: [sleep] Phase 2: Pareto Scoring"); - - try { - allScores = await db.calculateAllEffectiveScores(agentId); - result.pareto.totalMemories = allScores.length; - result.pareto.coreMemories = allScores.filter((s) => s.category === "core").length; - result.pareto.regularMemories = allScores.filter((s) => s.category !== "core").length; - - // Calculate the threshold for top N% (default: top 20%) - paretoThreshold = db.calculateParetoThreshold(allScores, 1 - paretoPercentile); - result.pareto.threshold = paretoThreshold; - - onProgress?.( - "pareto", - `Scored ${allScores.length} memories (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`, - ); - onProgress?.( - "pareto", - `Pareto threshold (top ${paretoPercentile * 100}%): ${paretoThreshold.toFixed(4)}`, - ); - - logger.info( - `memory-neo4j: [sleep] Phase 2 complete — threshold=${paretoThreshold.toFixed(4)} for top ${paretoPercentile * 100}%`, - ); - } catch (err) { - logger.warn(`memory-neo4j: [sleep] Phase 2 error: ${String(err)}`); - } - } - - // -------------------------------------------------------------------------- - // Phase 3: Core Promotion (using pre-computed scores from Phase 2) - // - // Design note on staleness: The effective scores and Pareto threshold were - // computed in Phase 2 and may be slightly stale by the time Phases 3/4 run. - // This is acceptable because: (a) the sleep cycle is a background maintenance - // task that runs infrequently (not concurrent with itself), (b) the scoring - // formula is deterministic based on stored properties that change slowly, and - // (c) promotion/demotion are reversible in the next cycle. The alternative - // (re-querying scores per phase) adds latency without meaningful accuracy gain. - // -------------------------------------------------------------------------- - if (!abortSignal?.aborted && paretoThreshold > 0) { - onPhaseStart?.("promotion"); - logger.info("memory-neo4j: [sleep] Phase 3: Core Promotion"); - - try { - const candidates = allScores.filter( - (s) => - s.category !== "core" && - s.effectiveScore >= paretoThreshold && - s.ageDays >= promotionMinAgeDays, - ); - result.promotion.candidatesFound = candidates.length; - - if (candidates.length > 0) { - const ids = candidates.map((m) => m.id); - result.promotion.promoted = await db.promoteToCore(ids); - for (const c of candidates) { - onProgress?.( - "promotion", - `Promoted "${c.text.slice(0, 40)}..." (score=${c.effectiveScore.toFixed(3)}, ${c.retrievalCount} retrievals)`, - ); - } - } - - logger.info( - `memory-neo4j: [sleep] Phase 3 complete — ${result.promotion.promoted} memories promoted to core`, - ); - } catch (err) { - logger.warn(`memory-neo4j: [sleep] Phase 3 error: ${String(err)}`); - } - } - - // -------------------------------------------------------------------------- - // Phase 4: Core Demotion (using pre-computed scores from Phase 2) - // -------------------------------------------------------------------------- - if (!abortSignal?.aborted && paretoThreshold > 0) { - onPhaseStart?.("demotion"); - logger.info("memory-neo4j: [sleep] Phase 4: Core Demotion"); - - try { - const candidates = allScores.filter( - (s) => s.category === "core" && s.effectiveScore < paretoThreshold, - ); - result.demotion.candidatesFound = candidates.length; - - if (candidates.length > 0) { - const ids = candidates.map((m) => m.id); - result.demotion.demoted = await db.demoteFromCore(ids); - for (const c of candidates) { - onProgress?.( - "demotion", - `Demoted "${c.text.slice(0, 40)}..." (score=${c.effectiveScore.toFixed(3)}, ${c.retrievalCount} retrievals)`, - ); - } - } - - logger.info( - `memory-neo4j: [sleep] Phase 4 complete — ${result.demotion.demoted} memories demoted from core`, - ); - } catch (err) { - logger.warn(`memory-neo4j: [sleep] Phase 4 error: ${String(err)}`); - } - } - - // -------------------------------------------------------------------------- - // Phase 5: Entity Extraction (moved before decay so new memories get - // extracted before pruning can remove them) - // -------------------------------------------------------------------------- - // Extraction uses LLM_CONCURRENCY (defined above, matches OLLAMA_NUM_PARALLEL) - if (!abortSignal?.aborted && config.enabled) { - onPhaseStart?.("extraction"); - logger.info("memory-neo4j: [sleep] Phase 5: Entity Extraction"); - - try { - // Get initial count - const counts = await db.countByExtractionStatus(agentId); - result.extraction.total = counts.pending; - - if (result.extraction.total > 0) { - let hasMore = true; - while (hasMore && !abortSignal?.aborted) { - const pending = await db.listPendingExtractions(extractionBatchSize, agentId); - - if (pending.length === 0) { - hasMore = false; - break; - } - - // Process in parallel chunks of LLM_CONCURRENCY - for (let i = 0; i < pending.length && !abortSignal?.aborted; i += LLM_CONCURRENCY) { - const chunk = pending.slice(i, i + LLM_CONCURRENCY); - const outcomes = await Promise.allSettled( - chunk.map((memory) => - runBackgroundExtraction( - memory.id, - memory.text, - db, - embeddings, - config, - logger, - memory.extractionRetries, - abortSignal, - ), - ), - ); - - for (const outcome of outcomes) { - result.extraction.processed++; - if (outcome.status === "fulfilled" && outcome.value.success) { - result.extraction.succeeded++; - } else { - result.extraction.failed++; - } - } - - if (result.extraction.processed % 10 === 0 || i + LLM_CONCURRENCY >= pending.length) { - onProgress?.( - "extraction", - `${result.extraction.processed}/${result.extraction.total} processed`, - ); - } - } - - // Delay between batches (abort-aware) - if (hasMore && !abortSignal?.aborted) { - await new Promise((resolve) => { - const timer = setTimeout(resolve, extractionDelayMs); - // If abort fires during delay, resolve immediately - abortSignal?.addEventListener( - "abort", - () => { - clearTimeout(timer); - resolve(); - }, - { once: true }, - ); - }); - } - } - } - - logger.info( - `memory-neo4j: [sleep] Phase 5 complete — ${result.extraction.succeeded} extracted, ${result.extraction.failed} failed`, - ); - } catch (err) { - logger.warn(`memory-neo4j: [sleep] Phase 5 error: ${String(err)}`); - } - } else if (!config.enabled) { - logger.info("memory-neo4j: [sleep] Phase 5 skipped — extraction not enabled"); - } - - // -------------------------------------------------------------------------- - // Phase 6: Decay & Pruning (after extraction so freshly extracted memories - // aren't pruned before they build entity connections) - // -------------------------------------------------------------------------- - if (!abortSignal?.aborted) { - onPhaseStart?.("decay"); - logger.info("memory-neo4j: [sleep] Phase 6: Decay & Pruning"); - - try { - const decayed = await db.findDecayedMemories({ - retentionThreshold: decayRetentionThreshold, - baseHalfLifeDays: decayBaseHalfLifeDays, - importanceMultiplier: decayImportanceMultiplier, - decayCurves, - agentId, - }); - - if (decayed.length > 0) { - const ids = decayed.map((m) => m.id); - result.decay.memoriesPruned = await db.pruneMemories(ids); - onProgress?.("decay", `Pruned ${result.decay.memoriesPruned} decayed memories`); - } - - logger.info( - `memory-neo4j: [sleep] Phase 6 complete — ${result.decay.memoriesPruned} memories pruned`, - ); - } catch (err) { - logger.warn(`memory-neo4j: [sleep] Phase 6 error: ${String(err)}`); - } - } - - // -------------------------------------------------------------------------- - // Phase 7: Orphan Cleanup - // -------------------------------------------------------------------------- - if (!abortSignal?.aborted) { - onPhaseStart?.("cleanup"); - logger.info("memory-neo4j: [sleep] Phase 7: Orphan Cleanup"); - - try { - // Clean up orphan entities - if (!abortSignal?.aborted) { - const orphanEntities = await db.findOrphanEntities(); - if (orphanEntities.length > 0) { - result.cleanup.entitiesRemoved = await db.deleteOrphanEntities( - orphanEntities.map((e) => e.id), - ); - onProgress?.("cleanup", `Removed ${result.cleanup.entitiesRemoved} orphan entities`); - } - } - - // Clean up orphan tags - if (!abortSignal?.aborted) { - const orphanTags = await db.findOrphanTags(); - if (orphanTags.length > 0) { - result.cleanup.tagsRemoved = await db.deleteOrphanTags(orphanTags.map((t) => t.id)); - onProgress?.("cleanup", `Removed ${result.cleanup.tagsRemoved} orphan tags`); - } - } - - logger.info( - `memory-neo4j: [sleep] Phase 7 complete — ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`, - ); - } catch (err) { - logger.warn(`memory-neo4j: [sleep] Phase 7 error: ${String(err)}`); - } - } - - result.durationMs = Date.now() - startTime; - result.aborted = abortSignal?.aborted ?? false; - - logger.info( - `memory-neo4j: [sleep] Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s` + - (result.aborted ? " (aborted)" : ""), - ); - - return result; -} - -// ============================================================================ -// Message Extraction (re-exported from message-utils.ts) -// ============================================================================ - -export { - extractUserMessages, - extractAssistantMessages, - stripMessageWrappers, - stripAssistantWrappers, -} from "./message-utils.js"; - // ============================================================================ // LLM-Judged Importance Rating // ============================================================================ diff --git a/extensions/memory-neo4j/index.ts b/extensions/memory-neo4j/index.ts index 5d533a068b9..c89c2e084db 100644 --- a/extensions/memory-neo4j/index.ts +++ b/extensions/memory-neo4j/index.ts @@ -16,8 +16,9 @@ import type { OpenClawPluginApi } from "openclaw/plugin-sdk"; import { Type } from "@sinclair/typebox"; import { randomUUID } from "node:crypto"; import { stringEnum } from "openclaw/plugin-sdk"; -import type { MemoryCategory, MemorySource } from "./schema.js"; +import type { Logger, MemoryCategory, MemorySource } from "./schema.js"; import { passesAttentionGate, passesAssistantAttentionGate } from "./attention-gate.js"; +import { registerCli } from "./cli.js"; import { DEFAULT_EMBEDDING_DIMS, EMBEDDING_DIMENSIONS, @@ -27,14 +28,8 @@ import { vectorDimsForModel, } from "./config.js"; import { Embeddings } from "./embeddings.js"; -import { - extractUserMessages, - extractAssistantMessages, - stripMessageWrappers, - runSleepCycle, - isSemanticDuplicate, - rateImportance, -} from "./extractor.js"; +import { isSemanticDuplicate, rateImportance } from "./extractor.js"; +import { extractUserMessages, extractAssistantMessages } from "./message-utils.js"; import { Neo4jMemoryClient } from "./neo4j-client.js"; import { hybridSearch } from "./search.js"; @@ -127,7 +122,7 @@ const memoryNeo4jPlugin = { limit, agentId, extractionConfig.enabled, - { graphSearchDepth: cfg.graphSearchDepth }, + { graphSearchDepth: cfg.graphSearchDepth, logger: api.logger }, ); if (results.length === 0) { @@ -216,17 +211,21 @@ const memoryNeo4jPlugin = { } // 3. Store memory immediately (fast path) + // User-stored core memories get pinned: importance locked at 1.0, + // immune from decay, scoring recalculation, and pruning. + const isUserPinnedCore = category === "core"; const memoryId = randomUUID(); await db.storeMemory({ id: memoryId, text, embedding: vector, - importance: Math.min(1, Math.max(0, importance)), + importance: isUserPinnedCore ? 1.0 : Math.min(1, Math.max(0, importance)), category, source: "user" as MemorySource, extractionStatus: extractionConfig.enabled ? "pending" : "skipped", agentId, sessionKey, + userPinned: isUserPinnedCore, }); // 4. Extraction is deferred to sleep cycle (like human memory consolidation) @@ -352,492 +351,10 @@ const memoryNeo4jPlugin = { ); // ======================================================================== - // CLI Commands + // CLI Commands (delegated to cli.ts) // ======================================================================== - api.registerCli( - ({ program }) => { - // Find existing memory command or create fallback - let memoryCmd = program.commands.find((cmd) => cmd.name() === "memory"); - if (!memoryCmd) { - // Fallback if core memory CLI not registered yet - memoryCmd = program.command("memory").description("Memory commands"); - } - - // Add neo4j memory subcommand group - const memory = memoryCmd.command("neo4j").description("Neo4j graph memory commands"); - - memory - .command("list") - .description("List memory counts by agent and category") - .option("--json", "Output as JSON") - .action(async (opts: { json?: boolean }) => { - try { - await db.ensureInitialized(); - const stats = await db.getMemoryStats(); - - if (opts.json) { - console.log(JSON.stringify(stats, null, 2)); - return; - } - - if (stats.length === 0) { - console.log("No memories stored."); - return; - } - - // Group by agentId - const byAgent = new Map< - string, - Array<{ category: string; count: number; avgImportance: number }> - >(); - for (const row of stats) { - const list = byAgent.get(row.agentId) || []; - list.push({ - category: row.category, - count: row.count, - avgImportance: row.avgImportance, - }); - byAgent.set(row.agentId, list); - } - - // Print table for each agent - for (const [agentId, categories] of byAgent) { - const total = categories.reduce((sum, c) => sum + c.count, 0); - console.log(`\n┌─ ${agentId} (${total} total)`); - console.log("│"); - console.log("│ Category Count Avg Importance"); - console.log("│ ─────────────────────────────────────"); - for (const { category, count, avgImportance } of categories) { - const cat = category.padEnd(12); - const cnt = String(count).padStart(5); - const imp = (avgImportance * 100).toFixed(0).padStart(3) + "%"; - console.log(`│ ${cat} ${cnt} ${imp}`); - } - console.log("└"); - } - console.log(""); - } catch (err) { - console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); - process.exitCode = 1; - } - }); - - memory - .command("search") - .description("Search memories") - .argument("", "Search query") - .option("--limit ", "Max results", "5") - .option("--agent ", "Agent id (default: default)") - .action(async (query: string, opts: { limit: string; agent?: string }) => { - try { - const results = await hybridSearch( - db, - embeddings, - query, - parseInt(opts.limit, 10), - opts.agent ?? "default", - extractionConfig.enabled, - { graphSearchDepth: cfg.graphSearchDepth }, - ); - const output = results.map((r) => ({ - id: r.id, - text: r.text, - category: r.category, - importance: r.importance, - score: r.score, - })); - console.log(JSON.stringify(output, null, 2)); - } catch (err) { - console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); - process.exitCode = 1; - } - }); - - memory - .command("stats") - .description("Show memory statistics and configuration") - .action(async () => { - try { - await db.ensureInitialized(); - const stats = await db.getMemoryStats(); - const total = stats.reduce((sum, s) => sum + s.count, 0); - - console.log("\nMemory (Neo4j) Statistics"); - console.log("─────────────────────────"); - console.log(`Total memories: ${total}`); - console.log(`Neo4j URI: ${cfg.neo4j.uri}`); - console.log(`Embedding: ${cfg.embedding.provider}/${cfg.embedding.model}`); - console.log( - `Extraction: ${extractionConfig.enabled ? extractionConfig.model : "disabled"}`, - ); - console.log(`Auto-capture: ${cfg.autoCapture ? "enabled" : "disabled"}`); - console.log(`Auto-recall: ${cfg.autoRecall ? "enabled" : "disabled"}`); - console.log(`Core memory: ${cfg.coreMemory.enabled ? "enabled" : "disabled"}`); - - if (stats.length > 0) { - // Group by category across all agents - const byCategory = new Map(); - for (const row of stats) { - byCategory.set(row.category, (byCategory.get(row.category) ?? 0) + row.count); - } - console.log("\nBy Category:"); - for (const [category, count] of byCategory) { - console.log(` ${category.padEnd(12)} ${count}`); - } - - // Show agent count - const agents = new Set(stats.map((s) => s.agentId)); - console.log(`\nAgents: ${agents.size} (${[...agents].join(", ")})`); - } - console.log(""); - } catch (err) { - console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); - process.exitCode = 1; - } - }); - - memory - .command("sleep") - .description( - "Run sleep cycle — consolidate memories with Pareto-based promotion/demotion", - ) - .option("--agent ", "Agent id (default: all agents)") - .option("--dedup-threshold ", "Vector similarity threshold for dedup (default: 0.95)") - .option("--pareto ", "Top N% for core memory (default: 0.2 = top 20%)") - .option("--promotion-min-age ", "Min age in days before promotion (default: 7)") - .option("--decay-threshold ", "Decay score threshold for pruning (default: 0.1)") - .option("--decay-half-life ", "Base half-life in days (default: 30)") - .option("--batch-size ", "Extraction batch size (default: 50)") - .option("--delay ", "Delay between extraction batches in ms (default: 1000)") - .option("--max-semantic-pairs ", "Max LLM-checked semantic dedup pairs (default: 500)") - .option( - "--concurrency ", - "Parallel LLM calls — match OLLAMA_NUM_PARALLEL (default: 8)", - ) - .option( - "--skip-semantic", - "Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)", - ) - .action( - async (opts: { - agent?: string; - dedupThreshold?: string; - pareto?: string; - promotionMinAge?: string; - decayThreshold?: string; - decayHalfLife?: string; - batchSize?: string; - delay?: string; - maxSemanticPairs?: string; - concurrency?: string; - skipSemantic?: boolean; - }) => { - console.log("\n🌙 Memory Sleep Cycle"); - console.log("═════════════════════════════════════════════════════════════"); - console.log("Seven-phase memory consolidation (Pareto-based):\n"); - console.log(" Phase 1: Deduplication — Merge near-duplicate memories"); - console.log( - " Phase 1b: Semantic Dedup — LLM-based paraphrase detection (0.75–0.95 band)", - ); - console.log(" Phase 1c: Conflict Detection — Resolve contradictory memories"); - console.log( - " Phase 2: Pareto Scoring — Calculate effective scores for all memories", - ); - console.log(" Phase 3: Core Promotion — Regular memories above threshold → core"); - console.log(" Phase 4: Core Demotion — Core memories below threshold → regular"); - console.log(" Phase 5: Extraction — Extract entities and categorize"); - console.log(" Phase 6: Decay & Pruning — Remove stale low-importance memories"); - console.log(" Phase 7: Orphan Cleanup — Remove disconnected nodes\n"); - - try { - // Validate sleep cycle CLI parameters before running - const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : undefined; - const delay = opts.delay ? parseInt(opts.delay, 10) : undefined; - const decayHalfLife = opts.decayHalfLife - ? parseInt(opts.decayHalfLife, 10) - : undefined; - const decayThreshold = opts.decayThreshold - ? parseFloat(opts.decayThreshold) - : undefined; - const pareto = opts.pareto ? parseFloat(opts.pareto) : undefined; - const promotionMinAge = opts.promotionMinAge - ? parseInt(opts.promotionMinAge, 10) - : undefined; - - if (batchSize != null && (Number.isNaN(batchSize) || batchSize <= 0)) { - console.error("Error: --batch-size must be greater than 0"); - process.exitCode = 1; - return; - } - if (delay != null && (Number.isNaN(delay) || delay < 0)) { - console.error("Error: --delay must be >= 0"); - process.exitCode = 1; - return; - } - if (decayHalfLife != null && (Number.isNaN(decayHalfLife) || decayHalfLife <= 0)) { - console.error("Error: --decay-half-life must be greater than 0"); - process.exitCode = 1; - return; - } - if ( - decayThreshold != null && - (Number.isNaN(decayThreshold) || decayThreshold < 0 || decayThreshold > 1) - ) { - console.error("Error: --decay-threshold must be between 0 and 1"); - process.exitCode = 1; - return; - } - if (pareto != null && (Number.isNaN(pareto) || pareto < 0 || pareto > 1)) { - console.error("Error: --pareto must be between 0 and 1"); - process.exitCode = 1; - return; - } - if ( - promotionMinAge != null && - (Number.isNaN(promotionMinAge) || promotionMinAge < 0) - ) { - console.error("Error: --promotion-min-age must be >= 0"); - process.exitCode = 1; - return; - } - - const maxSemanticPairs = opts.maxSemanticPairs - ? parseInt(opts.maxSemanticPairs, 10) - : undefined; - if ( - maxSemanticPairs != null && - (Number.isNaN(maxSemanticPairs) || maxSemanticPairs <= 0) - ) { - console.error("Error: --max-semantic-pairs must be greater than 0"); - process.exitCode = 1; - return; - } - - const concurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : undefined; - if (concurrency != null && (Number.isNaN(concurrency) || concurrency <= 0)) { - console.error("Error: --concurrency must be greater than 0"); - process.exitCode = 1; - return; - } - - await db.ensureInitialized(); - - const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, { - agentId: opts.agent, - dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined, - skipSemanticDedup: opts.skipSemantic === true, - maxSemanticDedupPairs: maxSemanticPairs, - llmConcurrency: concurrency, - paretoPercentile: pareto, - promotionMinAgeDays: promotionMinAge, - decayRetentionThreshold: decayThreshold, - decayBaseHalfLifeDays: decayHalfLife, - decayCurves: - Object.keys(cfg.decayCurves).length > 0 ? cfg.decayCurves : undefined, - extractionBatchSize: batchSize, - extractionDelayMs: delay, - onPhaseStart: (phase) => { - const phaseNames: Record = { - dedup: "Phase 1: Deduplication", - semanticDedup: "Phase 1b: Semantic Deduplication", - conflict: "Phase 1c: Conflict Detection", - pareto: "Phase 2: Pareto Scoring", - promotion: "Phase 3: Core Promotion", - demotion: "Phase 4: Core Demotion", - extraction: "Phase 5: Extraction", - decay: "Phase 6: Decay & Pruning", - cleanup: "Phase 7: Orphan Cleanup", - }; - console.log(`\n▶ ${phaseNames[phase]}`); - console.log("─────────────────────────────────────────────────────────────"); - }, - onProgress: (_phase, message) => { - console.log(` ${message}`); - }, - }); - - console.log("\n═════════════════════════════════════════════════════════════"); - console.log(`✅ Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s`); - console.log("─────────────────────────────────────────────────────────────"); - console.log( - ` Deduplication: ${result.dedup.clustersFound} clusters → ${result.dedup.memoriesMerged} merged`, - ); - console.log( - ` Conflicts: ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`, - ); - console.log( - ` Semantic Dedup: ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`, - ); - console.log( - ` Pareto: ${result.pareto.totalMemories} total (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`, - ); - console.log( - ` Threshold: ${result.pareto.threshold.toFixed(4)} (top 20%)`, - ); - console.log( - ` Promotion: ${result.promotion.promoted}/${result.promotion.candidatesFound} promoted to core`, - ); - console.log( - ` Demotion: ${result.demotion.demoted}/${result.demotion.candidatesFound} demoted from core`, - ); - console.log(` Decay/Pruning: ${result.decay.memoriesPruned} memories pruned`); - console.log( - ` Extraction: ${result.extraction.succeeded}/${result.extraction.total} extracted` + - (result.extraction.failed > 0 ? ` (${result.extraction.failed} failed)` : ""), - ); - console.log( - ` Cleanup: ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`, - ); - if (result.aborted) { - console.log("\n⚠️ Sleep cycle was aborted before completion."); - } - console.log(""); - } catch (err) { - console.error( - `\n❌ Sleep cycle failed: ${err instanceof Error ? err.message : String(err)}`, - ); - process.exitCode = 1; - } - }, - ); - - memory - .command("promote") - .description("Manually promote a memory to core status") - .argument("", "Memory ID to promote") - .action(async (id: string) => { - try { - await db.ensureInitialized(); - const promoted = await db.promoteToCore([id]); - if (promoted > 0) { - console.log(`✅ Memory ${id} promoted to core.`); - } else { - console.log(`❌ Memory ${id} not found.`); - process.exitCode = 1; - } - } catch (err) { - console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); - process.exitCode = 1; - } - }); - - memory - .command("index") - .description( - "Re-embed all memories and entities — use after changing embedding model/provider", - ) - .option("--batch-size ", "Embedding batch size (default: 50)") - .action(async (opts: { batchSize?: string }) => { - const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : 50; - if (Number.isNaN(batchSize) || batchSize <= 0) { - console.error("Error: --batch-size must be greater than 0"); - process.exitCode = 1; - return; - } - - console.log("\nMemory Neo4j — Reindex Embeddings"); - console.log("═════════════════════════════════════════════════════════════"); - console.log(`Model: ${cfg.embedding.provider}/${cfg.embedding.model}`); - console.log(`Dimensions: ${vectorDim}`); - console.log(`Batch size: ${batchSize}\n`); - - try { - const startedAt = Date.now(); - const result = await db.reindex((texts) => embeddings.embedBatch(texts), { - batchSize, - onProgress: (phase, done, total) => { - if (phase === "drop-indexes" && done === 0) { - console.log("▶ Dropping old vector index…"); - } else if (phase === "memories") { - console.log(` Memories: ${done}/${total}`); - } else if (phase === "create-indexes" && done === 0) { - console.log("▶ Recreating vector index…"); - } - }, - }); - - const elapsed = ((Date.now() - startedAt) / 1000).toFixed(1); - console.log("\n═════════════════════════════════════════════════════════════"); - console.log(`✅ Reindex complete in ${elapsed}s — ${result.memories} memories`); - console.log(""); - } catch (err) { - console.error( - `\n❌ Reindex failed: ${err instanceof Error ? err.message : String(err)}`, - ); - process.exitCode = 1; - } - }); - - memory - .command("cleanup") - .description( - "Retroactively apply the attention gate — find and remove low-substance memories", - ) - .option("--execute", "Actually delete (default: dry-run preview)") - .option("--all", "Include explicitly-stored memories (default: auto-capture only)") - .option("--agent ", "Only clean up memories for a specific agent") - .action(async (opts: { execute?: boolean; all?: boolean; agent?: string }) => { - try { - await db.ensureInitialized(); - - // Fetch memories — by default only auto-capture (explicit stores are trusted) - const conditions: string[] = []; - if (!opts.all) { - conditions.push("m.source = 'auto-capture'"); - } - if (opts.agent) { - conditions.push("m.agentId = $agentId"); - } - const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : ""; - const allMemories = await db.runQuery<{ id: string; text: string; source: string }>( - `MATCH (m:Memory) ${where} - RETURN m.id AS id, m.text AS text, COALESCE(m.source, 'unknown') AS source - ORDER BY m.createdAt ASC`, - opts.agent ? { agentId: opts.agent } : {}, - ); - - // Strip channel metadata wrappers (same as the real pipeline) then gate - const noise: Array<{ id: string; text: string; source: string }> = []; - for (const mem of allMemories) { - const stripped = stripMessageWrappers(mem.text); - if (!passesAttentionGate(stripped)) { - noise.push(mem); - } - } - - if (noise.length === 0) { - console.log("\nNo low-substance memories found. Everything passes the gate."); - return; - } - - console.log( - `\nFound ${noise.length}/${allMemories.length} memories that fail the attention gate:\n`, - ); - - for (const mem of noise) { - const preview = mem.text.length > 80 ? `${mem.text.slice(0, 77)}...` : mem.text; - console.log(` [${mem.source}] "${preview}"`); - } - - if (!opts.execute) { - console.log( - `\nDry run — ${noise.length} memories would be removed. Re-run with --execute to delete.\n`, - ); - return; - } - - // Delete in batch - const deleted = await db.pruneMemories(noise.map((m) => m.id)); - console.log(`\nDeleted ${deleted} low-substance memories.\n`); - } catch (err) { - console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); - process.exitCode = 1; - } - }); - }, - { commands: [] }, // Adds subcommands to existing "memory" command, no conflict - ); + registerCli(api, { db, embeddings, cfg, extractionConfig, vectorDim }); // ======================================================================== // Lifecycle Hooks @@ -952,8 +469,9 @@ const memoryNeo4jPlugin = { } try { + const t0 = performance.now(); const maxEntries = cfg.coreMemory.maxEntries; - const coreMemories = await db.listByCategory("core", maxEntries, 0, agentId); + const coreMemories = await db.listCoreForInjection(maxEntries, agentId); if (coreMemories.length === 0) { return; @@ -964,8 +482,9 @@ const memoryNeo4jPlugin = { touchSession(sessionKey); const content = coreMemories.map((m) => `- ${m.text}`).join("\n"); + const totalMs = performance.now() - t0; api.logger.info?.( - `memory-neo4j: mid-session core refresh at ${usagePercent.toFixed(1)}% context (${coreMemories.length} memories)`, + `memory-neo4j: [bench] core-refresh ${totalMs.toFixed(0)}ms at ${usagePercent.toFixed(1)}% context (${coreMemories.length} memories)`, ); return { @@ -1009,6 +528,7 @@ const memoryNeo4jPlugin = { : event.prompt; try { + const t0 = performance.now(); let results = await hybridSearch( db, embeddings, @@ -1016,8 +536,9 @@ const memoryNeo4jPlugin = { 3, agentId, extractionConfig.enabled, - { graphSearchDepth: cfg.graphSearchDepth }, + { graphSearchDepth: cfg.graphSearchDepth, logger: api.logger }, ); + const tSearch = performance.now(); // Feature 1: Filter out low-relevance results below min RRF score results = results.filter((r) => r.score >= cfg.autoRecallMinScore); @@ -1029,13 +550,17 @@ const memoryNeo4jPlugin = { results = results.filter((r) => !coreIds.has(r.id)); } + const totalMs = performance.now() - t0; + api.logger.info?.( + `memory-neo4j: [bench] auto-recall ${totalMs.toFixed(0)}ms total (search=${(tSearch - t0).toFixed(0)}ms), ${results.length} results`, + ); + if (results.length === 0) { return; } const memoryContext = results.map((r) => `- [${r.category}] ${r.text}`).join("\n"); - api.logger.info?.(`memory-neo4j: injecting ${results.length} memories into context`); api.logger.debug?.( `memory-neo4j: auto-recall memories: ${JSON.stringify(results.map((r) => ({ id: r.id, text: r.text.slice(0, 80), category: r.category, score: r.score })))}`, ); @@ -1075,23 +600,25 @@ const memoryNeo4jPlugin = { } try { + const t0 = performance.now(); const agentId = ctx.agentId || "default"; const maxEntries = cfg.coreMemory.maxEntries; api.logger.debug?.( `memory-neo4j: loading core memories for agent=${agentId} session=${sessionKey ?? "unknown"}`, ); - // Core memories are always included (no importance filter) - if marked as core, it's important - // Results are ordered by importance desc, so most important come first up to maxEntries - const coreMemories = await db.listByCategory("core", maxEntries, 0, agentId); + // All user-pinned core memories are always included (no limit). + // Non-pinned core memories fill remaining slots up to maxEntries, ordered by importance. + const coreMemories = await db.listCoreForInjection(maxEntries, agentId); + const tQuery = performance.now(); if (coreMemories.length === 0) { if (sessionKey) { bootstrappedSessions.add(sessionKey); touchSession(sessionKey); } - api.logger.debug?.( - `memory-neo4j: no core memories found for agent=${agentId}, marking session as bootstrapped`, + api.logger.info?.( + `memory-neo4j: [bench] core-inject ${(tQuery - t0).toFixed(0)}ms (0 memories, skipped)`, ); return; } @@ -1128,9 +655,10 @@ const memoryNeo4jPlugin = { coreMemoryIdsBySession.set(sessionKey, new Set(coreMemories.map((m) => m.id))); touchSession(sessionKey); } - // Log at info level when actually injecting, debug for skips + + const totalMs = performance.now() - t0; api.logger.info?.( - `memory-neo4j: ${action} MEMORY.md with ${coreMemories.length} core memories for agent=${agentId} session=${sessionKey ?? "unknown"}`, + `memory-neo4j: [bench] core-inject ${totalMs.toFixed(0)}ms (query=${(tQuery - t0).toFixed(0)}ms), ${action} MEMORY.md with ${coreMemories.length} memories`, ); return { files }; @@ -1152,7 +680,7 @@ const memoryNeo4jPlugin = { // // Phase 3 — Sleep consolidation (deferred to `openclaw memory neo4j sleep`): // The sleep cycle handles entity extraction, categorization, Pareto - // scoring, promotion/demotion, and decay — mirroring hippocampal replay. + // scoring, promotion, and decay — mirroring hippocampal replay. api.logger.debug?.( `memory-neo4j: autoCapture=${cfg.autoCapture}, extraction.enabled=${extractionConfig.enabled}`, ); @@ -1228,12 +756,6 @@ const memoryNeo4jPlugin = { // Auto-capture pipeline (fire-and-forget from agent_end hook) // ============================================================================ -type AutoCaptureLogger = { - info: (msg: string) => void; - warn: (msg: string) => void; - debug?: (msg: string) => void; -}; - /** * Shared capture logic for both user and assistant messages. * Extracts the common embed → dedup → rate → store pipeline. @@ -1248,7 +770,8 @@ async function captureMessage( db: import("./neo4j-client.js").Neo4jMemoryClient, embeddings: import("./embeddings.js").Embeddings, extractionConfig: import("./config.js").ExtractionConfig, - logger: AutoCaptureLogger, + logger: Logger, + precomputedVector?: number[], ): Promise<{ stored: boolean; semanticDeduped: boolean }> { // For assistant messages, rate importance first (before embedding) to skip early. // When extraction is disabled, rateImportance returns 0.5 (the fallback), so we @@ -1263,11 +786,14 @@ async function captureMessage( } } - const vector = await embeddings.embed(text); + const vector = precomputedVector ?? (await embeddings.embed(text)); - // Quick dedup (same content already stored — cosine >= 0.95) - const existing = await db.findSimilar(vector, 0.95, 1, agentId); - if (existing.length > 0) { + // Single vector search at lower threshold, split by score band + const candidates = await db.findSimilar(vector, 0.75, 3, agentId); + + // Exact dedup: any candidate with score >= 0.95 means it's a duplicate + const exactDup = candidates.find((c) => c.score >= 0.95); + if (exactDup) { return { stored: false, semanticDeduped: false }; } @@ -1281,10 +807,9 @@ async function captureMessage( } } - // Semantic dedup: check moderate-similarity memories (0.75-0.95) + // Semantic dedup: remaining candidates in 0.75-0.95 band // Pass the vector similarity score as a pre-screen to skip LLM calls // for pairs below SEMANTIC_DEDUP_VECTOR_THRESHOLD. - const candidates = await db.findSimilar(vector, 0.75, 3, agentId); if (candidates.length > 0) { for (const candidate of candidates) { if (await isSemanticDuplicate(text, candidate.text, extractionConfig, candidate.score)) { @@ -1321,9 +846,10 @@ async function runAutoCapture( db: import("./neo4j-client.js").Neo4jMemoryClient, embeddings: import("./embeddings.js").Embeddings, extractionConfig: import("./config.js").ExtractionConfig, - logger: AutoCaptureLogger, + logger: Logger, ): Promise { try { + const t0 = performance.now(); let stored = 0; let semanticDeduped = 0; @@ -1331,19 +857,51 @@ async function runAutoCapture( const userMessages = extractUserMessages(messages); const retained = userMessages.filter((text) => passesAttentionGate(text)); + // Process assistant messages + const assistantMessages = extractAssistantMessages(messages); + const retainedAssistant = assistantMessages.filter((text) => + passesAssistantAttentionGate(text), + ); + const tGate = performance.now(); + + // Collect all texts to embed in a single batch + const allTexts: string[] = []; + const allMeta: Array<{ + text: string; + source: "auto-capture" | "auto-capture-assistant"; + threshold: number; + discount: number; + }> = []; + for (const text of retained) { + allTexts.push(text); + allMeta.push({ text, source: "auto-capture", threshold: 0.5, discount: 1.0 }); + } + for (const text of retainedAssistant) { + allTexts.push(text); + allMeta.push({ text, source: "auto-capture-assistant", threshold: 0.8, discount: 0.75 }); + } + + // Batch embed all at once + const vectors = allTexts.length > 0 ? await embeddings.embedBatch(allTexts) : []; + const tEmbed = performance.now(); + + // Process each with pre-computed vector + for (let i = 0; i < allMeta.length; i++) { try { + const meta = allMeta[i]; const result = await captureMessage( - text, - "auto-capture", - 0.5, - 1.0, + meta.text, + meta.source, + meta.threshold, + meta.discount, agentId, sessionKey, db, embeddings, extractionConfig, logger, + vectors[i], ); if (result.stored) stored++; if (result.semanticDeduped) semanticDeduped++; @@ -1351,50 +909,23 @@ async function runAutoCapture( logger.debug?.(`memory-neo4j: auto-capture item failed: ${String(err)}`); } } + const tProcess = performance.now(); - // Process assistant messages - const assistantMessages = extractAssistantMessages(messages); - const retainedAssistant = assistantMessages.filter((text) => - passesAssistantAttentionGate(text), + const totalMs = tProcess - t0; + const gateMs = tGate - t0; + const embedMs = tEmbed - tGate; + const processMs = tProcess - tEmbed; + logger.info( + `memory-neo4j: [bench] auto-capture ${totalMs.toFixed(0)}ms total (gate=${gateMs.toFixed(0)}ms, embed=${embedMs.toFixed(0)}ms, process=${processMs.toFixed(0)}ms), ` + + `${retained.length}+${retainedAssistant.length} gated, ${stored} stored, ${semanticDeduped} deduped`, ); - - for (const text of retainedAssistant) { - try { - const result = await captureMessage( - text, - "auto-capture-assistant", - 0.8, - 0.75, - agentId, - sessionKey, - db, - embeddings, - extractionConfig, - logger, - ); - if (result.stored) stored++; - if (result.semanticDeduped) semanticDeduped++; - } catch (err) { - logger.debug?.(`memory-neo4j: assistant auto-capture item failed: ${String(err)}`); - } - } - - if (stored > 0 || semanticDeduped > 0) { - logger.info( - `memory-neo4j: auto-captured ${stored} memories (attention-gated)${semanticDeduped > 0 ? `, ${semanticDeduped} semantic dupes skipped` : ""}`, - ); - } else if (userMessages.length > 0 || assistantMessages.length > 0) { - logger.info( - `memory-neo4j: auto-capture ran (0 stored, ${userMessages.length} user msgs, ${retained.length} passed gate, ${assistantMessages.length} assistant msgs, ${retainedAssistant.length} passed gate)`, - ); - } } catch (err) { logger.warn(`memory-neo4j: auto-capture failed: ${String(err)}`); } } -// Re-export attention gate for backwards compatibility (tests import from here) -export { passesAttentionGate, passesAssistantAttentionGate } from "./attention-gate.js"; +// Export auto-capture internals for testing +export { captureMessage as _captureMessage, runAutoCapture as _runAutoCapture }; // ============================================================================ // Export diff --git a/extensions/memory-neo4j/llm-client.ts b/extensions/memory-neo4j/llm-client.ts new file mode 100644 index 00000000000..3698c96ea6e --- /dev/null +++ b/extensions/memory-neo4j/llm-client.ts @@ -0,0 +1,188 @@ +/** + * OpenRouter/OpenAI-compatible LLM API client for memory-neo4j. + * + * Handles non-streaming and streaming chat completion requests with + * retry logic, timeout handling, and abort signal support. + */ + +import type { ExtractionConfig } from "./config.js"; + +// Timeout for LLM and embedding fetch calls to prevent hanging indefinitely +export const FETCH_TIMEOUT_MS = 30_000; + +/** + * Build a combined abort signal from the caller's signal and a per-request timeout. + */ +function buildSignal(abortSignal?: AbortSignal): AbortSignal { + return abortSignal + ? AbortSignal.any([abortSignal, AbortSignal.timeout(FETCH_TIMEOUT_MS)]) + : AbortSignal.timeout(FETCH_TIMEOUT_MS); +} + +/** + * Shared request/retry logic for OpenRouter API calls. + * Handles signal composition, request building, error handling, and exponential backoff. + * The `parseFn` callback processes the Response differently for streaming vs non-streaming. + */ +async function openRouterRequest( + config: ExtractionConfig, + messages: Array<{ role: string; content: string }>, + abortSignal: AbortSignal | undefined, + stream: boolean, + parseFn: (response: Response, abortSignal?: AbortSignal) => Promise, +): Promise { + for (let attempt = 0; attempt <= config.maxRetries; attempt++) { + try { + const signal = buildSignal(abortSignal); + + const response = await fetch(`${config.baseUrl}/chat/completions`, { + method: "POST", + headers: { + Authorization: `Bearer ${config.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: config.model, + messages, + temperature: config.temperature, + response_format: { type: "json_object" }, + ...(stream ? { stream: true } : {}), + }), + signal, + }); + + if (!response.ok) { + const body = await response.text().catch(() => ""); + throw new Error(`OpenRouter API error ${response.status}: ${body}`); + } + + return await parseFn(response, abortSignal); + } catch (err) { + if (attempt >= config.maxRetries) { + throw err; + } + // Exponential backoff + await new Promise((resolve) => setTimeout(resolve, 500 * 2 ** attempt)); + } + } + return null; +} + +/** + * Parse a non-streaming JSON response. + */ +function parseNonStreaming(response: Response): Promise { + return response.json().then((data: unknown) => { + const typed = data as { + choices?: Array<{ message?: { content?: string } }>; + }; + return typed.choices?.[0]?.message?.content ?? null; + }); +} + +/** + * Parse a streaming SSE response, accumulating chunks into a single string. + */ +async function parseStreaming( + response: Response, + abortSignal?: AbortSignal, +): Promise { + if (!response.body) { + throw new Error("No response body for streaming request"); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let accumulated = ""; + let buffer = ""; + + for (;;) { + // Check abort between chunks for responsive cancellation + if (abortSignal?.aborted) { + reader.cancel().catch(() => {}); + return null; + } + + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + + // Parse SSE lines + const lines = buffer.split("\n"); + buffer = lines.pop() ?? ""; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed.startsWith("data: ")) continue; + const data = trimmed.slice(6); + if (data === "[DONE]") continue; + + try { + const parsed = JSON.parse(data) as { + choices?: Array<{ delta?: { content?: string } }>; + }; + const chunk = parsed.choices?.[0]?.delta?.content; + if (chunk) { + accumulated += chunk; + } + } catch { + // Skip malformed SSE chunks + } + } + } + + return accumulated || null; +} + +export async function callOpenRouter( + config: ExtractionConfig, + prompt: string | Array<{ role: string; content: string }>, + abortSignal?: AbortSignal, +): Promise { + const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt; + return openRouterRequest(config, messages, abortSignal, false, parseNonStreaming); +} + +/** + * Streaming variant of callOpenRouter. Uses the streaming API to receive chunks + * incrementally, allowing earlier cancellation via abort signal and better + * latency characteristics for long responses. + * + * Accumulates all chunks into a single response string since extraction + * uses JSON mode (which requires the complete object to parse). + */ +export async function callOpenRouterStream( + config: ExtractionConfig, + prompt: string | Array<{ role: string; content: string }>, + abortSignal?: AbortSignal, +): Promise { + const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt; + return openRouterRequest(config, messages, abortSignal, true, parseStreaming); +} + +/** + * Check if an error is transient (network/timeout) vs permanent (JSON parse, etc.) + */ +export function isTransientError(err: unknown): boolean { + if (!(err instanceof Error)) { + return false; + } + const msg = err.message.toLowerCase(); + return ( + err.name === "AbortError" || + err.name === "TimeoutError" || + msg.includes("timeout") || + msg.includes("econnrefused") || + msg.includes("econnreset") || + msg.includes("etimedout") || + msg.includes("enotfound") || + msg.includes("network") || + msg.includes("fetch failed") || + msg.includes("socket hang up") || + msg.includes("api error 429") || + msg.includes("api error 502") || + msg.includes("api error 503") || + msg.includes("api error 504") + ); +} diff --git a/extensions/memory-neo4j/message-utils.ts b/extensions/memory-neo4j/message-utils.ts index 8eac03368a4..7d60693c97f 100644 --- a/extensions/memory-neo4j/message-utils.ts +++ b/extensions/memory-neo4j/message-utils.ts @@ -8,14 +8,18 @@ */ // ============================================================================ -// User Message Extraction +// Core Extraction // ============================================================================ /** - * Extract user message texts from the event.messages array. - * Handles both string content and content block arrays. + * Extract text blocks from messages with a given role, apply a strip function, + * and filter out short results. Handles both string content and content block arrays. */ -export function extractUserMessages(messages: unknown[]): string[] { +function extractMessagesByRole( + messages: unknown[], + role: string, + stripFn: (text: string) => string, +): string[] { const texts: string[] = []; for (const msg of messages) { @@ -24,8 +28,7 @@ export function extractUserMessages(messages: unknown[]): string[] { } const msgObj = msg as Record; - // Only process user messages for auto-capture - if (msgObj.role !== "user") { + if (msgObj.role !== role) { continue; } @@ -51,8 +54,18 @@ export function extractUserMessages(messages: unknown[]): string[] { } } - // Strip wrappers then filter by length - return texts.map(stripMessageWrappers).filter((t) => t.length >= 10); + return texts.map(stripFn).filter((t) => t.length >= 10); +} + +// ============================================================================ +// User Message Extraction +// ============================================================================ + +/** + * Extract user message texts from the event.messages array. + */ +export function extractUserMessages(messages: unknown[]): string[] { + return extractMessagesByRole(messages, "user", stripMessageWrappers); } /** @@ -84,9 +97,7 @@ export function stripMessageWrappers(text: string): string { s = s.replace(/---\s*Queued #\d+\s*/g, ""); // Telegram wrapper — may now be at start after previous strips s = s.replace(/^\s*\[Telegram\s[^\]]+\]\s*/i, ""); - // "[message_id: NNN]" suffix (Telegram) - s = s.replace(/\n?\[message_id:\s*\d+\]\s*$/i, ""); - // "[message_id: UUID]" suffix (non-numeric Telegram/channel IDs) + // "[message_id: ...]" suffix (Telegram and other channel IDs) s = s.replace(/\n?\[message_id:\s*[^\]]+\]\s*$/i, ""); // Slack wrapper — "[Slack #channel @user] MESSAGE [slack message id: ...]" s = s.replace(/^\s*\[Slack\s[^\]]+\]\s*/i, ""); @@ -118,42 +129,7 @@ export function stripAssistantWrappers(text: string): string { /** * Extract assistant message texts from the event.messages array. - * Handles both string content and content block arrays. */ export function extractAssistantMessages(messages: unknown[]): string[] { - const texts: string[] = []; - - for (const msg of messages) { - if (!msg || typeof msg !== "object") { - continue; - } - const msgObj = msg as Record; - - if (msgObj.role !== "assistant") { - continue; - } - - const content = msgObj.content; - if (typeof content === "string") { - texts.push(content); - continue; - } - - if (Array.isArray(content)) { - for (const block of content) { - if ( - block && - typeof block === "object" && - "type" in block && - (block as Record).type === "text" && - "text" in block && - typeof (block as Record).text === "string" - ) { - texts.push((block as Record).text as string); - } - } - } - } - - return texts.map(stripAssistantWrappers).filter((t) => t.length >= 10); + return extractMessagesByRole(messages, "assistant", stripAssistantWrappers); } diff --git a/extensions/memory-neo4j/neo4j-client.test.ts b/extensions/memory-neo4j/neo4j-client.test.ts index 08eb426e378..9678b8227c5 100644 --- a/extensions/memory-neo4j/neo4j-client.test.ts +++ b/extensions/memory-neo4j/neo4j-client.test.ts @@ -7,7 +7,7 @@ import type { Driver } from "neo4j-driver"; import { describe, it, expect, vi, beforeEach } from "vitest"; -import type { StoreMemoryInput, MergeEntityInput } from "./schema.js"; +import type { StoreMemoryInput } from "./schema.js"; import { Neo4jMemoryClient } from "./neo4j-client.js"; // ============================================================================ @@ -867,10 +867,10 @@ describe("Neo4jMemoryClient", () => { }); // ------------------------------------------------------------------------ - // promoteToCore() / demoteFromCore() + // promoteToCore() // ------------------------------------------------------------------------ - describe("Core promotion/demotion", () => { + describe("Core promotion", () => { it("should promote memories to core category", async () => { mockSession.run.mockResolvedValue({ records: [{ get: vi.fn().mockReturnValue(2) }], @@ -885,26 +885,10 @@ describe("Neo4jMemoryClient", () => { ); }); - it("should demote memories from core category", async () => { - mockSession.run.mockResolvedValue({ - records: [{ get: vi.fn().mockReturnValue(1) }], - }); - - const result = await client.demoteFromCore(["m1"]); - - expect(result).toBe(1); - expect(mockSession.run).toHaveBeenCalledWith( - expect.stringContaining("category = 'fact'"), - expect.objectContaining({ ids: ["m1"] }), - ); - }); - it("should handle empty ID arrays", async () => { const promoteResult = await client.promoteToCore([]); - const demoteResult = await client.demoteFromCore([]); expect(promoteResult).toBe(0); - expect(demoteResult).toBe(0); }); }); @@ -1157,115 +1141,6 @@ describe("Neo4jMemoryClient", () => { }); }); - // ------------------------------------------------------------------------ - // Entity and Tag operations - // ------------------------------------------------------------------------ - - describe("Entity operations", () => { - it("should merge entity idempotently", async () => { - mockSession.run.mockResolvedValue({ - records: [ - { - get: vi.fn((key) => { - const data: Record = { id: "e1", name: "tarun" }; - return data[key]; - }), - }, - ], - }); - - const input: MergeEntityInput = { - id: "e1", - name: "Tarun", - type: "person", - aliases: ["boss"], - description: "CEO", - }; - - const result = await client.mergeEntity(input); - - expect(result).toEqual({ id: "e1", name: "tarun" }); - expect(mockSession.run).toHaveBeenCalledWith( - expect.stringContaining("MERGE (e:Entity {name: $name})"), - expect.objectContaining({ - name: "tarun", // normalized - }), - ); - }); - - it("should create MENTIONS relationship", async () => { - mockSession.run.mockResolvedValue({ records: [] }); - - await client.createMentions("mem-1", "Tarun", "context", 0.95); - - expect(mockSession.run).toHaveBeenCalledWith( - expect.stringContaining("MERGE (m)-[r:MENTIONS]->(e)"), - expect.objectContaining({ - memoryId: "mem-1", - entityName: "tarun", // normalized - role: "context", - confidence: 0.95, - }), - ); - }); - - it("should create entity relationships with validated type", async () => { - mockSession.run.mockResolvedValue({ records: [] }); - - await client.createEntityRelationship("Alice", "Acme", "WORKS_AT", 0.9); - - expect(mockSession.run).toHaveBeenCalledWith( - expect.stringContaining("MERGE (e1)-[r:WORKS_AT]->(e2)"), - expect.objectContaining({ - sourceName: "alice", - targetName: "acme", - confidence: 0.9, - }), - ); - }); - - it("should reject invalid relationship types", async () => { - await client.createEntityRelationship("a", "b", "INVALID_TYPE", 0.9); - - expect(mockLogger.warn).toHaveBeenCalledWith( - expect.stringContaining("rejected invalid relationship type"), - ); - expect(mockSession.run).not.toHaveBeenCalled(); - }); - }); - - describe("Tag operations", () => { - it("should tag memory with normalized tag name", async () => { - mockSession.run.mockResolvedValue({ records: [] }); - - await client.tagMemory("mem-1", "Neo4j", "technology", 0.95); - - expect(mockSession.run).toHaveBeenCalledWith( - expect.stringContaining("MERGE (t:Tag {name: $tagName})"), - expect.objectContaining({ - memoryId: "mem-1", - tagName: "neo4j", // normalized - tagCategory: "technology", - confidence: 0.95, - }), - ); - }); - - it("should update memory category only when current is 'other'", async () => { - mockSession.run.mockResolvedValue({ records: [] }); - - await client.updateMemoryCategory("mem-1", "fact"); - - expect(mockSession.run).toHaveBeenCalledWith( - expect.stringContaining("WHERE m.category = 'other'"), - expect.objectContaining({ - id: "mem-1", - category: "fact", - }), - ); - }); - }); - // ------------------------------------------------------------------------ // Extraction status tracking // ------------------------------------------------------------------------ @@ -1296,16 +1171,6 @@ describe("Neo4jMemoryClient", () => { ); }); - it("should get extraction retry count", async () => { - mockSession.run.mockResolvedValue({ - records: [{ get: vi.fn().mockReturnValue(3) }], - }); - - const result = await client.getExtractionRetries("mem-1"); - - expect(result).toBe(3); - }); - it("should count memories by extraction status", async () => { mockSession.run.mockResolvedValue({ records: [ diff --git a/extensions/memory-neo4j/neo4j-client.ts b/extensions/memory-neo4j/neo4j-client.ts index 2443221e41d..8e0ee44c712 100644 --- a/extensions/memory-neo4j/neo4j-client.ts +++ b/extensions/memory-neo4j/neo4j-client.ts @@ -10,13 +10,13 @@ import neo4j, { type Driver } from "neo4j-driver"; import { randomUUID } from "node:crypto"; -import type { - ExtractionStatus, - MergeEntityInput, - SearchSignalResult, - StoreMemoryInput, +import type { ExtractionStatus, Logger, SearchSignalResult, StoreMemoryInput } from "./schema.js"; +import { + ALLOWED_RELATIONSHIP_TYPES, + escapeLucene, + makePairKey, + validateRelationshipType, } from "./schema.js"; -import { ALLOWED_RELATIONSHIP_TYPES, escapeLucene, validateRelationshipType } from "./schema.js"; // SAFETY: This pattern is built from the hardcoded ALLOWED_RELATIONSHIP_TYPES constant, // not from user input. It's used in Cypher variable-length path patterns like @@ -24,17 +24,6 @@ import { ALLOWED_RELATIONSHIP_TYPES, escapeLucene, validateRelationshipType } fr // constant, there is no injection risk. const RELATIONSHIP_TYPE_PATTERN = [...ALLOWED_RELATIONSHIP_TYPES].join("|"); -// ============================================================================ -// Types -// ============================================================================ - -type Logger = { - info: (msg: string) => void; - warn: (msg: string) => void; - error: (msg: string) => void; - debug?: (msg: string) => void; -}; - // Retry configuration for transient Neo4j errors (deadlocks, etc.) const TRANSIENT_RETRY_ATTEMPTS = 3; const TRANSIENT_RETRY_BASE_DELAY_MS = 500; @@ -159,7 +148,7 @@ export class Neo4jMemoryClient { "CREATE INDEX entity_name_index IF NOT EXISTS FOR (e:Entity) ON (e.name)", ); // Composite index for queries that filter by both agentId and category - // (e.g. listByCategory, promotion/demotion filtering in sleep cycle) + // (e.g. listByCategory, promotion filtering in sleep cycle) await this.runSafe( session, "CREATE INDEX memory_agent_category_index IF NOT EXISTS FOR (m:Memory) ON (m.agentId, m.category)", @@ -256,12 +245,14 @@ export class Neo4jMemoryClient { agentId: $agentId, sessionKey: $sessionKey, createdAt: $createdAt, updatedAt: $updatedAt, retrievalCount: $retrievalCount, lastRetrievedAt: $lastRetrievedAt, - extractionRetries: $extractionRetries + extractionRetries: $extractionRetries, + userPinned: $userPinned }) RETURN m.id AS id`, { ...input, sessionKey: input.sessionKey ?? null, + userPinned: input.userPinned ?? false, createdAt: now, updatedAt: now, retrievalCount: 0, @@ -397,6 +388,47 @@ export class Neo4jMemoryClient { } } + /** + * Load core memories for injection: ALL user-pinned core memories (no limit) + * plus up to maxRegular non-pinned core memories ordered by importance. + * + * Total returned = (all userPinned core) + (top maxRegular non-pinned core). + */ + async listCoreForInjection( + maxRegular: number, + agentId?: string, + ): Promise<{ id: string; text: string; category: string; importance: number }[]> { + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + const agentFilter = agentId ? "AND m.agentId = $agentId" : ""; + const result = await session.run( + `MATCH (m:Memory) + WHERE m.category = 'core' ${agentFilter} + WITH m, coalesce(m.userPinned, false) AS pinned + ORDER BY m.importance DESC + WITH collect({id: m.id, text: m.text, category: m.category, importance: m.importance, pinned: pinned}) AS all + WITH [x IN all WHERE x.pinned] AS pinnedList, + [x IN all WHERE NOT x.pinned][0..$maxRegular] AS regularList + UNWIND (pinnedList + regularList) AS mem + RETURN mem.id AS id, mem.text AS text, mem.category AS category, mem.importance AS importance`, + { + maxRegular: neo4j.int(Math.floor(maxRegular)), + ...(agentId ? { agentId } : {}), + }, + ); + + return result.records.map((r) => ({ + id: r.get("id") as string, + text: r.get("text") as string, + category: r.get("category") as string, + importance: r.get("importance") as number, + })); + } finally { + await session.close(); + } + } + // -------------------------------------------------------------------------- // Search Signals // -------------------------------------------------------------------------- @@ -549,7 +581,7 @@ export class Neo4jMemoryClient { // Variable-length relationship pattern: 1..maxHops hops through entity relationships const hopRange = `1..${Math.max(1, Math.min(3, maxHops))}`; const result = await session.run( - `// Find matching entities via fulltext index + `// Find matching entities via fulltext index (SINGLE lookup) CALL db.index.fulltext.queryNodes('entity_fulltext_index', $query) YIELD node AS entity, score WHERE score >= 0.5 @@ -557,37 +589,32 @@ export class Neo4jMemoryClient { ORDER BY score DESC LIMIT 5 - // Direct: Entity ← MENTIONS ← Memory + // Collect direct mentions OPTIONAL MATCH (entity)<-[rm:MENTIONS]-(m:Memory) WHERE m IS NOT NULL ${agentFilter} - WITH m, coalesce(rm.confidence, 1.0) AS directScore, entity - WHERE m IS NOT NULL + WITH entity, collect({ + id: m.id, text: m.text, category: m.category, + importance: m.importance, createdAt: m.createdAt, + score: coalesce(rm.confidence, 1.0) + }) AS directResults - RETURN m.id AS id, m.text AS text, m.category AS category, - m.importance AS importance, m.createdAt AS createdAt, - max(directScore) AS graphScore - - UNION - - // Find matching entities via fulltext index (repeated for UNION) - CALL db.index.fulltext.queryNodes('entity_fulltext_index', $query) - YIELD node AS entity, score - WHERE score >= 0.5 - WITH entity - ORDER BY score DESC - LIMIT 5 - - // N-hop: Entity -[rels*1..N]-> Entity ← MENTIONS ← Memory + // N-hop spreading activation OPTIONAL MATCH (entity)-[rels:${RELATIONSHIP_TYPE_PATTERN}*${hopRange}]-(e2:Entity) WHERE ALL(r IN rels WHERE coalesce(r.confidence, 0.7) >= $firingThreshold) - OPTIONAL MATCH (e2)<-[rm:MENTIONS]-(m:Memory) - WHERE m IS NOT NULL ${agentFilter} - WITH m, reduce(s = 1.0, r IN rels | s * coalesce(r.confidence, 0.7)) * coalesce(rm.confidence, 1.0) AS hopScore - WHERE m IS NOT NULL + OPTIONAL MATCH (e2)<-[rm2:MENTIONS]-(m2:Memory) + WHERE m2 IS NOT NULL ${agentFilter} + WITH directResults, collect({ + id: m2.id, text: m2.text, category: m2.category, + importance: m2.importance, createdAt: m2.createdAt, + score: reduce(s = 1.0, r IN rels | s * coalesce(r.confidence, 0.7)) * coalesce(rm2.confidence, 1.0) + }) AS hopResults - RETURN m.id AS id, m.text AS text, m.category AS category, - m.importance AS importance, m.createdAt AS createdAt, - max(hopScore) AS graphScore`, + // Combine and return + UNWIND (directResults + hopResults) AS row + WITH row WHERE row.id IS NOT NULL + RETURN row.id AS id, row.text AS text, row.category AS category, + row.importance AS importance, row.createdAt AS createdAt, + max(row.score) AS graphScore`, { query: escaped, firingThreshold, ...(agentId ? { agentId } : {}) }, ); @@ -613,7 +640,6 @@ export class Neo4jMemoryClient { } return Array.from(byId.values()) - .slice() .sort((a, b) => b.score - a.score) .slice(0, limit); } finally { @@ -713,159 +739,6 @@ export class Neo4jMemoryClient { // Entity & Relationship Operations // -------------------------------------------------------------------------- - /** - * Merge (upsert) an Entity node using MERGE pattern. - * Idempotent — safe to call multiple times for the same entity name. - */ - async mergeEntity(input: MergeEntityInput): Promise<{ id: string; name: string }> { - await this.ensureInitialized(); - return this.retryOnTransient(async () => { - const session = this.driver!.session(); - try { - const result = await session.run( - `MERGE (e:Entity {name: $name}) - ON CREATE SET - e.id = $id, e.type = $type, e.aliases = $aliases, - e.description = $description, - e.firstSeen = $now, e.lastSeen = $now, e.mentionCount = 1 - ON MATCH SET - e.type = COALESCE($type, e.type), - e.description = COALESCE($description, e.description), - e.lastSeen = $now, - e.mentionCount = e.mentionCount + 1 - RETURN e.id AS id, e.name AS name`, - { - id: input.id, - name: input.name.trim().toLowerCase(), - type: input.type, - aliases: input.aliases ?? [], - description: input.description ?? null, - now: new Date().toISOString(), - }, - ); - const record = result.records[0]; - return { - id: record.get("id") as string, - name: record.get("name") as string, - }; - } finally { - await session.close(); - } - }); - } - - /** - * Create a MENTIONS relationship between a Memory and an Entity. - */ - async createMentions( - memoryId: string, - entityName: string, - role: string = "context", - confidence: number = 1.0, - ): Promise { - await this.ensureInitialized(); - const session = this.driver!.session(); - try { - await session.run( - `MATCH (m:Memory {id: $memoryId}) - MATCH (e:Entity {name: $entityName}) - MERGE (m)-[r:MENTIONS]->(e) - ON CREATE SET r.role = $role, r.confidence = $confidence`, - { memoryId, entityName: entityName.trim().toLowerCase(), role, confidence }, - ); - } finally { - await session.close(); - } - } - - /** - * Create a typed relationship between two Entity nodes. - * The relationship type is validated against an allowlist before injection. - */ - async createEntityRelationship( - sourceName: string, - targetName: string, - relType: string, - confidence: number = 1.0, - ): Promise { - if (!validateRelationshipType(relType)) { - this.logger.warn(`memory-neo4j: rejected invalid relationship type: ${relType}`); - return; - } - - await this.ensureInitialized(); - const session = this.driver!.session(); - try { - await session.run( - `MATCH (e1:Entity {name: $sourceName}) - MATCH (e2:Entity {name: $targetName}) - MERGE (e1)-[r:${relType}]->(e2) - ON CREATE SET r.confidence = $confidence, r.createdAt = $now - ON MATCH SET r.confidence = CASE WHEN $confidence > r.confidence THEN $confidence ELSE r.confidence END`, - { - sourceName: sourceName.trim().toLowerCase(), - targetName: targetName.trim().toLowerCase(), - confidence, - now: new Date().toISOString(), - }, - ); - } finally { - await session.close(); - } - } - - /** - * Merge a Tag node and link it to a Memory. - */ - async tagMemory( - memoryId: string, - tagName: string, - tagCategory: string, - confidence: number = 1.0, - ): Promise { - await this.ensureInitialized(); - const session = this.driver!.session(); - try { - await session.run( - `MERGE (t:Tag {name: $tagName}) - ON CREATE SET t.id = $tagId, t.category = $tagCategory, t.createdAt = $now - WITH t - MATCH (m:Memory {id: $memoryId}) - MERGE (m)-[r:TAGGED]->(t) - ON CREATE SET r.confidence = $confidence`, - { - memoryId, - tagName: tagName.trim().toLowerCase(), - tagId: randomUUID(), - tagCategory, - confidence, - now: new Date().toISOString(), - }, - ); - } finally { - await session.close(); - } - } - - /** - * Update a memory's category. Only updates if current category is 'other' - * (auto-assigned) to avoid overriding user-explicit categorization. - */ - async updateMemoryCategory(id: string, category: string): Promise { - await this.ensureInitialized(); - const session = this.driver!.session(); - try { - await session.run( - `MATCH (m:Memory {id: $id}) - WHERE m.category = 'other' - SET m.category = $category, m.updatedAt = $now`, - { id, category, now: new Date().toISOString() }, - ); - } finally { - await session.close(); - } - } - /** * Update the extraction status of a Memory node. * Optionally increments the extractionRetries counter (for transient failure tracking). @@ -891,24 +764,6 @@ export class Neo4jMemoryClient { } } - /** - * Get the current extraction retry count for a memory. - */ - async getExtractionRetries(id: string): Promise { - await this.ensureInitialized(); - const session = this.driver!.session(); - try { - const result = await session.run( - `MATCH (m:Memory {id: $id}) - RETURN coalesce(m.extractionRetries, 0) AS retries`, - { id }, - ); - return (result.records[0]?.get("retries") as number) ?? 0; - } finally { - await session.close(); - } - } - /** * Batch all entity operations from an extraction result into a single managed * transaction. Replaces the previous pattern of N individual session-per-call @@ -1154,21 +1009,20 @@ export class Neo4jMemoryClient { > { await this.ensureInitialized(); - // Step 1: Fetch all memory metadata in a short-lived session - const memoryData = new Map(); + // Step 1: Fetch only IDs and importance (not text) to reduce data transfer + const memoryMeta = new Map(); { const session = this.driver!.session(); try { const agentFilter = agentId ? "WHERE m.agentId = $agentId" : ""; const allResult = await session.run( `MATCH (m:Memory) ${agentFilter} - RETURN m.id AS id, m.text AS text, m.importance AS importance`, + RETURN m.id AS id, m.importance AS importance`, agentId ? { agentId } : {}, ); for (const r of allResult.records) { - memoryData.set(r.get("id") as string, { - text: r.get("text") as string, + memoryMeta.set(r.get("id") as string, { importance: r.get("importance") as number, }); } @@ -1177,7 +1031,7 @@ export class Neo4jMemoryClient { } } - if (memoryData.size < 2) { + if (memoryMeta.size < 2) { return []; } @@ -1207,16 +1061,11 @@ export class Neo4jMemoryClient { } }; - // Helper to create a canonical pair key (sorted) - const makePairKey = (a: string, b: string): string => { - return a < b ? `${a}:${b}` : `${b}:${a}`; - }; - // Process vector queries in concurrent batches to avoid overwhelming Neo4j // while still being much faster than fully sequential execution. const DEDUP_CONCURRENCY = 8; let pairsFound = 0; - const allIds = [...memoryData.keys()]; + const allIds = [...memoryMeta.keys()]; for (let batchStart = 0; batchStart < allIds.length; batchStart += DEDUP_CONCURRENCY) { if (pairsFound > 500) { @@ -1253,7 +1102,7 @@ export class Neo4jMemoryClient { for (const r of similar.records) { const matchId = r.get("matchId") as string; - if (memoryData.has(matchId)) { + if (memoryMeta.has(matchId)) { union(id, matchId); pairsFound++; @@ -1274,7 +1123,7 @@ export class Neo4jMemoryClient { // Step 3: Group by root const clusters = new Map(); - for (const id of memoryData.keys()) { + for (const id of memoryMeta.keys()) { if (!parent.has(id)) { continue; } @@ -1285,38 +1134,61 @@ export class Neo4jMemoryClient { clusters.get(root)!.push(id); } - // Return clusters with 2+ members - return Array.from(clusters.values()) - .filter((ids) => ids.length >= 2) - .map((ids) => { - const cluster: { - memoryIds: string[]; - texts: string[]; - importances: number[]; - similarities?: Map; - } = { - memoryIds: ids, - texts: ids.map((id) => memoryData.get(id)!.text), - importances: ids.map((id) => memoryData.get(id)!.importance), - }; + // Step 4: Fetch text only for memories that are in clusters (not all memories) + const duplicateClusters = Array.from(clusters.values()).filter((ids) => ids.length >= 2); + const clusteredIds = new Set(); + for (const ids of duplicateClusters) { + for (const id of ids) clusteredIds.add(id); + } - // Include similarities for this cluster if requested - if (pairwiseSimilarities) { - const clusterSims = new Map(); - for (let i = 0; i < ids.length - 1; i++) { - for (let j = i + 1; j < ids.length; j++) { - const pairKey = makePairKey(ids[i], ids[j]); - const score = pairwiseSimilarities.get(pairKey); - if (score !== undefined) { - clusterSims.set(pairKey, score); - } + const textMap = new Map(); + if (clusteredIds.size > 0) { + const session = this.driver!.session(); + try { + const result = await session.run( + `UNWIND $ids AS memId + MATCH (m:Memory {id: memId}) + RETURN m.id AS id, m.text AS text`, + { ids: [...clusteredIds] }, + ); + for (const r of result.records) { + textMap.set(r.get("id") as string, r.get("text") as string); + } + } finally { + await session.close(); + } + } + + // Return clusters with 2+ members + return duplicateClusters.map((ids) => { + const cluster: { + memoryIds: string[]; + texts: string[]; + importances: number[]; + similarities?: Map; + } = { + memoryIds: ids, + texts: ids.map((id) => textMap.get(id) ?? ""), + importances: ids.map((id) => memoryMeta.get(id)!.importance), + }; + + // Include similarities for this cluster if requested + if (pairwiseSimilarities) { + const clusterSims = new Map(); + for (let i = 0; i < ids.length - 1; i++) { + for (let j = i + 1; j < ids.length; j++) { + const pairKey = makePairKey(ids[i], ids[j]); + const score = pairwiseSimilarities.get(pairKey); + if (score !== undefined) { + clusterSims.set(pairKey, score); } } - cluster.similarities = clusterSims; } + cluster.similarities = clusterSims; + } - return cluster; - }); + return cluster; + }); } /** @@ -1420,8 +1292,8 @@ export class Neo4jMemoryClient { * * A memory with importance=1.0 decays slower than one with importance=0.3. * - * IMPORTANT: Core memories (category='core') are EXEMPT from decay. - * They persist indefinitely regardless of age. + * IMPORTANT: Core memories (category='core') and user-pinned memories + * are EXEMPT from decay. They persist indefinitely regardless of age. */ async findDecayedMemories( options: { @@ -1473,6 +1345,7 @@ export class Neo4jMemoryClient { `MATCH (m:Memory) WHERE m.createdAt IS NOT NULL AND m.category <> 'core' + AND coalesce(m.userPinned, false) = false ${agentFilter} WITH m, duration.between(datetime(m.createdAt), datetime()).days AS ageDays, @@ -1659,7 +1532,7 @@ export class Neo4jMemoryClient { /** * Find memory pairs that share at least one entity (via MENTIONS relationships). * These are candidates for conflict resolution — the LLM decides if they truly conflict. - * Excludes core memories (conflicts there are handled by promotion/demotion). + * Excludes core memories (conflicts there are handled by promotion). */ async findConflictingMemories(agentId?: string): Promise< Array<{ @@ -1729,8 +1602,8 @@ export class Neo4jMemoryClient { * Calculate effective scores for all memories to determine Pareto threshold. * * Uses: importance × freq_boost × recency for ALL memories (including core). - * This gives core memories a slight disadvantage (they need strong retrieval - * patterns to stay in top 20%), creating healthy churn. + * User-pinned core memories are excluded — they have fixed importance=1.0 + * and should not influence the Pareto threshold calculation. */ async calculateAllEffectiveScores(agentId?: string): Promise< Array<{ @@ -1747,8 +1620,8 @@ export class Neo4jMemoryClient { const session = this.driver!.session(); try { const agentFilter = agentId - ? "WHERE m.agentId = $agentId AND m.createdAt IS NOT NULL" - : "WHERE m.createdAt IS NOT NULL"; + ? "WHERE m.agentId = $agentId AND m.createdAt IS NOT NULL AND coalesce(m.userPinned, false) = false" + : "WHERE m.createdAt IS NOT NULL AND coalesce(m.userPinned, false) = false"; const result = await session.run( `MATCH (m:Memory) ${agentFilter} @@ -1761,7 +1634,7 @@ export class Neo4jMemoryClient { END AS daysSinceRetrieval WITH m, retrievalCount, ageDays, daysSinceRetrieval, // Effective score: importance × freq_boost × recency - // This is used for global ranking (promotion/demotion threshold) + // This is used for global ranking (promotion threshold) m.importance * (1 + log(1 + retrievalCount) * 0.3) * CASE WHEN daysSinceRetrieval IS NULL THEN 0.1 @@ -1788,7 +1661,7 @@ export class Neo4jMemoryClient { } /** - * Calculate the Pareto threshold (80th percentile) for promotion/demotion. + * Calculate the Pareto threshold (80th percentile) for promotion. * Returns the effective score that separates top 20% from bottom 80%. */ calculateParetoThreshold( @@ -1836,33 +1709,6 @@ export class Neo4jMemoryClient { } } - /** - * Demote memories from core back to their original category. - * Uses 'fact' as default since we don't track original category. - */ - async demoteFromCore(memoryIds: string[]): Promise { - if (memoryIds.length === 0) { - return 0; - } - - await this.ensureInitialized(); - const session = this.driver!.session(); - try { - const result = await session.run( - `UNWIND $ids AS memId - MATCH (m:Memory {id: memId}) - WHERE m.category = 'core' - SET m.category = 'fact', m.demotedAt = $now, m.updatedAt = $now - RETURN count(*) AS demoted`, - { ids: memoryIds, now: new Date().toISOString() }, - ); - - return (result.records[0]?.get("demoted") as number) ?? 0; - } finally { - await session.close(); - } - } - // -------------------------------------------------------------------------- // Reindex: re-embed all Memory and Entity nodes // -------------------------------------------------------------------------- diff --git a/extensions/memory-neo4j/schema.ts b/extensions/memory-neo4j/schema.ts index 745239c653f..bd4a60589da 100644 --- a/extensions/memory-neo4j/schema.ts +++ b/extensions/memory-neo4j/schema.ts @@ -2,6 +2,17 @@ * Graph schema types, Cypher query templates, and constants for memory-neo4j. */ +// ============================================================================ +// Shared Types +// ============================================================================ + +export type Logger = { + info: (msg: string) => void; + warn: (msg: string) => void; + error: (msg: string) => void; + debug?: (msg: string) => void; +}; + // ============================================================================ // Node Types // ============================================================================ @@ -32,7 +43,7 @@ export type MemoryNode = { retrievalCount: number; lastRetrievedAt?: string; promotedAt?: string; - demotedAt?: string; + userPinned?: boolean; }; export type EntityNode = { @@ -119,6 +130,7 @@ export type StoreMemoryInput = { extractionStatus: ExtractionStatus; agentId: string; sessionKey?: string; + userPinned?: boolean; }; export type MergeEntityInput = { @@ -174,3 +186,10 @@ export function escapeLucene(query: string): string { export function validateRelationshipType(type: string): boolean { return ALLOWED_RELATIONSHIP_TYPES.has(type); } + +/** + * Create a canonical key for a pair of IDs (sorted for order-independence). + */ +export function makePairKey(a: string, b: string): string { + return a < b ? `${a}:${b}` : `${b}:${a}`; +} diff --git a/extensions/memory-neo4j/search.ts b/extensions/memory-neo4j/search.ts index 60a787e747e..9ced74784fe 100644 --- a/extensions/memory-neo4j/search.ts +++ b/extensions/memory-neo4j/search.ts @@ -14,7 +14,7 @@ import type { Embeddings } from "./embeddings.js"; import type { Neo4jMemoryClient } from "./neo4j-client.js"; -import type { HybridSearchResult, SearchSignalResult } from "./schema.js"; +import type { HybridSearchResult, Logger, SearchSignalResult } from "./schema.js"; // ============================================================================ // Query Classification @@ -214,6 +214,7 @@ export async function hybridSearch( candidateMultiplier?: number; graphFiringThreshold?: number; graphSearchDepth?: number; + logger?: Logger; } = {}, ): Promise { // Guard against empty queries @@ -226,12 +227,15 @@ export async function hybridSearch( candidateMultiplier = 4, graphFiringThreshold = 0.3, graphSearchDepth = 1, + logger, } = options; const candidateLimit = Math.floor(Math.min(200, Math.max(1, limit * candidateMultiplier))); // 1. Generate query embedding + const t0 = performance.now(); const queryEmbedding = await embeddings.embed(query); + const tEmbed = performance.now(); // 2. Classify query and get adaptive weights const queryType = classifyQuery(query); @@ -245,9 +249,11 @@ export async function hybridSearch( ? db.graphSearch(query, candidateLimit, graphFiringThreshold, agentId, graphSearchDepth) : Promise.resolve([] as SearchSignalResult[]), ]); + const tSignals = performance.now(); // 4. Fuse with confidence-weighted RRF const fused = fuseWithConfidenceRRF([vectorResults, bm25Results, graphResults], rrfK, weights); + const tFuse = performance.now(); // 5. Return top results, normalized to 0-100% display scores. // Only normalize when maxRrf is above a minimum threshold to avoid @@ -275,5 +281,11 @@ export async function hybridSearch( }); } + // Log search timing breakdown + logger?.info?.( + `memory-neo4j: [bench] hybridSearch ${(tFuse - t0).toFixed(0)}ms (embed=${(tEmbed - t0).toFixed(0)}ms, signals=${(tSignals - tEmbed).toFixed(0)}ms, fuse=${(tFuse - tSignals).toFixed(0)}ms) ` + + `type=${queryType} vec=${vectorResults.length} bm25=${bm25Results.length} graph=${graphResults.length} → ${results.length} results`, + ); + return results; } diff --git a/extensions/memory-neo4j/sleep-cycle.ts b/extensions/memory-neo4j/sleep-cycle.ts new file mode 100644 index 00000000000..62b680f695f --- /dev/null +++ b/extensions/memory-neo4j/sleep-cycle.ts @@ -0,0 +1,663 @@ +/** + * Seven-phase sleep cycle for memory consolidation. + * + * Implements a Pareto-based memory ecosystem where core memory + * is bounded to the top 20% of memories by effective score. + * + * Phases: + * 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy) + * 2. PARETO SCORING - Calculate effective scores for all memories + * 3. CORE PROMOTION - Regular memories above threshold -> core + * 4. CORE DEMOTION - Core memories below threshold -> regular + * 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve) + * 6. EXTRACTION - Form entity relationships (strengthen connections) + * 7. CLEANUP - Remove orphaned entities/tags (garbage collection) + * + * Research basis: + * - Pareto principle (20/80 rule) for memory tiering + * - ACT-R memory model for retrieval-based importance + * - Ebbinghaus forgetting curve for decay + * - MemGPT/Letta for tiered memory architecture + */ + +import type { ExtractionConfig } from "./config.js"; +import type { Embeddings } from "./embeddings.js"; +import type { Neo4jMemoryClient } from "./neo4j-client.js"; +import type { Logger } from "./schema.js"; +import { isSemanticDuplicate, resolveConflict, runBackgroundExtraction } from "./extractor.js"; +import { makePairKey } from "./schema.js"; + +/** + * Sleep Cycle Result - aggregated stats from all phases. + */ +export type SleepCycleResult = { + // Phase 1: Deduplication + dedup: { + clustersFound: number; + memoriesMerged: number; + }; + // Phase 1b: Conflict Detection + conflict: { + pairsFound: number; + resolved: number; + invalidated: number; + }; + // Phase 1c: Semantic Deduplication + semanticDedup: { + pairsChecked: number; + duplicatesMerged: number; + }; + // Phase 2: Pareto Scoring & Threshold + pareto: { + totalMemories: number; + coreMemories: number; + regularMemories: number; + threshold: number; // The 80th percentile effective score + }; + // Phase 3: Core Promotion + promotion: { + candidatesFound: number; + promoted: number; + }; + // Phase 4: Entity Extraction + extraction: { + total: number; + processed: number; + succeeded: number; + failed: number; + }; + // Phase 4: Decay & Pruning + decay: { + memoriesPruned: number; + }; + // Phase 5: Orphan Cleanup + cleanup: { + entitiesRemoved: number; + tagsRemoved: number; + }; + // Overall + durationMs: number; + aborted: boolean; +}; + +export type SleepCycleOptions = { + // Common + agentId?: string; + abortSignal?: AbortSignal; + + // Phase 1: Deduplication + dedupThreshold?: number; // Vector similarity threshold (default: 0.95) + skipSemanticDedup?: boolean; // Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c) + + // Phase 2-3: Pareto-based Promotion + paretoPercentile?: number; // Top N% for core (default: 0.2 = top 20%) + promotionMinAgeDays?: number; // Min age before promotion (default: 7) + + // Phase 1b: Semantic Dedup + maxSemanticDedupPairs?: number; // Max LLM-checked pairs (default: 500) + + // Concurrency + llmConcurrency?: number; // Parallel LLM calls (default: 8, match OLLAMA_NUM_PARALLEL) + + // Phase 4: Extraction + extractionBatchSize?: number; // Memories per batch (default: 50) + extractionDelayMs?: number; // Delay between batches (default: 1000) + + // Phase 4: Decay + decayRetentionThreshold?: number; // Below this, memory is pruned (default: 0.1) + decayBaseHalfLifeDays?: number; // Base half-life in days (default: 30) + decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2) + decayCurves?: Record; // Per-category decay curve overrides + + // Progress callback + onPhaseStart?: ( + phase: + | "dedup" + | "conflict" + | "semanticDedup" + | "pareto" + | "promotion" + | "decay" + | "extraction" + | "cleanup", + ) => void; + onProgress?: (phase: string, message: string) => void; +}; + +// ============================================================================ +// Sleep Cycle Implementation +// ============================================================================ + +/** + * Run the full sleep cycle - seven phases of memory consolidation. + * + * This implements a Pareto-based memory ecosystem where core memory + * is bounded to the top 20% of memories by effective score. + * + * Effective Score Formulas: + * - Regular memories: importance x freq_boost x recency + * - Core memories: importance x freq_boost x recency (same for threshold comparison) + * - Core memory retrieval ranking: freq_boost x recency (pure usage-based) + * + * Where: + * - freq_boost = 1 + log(1 + retrievalCount) x 0.3 + * - recency = 2^(-days_since_last / 14) + */ +export async function runSleepCycle( + db: Neo4jMemoryClient, + embeddings: Embeddings, + config: ExtractionConfig, + logger: Logger, + options: SleepCycleOptions = {}, +): Promise { + const startTime = Date.now(); + const { + agentId, + abortSignal, + dedupThreshold = 0.95, + skipSemanticDedup = false, + maxSemanticDedupPairs = 500, + llmConcurrency = 8, + paretoPercentile = 0.2, + promotionMinAgeDays = 7, + decayRetentionThreshold = 0.1, + decayBaseHalfLifeDays = 30, + decayImportanceMultiplier = 2, + decayCurves, + extractionBatchSize = 50, + extractionDelayMs = 1000, + onPhaseStart, + onProgress, + } = options; + + const result: SleepCycleResult = { + dedup: { clustersFound: 0, memoriesMerged: 0 }, + conflict: { pairsFound: 0, resolved: 0, invalidated: 0 }, + semanticDedup: { pairsChecked: 0, duplicatesMerged: 0 }, + pareto: { + totalMemories: 0, + coreMemories: 0, + regularMemories: 0, + threshold: 0, + }, + promotion: { candidatesFound: 0, promoted: 0 }, + decay: { memoriesPruned: 0 }, + extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 }, + cleanup: { entitiesRemoved: 0, tagsRemoved: 0 }, + durationMs: 0, + aborted: false, + }; + + // -------------------------------------------------------------------------- + // Phase 1: Deduplication (Optimized - combined vector + semantic dedup) + // Call findDuplicateClusters ONCE at 0.75 threshold, then split by similarity band: + // - >=0.95: vector merge (high-confidence duplicates) + // - 0.75-0.95: semantic dedup via LLM (paraphrases) + // -------------------------------------------------------------------------- + if (!abortSignal?.aborted) { + onPhaseStart?.("dedup"); + logger.info("memory-neo4j: [sleep] Phase 1: Deduplication (vector + semantic)"); + + try { + // Fetch clusters at 0.75 threshold with similarity scores + const allClusters = await db.findDuplicateClusters(0.75, agentId, true); + + // Separate clusters into high-similarity (>=0.95) and medium-similarity (0.75-0.95) + const highSimClusters: typeof allClusters = []; + const mediumSimClusters: typeof allClusters = []; + + for (const cluster of allClusters) { + if (abortSignal?.aborted) break; + if (!cluster.similarities || cluster.memoryIds.length < 2) continue; + + // Check if ANY pair in this cluster has similarity >= dedupThreshold + let hasHighSim = false; + for (const [pairKey, score] of cluster.similarities.entries()) { + if (score >= dedupThreshold) { + hasHighSim = true; + break; + } + } + + if (hasHighSim) { + // Split this cluster into high-sim and medium-sim sub-clusters + // For simplicity, if a cluster has ANY high-sim pair, treat the whole cluster as high-sim + // (This matches the old behavior where Phase 1 would merge them all) + highSimClusters.push(cluster); + } else { + mediumSimClusters.push(cluster); + } + } + + // Part 1a: Vector merge for high-similarity clusters (>=0.95) + result.dedup.clustersFound = highSimClusters.length; + + for (const cluster of highSimClusters) { + if (abortSignal?.aborted) break; + + const { deletedCount } = await db.mergeMemoryCluster( + cluster.memoryIds, + cluster.importances, + ); + result.dedup.memoriesMerged += deletedCount; + onProgress?.("dedup", `Merged cluster of ${cluster.memoryIds.length} -> 1 (vector)`); + } + + logger.info( + `memory-neo4j: [sleep] Phase 1a (vector) complete — ${result.dedup.clustersFound} clusters, ${result.dedup.memoriesMerged} merged`, + ); + + // Part 1b: Semantic dedup for medium-similarity clusters (0.75-0.95) + if (skipSemanticDedup) { + onPhaseStart?.("semanticDedup"); + logger.info("memory-neo4j: [sleep] Phase 1b: Skipped (--skip-semantic)"); + onProgress?.("semanticDedup", "Skipped — semantic dedup disabled"); + } else { + onPhaseStart?.("semanticDedup"); + logger.info("memory-neo4j: [sleep] Phase 1b: Semantic Deduplication (0.75-0.95 band)"); + + // Collect all candidate pairs upfront (with pairwise similarity for pre-screening) + type DedupPair = { + textA: string; + textB: string; + idA: string; + idB: string; + importanceA: number; + importanceB: number; + similarity?: number; + }; + const allPairs: DedupPair[] = []; + + for (const cluster of mediumSimClusters) { + if (cluster.memoryIds.length < 2) continue; + for (let i = 0; i < cluster.memoryIds.length - 1; i++) { + for (let j = i + 1; j < cluster.memoryIds.length; j++) { + const pairKey = makePairKey(cluster.memoryIds[i], cluster.memoryIds[j]); + allPairs.push({ + textA: cluster.texts[i], + textB: cluster.texts[j], + idA: cluster.memoryIds[i], + idB: cluster.memoryIds[j], + importanceA: cluster.importances[i], + importanceB: cluster.importances[j], + similarity: cluster.similarities?.get(pairKey), + }); + } + } + } + + // Cap the number of LLM-checked pairs to prevent sleep cycle timeouts. + // Sort by similarity descending so higher-similarity pairs (more likely + // to be duplicates) are checked first. + if (allPairs.length > maxSemanticDedupPairs) { + allPairs.sort((a, b) => (b.similarity ?? 0) - (a.similarity ?? 0)); + const skipped = allPairs.length - maxSemanticDedupPairs; + allPairs.length = maxSemanticDedupPairs; + onProgress?.( + "semanticDedup", + `Capped at ${maxSemanticDedupPairs} pairs (${skipped} lower-similarity pairs skipped)`, + ); + logger.info( + `memory-neo4j: [sleep] Phase 1b capped to ${maxSemanticDedupPairs} pairs (${skipped} skipped)`, + ); + } + + // Process pairs in concurrent batches + const invalidatedIds = new Set(); + + for (let i = 0; i < allPairs.length && !abortSignal?.aborted; i += llmConcurrency) { + const batch = allPairs.slice(i, i + llmConcurrency); + + // Filter out pairs where one side was already invalidated + const activeBatch = batch.filter( + (p) => !invalidatedIds.has(p.idA) && !invalidatedIds.has(p.idB), + ); + + if (activeBatch.length === 0) continue; + + const outcomes = await Promise.allSettled( + activeBatch.map((p) => + isSemanticDuplicate(p.textA, p.textB, config, p.similarity, abortSignal), + ), + ); + + for (let k = 0; k < outcomes.length; k++) { + const pair = activeBatch[k]; + result.semanticDedup.pairsChecked++; + + if ( + outcomes[k].status === "fulfilled" && + (outcomes[k] as PromiseFulfilledResult).value + ) { + // Skip if either side was invalidated by an earlier result in this batch + if (invalidatedIds.has(pair.idA) || invalidatedIds.has(pair.idB)) continue; + + const keepId = pair.importanceA >= pair.importanceB ? pair.idA : pair.idB; + const removeId = keepId === pair.idA ? pair.idB : pair.idA; + const keepText = keepId === pair.idA ? pair.textA : pair.textB; + const removeText = removeId === pair.idA ? pair.textA : pair.textB; + + await db.invalidateMemory(removeId); + invalidatedIds.add(removeId); + result.semanticDedup.duplicatesMerged++; + + onProgress?.( + "semanticDedup", + `Merged: "${removeText.slice(0, 50)}..." -> kept "${keepText.slice(0, 50)}..."`, + ); + } + } + } + + logger.info( + `memory-neo4j: [sleep] Phase 1b (semantic) complete — ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`, + ); + } // close skipSemanticDedup else + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 1 error: ${String(err)}`); + } + } + + // -------------------------------------------------------------------------- + // Phase 1c: Conflict Detection (formerly Phase 1b) + // -------------------------------------------------------------------------- + if (!abortSignal?.aborted && !skipSemanticDedup) { + onPhaseStart?.("conflict"); + logger.info("memory-neo4j: [sleep] Phase 1c: Conflict Detection"); + + try { + const pairs = await db.findConflictingMemories(agentId); + result.conflict.pairsFound = pairs.length; + + // Process conflict pairs in parallel chunks of llmConcurrency + for (let i = 0; i < pairs.length && !abortSignal?.aborted; i += llmConcurrency) { + const chunk = pairs.slice(i, i + llmConcurrency); + const outcomes = await Promise.allSettled( + chunk.map((pair) => + resolveConflict(pair.memoryA.text, pair.memoryB.text, config, abortSignal), + ), + ); + + for (let k = 0; k < outcomes.length; k++) { + if (abortSignal?.aborted) break; + const pair = chunk[k]; + const outcome = outcomes[k]; + if (outcome.status !== "fulfilled") continue; + + const decision = outcome.value; + if (decision === "a") { + await db.invalidateMemory(pair.memoryB.id); + result.conflict.invalidated++; + result.conflict.resolved++; + onProgress?.( + "conflict", + `Kept A, invalidated B: "${pair.memoryB.text.slice(0, 40)}..."`, + ); + } else if (decision === "b") { + await db.invalidateMemory(pair.memoryA.id); + result.conflict.invalidated++; + result.conflict.resolved++; + onProgress?.( + "conflict", + `Kept B, invalidated A: "${pair.memoryA.text.slice(0, 40)}..."`, + ); + } else if (decision === "both") { + result.conflict.resolved++; + onProgress?.("conflict", `Kept both: no real conflict`); + } + // "skip" = LLM unavailable, don't count as resolved + } + } + + logger.info( + `memory-neo4j: [sleep] Phase 1c complete — ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`, + ); + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 1c error: ${String(err)}`); + } + } + + // -------------------------------------------------------------------------- + // Phase 2: Pareto Scoring & Threshold Calculation + // -------------------------------------------------------------------------- + let paretoThreshold = 0; + let allScores: Awaited> = []; + if (!abortSignal?.aborted) { + onPhaseStart?.("pareto"); + logger.info("memory-neo4j: [sleep] Phase 2: Pareto Scoring"); + + try { + allScores = await db.calculateAllEffectiveScores(agentId); + result.pareto.totalMemories = allScores.length; + result.pareto.coreMemories = allScores.filter((s) => s.category === "core").length; + result.pareto.regularMemories = allScores.filter((s) => s.category !== "core").length; + + // Calculate the threshold for top N% (default: top 20%) + paretoThreshold = db.calculateParetoThreshold(allScores, 1 - paretoPercentile); + result.pareto.threshold = paretoThreshold; + + onProgress?.( + "pareto", + `Scored ${allScores.length} memories (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`, + ); + onProgress?.( + "pareto", + `Pareto threshold (top ${paretoPercentile * 100}%): ${paretoThreshold.toFixed(4)}`, + ); + + logger.info( + `memory-neo4j: [sleep] Phase 2 complete — threshold=${paretoThreshold.toFixed(4)} for top ${paretoPercentile * 100}%`, + ); + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 2 error: ${String(err)}`); + } + } + + // -------------------------------------------------------------------------- + // Phase 3: Core Promotion (using pre-computed scores from Phase 2) + // + // Design note on staleness: The effective scores and Pareto threshold were + // computed in Phase 2 and may be slightly stale by the time Phases 3/4 run. + // This is acceptable because: (a) the sleep cycle is a background maintenance + // task that runs infrequently (not concurrent with itself), (b) the scoring + // formula is deterministic based on stored properties that change slowly, and + // (c) promotion is a one-way operation (core memories are never auto-demoted; + // bad core memories are handled manually via memory_forget). The alternative + // (re-querying scores per phase) adds latency without meaningful accuracy gain. + // -------------------------------------------------------------------------- + if (!abortSignal?.aborted && paretoThreshold > 0) { + onPhaseStart?.("promotion"); + logger.info("memory-neo4j: [sleep] Phase 3: Core Promotion"); + + try { + const candidates = allScores.filter( + (s) => + s.category !== "core" && + s.effectiveScore >= paretoThreshold && + s.ageDays >= promotionMinAgeDays, + ); + result.promotion.candidatesFound = candidates.length; + + if (candidates.length > 0) { + const ids = candidates.map((m) => m.id); + result.promotion.promoted = await db.promoteToCore(ids); + for (const c of candidates) { + onProgress?.( + "promotion", + `Promoted "${c.text.slice(0, 40)}..." (score=${c.effectiveScore.toFixed(3)}, ${c.retrievalCount} retrievals)`, + ); + } + } + + logger.info( + `memory-neo4j: [sleep] Phase 3 complete — ${result.promotion.promoted} memories promoted to core`, + ); + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 3 error: ${String(err)}`); + } + } + + // -------------------------------------------------------------------------- + // Phase 4: Entity Extraction (moved before decay so new memories get + // extracted before pruning can remove them) + // -------------------------------------------------------------------------- + // Extraction uses llmConcurrency (defined above, matches OLLAMA_NUM_PARALLEL) + if (!abortSignal?.aborted && config.enabled) { + onPhaseStart?.("extraction"); + logger.info("memory-neo4j: [sleep] Phase 4: Entity Extraction"); + + try { + // Get initial count + const counts = await db.countByExtractionStatus(agentId); + result.extraction.total = counts.pending; + + if (result.extraction.total > 0) { + let hasMore = true; + while (hasMore && !abortSignal?.aborted) { + const pending = await db.listPendingExtractions(extractionBatchSize, agentId); + + if (pending.length === 0) { + hasMore = false; + break; + } + + // Process in parallel chunks of llmConcurrency + for (let i = 0; i < pending.length && !abortSignal?.aborted; i += llmConcurrency) { + const chunk = pending.slice(i, i + llmConcurrency); + const outcomes = await Promise.allSettled( + chunk.map((memory) => + runBackgroundExtraction( + memory.id, + memory.text, + db, + embeddings, + config, + logger, + memory.extractionRetries, + abortSignal, + ), + ), + ); + + for (const outcome of outcomes) { + result.extraction.processed++; + if (outcome.status === "fulfilled" && outcome.value.success) { + result.extraction.succeeded++; + } else { + result.extraction.failed++; + } + } + + if (result.extraction.processed % 10 === 0 || i + llmConcurrency >= pending.length) { + onProgress?.( + "extraction", + `${result.extraction.processed}/${result.extraction.total} processed`, + ); + } + } + + // Delay between batches (abort-aware) + if (hasMore && !abortSignal?.aborted) { + await new Promise((resolve) => { + const timer = setTimeout(resolve, extractionDelayMs); + // If abort fires during delay, resolve immediately + abortSignal?.addEventListener( + "abort", + () => { + clearTimeout(timer); + resolve(); + }, + { once: true }, + ); + }); + } + } + } + + logger.info( + `memory-neo4j: [sleep] Phase 4 complete — ${result.extraction.succeeded} extracted, ${result.extraction.failed} failed`, + ); + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 4 error: ${String(err)}`); + } + } else if (!config.enabled) { + logger.info("memory-neo4j: [sleep] Phase 4 skipped — extraction not enabled"); + } + + // -------------------------------------------------------------------------- + // Phase 5: Decay & Pruning (after extraction so freshly extracted memories + // aren't pruned before they build entity connections) + // -------------------------------------------------------------------------- + if (!abortSignal?.aborted) { + onPhaseStart?.("decay"); + logger.info("memory-neo4j: [sleep] Phase 5: Decay & Pruning"); + + try { + const decayed = await db.findDecayedMemories({ + retentionThreshold: decayRetentionThreshold, + baseHalfLifeDays: decayBaseHalfLifeDays, + importanceMultiplier: decayImportanceMultiplier, + decayCurves, + agentId, + }); + + if (decayed.length > 0) { + const ids = decayed.map((m) => m.id); + result.decay.memoriesPruned = await db.pruneMemories(ids); + onProgress?.("decay", `Pruned ${result.decay.memoriesPruned} decayed memories`); + } + + logger.info( + `memory-neo4j: [sleep] Phase 5 complete — ${result.decay.memoriesPruned} memories pruned`, + ); + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 5 error: ${String(err)}`); + } + } + + // -------------------------------------------------------------------------- + // Phase 6: Orphan Cleanup + // -------------------------------------------------------------------------- + if (!abortSignal?.aborted) { + onPhaseStart?.("cleanup"); + logger.info("memory-neo4j: [sleep] Phase 6: Orphan Cleanup"); + + try { + // Clean up orphan entities + if (!abortSignal?.aborted) { + const orphanEntities = await db.findOrphanEntities(); + if (orphanEntities.length > 0) { + result.cleanup.entitiesRemoved = await db.deleteOrphanEntities( + orphanEntities.map((e) => e.id), + ); + onProgress?.("cleanup", `Removed ${result.cleanup.entitiesRemoved} orphan entities`); + } + } + + // Clean up orphan tags + if (!abortSignal?.aborted) { + const orphanTags = await db.findOrphanTags(); + if (orphanTags.length > 0) { + result.cleanup.tagsRemoved = await db.deleteOrphanTags(orphanTags.map((t) => t.id)); + onProgress?.("cleanup", `Removed ${result.cleanup.tagsRemoved} orphan tags`); + } + } + + logger.info( + `memory-neo4j: [sleep] Phase 6 complete — ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`, + ); + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 6 error: ${String(err)}`); + } + } + + result.durationMs = Date.now() - startTime; + result.aborted = abortSignal?.aborted ?? false; + + logger.info( + `memory-neo4j: [sleep] Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s` + + (result.aborted ? " (aborted)" : ""), + ); + + return result; +}