diff --git a/extensions/memory-neo4j/attention-gate.ts b/extensions/memory-neo4j/attention-gate.ts
index 7f334fe25c6..c05f25e47ba 100644
--- a/extensions/memory-neo4j/attention-gate.ts
+++ b/extensions/memory-neo4j/attention-gate.ts
@@ -62,6 +62,30 @@ const MIN_CAPTURE_CHARS = 30;
 /** Minimum word count — short contextual phrases lack standalone meaning. */
 const MIN_WORD_COUNT = 8;
 
+/** Shared checks applied by both user and assistant attention gates. */
+function failsSharedGateChecks(trimmed: string): boolean {
+  // Injected context from the memory system itself
+  if (trimmed.includes("<relevant-memories>") || trimmed.includes("<core-memory-refresh>")) {
+    return true;
+  }
+
+  // Noise patterns
+  if (NOISE_PATTERNS.some((r) => r.test(trimmed))) {
+    return true;
+  }
+
+  // Excessive emoji (likely reaction, not substance)
+  const emojiCount = (
+    trimmed.match(/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1FA00}-\u{1FAFF}]/gu) ||
+    []
+  ).length;
+  if (emojiCount > 3) {
+    return true;
+  }
+
+  return false;
+}
+
 export function passesAttentionGate(text: string): boolean {
   const trimmed = text.trim();
 
@@ -76,22 +100,7 @@ export function passesAttentionGate(text: string): boolean {
     return false;
   }
 
-  // Injected context from the memory system itself
-  if (trimmed.includes("<relevant-memories>") || trimmed.includes("<core-memory-refresh>")) {
-    return false;
-  }
-
-  // Noise patterns
-  if (NOISE_PATTERNS.some((r) => r.test(trimmed))) {
-    return false;
-  }
-
-  // Excessive emoji (likely reaction, not substance)
-  const emojiCount = (
-    trimmed.match(/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1FA00}-\u{1FAFF}]/gu) ||
-    []
-  ).length;
-  if (emojiCount > 3) {
+  if (failsSharedGateChecks(trimmed)) {
     return false;
   }
 
@@ -183,13 +192,7 @@ export function passesAssistantAttentionGate(text: string): boolean {
     return false;
   }
 
-  // Injected context from the memory system itself
-  if (trimmed.includes("<relevant-memories>") || trimmed.includes("<core-memory-refresh>")) {
-    return false;
-  }
-
-  // Noise patterns (same as user gate)
-  if (NOISE_PATTERNS.some((r) => r.test(trimmed))) {
+  if (failsSharedGateChecks(trimmed)) {
     return false;
   }
 
@@ -198,14 +201,5 @@ export function passesAssistantAttentionGate(text: string): boolean {
     return false;
   }
 
-  // Excessive emoji (likely reaction, not substance)
-  const emojiCount = (
-    trimmed.match(/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1FA00}-\u{1FAFF}]/gu) ||
-    []
-  ).length;
-  if (emojiCount > 3) {
-    return false;
-  }
-
   return true;
 }
diff --git a/extensions/memory-neo4j/auto-capture.test.ts b/extensions/memory-neo4j/auto-capture.test.ts
new file mode 100644
index 00000000000..141dd0f5327
--- /dev/null
+++ b/extensions/memory-neo4j/auto-capture.test.ts
@@ -0,0 +1,573 @@
+/**
+ * Tests for the auto-capture pipeline: captureMessage and runAutoCapture.
+ *
+ * Tests the embed → dedup → rate → store pipeline including:
+ * - Pre-computed vector usage (batch embedding optimization)
+ * - Exact dedup (≥0.95 score band)
+ * - Semantic dedup (0.75-0.95 score band via LLM)
+ * - Importance pre-screening for assistant messages
+ * - Batch embedding in runAutoCapture
+ */
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { ExtractionConfig } from "./config.js";
+import type { Embeddings } from "./embeddings.js";
+import type { Neo4jMemoryClient } from "./neo4j-client.js";
+import { _captureMessage as captureMessage, _runAutoCapture as runAutoCapture } from "./index.js";
+
+// ============================================================================
+// Mocks
+// ============================================================================
+
+const enabledConfig: ExtractionConfig = {
+  enabled: true,
+  apiKey: "test-key",
+  model: "test-model",
+  baseUrl: "https://test.ai/api/v1",
+  temperature: 0.0,
+  maxRetries: 0,
+};
+
+const disabledConfig: ExtractionConfig = {
+  ...enabledConfig,
+  enabled: false,
+};
+
+const mockLogger = {
+  info: vi.fn(),
+  warn: vi.fn(),
+  debug: vi.fn(),
+};
+
+function createMockDb(overrides?: Partial<Neo4jMemoryClient>): Neo4jMemoryClient {
+  return {
+    findSimilar: vi.fn().mockResolvedValue([]),
+    storeMemory: vi.fn().mockResolvedValue(undefined),
+    ...overrides,
+  } as unknown as Neo4jMemoryClient;
+}
+
+function createMockEmbeddings(overrides?: Partial<Embeddings>): Embeddings {
+  return {
+    embed: vi.fn().mockResolvedValue([0.1, 0.2, 0.3]),
+    embedBatch: vi.fn().mockResolvedValue([[0.1, 0.2, 0.3]]),
+    ...overrides,
+  } as unknown as Embeddings;
+}
+
+// ============================================================================
+// captureMessage
+// ============================================================================
+
+describe("captureMessage", () => {
+  const originalFetch = globalThis.fetch;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+  });
+
+  it("should store a new memory when no duplicates exist", async () => {
+    const db = createMockDb();
+    const embeddings = createMockEmbeddings();
+
+    // Mock rateImportance (LLM call via fetch)
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: JSON.stringify({ score: 7 }) } }],
+        }),
+    });
+
+    const result = await captureMessage(
+      "I prefer TypeScript over JavaScript",
+      "auto-capture",
+      0.5,
+      1.0,
+      "test-agent",
+      "session-1",
+      db,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    expect(result.stored).toBe(true);
+    expect(result.semanticDeduped).toBe(false);
+    expect(db.storeMemory).toHaveBeenCalledOnce();
+    expect(embeddings.embed).toHaveBeenCalledWith("I prefer TypeScript over JavaScript");
+  });
+
+  it("should use pre-computed vector when provided", async () => {
+    const db = createMockDb();
+    const embeddings = createMockEmbeddings();
+    const precomputedVector = [0.5, 0.6, 0.7];
+
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: JSON.stringify({ score: 7 }) } }],
+        }),
+    });
+
+    const result = await captureMessage(
+      "test text",
+      "auto-capture",
+      0.5,
+      1.0,
+      "test-agent",
+      undefined,
+      db,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+      precomputedVector,
+    );
+
+    expect(result.stored).toBe(true);
+    // Should NOT call embed() since pre-computed vector was provided
+    expect(embeddings.embed).not.toHaveBeenCalled();
+    // Should use the pre-computed vector for findSimilar
+    expect(db.findSimilar).toHaveBeenCalledWith(precomputedVector, 0.75, 3, "test-agent");
+  });
+
+  it("should skip storage when exact duplicate found (score >= 0.95)", async () => {
+    const db = createMockDb({
+      findSimilar: vi
+        .fn()
+        .mockResolvedValue([{ id: "existing-1", text: "duplicate text", score: 0.97 }]),
+    });
+    const embeddings = createMockEmbeddings();
+
+    const result = await captureMessage(
+      "duplicate text",
+      "auto-capture",
+      0.5,
+      1.0,
+      "test-agent",
+      undefined,
+      db,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    expect(result.stored).toBe(false);
+    expect(result.semanticDeduped).toBe(false);
+    expect(db.storeMemory).not.toHaveBeenCalled();
+  });
+
+  it("should semantic dedup when candidate in 0.75-0.95 band is LLM-confirmed duplicate", async () => {
+    const db = createMockDb({
+      findSimilar: vi
+        .fn()
+        .mockResolvedValue([{ id: "candidate-1", text: "User prefers TypeScript", score: 0.88 }]),
+    });
+    const embeddings = createMockEmbeddings();
+
+    // First call: rateImportance, second call: isSemanticDuplicate
+    let callCount = 0;
+    globalThis.fetch = vi.fn().mockImplementation(() => {
+      callCount++;
+      if (callCount === 1) {
+        // rateImportance response
+        return Promise.resolve({
+          ok: true,
+          json: () =>
+            Promise.resolve({
+              choices: [{ message: { content: JSON.stringify({ score: 7 }) } }],
+            }),
+        });
+      }
+      // isSemanticDuplicate response
+      return Promise.resolve({
+        ok: true,
+        json: () =>
+          Promise.resolve({
+            choices: [
+              {
+                message: {
+                  content: JSON.stringify({
+                    verdict: "duplicate",
+                    reason: "same preference",
+                  }),
+                },
+              },
+            ],
+          }),
+      });
+    });
+
+    const result = await captureMessage(
+      "I like TypeScript",
+      "auto-capture",
+      0.5,
+      1.0,
+      "test-agent",
+      undefined,
+      db,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    expect(result.stored).toBe(false);
+    expect(result.semanticDeduped).toBe(true);
+    expect(db.storeMemory).not.toHaveBeenCalled();
+  });
+
+  it("should skip importance check when extraction is disabled", async () => {
+    const db = createMockDb();
+    const embeddings = createMockEmbeddings();
+
+    // With extraction disabled, rateImportance returns 0.5 fallback,
+    // so the threshold check is skipped entirely
+    const result = await captureMessage(
+      "some text to store",
+      "auto-capture",
+      0.5,
+      1.0,
+      "test-agent",
+      undefined,
+      db,
+      embeddings,
+      disabledConfig,
+      mockLogger,
+    );
+
+    expect(result.stored).toBe(true);
+    expect(db.storeMemory).toHaveBeenCalledOnce();
+    // Verify stored with fallback importance * discount
+    const storeCall = (db.storeMemory as ReturnType<typeof vi.fn>).mock.calls[0][0];
+    expect(storeCall.importance).toBe(0.5); // 0.5 fallback * 1.0 discount
+    expect(storeCall.extractionStatus).toBe("skipped");
+  });
+
+  it("should apply importance discount for assistant messages", async () => {
+    const db = createMockDb();
+    const embeddings = createMockEmbeddings();
+
+    // For assistant messages, importance is rated first
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: JSON.stringify({ score: 8 }) } }],
+        }),
+    });
+
+    const result = await captureMessage(
+      "Here's what I know about Neo4j graph databases...",
+      "auto-capture-assistant",
+      0.8, // higher threshold for assistant
+      0.75, // 25% discount
+      "test-agent",
+      undefined,
+      db,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    expect(result.stored).toBe(true);
+    const storeCall = (db.storeMemory as ReturnType<typeof vi.fn>).mock.calls[0][0];
+    // importance 0.8 (score 8/10) * 0.75 discount ≈ 0.6
+    expect(storeCall.importance).toBeCloseTo(0.6);
+    expect(storeCall.source).toBe("auto-capture-assistant");
+  });
+
+  it("should reject assistant messages below importance threshold", async () => {
+    const db = createMockDb();
+    const embeddings = createMockEmbeddings();
+
+    // Low importance score
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: JSON.stringify({ score: 3 }) } }],
+        }),
+    });
+
+    const result = await captureMessage(
+      "Sure, I can help with that.",
+      "auto-capture-assistant",
+      0.8, // threshold 0.8
+      0.75,
+      "test-agent",
+      undefined,
+      db,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    expect(result.stored).toBe(false);
+    // Should not even embed since importance pre-screen failed
+    expect(embeddings.embed).not.toHaveBeenCalled();
+    expect(db.storeMemory).not.toHaveBeenCalled();
+  });
+
+  it("should reject user messages below importance threshold", async () => {
+    const db = createMockDb();
+    const embeddings = createMockEmbeddings();
+
+    // Low importance score
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: JSON.stringify({ score: 2 }) } }],
+        }),
+    });
+
+    const result = await captureMessage(
+      "okay thanks",
+      "auto-capture",
+      0.5, // threshold 0.5
+      1.0,
+      "test-agent",
+      undefined,
+      db,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    expect(result.stored).toBe(false);
+    expect(db.storeMemory).not.toHaveBeenCalled();
+  });
+});
+
+// ============================================================================
+// runAutoCapture
+// ============================================================================
+
+describe("runAutoCapture", () => {
+  const originalFetch = globalThis.fetch;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+  });
+
+  it("should batch-embed all retained messages at once", async () => {
+    const db = createMockDb();
+    const embedBatchMock = vi.fn().mockResolvedValue([
+      [0.1, 0.2],
+      [0.3, 0.4],
+    ]);
+    const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock });
+
+    // Mock rateImportance calls
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: JSON.stringify({ score: 7 }) } }],
+        }),
+    });
+
+    const messages = [
+      {
+        role: "user",
+        content: "I prefer TypeScript over JavaScript for backend development",
+      },
+      {
+        role: "assistant",
+        content:
+          "TypeScript is great for type safety and developer experience, especially with Node.js projects",
+      },
+    ];
+
+    await runAutoCapture(
+      messages,
+      "test-agent",
+      "session-1",
+      db,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    // Should call embedBatch once with both texts
+    expect(embedBatchMock).toHaveBeenCalledOnce();
+    const batchTexts = embedBatchMock.mock.calls[0][0];
+    expect(batchTexts.length).toBe(2);
+  });
+
+  it("should not call embedBatch when no messages pass the gate", async () => {
+    const db = createMockDb();
+    const embedBatchMock = vi.fn().mockResolvedValue([]);
+    const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock });
+
+    // Short messages that won't pass attention gate
+    const messages = [
+      { role: "user", content: "ok" },
+      { role: "assistant", content: "yes" },
+    ];
+
+    await runAutoCapture(
+      messages,
+      "test-agent",
+      "session-1",
+      db,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    expect(embedBatchMock).not.toHaveBeenCalled();
+    expect(db.storeMemory).not.toHaveBeenCalled();
+  });
+
+  it("should handle empty messages array", async () => {
+    const db = createMockDb();
+    const embeddings = createMockEmbeddings();
+
+    await runAutoCapture([], "test-agent", undefined, db, embeddings, enabledConfig, mockLogger);
+
+    expect(db.storeMemory).not.toHaveBeenCalled();
+  });
+
+  it("should continue processing if one message fails", async () => {
+    const db = createMockDb();
+    // First embed call fails, second succeeds
+    let embedCallCount = 0;
+    const findSimilarMock = vi.fn().mockImplementation(() => {
+      embedCallCount++;
+      if (embedCallCount === 1) {
+        return Promise.reject(new Error("DB connection failed"));
+      }
+      return Promise.resolve([]);
+    });
+    const embedBatchMock = vi.fn().mockResolvedValue([
+      [0.1, 0.2],
+      [0.3, 0.4],
+    ]);
+    const dbWithError = createMockDb({
+      findSimilar: findSimilarMock,
+    });
+    const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock });
+
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: JSON.stringify({ score: 7 }) } }],
+        }),
+    });
+
+    const messages = [
+      {
+        role: "user",
+        content: "First message that is long enough to pass the attention gate filter",
+      },
+      {
+        role: "user",
+        content: "Second message that is also long enough to pass the attention gate",
+      },
+    ];
+
+    // Should not throw — errors are caught per-message
+    await runAutoCapture(
+      messages,
+      "test-agent",
+      "session-1",
+      dbWithError,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    // The second message should still have been attempted
+    expect(findSimilarMock).toHaveBeenCalledTimes(2);
+  });
+
+  it("should use different thresholds for user vs assistant messages", async () => {
+    const db = createMockDb();
+    const storeMemoryMock = vi.fn().mockResolvedValue(undefined);
+    const dbWithStore = createMockDb({ storeMemory: storeMemoryMock });
+    const embedBatchMock = vi.fn().mockResolvedValue([
+      [0.1, 0.2],
+      [0.3, 0.4],
+    ]);
+    const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock });
+
+    // Always return high importance so both pass
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: JSON.stringify({ score: 9 }) } }],
+        }),
+    });
+
+    const messages = [
+      {
+        role: "user",
+        content: "I really love working with graph databases like Neo4j for my projects",
+      },
+      {
+        role: "assistant",
+        content:
+          "Graph databases like Neo4j excel at modeling connected data and relationship queries",
+      },
+    ];
+
+    await runAutoCapture(
+      messages,
+      "test-agent",
+      "session-1",
+      dbWithStore,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    // Both should be stored
+    const storeCalls = storeMemoryMock.mock.calls;
+    if (storeCalls.length === 2) {
+      // User message: importance * 1.0 discount
+      expect(storeCalls[0][0].source).toBe("auto-capture");
+      // Assistant message: importance * 0.75 discount
+      expect(storeCalls[1][0].source).toBe("auto-capture-assistant");
+      expect(storeCalls[1][0].importance).toBeLessThan(storeCalls[0][0].importance);
+    }
+  });
+
+  it("should log capture errors without throwing", async () => {
+    const embedBatchMock = vi.fn().mockRejectedValue(new Error("embedding service down"));
+    const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock });
+    const db = createMockDb();
+
+    const messages = [
+      {
+        role: "user",
+        content: "A long enough message to pass the attention gate for testing purposes",
+      },
+    ];
+
+    // Should not throw
+    await runAutoCapture(
+      messages,
+      "test-agent",
+      "session-1",
+      db,
+      embeddings,
+      enabledConfig,
+      mockLogger,
+    );
+
+    // Should have logged the error
+    expect(mockLogger.warn).toHaveBeenCalled();
+  });
+});
diff --git a/extensions/memory-neo4j/cli.ts b/extensions/memory-neo4j/cli.ts
new file mode 100644
index 00000000000..e6271beea90
--- /dev/null
+++ b/extensions/memory-neo4j/cli.ts
@@ -0,0 +1,514 @@
+/**
+ * CLI command registration for memory-neo4j.
+ *
+ * Registers the `openclaw memory neo4j` subcommand group with commands:
+ * - list: List memory counts by agent and category
+ * - search: Search memories via hybrid search
+ * - stats: Show memory statistics and configuration
+ * - sleep: Run sleep cycle (seven-phase memory consolidation)
+ * - promote: Manually promote a memory to core
+ * - index: Re-embed all memories after changing embedding model
+ * - cleanup: Retroactively apply attention gate to stored memories
+ */
+
+import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
+import type { ExtractionConfig, MemoryNeo4jConfig } from "./config.js";
+import type { Embeddings } from "./embeddings.js";
+import type { Neo4jMemoryClient } from "./neo4j-client.js";
+import { passesAttentionGate } from "./attention-gate.js";
+import { stripMessageWrappers } from "./message-utils.js";
+import { hybridSearch } from "./search.js";
+import { runSleepCycle } from "./sleep-cycle.js";
+
+export type CliDeps = {
+  db: Neo4jMemoryClient;
+  embeddings: Embeddings;
+  cfg: MemoryNeo4jConfig;
+  extractionConfig: ExtractionConfig;
+  vectorDim: number;
+};
+
+/**
+ * Register the `openclaw memory neo4j` CLI subcommand group.
+ */
+export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
+  const { db, embeddings, cfg, extractionConfig, vectorDim } = deps;
+
+  api.registerCli(
+    ({ program }) => {
+      // Find existing memory command or create fallback
+      let memoryCmd = program.commands.find((cmd) => cmd.name() === "memory");
+      if (!memoryCmd) {
+        // Fallback if core memory CLI not registered yet
+        memoryCmd = program.command("memory").description("Memory commands");
+      }
+
+      // Add neo4j memory subcommand group
+      const memory = memoryCmd.command("neo4j").description("Neo4j graph memory commands");
+
+      memory
+        .command("list")
+        .description("List memory counts by agent and category")
+        .option("--json", "Output as JSON")
+        .action(async (opts: { json?: boolean }) => {
+          try {
+            await db.ensureInitialized();
+            const stats = await db.getMemoryStats();
+
+            if (opts.json) {
+              console.log(JSON.stringify(stats, null, 2));
+              return;
+            }
+
+            if (stats.length === 0) {
+              console.log("No memories stored.");
+              return;
+            }
+
+            // Group by agentId
+            const byAgent = new Map<
+              string,
+              Array<{ category: string; count: number; avgImportance: number }>
+            >();
+            for (const row of stats) {
+              const list = byAgent.get(row.agentId) || [];
+              list.push({
+                category: row.category,
+                count: row.count,
+                avgImportance: row.avgImportance,
+              });
+              byAgent.set(row.agentId, list);
+            }
+
+            // Print table for each agent
+            for (const [agentId, categories] of byAgent) {
+              const total = categories.reduce((sum, c) => sum + c.count, 0);
+              console.log(`\n┌─ ${agentId} (${total} total)`);
+              console.log("│");
+              console.log("│  Category      Count   Avg Importance");
+              console.log("│  ─────────────────────────────────────");
+              for (const { category, count, avgImportance } of categories) {
+                const cat = category.padEnd(12);
+                const cnt = String(count).padStart(5);
+                const imp = (avgImportance * 100).toFixed(0).padStart(3) + "%";
+                console.log(`│  ${cat} ${cnt}   ${imp}`);
+              }
+              console.log("└");
+            }
+            console.log("");
+          } catch (err) {
+            console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
+            process.exitCode = 1;
+          }
+        });
+
+      memory
+        .command("search")
+        .description("Search memories")
+        .argument("<query>", "Search query")
+        .option("--limit <n>", "Max results", "5")
+        .option("--agent <id>", "Agent id (default: default)")
+        .action(async (query: string, opts: { limit: string; agent?: string }) => {
+          try {
+            const results = await hybridSearch(
+              db,
+              embeddings,
+              query,
+              parseInt(opts.limit, 10),
+              opts.agent ?? "default",
+              extractionConfig.enabled,
+              { graphSearchDepth: cfg.graphSearchDepth },
+            );
+            const output = results.map((r) => ({
+              id: r.id,
+              text: r.text,
+              category: r.category,
+              importance: r.importance,
+              score: r.score,
+            }));
+            console.log(JSON.stringify(output, null, 2));
+          } catch (err) {
+            console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
+            process.exitCode = 1;
+          }
+        });
+
+      memory
+        .command("stats")
+        .description("Show memory statistics and configuration")
+        .action(async () => {
+          try {
+            await db.ensureInitialized();
+            const stats = await db.getMemoryStats();
+            const total = stats.reduce((sum, s) => sum + s.count, 0);
+
+            console.log("\nMemory (Neo4j) Statistics");
+            console.log("─────────────────────────");
+            console.log(`Total memories: ${total}`);
+            console.log(`Neo4j URI:      ${cfg.neo4j.uri}`);
+            console.log(`Embedding:      ${cfg.embedding.provider}/${cfg.embedding.model}`);
+            console.log(
+              `Extraction:     ${extractionConfig.enabled ? extractionConfig.model : "disabled"}`,
+            );
+            console.log(`Auto-capture:   ${cfg.autoCapture ? "enabled" : "disabled"}`);
+            console.log(`Auto-recall:    ${cfg.autoRecall ? "enabled" : "disabled"}`);
+            console.log(`Core memory:    ${cfg.coreMemory.enabled ? "enabled" : "disabled"}`);
+
+            if (stats.length > 0) {
+              // Group by category across all agents
+              const byCategory = new Map<string, number>();
+              for (const row of stats) {
+                byCategory.set(row.category, (byCategory.get(row.category) ?? 0) + row.count);
+              }
+              console.log("\nBy Category:");
+              for (const [category, count] of byCategory) {
+                console.log(`  ${category.padEnd(12)} ${count}`);
+              }
+
+              // Show agent count
+              const agents = new Set(stats.map((s) => s.agentId));
+              console.log(`\nAgents: ${agents.size} (${[...agents].join(", ")})`);
+            }
+            console.log("");
+          } catch (err) {
+            console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
+            process.exitCode = 1;
+          }
+        });
+
+      memory
+        .command("sleep")
+        .description("Run sleep cycle — consolidate memories with Pareto-based promotion")
+        .option("--agent <id>", "Agent id (default: all agents)")
+        .option("--dedup-threshold <n>", "Vector similarity threshold for dedup (default: 0.95)")
+        .option("--pareto <n>", "Top N% for core memory (default: 0.2 = top 20%)")
+        .option("--promotion-min-age <days>", "Min age in days before promotion (default: 7)")
+        .option("--decay-threshold <n>", "Decay score threshold for pruning (default: 0.1)")
+        .option("--decay-half-life <days>", "Base half-life in days (default: 30)")
+        .option("--batch-size <n>", "Extraction batch size (default: 50)")
+        .option("--delay <ms>", "Delay between extraction batches in ms (default: 1000)")
+        .option("--max-semantic-pairs <n>", "Max LLM-checked semantic dedup pairs (default: 500)")
+        .option("--concurrency <n>", "Parallel LLM calls — match OLLAMA_NUM_PARALLEL (default: 8)")
+        .option(
+          "--skip-semantic",
+          "Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)",
+        )
+        .action(
+          async (opts: {
+            agent?: string;
+            dedupThreshold?: string;
+            pareto?: string;
+            promotionMinAge?: string;
+            decayThreshold?: string;
+            decayHalfLife?: string;
+            batchSize?: string;
+            delay?: string;
+            maxSemanticPairs?: string;
+            concurrency?: string;
+            skipSemantic?: boolean;
+          }) => {
+            console.log("\n🌙 Memory Sleep Cycle");
+            console.log("═════════════════════════════════════════════════════════════");
+            console.log("Seven-phase memory consolidation (Pareto-based):\n");
+            console.log("  Phase 1:  Deduplication    — Merge near-duplicate memories");
+            console.log(
+              "  Phase 1b: Semantic Dedup   — LLM-based paraphrase detection (0.75–0.95 band)",
+            );
+            console.log("  Phase 1c: Conflict Detection — Resolve contradictory memories");
+            console.log(
+              "  Phase 2:  Pareto Scoring   — Calculate effective scores for all memories",
+            );
+            console.log("  Phase 3: Core Promotion   — Regular memories above threshold → core");
+            console.log("  Phase 4: Core Demotion    — Core memories below threshold → regular");
+            console.log("  Phase 5: Extraction       — Extract entities and categorize");
+            console.log("  Phase 6: Decay & Pruning  — Remove stale low-importance memories");
+            console.log("  Phase 7: Orphan Cleanup   — Remove disconnected nodes\n");
+
+            try {
+              // Validate sleep cycle CLI parameters before running
+              const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : undefined;
+              const delay = opts.delay ? parseInt(opts.delay, 10) : undefined;
+              const decayHalfLife = opts.decayHalfLife
+                ? parseInt(opts.decayHalfLife, 10)
+                : undefined;
+              const decayThreshold = opts.decayThreshold
+                ? parseFloat(opts.decayThreshold)
+                : undefined;
+              const pareto = opts.pareto ? parseFloat(opts.pareto) : undefined;
+              const promotionMinAge = opts.promotionMinAge
+                ? parseInt(opts.promotionMinAge, 10)
+                : undefined;
+
+              if (batchSize != null && (Number.isNaN(batchSize) || batchSize <= 0)) {
+                console.error("Error: --batch-size must be greater than 0");
+                process.exitCode = 1;
+                return;
+              }
+              if (delay != null && (Number.isNaN(delay) || delay < 0)) {
+                console.error("Error: --delay must be >= 0");
+                process.exitCode = 1;
+                return;
+              }
+              if (decayHalfLife != null && (Number.isNaN(decayHalfLife) || decayHalfLife <= 0)) {
+                console.error("Error: --decay-half-life must be greater than 0");
+                process.exitCode = 1;
+                return;
+              }
+              if (
+                decayThreshold != null &&
+                (Number.isNaN(decayThreshold) || decayThreshold < 0 || decayThreshold > 1)
+              ) {
+                console.error("Error: --decay-threshold must be between 0 and 1");
+                process.exitCode = 1;
+                return;
+              }
+              if (pareto != null && (Number.isNaN(pareto) || pareto < 0 || pareto > 1)) {
+                console.error("Error: --pareto must be between 0 and 1");
+                process.exitCode = 1;
+                return;
+              }
+              if (
+                promotionMinAge != null &&
+                (Number.isNaN(promotionMinAge) || promotionMinAge < 0)
+              ) {
+                console.error("Error: --promotion-min-age must be >= 0");
+                process.exitCode = 1;
+                return;
+              }
+
+              const maxSemanticPairs = opts.maxSemanticPairs
+                ? parseInt(opts.maxSemanticPairs, 10)
+                : undefined;
+              if (
+                maxSemanticPairs != null &&
+                (Number.isNaN(maxSemanticPairs) || maxSemanticPairs <= 0)
+              ) {
+                console.error("Error: --max-semantic-pairs must be greater than 0");
+                process.exitCode = 1;
+                return;
+              }
+
+              const concurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : undefined;
+              if (concurrency != null && (Number.isNaN(concurrency) || concurrency <= 0)) {
+                console.error("Error: --concurrency must be greater than 0");
+                process.exitCode = 1;
+                return;
+              }
+
+              await db.ensureInitialized();
+
+              const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, {
+                agentId: opts.agent,
+                dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined,
+                skipSemanticDedup: opts.skipSemantic === true,
+                maxSemanticDedupPairs: maxSemanticPairs,
+                llmConcurrency: concurrency,
+                paretoPercentile: pareto,
+                promotionMinAgeDays: promotionMinAge,
+                decayRetentionThreshold: decayThreshold,
+                decayBaseHalfLifeDays: decayHalfLife,
+                decayCurves: Object.keys(cfg.decayCurves).length > 0 ? cfg.decayCurves : undefined,
+                extractionBatchSize: batchSize,
+                extractionDelayMs: delay,
+                onPhaseStart: (phase) => {
+                  const phaseNames: Record<string, string> = {
+                    dedup: "Phase 1: Deduplication",
+                    semanticDedup: "Phase 1b: Semantic Deduplication",
+                    conflict: "Phase 1c: Conflict Detection",
+                    pareto: "Phase 2: Pareto Scoring",
+                    promotion: "Phase 3: Core Promotion",
+                    extraction: "Phase 4: Extraction",
+                    decay: "Phase 5: Decay & Pruning",
+                    cleanup: "Phase 6: Orphan Cleanup",
+                  };
+                  console.log(`\n▶ ${phaseNames[phase]}`);
+                  console.log("─────────────────────────────────────────────────────────────");
+                },
+                onProgress: (_phase, message) => {
+                  console.log(`   ${message}`);
+                },
+              });
+
+              console.log("\n═════════════════════════════════════════════════════════════");
+              console.log(`✅ Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s`);
+              console.log("─────────────────────────────────────────────────────────────");
+              console.log(
+                `   Deduplication:  ${result.dedup.clustersFound} clusters → ${result.dedup.memoriesMerged} merged`,
+              );
+              console.log(
+                `   Conflicts:      ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`,
+              );
+              console.log(
+                `   Semantic Dedup: ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`,
+              );
+              console.log(
+                `   Pareto:         ${result.pareto.totalMemories} total (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`,
+              );
+              console.log(
+                `                   Threshold: ${result.pareto.threshold.toFixed(4)} (top 20%)`,
+              );
+              console.log(
+                `   Promotion:      ${result.promotion.promoted}/${result.promotion.candidatesFound} promoted to core`,
+              );
+              console.log(`   Decay/Pruning:  ${result.decay.memoriesPruned} memories pruned`);
+              console.log(
+                `   Extraction:     ${result.extraction.succeeded}/${result.extraction.total} extracted` +
+                  (result.extraction.failed > 0 ? ` (${result.extraction.failed} failed)` : ""),
+              );
+              console.log(
+                `   Cleanup:        ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
+              );
+              if (result.aborted) {
+                console.log("\n⚠️  Sleep cycle was aborted before completion.");
+              }
+              console.log("");
+            } catch (err) {
+              console.error(
+                `\n❌ Sleep cycle failed: ${err instanceof Error ? err.message : String(err)}`,
+              );
+              process.exitCode = 1;
+            }
+          },
+        );
+
+      memory
+        .command("promote")
+        .description("Manually promote a memory to core status")
+        .argument("<id>", "Memory ID to promote")
+        .action(async (id: string) => {
+          try {
+            await db.ensureInitialized();
+            const promoted = await db.promoteToCore([id]);
+            if (promoted > 0) {
+              console.log(`✅ Memory ${id} promoted to core.`);
+            } else {
+              console.log(`❌ Memory ${id} not found.`);
+              process.exitCode = 1;
+            }
+          } catch (err) {
+            console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
+            process.exitCode = 1;
+          }
+        });
+
+      memory
+        .command("index")
+        .description(
+          "Re-embed all memories and entities — use after changing embedding model/provider",
+        )
+        .option("--batch-size <n>", "Embedding batch size (default: 50)")
+        .action(async (opts: { batchSize?: string }) => {
+          const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : 50;
+          if (Number.isNaN(batchSize) || batchSize <= 0) {
+            console.error("Error: --batch-size must be greater than 0");
+            process.exitCode = 1;
+            return;
+          }
+
+          console.log("\nMemory Neo4j — Reindex Embeddings");
+          console.log("═════════════════════════════════════════════════════════════");
+          console.log(`Model:      ${cfg.embedding.provider}/${cfg.embedding.model}`);
+          console.log(`Dimensions: ${vectorDim}`);
+          console.log(`Batch size: ${batchSize}\n`);
+
+          try {
+            const startedAt = Date.now();
+            const result = await db.reindex((texts) => embeddings.embedBatch(texts), {
+              batchSize,
+              onProgress: (phase, done, total) => {
+                if (phase === "drop-indexes" && done === 0) {
+                  console.log("▶ Dropping old vector index…");
+                } else if (phase === "memories") {
+                  console.log(`   Memories: ${done}/${total}`);
+                } else if (phase === "create-indexes" && done === 0) {
+                  console.log("▶ Recreating vector index…");
+                }
+              },
+            });
+
+            const elapsed = ((Date.now() - startedAt) / 1000).toFixed(1);
+            console.log("\n═════════════════════════════════════════════════════════════");
+            console.log(`✅ Reindex complete in ${elapsed}s — ${result.memories} memories`);
+            console.log("");
+          } catch (err) {
+            console.error(
+              `\n❌ Reindex failed: ${err instanceof Error ? err.message : String(err)}`,
+            );
+            process.exitCode = 1;
+          }
+        });
+
+      memory
+        .command("cleanup")
+        .description(
+          "Retroactively apply the attention gate — find and remove low-substance memories",
+        )
+        .option("--execute", "Actually delete (default: dry-run preview)")
+        .option("--all", "Include explicitly-stored memories (default: auto-capture only)")
+        .option("--agent <id>", "Only clean up memories for a specific agent")
+        .action(async (opts: { execute?: boolean; all?: boolean; agent?: string }) => {
+          try {
+            await db.ensureInitialized();
+
+            // Fetch memories — by default only auto-capture (explicit stores are trusted)
+            const conditions: string[] = [];
+            if (!opts.all) {
+              conditions.push("m.source = 'auto-capture'");
+            }
+            if (opts.agent) {
+              conditions.push("m.agentId = $agentId");
+            }
+            const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
+            const allMemories = await db.runQuery<{
+              id: string;
+              text: string;
+              source: string;
+            }>(
+              `MATCH (m:Memory) ${where}
+               RETURN m.id AS id, m.text AS text, COALESCE(m.source, 'unknown') AS source
+               ORDER BY m.createdAt ASC`,
+              opts.agent ? { agentId: opts.agent } : {},
+            );
+
+            // Strip channel metadata wrappers (same as the real pipeline) then gate
+            const noise: Array<{ id: string; text: string; source: string }> = [];
+            for (const mem of allMemories) {
+              const stripped = stripMessageWrappers(mem.text);
+              if (!passesAttentionGate(stripped)) {
+                noise.push(mem);
+              }
+            }
+
+            if (noise.length === 0) {
+              console.log("\nNo low-substance memories found. Everything passes the gate.");
+              return;
+            }
+
+            console.log(
+              `\nFound ${noise.length}/${allMemories.length} memories that fail the attention gate:\n`,
+            );
+
+            for (const mem of noise) {
+              const preview = mem.text.length > 80 ? `${mem.text.slice(0, 77)}...` : mem.text;
+              console.log(`  [${mem.source}] "${preview}"`);
+            }
+
+            if (!opts.execute) {
+              console.log(
+                `\nDry run — ${noise.length} memories would be removed. Re-run with --execute to delete.\n`,
+              );
+              return;
+            }
+
+            // Delete in batch
+            const deleted = await db.pruneMemories(noise.map((m) => m.id));
+            console.log(`\nDeleted ${deleted} low-substance memories.\n`);
+          } catch (err) {
+            console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
+            process.exitCode = 1;
+          }
+        });
+    },
+    { commands: [] }, // Adds subcommands to existing "memory" command, no conflict
+  );
+}
diff --git a/extensions/memory-neo4j/config.ts b/extensions/memory-neo4j/config.ts
index 50dcdc1827c..d6bb3665087 100644
--- a/extensions/memory-neo4j/config.ts
+++ b/extensions/memory-neo4j/config.ts
@@ -92,24 +92,27 @@ export const EMBEDDING_DIMENSIONS: Record<string, number> = {
 // Default dimension for unknown models (Ollama models vary)
 export const DEFAULT_EMBEDDING_DIMS = 1024;
 
-export function vectorDimsForModel(model: string): number {
-  // Check exact match first
-  if (EMBEDDING_DIMENSIONS[model]) {
-    return EMBEDDING_DIMENSIONS[model];
+/**
+ * Lookup a value by exact key or longest matching prefix.
+ * Returns undefined if no match found.
+ */
+function lookupByPrefix<T>(table: Record<string, T>, key: string): T | undefined {
+  if (table[key] !== undefined) {
+    return table[key];
   }
-  // Prefer longest matching prefix (e.g. "mxbai-embed-large-2k" over "mxbai-embed-large")
-  let best: { dims: number; keyLen: number } | undefined;
-  for (const [known, dims] of Object.entries(EMBEDDING_DIMENSIONS)) {
-    if (model.startsWith(known) && (!best || known.length > best.keyLen)) {
-      best = { dims, keyLen: known.length };
+  let best: { value: T; keyLen: number } | undefined;
+  for (const [known, value] of Object.entries(table)) {
+    if (key.startsWith(known) && (!best || known.length > best.keyLen)) {
+      best = { value, keyLen: known.length };
     }
   }
-  if (best) {
-    return best.dims;
-  }
+  return best?.value;
+}
+
+export function vectorDimsForModel(model: string): number {
   // Return default for unknown models — callers should warn when this path is taken,
   // as the default 1024 dimensions may not match the actual model's output.
-  return DEFAULT_EMBEDDING_DIMS;
+  return lookupByPrefix(EMBEDDING_DIMENSIONS, model) ?? DEFAULT_EMBEDDING_DIMS;
 }
 
 /** Max input token lengths for known embedding models. */
@@ -129,17 +132,7 @@ export const EMBEDDING_CONTEXT_LENGTHS: Record<string, number> = {
 export const DEFAULT_EMBEDDING_CONTEXT_LENGTH = 512;
 
 export function contextLengthForModel(model: string): number {
-  if (EMBEDDING_CONTEXT_LENGTHS[model]) {
-    return EMBEDDING_CONTEXT_LENGTHS[model];
-  }
-  // Prefer longest matching prefix (e.g. "mxbai-embed-large-8k" over "mxbai-embed-large")
-  let best: { len: number; keyLen: number } | undefined;
-  for (const [known, len] of Object.entries(EMBEDDING_CONTEXT_LENGTHS)) {
-    if (model.startsWith(known) && (!best || known.length > best.keyLen)) {
-      best = { len, keyLen: known.length };
-    }
-  }
-  return best?.len ?? DEFAULT_EMBEDDING_CONTEXT_LENGTH;
+  return lookupByPrefix(EMBEDDING_CONTEXT_LENGTHS, model) ?? DEFAULT_EMBEDDING_CONTEXT_LENGTH;
 }
 
 /**
diff --git a/extensions/memory-neo4j/embeddings.ts b/extensions/memory-neo4j/embeddings.ts
index 44b101175c9..931eb97a24d 100644
--- a/extensions/memory-neo4j/embeddings.ts
+++ b/extensions/memory-neo4j/embeddings.ts
@@ -8,15 +8,9 @@
 import { createHash } from "node:crypto";
 import OpenAI from "openai";
 import type { EmbeddingProvider } from "./config.js";
+import type { Logger } from "./schema.js";
 import { contextLengthForModel } from "./config.js";
 
-type Logger = {
-  info: (msg: string) => void;
-  warn: (msg: string) => void;
-  error: (msg: string) => void;
-  debug?: (msg: string) => void;
-};
-
 /**
  * Simple LRU cache for embedding vectors.
  * Keyed by SHA-256 hash of the input text to avoid storing large strings.
diff --git a/extensions/memory-neo4j/extractor.test.ts b/extensions/memory-neo4j/extractor.test.ts
index 7bb41012826..9b277154aea 100644
--- a/extensions/memory-neo4j/extractor.test.ts
+++ b/extensions/memory-neo4j/extractor.test.ts
@@ -8,19 +8,22 @@
 
 import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
 import type { ExtractionConfig } from "./config.js";
+import { passesAttentionGate, passesAssistantAttentionGate } from "./attention-gate.js";
 import {
-  extractUserMessages,
-  extractAssistantMessages,
-  stripAssistantWrappers,
   extractEntities,
   runBackgroundExtraction,
   rateImportance,
   resolveConflict,
   isSemanticDuplicate,
-  isTransientError,
-  runSleepCycle,
+  SEMANTIC_DEDUP_VECTOR_THRESHOLD,
 } from "./extractor.js";
-import { passesAttentionGate, passesAssistantAttentionGate } from "./index.js";
+import { isTransientError } from "./llm-client.js";
+import {
+  extractUserMessages,
+  extractAssistantMessages,
+  stripAssistantWrappers,
+} from "./message-utils.js";
+import { runSleepCycle } from "./sleep-cycle.js";
 
 // ============================================================================
 // passesAttentionGate()
@@ -1756,7 +1759,6 @@ describe("runSleepCycle", () => {
       calculateAllEffectiveScores: vi.fn().mockResolvedValue([]),
       calculateParetoThreshold: vi.fn().mockReturnValue(0.5),
       promoteToCore: vi.fn().mockResolvedValue(0),
-      demoteFromCore: vi.fn().mockResolvedValue(0),
       findDecayedMemories: vi.fn().mockResolvedValue([]),
       pruneMemories: vi.fn().mockResolvedValue(0),
       countByExtractionStatus: vi
@@ -1768,11 +1770,6 @@ describe("runSleepCycle", () => {
       findOrphanTags: vi.fn().mockResolvedValue([]),
       deleteOrphanTags: vi.fn().mockResolvedValue(0),
       updateExtractionStatus: vi.fn().mockResolvedValue(undefined),
-      mergeEntity: vi.fn().mockResolvedValue({ id: "e1", name: "test" }),
-      createMentions: vi.fn().mockResolvedValue(undefined),
-      createEntityRelationship: vi.fn().mockResolvedValue(undefined),
-      tagMemory: vi.fn().mockResolvedValue(undefined),
-      updateMemoryCategory: vi.fn().mockResolvedValue(undefined),
     };
   });
 
@@ -2252,64 +2249,7 @@ describe("runSleepCycle", () => {
     });
   });
 
-  // Phase 4: Demotion
-  describe("Phase 4: Core Demotion", () => {
-    it("should demote core memories below threshold", async () => {
-      const scores = [
-        {
-          id: "m1",
-          text: "test",
-          category: "core",
-          importance: 0.3,
-          retrievalCount: 1,
-          ageDays: 30,
-          effectiveScore: 0.3,
-        },
-        {
-          id: "m2",
-          text: "test",
-          category: "core",
-          importance: 0.9,
-          retrievalCount: 10,
-          ageDays: 5,
-          effectiveScore: 0.95,
-        },
-      ];
-      mockDb.calculateAllEffectiveScores.mockResolvedValue(scores);
-      mockDb.calculateParetoThreshold.mockReturnValue(0.7);
-      mockDb.demoteFromCore.mockResolvedValue(1);
-
-      const result = await runSleepCycle(mockDb, mockEmbeddings, mockConfig, mockLogger);
-
-      // m1 should be demoted (category=core, score=0.30 < 0.70)
-      expect(mockDb.demoteFromCore).toHaveBeenCalledWith(["m1"]);
-      expect(result.demotion.candidatesFound).toBe(1);
-      expect(result.demotion.demoted).toBe(1);
-    });
-
-    it("should not demote regular memories", async () => {
-      const scores = [
-        {
-          id: "m1",
-          text: "test",
-          category: "fact",
-          importance: 0.2,
-          retrievalCount: 0,
-          ageDays: 50,
-          effectiveScore: 0.1,
-        },
-      ];
-      mockDb.calculateAllEffectiveScores.mockResolvedValue(scores);
-      mockDb.calculateParetoThreshold.mockReturnValue(0.7);
-
-      const result = await runSleepCycle(mockDb, mockEmbeddings, mockConfig, mockLogger);
-
-      expect(result.demotion.candidatesFound).toBe(0);
-      expect(mockDb.demoteFromCore).not.toHaveBeenCalled();
-    });
-  });
-
-  // Phase 5: Extraction
+  // Phase 4: Extraction
   describe("Phase 5: Entity Extraction", () => {
     it("should process pending extractions in batches", async () => {
       mockDb.countByExtractionStatus.mockResolvedValue({
@@ -2606,7 +2546,6 @@ describe("runSleepCycle", () => {
       expect(onPhaseStart).toHaveBeenCalledWith("semanticDedup");
       expect(onPhaseStart).toHaveBeenCalledWith("pareto");
       expect(onPhaseStart).toHaveBeenCalledWith("promotion");
-      expect(onPhaseStart).toHaveBeenCalledWith("demotion");
       expect(onPhaseStart).toHaveBeenCalledWith("extraction");
       expect(onPhaseStart).toHaveBeenCalledWith("decay");
       expect(onPhaseStart).toHaveBeenCalledWith("cleanup");
@@ -2642,7 +2581,6 @@ describe("runSleepCycle", () => {
       expect(result).toHaveProperty("semanticDedup");
       expect(result).toHaveProperty("pareto");
       expect(result).toHaveProperty("promotion");
-      expect(result).toHaveProperty("demotion");
       expect(result).toHaveProperty("decay");
       expect(result).toHaveProperty("extraction");
       expect(result).toHaveProperty("cleanup");
@@ -2669,6 +2607,208 @@ describe("runSleepCycle", () => {
 // isTransientError()
 // ============================================================================
 
+// ============================================================================
+// isSemanticDuplicate
+// ============================================================================
+
+describe("isSemanticDuplicate", () => {
+  const originalFetch = globalThis.fetch;
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+  });
+
+  const enabledConfig: ExtractionConfig = {
+    enabled: true,
+    apiKey: "test-key",
+    model: "test-model",
+    baseUrl: "https://test.ai/api/v1",
+    temperature: 0.0,
+    maxRetries: 0,
+  };
+
+  const disabledConfig: ExtractionConfig = {
+    ...enabledConfig,
+    enabled: false,
+  };
+
+  it("should return false when extraction is disabled", async () => {
+    const result = await isSemanticDuplicate("new text", "existing text", disabledConfig);
+    expect(result).toBe(false);
+  });
+
+  it("should return true when LLM says duplicate", async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [
+            {
+              message: {
+                content: JSON.stringify({ verdict: "duplicate", reason: "same fact" }),
+              },
+            },
+          ],
+        }),
+    });
+
+    const result = await isSemanticDuplicate("I like Neo4j", "User prefers Neo4j", enabledConfig);
+    expect(result).toBe(true);
+  });
+
+  it("should return false when LLM says unique", async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [
+            {
+              message: {
+                content: JSON.stringify({ verdict: "unique", reason: "different topic" }),
+              },
+            },
+          ],
+        }),
+    });
+
+    const result = await isSemanticDuplicate("I like coffee", "User lives in NYC", enabledConfig);
+    expect(result).toBe(false);
+  });
+
+  it("should skip LLM call when vector similarity is below threshold", async () => {
+    const fetchSpy = vi.fn();
+    globalThis.fetch = fetchSpy;
+
+    const result = await isSemanticDuplicate(
+      "text a",
+      "text b",
+      enabledConfig,
+      SEMANTIC_DEDUP_VECTOR_THRESHOLD - 0.01,
+    );
+    expect(result).toBe(false);
+    expect(fetchSpy).not.toHaveBeenCalled();
+  });
+
+  it("should call LLM when vector similarity is at or above threshold", async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [
+            {
+              message: {
+                content: JSON.stringify({ verdict: "duplicate", reason: "same" }),
+              },
+            },
+          ],
+        }),
+    });
+
+    const result = await isSemanticDuplicate(
+      "text a",
+      "text b",
+      enabledConfig,
+      SEMANTIC_DEDUP_VECTOR_THRESHOLD,
+    );
+    expect(result).toBe(true);
+    expect(globalThis.fetch).toHaveBeenCalled();
+  });
+
+  it("should call LLM when no vector similarity is provided", async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [
+            {
+              message: {
+                content: JSON.stringify({ verdict: "unique", reason: "different" }),
+              },
+            },
+          ],
+        }),
+    });
+
+    const result = await isSemanticDuplicate("text a", "text b", enabledConfig);
+    expect(result).toBe(false);
+    expect(globalThis.fetch).toHaveBeenCalled();
+  });
+
+  it("should return false on fetch error (fail-open)", async () => {
+    globalThis.fetch = vi
+      .fn()
+      .mockRejectedValue(new DOMException("signal timed out", "TimeoutError"));
+
+    const result = await isSemanticDuplicate("text a", "text b", enabledConfig);
+    expect(result).toBe(false);
+  });
+
+  it("should return false on invalid JSON response", async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: "not valid json" } }],
+        }),
+    });
+
+    const result = await isSemanticDuplicate("text a", "text b", enabledConfig);
+    expect(result).toBe(false);
+  });
+
+  it("should return false when verdict is missing from response", async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [
+            {
+              message: {
+                content: JSON.stringify({ reason: "no verdict field" }),
+              },
+            },
+          ],
+        }),
+    });
+
+    const result = await isSemanticDuplicate("text a", "text b", enabledConfig);
+    expect(result).toBe(false);
+  });
+
+  it("should return false when LLM returns null content", async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: null } }],
+        }),
+    });
+
+    const result = await isSemanticDuplicate("text a", "text b", enabledConfig);
+    expect(result).toBe(false);
+  });
+
+  it("should respect abort signal", async () => {
+    const controller = new AbortController();
+    controller.abort();
+
+    globalThis.fetch = vi.fn().mockRejectedValue(new DOMException("signal aborted", "AbortError"));
+
+    const result = await isSemanticDuplicate(
+      "text a",
+      "text b",
+      enabledConfig,
+      undefined,
+      controller.signal,
+    );
+    expect(result).toBe(false);
+  });
+});
+
+// ============================================================================
+// isTransientError
+// ============================================================================
+
 describe("isTransientError", () => {
   it("should return false for non-Error values", () => {
     expect(isTransientError("string error")).toBe(false);
diff --git a/extensions/memory-neo4j/extractor.ts b/extensions/memory-neo4j/extractor.ts
index 3b9c29d334f..c7d37f724cd 100644
--- a/extensions/memory-neo4j/extractor.ts
+++ b/extensions/memory-neo4j/extractor.ts
@@ -1,9 +1,12 @@
 /**
- * LLM-based entity extraction and sleep cycle for memory-neo4j.
+ * LLM-based entity extraction and memory operations for memory-neo4j.
  *
  * Extraction uses a configurable OpenAI-compatible LLM (OpenRouter, Ollama, etc.) to:
  * - Extract entities, relationships, and tags from stored memories
  * - Classify memories into categories (preference, fact, decision, etc.)
+ * - Rate memory importance on a 1-10 scale
+ * - Detect semantic duplicates via LLM comparison
+ * - Resolve conflicting memories
  *
  * Runs as background fire-and-forget operations with graceful degradation.
  */
@@ -12,20 +15,10 @@ import { randomUUID } from "node:crypto";
 import type { ExtractionConfig } from "./config.js";
 import type { Embeddings } from "./embeddings.js";
 import type { Neo4jMemoryClient } from "./neo4j-client.js";
-import type { EntityType, ExtractionResult, MemoryCategory } from "./schema.js";
+import type { EntityType, ExtractionResult, Logger, MemoryCategory } from "./schema.js";
+import { callOpenRouter, callOpenRouterStream, isTransientError } from "./llm-client.js";
 import { ALLOWED_RELATIONSHIP_TYPES, ENTITY_TYPES, MEMORY_CATEGORIES } from "./schema.js";
 
-// ============================================================================
-// Types
-// ============================================================================
-
-type Logger = {
-  info: (msg: string) => void;
-  warn: (msg: string) => void;
-  error: (msg: string) => void;
-  debug?: (msg: string) => void;
-};
-
 // ============================================================================
 // Extraction Prompt
 // ============================================================================
@@ -58,161 +51,6 @@ Rules:
 - Keep entity descriptions brief (1 sentence max)
 - Category: "preference" for opinions/preferences, "fact" for factual info, "decision" for choices made, "entity" for entity-focused, "other" for miscellaneous`;
 
-// ============================================================================
-// OpenRouter API Client
-// ============================================================================
-
-// Timeout for LLM and embedding fetch calls to prevent hanging indefinitely
-const FETCH_TIMEOUT_MS = 30_000;
-
-async function callOpenRouter(
-  config: ExtractionConfig,
-  prompt: string | Array<{ role: string; content: string }>,
-  abortSignal?: AbortSignal,
-): Promise<string | null> {
-  const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt;
-
-  for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
-    try {
-      // Combine the caller's abort signal with a per-request timeout
-      const signal = abortSignal
-        ? AbortSignal.any([abortSignal, AbortSignal.timeout(FETCH_TIMEOUT_MS)])
-        : AbortSignal.timeout(FETCH_TIMEOUT_MS);
-
-      const response = await fetch(`${config.baseUrl}/chat/completions`, {
-        method: "POST",
-        headers: {
-          Authorization: `Bearer ${config.apiKey}`,
-          "Content-Type": "application/json",
-        },
-        body: JSON.stringify({
-          model: config.model,
-          messages,
-          temperature: config.temperature,
-          response_format: { type: "json_object" },
-        }),
-        signal,
-      });
-
-      if (!response.ok) {
-        const body = await response.text().catch(() => "");
-        throw new Error(`OpenRouter API error ${response.status}: ${body}`);
-      }
-
-      const data = (await response.json()) as {
-        choices?: Array<{ message?: { content?: string } }>;
-      };
-      return data.choices?.[0]?.message?.content ?? null;
-    } catch (err) {
-      if (attempt >= config.maxRetries) {
-        throw err;
-      }
-      // Exponential backoff
-      await new Promise((resolve) => setTimeout(resolve, 500 * Math.pow(2, attempt)));
-    }
-  }
-  return null;
-}
-
-/**
- * Streaming variant of callOpenRouter. Uses the streaming API to receive chunks
- * incrementally, allowing earlier cancellation via abort signal and better
- * latency characteristics for long responses.
- *
- * Accumulates all chunks into a single response string since extraction
- * uses JSON mode (which requires the complete object to parse).
- */
-async function callOpenRouterStream(
-  config: ExtractionConfig,
-  prompt: string | Array<{ role: string; content: string }>,
-  abortSignal?: AbortSignal,
-): Promise<string | null> {
-  const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt;
-
-  for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
-    try {
-      const signal = abortSignal
-        ? AbortSignal.any([abortSignal, AbortSignal.timeout(FETCH_TIMEOUT_MS)])
-        : AbortSignal.timeout(FETCH_TIMEOUT_MS);
-
-      const response = await fetch(`${config.baseUrl}/chat/completions`, {
-        method: "POST",
-        headers: {
-          Authorization: `Bearer ${config.apiKey}`,
-          "Content-Type": "application/json",
-        },
-        body: JSON.stringify({
-          model: config.model,
-          messages,
-          temperature: config.temperature,
-          response_format: { type: "json_object" },
-          stream: true,
-        }),
-        signal,
-      });
-
-      if (!response.ok) {
-        const body = await response.text().catch(() => "");
-        throw new Error(`OpenRouter API error ${response.status}: ${body}`);
-      }
-
-      if (!response.body) {
-        throw new Error("No response body for streaming request");
-      }
-
-      // Read SSE stream and accumulate content chunks
-      const reader = response.body.getReader();
-      const decoder = new TextDecoder();
-      let accumulated = "";
-      let buffer = "";
-
-      for (;;) {
-        // Check abort between chunks for responsive cancellation
-        if (abortSignal?.aborted) {
-          reader.cancel().catch(() => {});
-          return null;
-        }
-
-        const { done, value } = await reader.read();
-        if (done) break;
-
-        buffer += decoder.decode(value, { stream: true });
-
-        // Parse SSE lines
-        const lines = buffer.split("\n");
-        buffer = lines.pop() ?? "";
-
-        for (const line of lines) {
-          const trimmed = line.trim();
-          if (!trimmed.startsWith("data: ")) continue;
-          const data = trimmed.slice(6);
-          if (data === "[DONE]") continue;
-
-          try {
-            const parsed = JSON.parse(data) as {
-              choices?: Array<{ delta?: { content?: string } }>;
-            };
-            const chunk = parsed.choices?.[0]?.delta?.content;
-            if (chunk) {
-              accumulated += chunk;
-            }
-          } catch {
-            // Skip malformed SSE chunks
-          }
-        }
-      }
-
-      return accumulated || null;
-    } catch (err) {
-      if (attempt >= config.maxRetries) {
-        throw err;
-      }
-      await new Promise((resolve) => setTimeout(resolve, 500 * Math.pow(2, attempt)));
-    }
-  }
-  return null;
-}
-
 // ============================================================================
 // Entity Extraction
 // ============================================================================
@@ -227,32 +65,6 @@ async function callOpenRouterStream(
  */
 const MAX_EXTRACTION_RETRIES = 3;
 
-/**
- * Check if an error is transient (network/timeout) vs permanent (JSON parse, etc.)
- */
-export function isTransientError(err: unknown): boolean {
-  if (!(err instanceof Error)) {
-    return false;
-  }
-  const msg = err.message.toLowerCase();
-  return (
-    err.name === "AbortError" ||
-    err.name === "TimeoutError" ||
-    msg.includes("timeout") ||
-    msg.includes("econnrefused") ||
-    msg.includes("econnreset") ||
-    msg.includes("etimedout") ||
-    msg.includes("enotfound") ||
-    msg.includes("network") ||
-    msg.includes("fetch failed") ||
-    msg.includes("socket hang up") ||
-    msg.includes("api error 429") ||
-    msg.includes("api error 502") ||
-    msg.includes("api error 503") ||
-    msg.includes("api error 504")
-  );
-}
-
 /**
  * Extract entities and relationships from a memory text using LLM.
  *
@@ -526,712 +338,6 @@ export async function runBackgroundExtraction(
   }
 }
 
-// ============================================================================
-// Sleep Cycle - Seven Phase Memory Consolidation
-// ============================================================================
-
-/**
- * Sleep Cycle Result - aggregated stats from all phases.
- */
-export type SleepCycleResult = {
-  // Phase 1: Deduplication
-  dedup: {
-    clustersFound: number;
-    memoriesMerged: number;
-  };
-  // Phase 1b: Conflict Detection
-  conflict: {
-    pairsFound: number;
-    resolved: number;
-    invalidated: number;
-  };
-  // Phase 1c: Semantic Deduplication
-  semanticDedup: {
-    pairsChecked: number;
-    duplicatesMerged: number;
-  };
-  // Phase 2: Pareto Scoring & Threshold
-  pareto: {
-    totalMemories: number;
-    coreMemories: number;
-    regularMemories: number;
-    threshold: number; // The 80th percentile effective score
-  };
-  // Phase 3: Core Promotion
-  promotion: {
-    candidatesFound: number;
-    promoted: number;
-  };
-  // Phase 4: Core Demotion
-  demotion: {
-    candidatesFound: number;
-    demoted: number;
-  };
-  // Phase 6: Decay & Pruning
-  decay: {
-    memoriesPruned: number;
-  };
-  // Phase 5: Entity Extraction
-  extraction: {
-    total: number;
-    processed: number;
-    succeeded: number;
-    failed: number;
-  };
-  // Phase 7: Orphan Cleanup
-  cleanup: {
-    entitiesRemoved: number;
-    tagsRemoved: number;
-  };
-  // Overall
-  durationMs: number;
-  aborted: boolean;
-};
-
-export type SleepCycleOptions = {
-  // Common
-  agentId?: string;
-  abortSignal?: AbortSignal;
-
-  // Phase 1: Deduplication
-  dedupThreshold?: number; // Vector similarity threshold (default: 0.95)
-  skipSemanticDedup?: boolean; // Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)
-
-  // Phase 2-4: Pareto-based Promotion/Demotion
-  paretoPercentile?: number; // Top N% for core (default: 0.2 = top 20%)
-  promotionMinAgeDays?: number; // Min age before promotion (default: 7)
-
-  // Phase 1b: Semantic Dedup
-  maxSemanticDedupPairs?: number; // Max LLM-checked pairs (default: 500)
-
-  // Concurrency
-  llmConcurrency?: number; // Parallel LLM calls (default: 8, match OLLAMA_NUM_PARALLEL)
-
-  // Phase 5: Extraction
-  extractionBatchSize?: number; // Memories per batch (default: 50)
-  extractionDelayMs?: number; // Delay between batches (default: 1000)
-
-  // Phase 6: Decay
-  decayRetentionThreshold?: number; // Below this, memory is pruned (default: 0.1)
-  decayBaseHalfLifeDays?: number; // Base half-life in days (default: 30)
-  decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2)
-  decayCurves?: Record<string, { halfLifeDays: number }>; // Per-category decay curve overrides
-
-  // Progress callback
-  onPhaseStart?: (
-    phase:
-      | "dedup"
-      | "conflict"
-      | "semanticDedup"
-      | "pareto"
-      | "promotion"
-      | "demotion"
-      | "decay"
-      | "extraction"
-      | "cleanup",
-  ) => void;
-  onProgress?: (phase: string, message: string) => void;
-};
-
-/**
- * Run the full sleep cycle - seven phases of memory consolidation.
- *
- * This implements a Pareto-based memory ecosystem where core memory
- * is bounded to the top 20% of memories by effective score.
- *
- * Phases:
- * 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
- * 2. PARETO SCORING - Calculate effective scores for all memories
- * 3. CORE PROMOTION - Regular memories above threshold → core
- * 4. CORE DEMOTION - Core memories below threshold → regular
- * 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
- * 6. EXTRACTION - Form entity relationships (strengthen connections)
- * 7. CLEANUP - Remove orphaned entities/tags (garbage collection)
- *
- * Effective Score Formulas:
- * - Regular memories: importance × freq_boost × recency
- * - Core memories: importance × freq_boost × recency (same for threshold comparison)
- * - Core memory retrieval ranking: freq_boost × recency (pure usage-based)
- *
- * Where:
- * - freq_boost = 1 + log(1 + retrievalCount) × 0.3
- * - recency = 2^(-days_since_last / 14)
- *
- * Benefits:
- * - Self-regulating core memory size (Pareto distribution)
- * - Memories can be promoted AND demoted based on usage
- * - Simulates human memory consolidation during sleep
- *
- * Research basis:
- * - Pareto principle (20/80 rule) for memory tiering
- * - ACT-R memory model for retrieval-based importance
- * - Ebbinghaus forgetting curve for decay
- * - MemGPT/Letta for tiered memory architecture
- */
-export async function runSleepCycle(
-  db: Neo4jMemoryClient,
-  embeddings: Embeddings,
-  config: ExtractionConfig,
-  logger: Logger,
-  options: SleepCycleOptions = {},
-): Promise<SleepCycleResult> {
-  const startTime = Date.now();
-  const {
-    agentId,
-    abortSignal,
-    dedupThreshold = 0.95,
-    skipSemanticDedup = false,
-    maxSemanticDedupPairs = 500,
-    llmConcurrency = 8,
-    paretoPercentile = 0.2,
-    promotionMinAgeDays = 7,
-    decayRetentionThreshold = 0.1,
-    decayBaseHalfLifeDays = 30,
-    decayImportanceMultiplier = 2,
-    decayCurves,
-    extractionBatchSize = 50,
-    extractionDelayMs = 1000,
-    onPhaseStart,
-    onProgress,
-  } = options;
-
-  const result: SleepCycleResult = {
-    dedup: { clustersFound: 0, memoriesMerged: 0 },
-    conflict: { pairsFound: 0, resolved: 0, invalidated: 0 },
-    semanticDedup: { pairsChecked: 0, duplicatesMerged: 0 },
-    pareto: { totalMemories: 0, coreMemories: 0, regularMemories: 0, threshold: 0 },
-    promotion: { candidatesFound: 0, promoted: 0 },
-    demotion: { candidatesFound: 0, demoted: 0 },
-    decay: { memoriesPruned: 0 },
-    extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 },
-    cleanup: { entitiesRemoved: 0, tagsRemoved: 0 },
-    durationMs: 0,
-    aborted: false,
-  };
-
-  const LLM_CONCURRENCY = llmConcurrency;
-
-  // --------------------------------------------------------------------------
-  // Phase 1: Deduplication (Optimized - combined vector + semantic dedup)
-  // Call findDuplicateClusters ONCE at 0.75 threshold, then split by similarity band:
-  // - ≥0.95: vector merge (high-confidence duplicates)
-  // - 0.75-0.95: semantic dedup via LLM (paraphrases)
-  // --------------------------------------------------------------------------
-  if (!abortSignal?.aborted) {
-    onPhaseStart?.("dedup");
-    logger.info("memory-neo4j: [sleep] Phase 1: Deduplication (vector + semantic)");
-
-    try {
-      // Fetch clusters at 0.75 threshold with similarity scores
-      const allClusters = await db.findDuplicateClusters(0.75, agentId, true);
-
-      // Helper to create canonical pair key (sorted)
-      const makePairKey = (a: string, b: string): string => {
-        return a < b ? `${a}:${b}` : `${b}:${a}`;
-      };
-
-      // Separate clusters into high-similarity (≥0.95) and medium-similarity (0.75-0.95)
-      const highSimClusters: typeof allClusters = [];
-      const mediumSimClusters: typeof allClusters = [];
-
-      for (const cluster of allClusters) {
-        if (abortSignal?.aborted) break;
-        if (!cluster.similarities || cluster.memoryIds.length < 2) continue;
-
-        // Check if ANY pair in this cluster has similarity ≥ dedupThreshold
-        let hasHighSim = false;
-        for (const [pairKey, score] of cluster.similarities.entries()) {
-          if (score >= dedupThreshold) {
-            hasHighSim = true;
-            break;
-          }
-        }
-
-        if (hasHighSim) {
-          // Split this cluster into high-sim and medium-sim sub-clusters
-          // For simplicity, if a cluster has ANY high-sim pair, treat the whole cluster as high-sim
-          // (This matches the old behavior where Phase 1 would merge them all)
-          highSimClusters.push(cluster);
-        } else {
-          mediumSimClusters.push(cluster);
-        }
-      }
-
-      // Part 1a: Vector merge for high-similarity clusters (≥0.95)
-      result.dedup.clustersFound = highSimClusters.length;
-
-      for (const cluster of highSimClusters) {
-        if (abortSignal?.aborted) break;
-
-        const { deletedCount } = await db.mergeMemoryCluster(
-          cluster.memoryIds,
-          cluster.importances,
-        );
-        result.dedup.memoriesMerged += deletedCount;
-        onProgress?.("dedup", `Merged cluster of ${cluster.memoryIds.length} → 1 (vector)`);
-      }
-
-      logger.info(
-        `memory-neo4j: [sleep] Phase 1a (vector) complete — ${result.dedup.clustersFound} clusters, ${result.dedup.memoriesMerged} merged`,
-      );
-
-      // Part 1b: Semantic dedup for medium-similarity clusters (0.75-0.95)
-      if (skipSemanticDedup) {
-        onPhaseStart?.("semanticDedup");
-        logger.info("memory-neo4j: [sleep] Phase 1b: Skipped (--skip-semantic)");
-        onProgress?.("semanticDedup", "Skipped — semantic dedup disabled");
-      } else {
-        onPhaseStart?.("semanticDedup");
-        logger.info("memory-neo4j: [sleep] Phase 1b: Semantic Deduplication (0.75-0.95 band)");
-
-        // Collect all candidate pairs upfront (with pairwise similarity for pre-screening)
-        type DedupPair = {
-          textA: string;
-          textB: string;
-          idA: string;
-          idB: string;
-          importanceA: number;
-          importanceB: number;
-          similarity?: number;
-        };
-        const allPairs: DedupPair[] = [];
-
-        for (const cluster of mediumSimClusters) {
-          if (cluster.memoryIds.length < 2) continue;
-          for (let i = 0; i < cluster.memoryIds.length - 1; i++) {
-            for (let j = i + 1; j < cluster.memoryIds.length; j++) {
-              const pairKey = makePairKey(cluster.memoryIds[i], cluster.memoryIds[j]);
-              allPairs.push({
-                textA: cluster.texts[i],
-                textB: cluster.texts[j],
-                idA: cluster.memoryIds[i],
-                idB: cluster.memoryIds[j],
-                importanceA: cluster.importances[i],
-                importanceB: cluster.importances[j],
-                similarity: cluster.similarities?.get(pairKey),
-              });
-            }
-          }
-        }
-
-        // Cap the number of LLM-checked pairs to prevent sleep cycle timeouts.
-        // Sort by similarity descending so higher-similarity pairs (more likely
-        // to be duplicates) are checked first.
-        if (allPairs.length > maxSemanticDedupPairs) {
-          allPairs.sort((a, b) => (b.similarity ?? 0) - (a.similarity ?? 0));
-          const skipped = allPairs.length - maxSemanticDedupPairs;
-          allPairs.length = maxSemanticDedupPairs;
-          onProgress?.(
-            "semanticDedup",
-            `Capped at ${maxSemanticDedupPairs} pairs (${skipped} lower-similarity pairs skipped)`,
-          );
-          logger.info(
-            `memory-neo4j: [sleep] Phase 1b capped to ${maxSemanticDedupPairs} pairs (${skipped} skipped)`,
-          );
-        }
-
-        // Process pairs in concurrent batches
-        const invalidatedIds = new Set<string>();
-
-        for (let i = 0; i < allPairs.length && !abortSignal?.aborted; i += LLM_CONCURRENCY) {
-          const batch = allPairs.slice(i, i + LLM_CONCURRENCY);
-
-          // Filter out pairs where one side was already invalidated
-          const activeBatch = batch.filter(
-            (p) => !invalidatedIds.has(p.idA) && !invalidatedIds.has(p.idB),
-          );
-
-          if (activeBatch.length === 0) continue;
-
-          const outcomes = await Promise.allSettled(
-            activeBatch.map((p) =>
-              isSemanticDuplicate(p.textA, p.textB, config, p.similarity, abortSignal),
-            ),
-          );
-
-          for (let k = 0; k < outcomes.length; k++) {
-            const pair = activeBatch[k];
-            result.semanticDedup.pairsChecked++;
-
-            if (
-              outcomes[k].status === "fulfilled" &&
-              (outcomes[k] as PromiseFulfilledResult<boolean>).value
-            ) {
-              // Skip if either side was invalidated by an earlier result in this batch
-              if (invalidatedIds.has(pair.idA) || invalidatedIds.has(pair.idB)) continue;
-
-              const keepId = pair.importanceA >= pair.importanceB ? pair.idA : pair.idB;
-              const removeId = keepId === pair.idA ? pair.idB : pair.idA;
-              const keepText = keepId === pair.idA ? pair.textA : pair.textB;
-              const removeText = removeId === pair.idA ? pair.textA : pair.textB;
-
-              await db.invalidateMemory(removeId);
-              invalidatedIds.add(removeId);
-              result.semanticDedup.duplicatesMerged++;
-
-              onProgress?.(
-                "semanticDedup",
-                `Merged: "${removeText.slice(0, 50)}..." → kept "${keepText.slice(0, 50)}..."`,
-              );
-            }
-          }
-        }
-
-        logger.info(
-          `memory-neo4j: [sleep] Phase 1b (semantic) complete — ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`,
-        );
-      } // close skipSemanticDedup else
-    } catch (err) {
-      logger.warn(`memory-neo4j: [sleep] Phase 1 error: ${String(err)}`);
-    }
-  }
-
-  // --------------------------------------------------------------------------
-  // Phase 1c: Conflict Detection (formerly Phase 1b)
-  // --------------------------------------------------------------------------
-  if (!abortSignal?.aborted && !skipSemanticDedup) {
-    onPhaseStart?.("conflict");
-    logger.info("memory-neo4j: [sleep] Phase 1c: Conflict Detection");
-
-    try {
-      const pairs = await db.findConflictingMemories(agentId);
-      result.conflict.pairsFound = pairs.length;
-
-      // Process conflict pairs in parallel chunks of LLM_CONCURRENCY
-      for (let i = 0; i < pairs.length && !abortSignal?.aborted; i += LLM_CONCURRENCY) {
-        const chunk = pairs.slice(i, i + LLM_CONCURRENCY);
-        const outcomes = await Promise.allSettled(
-          chunk.map((pair) =>
-            resolveConflict(pair.memoryA.text, pair.memoryB.text, config, abortSignal),
-          ),
-        );
-
-        for (let k = 0; k < outcomes.length; k++) {
-          if (abortSignal?.aborted) break;
-          const pair = chunk[k];
-          const outcome = outcomes[k];
-          if (outcome.status !== "fulfilled") continue;
-
-          const decision = outcome.value;
-          if (decision === "a") {
-            await db.invalidateMemory(pair.memoryB.id);
-            result.conflict.invalidated++;
-            result.conflict.resolved++;
-            onProgress?.(
-              "conflict",
-              `Kept A, invalidated B: "${pair.memoryB.text.slice(0, 40)}..."`,
-            );
-          } else if (decision === "b") {
-            await db.invalidateMemory(pair.memoryA.id);
-            result.conflict.invalidated++;
-            result.conflict.resolved++;
-            onProgress?.(
-              "conflict",
-              `Kept B, invalidated A: "${pair.memoryA.text.slice(0, 40)}..."`,
-            );
-          } else if (decision === "both") {
-            result.conflict.resolved++;
-            onProgress?.("conflict", `Kept both: no real conflict`);
-          }
-          // "skip" = LLM unavailable, don't count as resolved
-        }
-      }
-
-      logger.info(
-        `memory-neo4j: [sleep] Phase 1c complete — ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`,
-      );
-    } catch (err) {
-      logger.warn(`memory-neo4j: [sleep] Phase 1c error: ${String(err)}`);
-    }
-  }
-
-  // --------------------------------------------------------------------------
-  // Phase 2: Pareto Scoring & Threshold Calculation
-  // --------------------------------------------------------------------------
-  let paretoThreshold = 0;
-  let allScores: Awaited<ReturnType<typeof db.calculateAllEffectiveScores>> = [];
-  if (!abortSignal?.aborted) {
-    onPhaseStart?.("pareto");
-    logger.info("memory-neo4j: [sleep] Phase 2: Pareto Scoring");
-
-    try {
-      allScores = await db.calculateAllEffectiveScores(agentId);
-      result.pareto.totalMemories = allScores.length;
-      result.pareto.coreMemories = allScores.filter((s) => s.category === "core").length;
-      result.pareto.regularMemories = allScores.filter((s) => s.category !== "core").length;
-
-      // Calculate the threshold for top N% (default: top 20%)
-      paretoThreshold = db.calculateParetoThreshold(allScores, 1 - paretoPercentile);
-      result.pareto.threshold = paretoThreshold;
-
-      onProgress?.(
-        "pareto",
-        `Scored ${allScores.length} memories (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`,
-      );
-      onProgress?.(
-        "pareto",
-        `Pareto threshold (top ${paretoPercentile * 100}%): ${paretoThreshold.toFixed(4)}`,
-      );
-
-      logger.info(
-        `memory-neo4j: [sleep] Phase 2 complete — threshold=${paretoThreshold.toFixed(4)} for top ${paretoPercentile * 100}%`,
-      );
-    } catch (err) {
-      logger.warn(`memory-neo4j: [sleep] Phase 2 error: ${String(err)}`);
-    }
-  }
-
-  // --------------------------------------------------------------------------
-  // Phase 3: Core Promotion (using pre-computed scores from Phase 2)
-  //
-  // Design note on staleness: The effective scores and Pareto threshold were
-  // computed in Phase 2 and may be slightly stale by the time Phases 3/4 run.
-  // This is acceptable because: (a) the sleep cycle is a background maintenance
-  // task that runs infrequently (not concurrent with itself), (b) the scoring
-  // formula is deterministic based on stored properties that change slowly, and
-  // (c) promotion/demotion are reversible in the next cycle. The alternative
-  // (re-querying scores per phase) adds latency without meaningful accuracy gain.
-  // --------------------------------------------------------------------------
-  if (!abortSignal?.aborted && paretoThreshold > 0) {
-    onPhaseStart?.("promotion");
-    logger.info("memory-neo4j: [sleep] Phase 3: Core Promotion");
-
-    try {
-      const candidates = allScores.filter(
-        (s) =>
-          s.category !== "core" &&
-          s.effectiveScore >= paretoThreshold &&
-          s.ageDays >= promotionMinAgeDays,
-      );
-      result.promotion.candidatesFound = candidates.length;
-
-      if (candidates.length > 0) {
-        const ids = candidates.map((m) => m.id);
-        result.promotion.promoted = await db.promoteToCore(ids);
-        for (const c of candidates) {
-          onProgress?.(
-            "promotion",
-            `Promoted "${c.text.slice(0, 40)}..." (score=${c.effectiveScore.toFixed(3)}, ${c.retrievalCount} retrievals)`,
-          );
-        }
-      }
-
-      logger.info(
-        `memory-neo4j: [sleep] Phase 3 complete — ${result.promotion.promoted} memories promoted to core`,
-      );
-    } catch (err) {
-      logger.warn(`memory-neo4j: [sleep] Phase 3 error: ${String(err)}`);
-    }
-  }
-
-  // --------------------------------------------------------------------------
-  // Phase 4: Core Demotion (using pre-computed scores from Phase 2)
-  // --------------------------------------------------------------------------
-  if (!abortSignal?.aborted && paretoThreshold > 0) {
-    onPhaseStart?.("demotion");
-    logger.info("memory-neo4j: [sleep] Phase 4: Core Demotion");
-
-    try {
-      const candidates = allScores.filter(
-        (s) => s.category === "core" && s.effectiveScore < paretoThreshold,
-      );
-      result.demotion.candidatesFound = candidates.length;
-
-      if (candidates.length > 0) {
-        const ids = candidates.map((m) => m.id);
-        result.demotion.demoted = await db.demoteFromCore(ids);
-        for (const c of candidates) {
-          onProgress?.(
-            "demotion",
-            `Demoted "${c.text.slice(0, 40)}..." (score=${c.effectiveScore.toFixed(3)}, ${c.retrievalCount} retrievals)`,
-          );
-        }
-      }
-
-      logger.info(
-        `memory-neo4j: [sleep] Phase 4 complete — ${result.demotion.demoted} memories demoted from core`,
-      );
-    } catch (err) {
-      logger.warn(`memory-neo4j: [sleep] Phase 4 error: ${String(err)}`);
-    }
-  }
-
-  // --------------------------------------------------------------------------
-  // Phase 5: Entity Extraction (moved before decay so new memories get
-  // extracted before pruning can remove them)
-  // --------------------------------------------------------------------------
-  // Extraction uses LLM_CONCURRENCY (defined above, matches OLLAMA_NUM_PARALLEL)
-  if (!abortSignal?.aborted && config.enabled) {
-    onPhaseStart?.("extraction");
-    logger.info("memory-neo4j: [sleep] Phase 5: Entity Extraction");
-
-    try {
-      // Get initial count
-      const counts = await db.countByExtractionStatus(agentId);
-      result.extraction.total = counts.pending;
-
-      if (result.extraction.total > 0) {
-        let hasMore = true;
-        while (hasMore && !abortSignal?.aborted) {
-          const pending = await db.listPendingExtractions(extractionBatchSize, agentId);
-
-          if (pending.length === 0) {
-            hasMore = false;
-            break;
-          }
-
-          // Process in parallel chunks of LLM_CONCURRENCY
-          for (let i = 0; i < pending.length && !abortSignal?.aborted; i += LLM_CONCURRENCY) {
-            const chunk = pending.slice(i, i + LLM_CONCURRENCY);
-            const outcomes = await Promise.allSettled(
-              chunk.map((memory) =>
-                runBackgroundExtraction(
-                  memory.id,
-                  memory.text,
-                  db,
-                  embeddings,
-                  config,
-                  logger,
-                  memory.extractionRetries,
-                  abortSignal,
-                ),
-              ),
-            );
-
-            for (const outcome of outcomes) {
-              result.extraction.processed++;
-              if (outcome.status === "fulfilled" && outcome.value.success) {
-                result.extraction.succeeded++;
-              } else {
-                result.extraction.failed++;
-              }
-            }
-
-            if (result.extraction.processed % 10 === 0 || i + LLM_CONCURRENCY >= pending.length) {
-              onProgress?.(
-                "extraction",
-                `${result.extraction.processed}/${result.extraction.total} processed`,
-              );
-            }
-          }
-
-          // Delay between batches (abort-aware)
-          if (hasMore && !abortSignal?.aborted) {
-            await new Promise<void>((resolve) => {
-              const timer = setTimeout(resolve, extractionDelayMs);
-              // If abort fires during delay, resolve immediately
-              abortSignal?.addEventListener(
-                "abort",
-                () => {
-                  clearTimeout(timer);
-                  resolve();
-                },
-                { once: true },
-              );
-            });
-          }
-        }
-      }
-
-      logger.info(
-        `memory-neo4j: [sleep] Phase 5 complete — ${result.extraction.succeeded} extracted, ${result.extraction.failed} failed`,
-      );
-    } catch (err) {
-      logger.warn(`memory-neo4j: [sleep] Phase 5 error: ${String(err)}`);
-    }
-  } else if (!config.enabled) {
-    logger.info("memory-neo4j: [sleep] Phase 5 skipped — extraction not enabled");
-  }
-
-  // --------------------------------------------------------------------------
-  // Phase 6: Decay & Pruning (after extraction so freshly extracted memories
-  // aren't pruned before they build entity connections)
-  // --------------------------------------------------------------------------
-  if (!abortSignal?.aborted) {
-    onPhaseStart?.("decay");
-    logger.info("memory-neo4j: [sleep] Phase 6: Decay & Pruning");
-
-    try {
-      const decayed = await db.findDecayedMemories({
-        retentionThreshold: decayRetentionThreshold,
-        baseHalfLifeDays: decayBaseHalfLifeDays,
-        importanceMultiplier: decayImportanceMultiplier,
-        decayCurves,
-        agentId,
-      });
-
-      if (decayed.length > 0) {
-        const ids = decayed.map((m) => m.id);
-        result.decay.memoriesPruned = await db.pruneMemories(ids);
-        onProgress?.("decay", `Pruned ${result.decay.memoriesPruned} decayed memories`);
-      }
-
-      logger.info(
-        `memory-neo4j: [sleep] Phase 6 complete — ${result.decay.memoriesPruned} memories pruned`,
-      );
-    } catch (err) {
-      logger.warn(`memory-neo4j: [sleep] Phase 6 error: ${String(err)}`);
-    }
-  }
-
-  // --------------------------------------------------------------------------
-  // Phase 7: Orphan Cleanup
-  // --------------------------------------------------------------------------
-  if (!abortSignal?.aborted) {
-    onPhaseStart?.("cleanup");
-    logger.info("memory-neo4j: [sleep] Phase 7: Orphan Cleanup");
-
-    try {
-      // Clean up orphan entities
-      if (!abortSignal?.aborted) {
-        const orphanEntities = await db.findOrphanEntities();
-        if (orphanEntities.length > 0) {
-          result.cleanup.entitiesRemoved = await db.deleteOrphanEntities(
-            orphanEntities.map((e) => e.id),
-          );
-          onProgress?.("cleanup", `Removed ${result.cleanup.entitiesRemoved} orphan entities`);
-        }
-      }
-
-      // Clean up orphan tags
-      if (!abortSignal?.aborted) {
-        const orphanTags = await db.findOrphanTags();
-        if (orphanTags.length > 0) {
-          result.cleanup.tagsRemoved = await db.deleteOrphanTags(orphanTags.map((t) => t.id));
-          onProgress?.("cleanup", `Removed ${result.cleanup.tagsRemoved} orphan tags`);
-        }
-      }
-
-      logger.info(
-        `memory-neo4j: [sleep] Phase 7 complete — ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
-      );
-    } catch (err) {
-      logger.warn(`memory-neo4j: [sleep] Phase 7 error: ${String(err)}`);
-    }
-  }
-
-  result.durationMs = Date.now() - startTime;
-  result.aborted = abortSignal?.aborted ?? false;
-
-  logger.info(
-    `memory-neo4j: [sleep] Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s` +
-      (result.aborted ? " (aborted)" : ""),
-  );
-
-  return result;
-}
-
-// ============================================================================
-// Message Extraction (re-exported from message-utils.ts)
-// ============================================================================
-
-export {
-  extractUserMessages,
-  extractAssistantMessages,
-  stripMessageWrappers,
-  stripAssistantWrappers,
-} from "./message-utils.js";
-
 // ============================================================================
 // LLM-Judged Importance Rating
 // ============================================================================
diff --git a/extensions/memory-neo4j/index.ts b/extensions/memory-neo4j/index.ts
index 5d533a068b9..c89c2e084db 100644
--- a/extensions/memory-neo4j/index.ts
+++ b/extensions/memory-neo4j/index.ts
@@ -16,8 +16,9 @@ import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
 import { Type } from "@sinclair/typebox";
 import { randomUUID } from "node:crypto";
 import { stringEnum } from "openclaw/plugin-sdk";
-import type { MemoryCategory, MemorySource } from "./schema.js";
+import type { Logger, MemoryCategory, MemorySource } from "./schema.js";
 import { passesAttentionGate, passesAssistantAttentionGate } from "./attention-gate.js";
+import { registerCli } from "./cli.js";
 import {
   DEFAULT_EMBEDDING_DIMS,
   EMBEDDING_DIMENSIONS,
@@ -27,14 +28,8 @@ import {
   vectorDimsForModel,
 } from "./config.js";
 import { Embeddings } from "./embeddings.js";
-import {
-  extractUserMessages,
-  extractAssistantMessages,
-  stripMessageWrappers,
-  runSleepCycle,
-  isSemanticDuplicate,
-  rateImportance,
-} from "./extractor.js";
+import { isSemanticDuplicate, rateImportance } from "./extractor.js";
+import { extractUserMessages, extractAssistantMessages } from "./message-utils.js";
 import { Neo4jMemoryClient } from "./neo4j-client.js";
 import { hybridSearch } from "./search.js";
 
@@ -127,7 +122,7 @@ const memoryNeo4jPlugin = {
               limit,
               agentId,
               extractionConfig.enabled,
-              { graphSearchDepth: cfg.graphSearchDepth },
+              { graphSearchDepth: cfg.graphSearchDepth, logger: api.logger },
             );
 
             if (results.length === 0) {
@@ -216,17 +211,21 @@ const memoryNeo4jPlugin = {
             }
 
             // 3. Store memory immediately (fast path)
+            // User-stored core memories get pinned: importance locked at 1.0,
+            // immune from decay, scoring recalculation, and pruning.
+            const isUserPinnedCore = category === "core";
             const memoryId = randomUUID();
             await db.storeMemory({
               id: memoryId,
               text,
               embedding: vector,
-              importance: Math.min(1, Math.max(0, importance)),
+              importance: isUserPinnedCore ? 1.0 : Math.min(1, Math.max(0, importance)),
               category,
               source: "user" as MemorySource,
               extractionStatus: extractionConfig.enabled ? "pending" : "skipped",
               agentId,
               sessionKey,
+              userPinned: isUserPinnedCore,
             });
 
             // 4. Extraction is deferred to sleep cycle (like human memory consolidation)
@@ -352,492 +351,10 @@ const memoryNeo4jPlugin = {
     );
 
     // ========================================================================
-    // CLI Commands
+    // CLI Commands (delegated to cli.ts)
     // ========================================================================
 
-    api.registerCli(
-      ({ program }) => {
-        // Find existing memory command or create fallback
-        let memoryCmd = program.commands.find((cmd) => cmd.name() === "memory");
-        if (!memoryCmd) {
-          // Fallback if core memory CLI not registered yet
-          memoryCmd = program.command("memory").description("Memory commands");
-        }
-
-        // Add neo4j memory subcommand group
-        const memory = memoryCmd.command("neo4j").description("Neo4j graph memory commands");
-
-        memory
-          .command("list")
-          .description("List memory counts by agent and category")
-          .option("--json", "Output as JSON")
-          .action(async (opts: { json?: boolean }) => {
-            try {
-              await db.ensureInitialized();
-              const stats = await db.getMemoryStats();
-
-              if (opts.json) {
-                console.log(JSON.stringify(stats, null, 2));
-                return;
-              }
-
-              if (stats.length === 0) {
-                console.log("No memories stored.");
-                return;
-              }
-
-              // Group by agentId
-              const byAgent = new Map<
-                string,
-                Array<{ category: string; count: number; avgImportance: number }>
-              >();
-              for (const row of stats) {
-                const list = byAgent.get(row.agentId) || [];
-                list.push({
-                  category: row.category,
-                  count: row.count,
-                  avgImportance: row.avgImportance,
-                });
-                byAgent.set(row.agentId, list);
-              }
-
-              // Print table for each agent
-              for (const [agentId, categories] of byAgent) {
-                const total = categories.reduce((sum, c) => sum + c.count, 0);
-                console.log(`\n┌─ ${agentId} (${total} total)`);
-                console.log("│");
-                console.log("│  Category      Count   Avg Importance");
-                console.log("│  ─────────────────────────────────────");
-                for (const { category, count, avgImportance } of categories) {
-                  const cat = category.padEnd(12);
-                  const cnt = String(count).padStart(5);
-                  const imp = (avgImportance * 100).toFixed(0).padStart(3) + "%";
-                  console.log(`│  ${cat} ${cnt}   ${imp}`);
-                }
-                console.log("└");
-              }
-              console.log("");
-            } catch (err) {
-              console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
-              process.exitCode = 1;
-            }
-          });
-
-        memory
-          .command("search")
-          .description("Search memories")
-          .argument("<query>", "Search query")
-          .option("--limit <n>", "Max results", "5")
-          .option("--agent <id>", "Agent id (default: default)")
-          .action(async (query: string, opts: { limit: string; agent?: string }) => {
-            try {
-              const results = await hybridSearch(
-                db,
-                embeddings,
-                query,
-                parseInt(opts.limit, 10),
-                opts.agent ?? "default",
-                extractionConfig.enabled,
-                { graphSearchDepth: cfg.graphSearchDepth },
-              );
-              const output = results.map((r) => ({
-                id: r.id,
-                text: r.text,
-                category: r.category,
-                importance: r.importance,
-                score: r.score,
-              }));
-              console.log(JSON.stringify(output, null, 2));
-            } catch (err) {
-              console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
-              process.exitCode = 1;
-            }
-          });
-
-        memory
-          .command("stats")
-          .description("Show memory statistics and configuration")
-          .action(async () => {
-            try {
-              await db.ensureInitialized();
-              const stats = await db.getMemoryStats();
-              const total = stats.reduce((sum, s) => sum + s.count, 0);
-
-              console.log("\nMemory (Neo4j) Statistics");
-              console.log("─────────────────────────");
-              console.log(`Total memories: ${total}`);
-              console.log(`Neo4j URI:      ${cfg.neo4j.uri}`);
-              console.log(`Embedding:      ${cfg.embedding.provider}/${cfg.embedding.model}`);
-              console.log(
-                `Extraction:     ${extractionConfig.enabled ? extractionConfig.model : "disabled"}`,
-              );
-              console.log(`Auto-capture:   ${cfg.autoCapture ? "enabled" : "disabled"}`);
-              console.log(`Auto-recall:    ${cfg.autoRecall ? "enabled" : "disabled"}`);
-              console.log(`Core memory:    ${cfg.coreMemory.enabled ? "enabled" : "disabled"}`);
-
-              if (stats.length > 0) {
-                // Group by category across all agents
-                const byCategory = new Map<string, number>();
-                for (const row of stats) {
-                  byCategory.set(row.category, (byCategory.get(row.category) ?? 0) + row.count);
-                }
-                console.log("\nBy Category:");
-                for (const [category, count] of byCategory) {
-                  console.log(`  ${category.padEnd(12)} ${count}`);
-                }
-
-                // Show agent count
-                const agents = new Set(stats.map((s) => s.agentId));
-                console.log(`\nAgents: ${agents.size} (${[...agents].join(", ")})`);
-              }
-              console.log("");
-            } catch (err) {
-              console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
-              process.exitCode = 1;
-            }
-          });
-
-        memory
-          .command("sleep")
-          .description(
-            "Run sleep cycle — consolidate memories with Pareto-based promotion/demotion",
-          )
-          .option("--agent <id>", "Agent id (default: all agents)")
-          .option("--dedup-threshold <n>", "Vector similarity threshold for dedup (default: 0.95)")
-          .option("--pareto <n>", "Top N% for core memory (default: 0.2 = top 20%)")
-          .option("--promotion-min-age <days>", "Min age in days before promotion (default: 7)")
-          .option("--decay-threshold <n>", "Decay score threshold for pruning (default: 0.1)")
-          .option("--decay-half-life <days>", "Base half-life in days (default: 30)")
-          .option("--batch-size <n>", "Extraction batch size (default: 50)")
-          .option("--delay <ms>", "Delay between extraction batches in ms (default: 1000)")
-          .option("--max-semantic-pairs <n>", "Max LLM-checked semantic dedup pairs (default: 500)")
-          .option(
-            "--concurrency <n>",
-            "Parallel LLM calls — match OLLAMA_NUM_PARALLEL (default: 8)",
-          )
-          .option(
-            "--skip-semantic",
-            "Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)",
-          )
-          .action(
-            async (opts: {
-              agent?: string;
-              dedupThreshold?: string;
-              pareto?: string;
-              promotionMinAge?: string;
-              decayThreshold?: string;
-              decayHalfLife?: string;
-              batchSize?: string;
-              delay?: string;
-              maxSemanticPairs?: string;
-              concurrency?: string;
-              skipSemantic?: boolean;
-            }) => {
-              console.log("\n🌙 Memory Sleep Cycle");
-              console.log("═════════════════════════════════════════════════════════════");
-              console.log("Seven-phase memory consolidation (Pareto-based):\n");
-              console.log("  Phase 1:  Deduplication    — Merge near-duplicate memories");
-              console.log(
-                "  Phase 1b: Semantic Dedup   — LLM-based paraphrase detection (0.75–0.95 band)",
-              );
-              console.log("  Phase 1c: Conflict Detection — Resolve contradictory memories");
-              console.log(
-                "  Phase 2:  Pareto Scoring   — Calculate effective scores for all memories",
-              );
-              console.log("  Phase 3: Core Promotion   — Regular memories above threshold → core");
-              console.log("  Phase 4: Core Demotion    — Core memories below threshold → regular");
-              console.log("  Phase 5: Extraction       — Extract entities and categorize");
-              console.log("  Phase 6: Decay & Pruning  — Remove stale low-importance memories");
-              console.log("  Phase 7: Orphan Cleanup   — Remove disconnected nodes\n");
-
-              try {
-                // Validate sleep cycle CLI parameters before running
-                const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : undefined;
-                const delay = opts.delay ? parseInt(opts.delay, 10) : undefined;
-                const decayHalfLife = opts.decayHalfLife
-                  ? parseInt(opts.decayHalfLife, 10)
-                  : undefined;
-                const decayThreshold = opts.decayThreshold
-                  ? parseFloat(opts.decayThreshold)
-                  : undefined;
-                const pareto = opts.pareto ? parseFloat(opts.pareto) : undefined;
-                const promotionMinAge = opts.promotionMinAge
-                  ? parseInt(opts.promotionMinAge, 10)
-                  : undefined;
-
-                if (batchSize != null && (Number.isNaN(batchSize) || batchSize <= 0)) {
-                  console.error("Error: --batch-size must be greater than 0");
-                  process.exitCode = 1;
-                  return;
-                }
-                if (delay != null && (Number.isNaN(delay) || delay < 0)) {
-                  console.error("Error: --delay must be >= 0");
-                  process.exitCode = 1;
-                  return;
-                }
-                if (decayHalfLife != null && (Number.isNaN(decayHalfLife) || decayHalfLife <= 0)) {
-                  console.error("Error: --decay-half-life must be greater than 0");
-                  process.exitCode = 1;
-                  return;
-                }
-                if (
-                  decayThreshold != null &&
-                  (Number.isNaN(decayThreshold) || decayThreshold < 0 || decayThreshold > 1)
-                ) {
-                  console.error("Error: --decay-threshold must be between 0 and 1");
-                  process.exitCode = 1;
-                  return;
-                }
-                if (pareto != null && (Number.isNaN(pareto) || pareto < 0 || pareto > 1)) {
-                  console.error("Error: --pareto must be between 0 and 1");
-                  process.exitCode = 1;
-                  return;
-                }
-                if (
-                  promotionMinAge != null &&
-                  (Number.isNaN(promotionMinAge) || promotionMinAge < 0)
-                ) {
-                  console.error("Error: --promotion-min-age must be >= 0");
-                  process.exitCode = 1;
-                  return;
-                }
-
-                const maxSemanticPairs = opts.maxSemanticPairs
-                  ? parseInt(opts.maxSemanticPairs, 10)
-                  : undefined;
-                if (
-                  maxSemanticPairs != null &&
-                  (Number.isNaN(maxSemanticPairs) || maxSemanticPairs <= 0)
-                ) {
-                  console.error("Error: --max-semantic-pairs must be greater than 0");
-                  process.exitCode = 1;
-                  return;
-                }
-
-                const concurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : undefined;
-                if (concurrency != null && (Number.isNaN(concurrency) || concurrency <= 0)) {
-                  console.error("Error: --concurrency must be greater than 0");
-                  process.exitCode = 1;
-                  return;
-                }
-
-                await db.ensureInitialized();
-
-                const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, {
-                  agentId: opts.agent,
-                  dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined,
-                  skipSemanticDedup: opts.skipSemantic === true,
-                  maxSemanticDedupPairs: maxSemanticPairs,
-                  llmConcurrency: concurrency,
-                  paretoPercentile: pareto,
-                  promotionMinAgeDays: promotionMinAge,
-                  decayRetentionThreshold: decayThreshold,
-                  decayBaseHalfLifeDays: decayHalfLife,
-                  decayCurves:
-                    Object.keys(cfg.decayCurves).length > 0 ? cfg.decayCurves : undefined,
-                  extractionBatchSize: batchSize,
-                  extractionDelayMs: delay,
-                  onPhaseStart: (phase) => {
-                    const phaseNames: Record<string, string> = {
-                      dedup: "Phase 1: Deduplication",
-                      semanticDedup: "Phase 1b: Semantic Deduplication",
-                      conflict: "Phase 1c: Conflict Detection",
-                      pareto: "Phase 2: Pareto Scoring",
-                      promotion: "Phase 3: Core Promotion",
-                      demotion: "Phase 4: Core Demotion",
-                      extraction: "Phase 5: Extraction",
-                      decay: "Phase 6: Decay & Pruning",
-                      cleanup: "Phase 7: Orphan Cleanup",
-                    };
-                    console.log(`\n▶ ${phaseNames[phase]}`);
-                    console.log("─────────────────────────────────────────────────────────────");
-                  },
-                  onProgress: (_phase, message) => {
-                    console.log(`   ${message}`);
-                  },
-                });
-
-                console.log("\n═════════════════════════════════════════════════════════════");
-                console.log(`✅ Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s`);
-                console.log("─────────────────────────────────────────────────────────────");
-                console.log(
-                  `   Deduplication:  ${result.dedup.clustersFound} clusters → ${result.dedup.memoriesMerged} merged`,
-                );
-                console.log(
-                  `   Conflicts:      ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`,
-                );
-                console.log(
-                  `   Semantic Dedup: ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`,
-                );
-                console.log(
-                  `   Pareto:         ${result.pareto.totalMemories} total (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`,
-                );
-                console.log(
-                  `                   Threshold: ${result.pareto.threshold.toFixed(4)} (top 20%)`,
-                );
-                console.log(
-                  `   Promotion:      ${result.promotion.promoted}/${result.promotion.candidatesFound} promoted to core`,
-                );
-                console.log(
-                  `   Demotion:       ${result.demotion.demoted}/${result.demotion.candidatesFound} demoted from core`,
-                );
-                console.log(`   Decay/Pruning:  ${result.decay.memoriesPruned} memories pruned`);
-                console.log(
-                  `   Extraction:     ${result.extraction.succeeded}/${result.extraction.total} extracted` +
-                    (result.extraction.failed > 0 ? ` (${result.extraction.failed} failed)` : ""),
-                );
-                console.log(
-                  `   Cleanup:        ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
-                );
-                if (result.aborted) {
-                  console.log("\n⚠️  Sleep cycle was aborted before completion.");
-                }
-                console.log("");
-              } catch (err) {
-                console.error(
-                  `\n❌ Sleep cycle failed: ${err instanceof Error ? err.message : String(err)}`,
-                );
-                process.exitCode = 1;
-              }
-            },
-          );
-
-        memory
-          .command("promote")
-          .description("Manually promote a memory to core status")
-          .argument("<id>", "Memory ID to promote")
-          .action(async (id: string) => {
-            try {
-              await db.ensureInitialized();
-              const promoted = await db.promoteToCore([id]);
-              if (promoted > 0) {
-                console.log(`✅ Memory ${id} promoted to core.`);
-              } else {
-                console.log(`❌ Memory ${id} not found.`);
-                process.exitCode = 1;
-              }
-            } catch (err) {
-              console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
-              process.exitCode = 1;
-            }
-          });
-
-        memory
-          .command("index")
-          .description(
-            "Re-embed all memories and entities — use after changing embedding model/provider",
-          )
-          .option("--batch-size <n>", "Embedding batch size (default: 50)")
-          .action(async (opts: { batchSize?: string }) => {
-            const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : 50;
-            if (Number.isNaN(batchSize) || batchSize <= 0) {
-              console.error("Error: --batch-size must be greater than 0");
-              process.exitCode = 1;
-              return;
-            }
-
-            console.log("\nMemory Neo4j — Reindex Embeddings");
-            console.log("═════════════════════════════════════════════════════════════");
-            console.log(`Model:      ${cfg.embedding.provider}/${cfg.embedding.model}`);
-            console.log(`Dimensions: ${vectorDim}`);
-            console.log(`Batch size: ${batchSize}\n`);
-
-            try {
-              const startedAt = Date.now();
-              const result = await db.reindex((texts) => embeddings.embedBatch(texts), {
-                batchSize,
-                onProgress: (phase, done, total) => {
-                  if (phase === "drop-indexes" && done === 0) {
-                    console.log("▶ Dropping old vector index…");
-                  } else if (phase === "memories") {
-                    console.log(`   Memories: ${done}/${total}`);
-                  } else if (phase === "create-indexes" && done === 0) {
-                    console.log("▶ Recreating vector index…");
-                  }
-                },
-              });
-
-              const elapsed = ((Date.now() - startedAt) / 1000).toFixed(1);
-              console.log("\n═════════════════════════════════════════════════════════════");
-              console.log(`✅ Reindex complete in ${elapsed}s — ${result.memories} memories`);
-              console.log("");
-            } catch (err) {
-              console.error(
-                `\n❌ Reindex failed: ${err instanceof Error ? err.message : String(err)}`,
-              );
-              process.exitCode = 1;
-            }
-          });
-
-        memory
-          .command("cleanup")
-          .description(
-            "Retroactively apply the attention gate — find and remove low-substance memories",
-          )
-          .option("--execute", "Actually delete (default: dry-run preview)")
-          .option("--all", "Include explicitly-stored memories (default: auto-capture only)")
-          .option("--agent <id>", "Only clean up memories for a specific agent")
-          .action(async (opts: { execute?: boolean; all?: boolean; agent?: string }) => {
-            try {
-              await db.ensureInitialized();
-
-              // Fetch memories — by default only auto-capture (explicit stores are trusted)
-              const conditions: string[] = [];
-              if (!opts.all) {
-                conditions.push("m.source = 'auto-capture'");
-              }
-              if (opts.agent) {
-                conditions.push("m.agentId = $agentId");
-              }
-              const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
-              const allMemories = await db.runQuery<{ id: string; text: string; source: string }>(
-                `MATCH (m:Memory) ${where}
-                 RETURN m.id AS id, m.text AS text, COALESCE(m.source, 'unknown') AS source
-                 ORDER BY m.createdAt ASC`,
-                opts.agent ? { agentId: opts.agent } : {},
-              );
-
-              // Strip channel metadata wrappers (same as the real pipeline) then gate
-              const noise: Array<{ id: string; text: string; source: string }> = [];
-              for (const mem of allMemories) {
-                const stripped = stripMessageWrappers(mem.text);
-                if (!passesAttentionGate(stripped)) {
-                  noise.push(mem);
-                }
-              }
-
-              if (noise.length === 0) {
-                console.log("\nNo low-substance memories found. Everything passes the gate.");
-                return;
-              }
-
-              console.log(
-                `\nFound ${noise.length}/${allMemories.length} memories that fail the attention gate:\n`,
-              );
-
-              for (const mem of noise) {
-                const preview = mem.text.length > 80 ? `${mem.text.slice(0, 77)}...` : mem.text;
-                console.log(`  [${mem.source}] "${preview}"`);
-              }
-
-              if (!opts.execute) {
-                console.log(
-                  `\nDry run — ${noise.length} memories would be removed. Re-run with --execute to delete.\n`,
-                );
-                return;
-              }
-
-              // Delete in batch
-              const deleted = await db.pruneMemories(noise.map((m) => m.id));
-              console.log(`\nDeleted ${deleted} low-substance memories.\n`);
-            } catch (err) {
-              console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
-              process.exitCode = 1;
-            }
-          });
-      },
-      { commands: [] }, // Adds subcommands to existing "memory" command, no conflict
-    );
+    registerCli(api, { db, embeddings, cfg, extractionConfig, vectorDim });
 
     // ========================================================================
     // Lifecycle Hooks
@@ -952,8 +469,9 @@ const memoryNeo4jPlugin = {
         }
 
         try {
+          const t0 = performance.now();
           const maxEntries = cfg.coreMemory.maxEntries;
-          const coreMemories = await db.listByCategory("core", maxEntries, 0, agentId);
+          const coreMemories = await db.listCoreForInjection(maxEntries, agentId);
 
           if (coreMemories.length === 0) {
             return;
@@ -964,8 +482,9 @@ const memoryNeo4jPlugin = {
           touchSession(sessionKey);
 
           const content = coreMemories.map((m) => `- ${m.text}`).join("\n");
+          const totalMs = performance.now() - t0;
           api.logger.info?.(
-            `memory-neo4j: mid-session core refresh at ${usagePercent.toFixed(1)}% context (${coreMemories.length} memories)`,
+            `memory-neo4j: [bench] core-refresh ${totalMs.toFixed(0)}ms at ${usagePercent.toFixed(1)}% context (${coreMemories.length} memories)`,
           );
 
           return {
@@ -1009,6 +528,7 @@ const memoryNeo4jPlugin = {
             : event.prompt;
 
         try {
+          const t0 = performance.now();
           let results = await hybridSearch(
             db,
             embeddings,
@@ -1016,8 +536,9 @@ const memoryNeo4jPlugin = {
             3,
             agentId,
             extractionConfig.enabled,
-            { graphSearchDepth: cfg.graphSearchDepth },
+            { graphSearchDepth: cfg.graphSearchDepth, logger: api.logger },
           );
+          const tSearch = performance.now();
 
           // Feature 1: Filter out low-relevance results below min RRF score
           results = results.filter((r) => r.score >= cfg.autoRecallMinScore);
@@ -1029,13 +550,17 @@ const memoryNeo4jPlugin = {
             results = results.filter((r) => !coreIds.has(r.id));
           }
 
+          const totalMs = performance.now() - t0;
+          api.logger.info?.(
+            `memory-neo4j: [bench] auto-recall ${totalMs.toFixed(0)}ms total (search=${(tSearch - t0).toFixed(0)}ms), ${results.length} results`,
+          );
+
           if (results.length === 0) {
             return;
           }
 
           const memoryContext = results.map((r) => `- [${r.category}] ${r.text}`).join("\n");
 
-          api.logger.info?.(`memory-neo4j: injecting ${results.length} memories into context`);
           api.logger.debug?.(
             `memory-neo4j: auto-recall memories: ${JSON.stringify(results.map((r) => ({ id: r.id, text: r.text.slice(0, 80), category: r.category, score: r.score })))}`,
           );
@@ -1075,23 +600,25 @@ const memoryNeo4jPlugin = {
         }
 
         try {
+          const t0 = performance.now();
           const agentId = ctx.agentId || "default";
           const maxEntries = cfg.coreMemory.maxEntries;
 
           api.logger.debug?.(
             `memory-neo4j: loading core memories for agent=${agentId} session=${sessionKey ?? "unknown"}`,
           );
-          // Core memories are always included (no importance filter) - if marked as core, it's important
-          // Results are ordered by importance desc, so most important come first up to maxEntries
-          const coreMemories = await db.listByCategory("core", maxEntries, 0, agentId);
+          // All user-pinned core memories are always included (no limit).
+          // Non-pinned core memories fill remaining slots up to maxEntries, ordered by importance.
+          const coreMemories = await db.listCoreForInjection(maxEntries, agentId);
+          const tQuery = performance.now();
 
           if (coreMemories.length === 0) {
             if (sessionKey) {
               bootstrappedSessions.add(sessionKey);
               touchSession(sessionKey);
             }
-            api.logger.debug?.(
-              `memory-neo4j: no core memories found for agent=${agentId}, marking session as bootstrapped`,
+            api.logger.info?.(
+              `memory-neo4j: [bench] core-inject ${(tQuery - t0).toFixed(0)}ms (0 memories, skipped)`,
             );
             return;
           }
@@ -1128,9 +655,10 @@ const memoryNeo4jPlugin = {
             coreMemoryIdsBySession.set(sessionKey, new Set(coreMemories.map((m) => m.id)));
             touchSession(sessionKey);
           }
-          // Log at info level when actually injecting, debug for skips
+
+          const totalMs = performance.now() - t0;
           api.logger.info?.(
-            `memory-neo4j: ${action} MEMORY.md with ${coreMemories.length} core memories for agent=${agentId} session=${sessionKey ?? "unknown"}`,
+            `memory-neo4j: [bench] core-inject ${totalMs.toFixed(0)}ms (query=${(tQuery - t0).toFixed(0)}ms), ${action} MEMORY.md with ${coreMemories.length} memories`,
           );
 
           return { files };
@@ -1152,7 +680,7 @@ const memoryNeo4jPlugin = {
     //
     // Phase 3 — Sleep consolidation (deferred to `openclaw memory neo4j sleep`):
     //   The sleep cycle handles entity extraction, categorization, Pareto
-    //   scoring, promotion/demotion, and decay — mirroring hippocampal replay.
+    //   scoring, promotion, and decay — mirroring hippocampal replay.
     api.logger.debug?.(
       `memory-neo4j: autoCapture=${cfg.autoCapture}, extraction.enabled=${extractionConfig.enabled}`,
     );
@@ -1228,12 +756,6 @@ const memoryNeo4jPlugin = {
 // Auto-capture pipeline (fire-and-forget from agent_end hook)
 // ============================================================================
 
-type AutoCaptureLogger = {
-  info: (msg: string) => void;
-  warn: (msg: string) => void;
-  debug?: (msg: string) => void;
-};
-
 /**
  * Shared capture logic for both user and assistant messages.
  * Extracts the common embed → dedup → rate → store pipeline.
@@ -1248,7 +770,8 @@ async function captureMessage(
   db: import("./neo4j-client.js").Neo4jMemoryClient,
   embeddings: import("./embeddings.js").Embeddings,
   extractionConfig: import("./config.js").ExtractionConfig,
-  logger: AutoCaptureLogger,
+  logger: Logger,
+  precomputedVector?: number[],
 ): Promise<{ stored: boolean; semanticDeduped: boolean }> {
   // For assistant messages, rate importance first (before embedding) to skip early.
   // When extraction is disabled, rateImportance returns 0.5 (the fallback), so we
@@ -1263,11 +786,14 @@ async function captureMessage(
     }
   }
 
-  const vector = await embeddings.embed(text);
+  const vector = precomputedVector ?? (await embeddings.embed(text));
 
-  // Quick dedup (same content already stored — cosine >= 0.95)
-  const existing = await db.findSimilar(vector, 0.95, 1, agentId);
-  if (existing.length > 0) {
+  // Single vector search at lower threshold, split by score band
+  const candidates = await db.findSimilar(vector, 0.75, 3, agentId);
+
+  // Exact dedup: any candidate with score >= 0.95 means it's a duplicate
+  const exactDup = candidates.find((c) => c.score >= 0.95);
+  if (exactDup) {
     return { stored: false, semanticDeduped: false };
   }
 
@@ -1281,10 +807,9 @@ async function captureMessage(
     }
   }
 
-  // Semantic dedup: check moderate-similarity memories (0.75-0.95)
+  // Semantic dedup: remaining candidates in 0.75-0.95 band
   // Pass the vector similarity score as a pre-screen to skip LLM calls
   // for pairs below SEMANTIC_DEDUP_VECTOR_THRESHOLD.
-  const candidates = await db.findSimilar(vector, 0.75, 3, agentId);
   if (candidates.length > 0) {
     for (const candidate of candidates) {
       if (await isSemanticDuplicate(text, candidate.text, extractionConfig, candidate.score)) {
@@ -1321,9 +846,10 @@ async function runAutoCapture(
   db: import("./neo4j-client.js").Neo4jMemoryClient,
   embeddings: import("./embeddings.js").Embeddings,
   extractionConfig: import("./config.js").ExtractionConfig,
-  logger: AutoCaptureLogger,
+  logger: Logger,
 ): Promise<void> {
   try {
+    const t0 = performance.now();
     let stored = 0;
     let semanticDeduped = 0;
 
@@ -1331,19 +857,51 @@ async function runAutoCapture(
     const userMessages = extractUserMessages(messages);
     const retained = userMessages.filter((text) => passesAttentionGate(text));
 
+    // Process assistant messages
+    const assistantMessages = extractAssistantMessages(messages);
+    const retainedAssistant = assistantMessages.filter((text) =>
+      passesAssistantAttentionGate(text),
+    );
+    const tGate = performance.now();
+
+    // Collect all texts to embed in a single batch
+    const allTexts: string[] = [];
+    const allMeta: Array<{
+      text: string;
+      source: "auto-capture" | "auto-capture-assistant";
+      threshold: number;
+      discount: number;
+    }> = [];
+
     for (const text of retained) {
+      allTexts.push(text);
+      allMeta.push({ text, source: "auto-capture", threshold: 0.5, discount: 1.0 });
+    }
+    for (const text of retainedAssistant) {
+      allTexts.push(text);
+      allMeta.push({ text, source: "auto-capture-assistant", threshold: 0.8, discount: 0.75 });
+    }
+
+    // Batch embed all at once
+    const vectors = allTexts.length > 0 ? await embeddings.embedBatch(allTexts) : [];
+    const tEmbed = performance.now();
+
+    // Process each with pre-computed vector
+    for (let i = 0; i < allMeta.length; i++) {
       try {
+        const meta = allMeta[i];
         const result = await captureMessage(
-          text,
-          "auto-capture",
-          0.5,
-          1.0,
+          meta.text,
+          meta.source,
+          meta.threshold,
+          meta.discount,
           agentId,
           sessionKey,
           db,
           embeddings,
           extractionConfig,
           logger,
+          vectors[i],
         );
         if (result.stored) stored++;
         if (result.semanticDeduped) semanticDeduped++;
@@ -1351,50 +909,23 @@ async function runAutoCapture(
         logger.debug?.(`memory-neo4j: auto-capture item failed: ${String(err)}`);
       }
     }
+    const tProcess = performance.now();
 
-    // Process assistant messages
-    const assistantMessages = extractAssistantMessages(messages);
-    const retainedAssistant = assistantMessages.filter((text) =>
-      passesAssistantAttentionGate(text),
+    const totalMs = tProcess - t0;
+    const gateMs = tGate - t0;
+    const embedMs = tEmbed - tGate;
+    const processMs = tProcess - tEmbed;
+    logger.info(
+      `memory-neo4j: [bench] auto-capture ${totalMs.toFixed(0)}ms total (gate=${gateMs.toFixed(0)}ms, embed=${embedMs.toFixed(0)}ms, process=${processMs.toFixed(0)}ms), ` +
+        `${retained.length}+${retainedAssistant.length} gated, ${stored} stored, ${semanticDeduped} deduped`,
     );
-
-    for (const text of retainedAssistant) {
-      try {
-        const result = await captureMessage(
-          text,
-          "auto-capture-assistant",
-          0.8,
-          0.75,
-          agentId,
-          sessionKey,
-          db,
-          embeddings,
-          extractionConfig,
-          logger,
-        );
-        if (result.stored) stored++;
-        if (result.semanticDeduped) semanticDeduped++;
-      } catch (err) {
-        logger.debug?.(`memory-neo4j: assistant auto-capture item failed: ${String(err)}`);
-      }
-    }
-
-    if (stored > 0 || semanticDeduped > 0) {
-      logger.info(
-        `memory-neo4j: auto-captured ${stored} memories (attention-gated)${semanticDeduped > 0 ? `, ${semanticDeduped} semantic dupes skipped` : ""}`,
-      );
-    } else if (userMessages.length > 0 || assistantMessages.length > 0) {
-      logger.info(
-        `memory-neo4j: auto-capture ran (0 stored, ${userMessages.length} user msgs, ${retained.length} passed gate, ${assistantMessages.length} assistant msgs, ${retainedAssistant.length} passed gate)`,
-      );
-    }
   } catch (err) {
     logger.warn(`memory-neo4j: auto-capture failed: ${String(err)}`);
   }
 }
 
-// Re-export attention gate for backwards compatibility (tests import from here)
-export { passesAttentionGate, passesAssistantAttentionGate } from "./attention-gate.js";
+// Export auto-capture internals for testing
+export { captureMessage as _captureMessage, runAutoCapture as _runAutoCapture };
 
 // ============================================================================
 // Export
diff --git a/extensions/memory-neo4j/llm-client.ts b/extensions/memory-neo4j/llm-client.ts
new file mode 100644
index 00000000000..3698c96ea6e
--- /dev/null
+++ b/extensions/memory-neo4j/llm-client.ts
@@ -0,0 +1,188 @@
+/**
+ * OpenRouter/OpenAI-compatible LLM API client for memory-neo4j.
+ *
+ * Handles non-streaming and streaming chat completion requests with
+ * retry logic, timeout handling, and abort signal support.
+ */
+
+import type { ExtractionConfig } from "./config.js";
+
+// Timeout for LLM and embedding fetch calls to prevent hanging indefinitely
+export const FETCH_TIMEOUT_MS = 30_000;
+
+/**
+ * Build a combined abort signal from the caller's signal and a per-request timeout.
+ */
+function buildSignal(abortSignal?: AbortSignal): AbortSignal {
+  return abortSignal
+    ? AbortSignal.any([abortSignal, AbortSignal.timeout(FETCH_TIMEOUT_MS)])
+    : AbortSignal.timeout(FETCH_TIMEOUT_MS);
+}
+
+/**
+ * Shared request/retry logic for OpenRouter API calls.
+ * Handles signal composition, request building, error handling, and exponential backoff.
+ * The `parseFn` callback processes the Response differently for streaming vs non-streaming.
+ */
+async function openRouterRequest(
+  config: ExtractionConfig,
+  messages: Array<{ role: string; content: string }>,
+  abortSignal: AbortSignal | undefined,
+  stream: boolean,
+  parseFn: (response: Response, abortSignal?: AbortSignal) => Promise<string | null>,
+): Promise<string | null> {
+  for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
+    try {
+      const signal = buildSignal(abortSignal);
+
+      const response = await fetch(`${config.baseUrl}/chat/completions`, {
+        method: "POST",
+        headers: {
+          Authorization: `Bearer ${config.apiKey}`,
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          model: config.model,
+          messages,
+          temperature: config.temperature,
+          response_format: { type: "json_object" },
+          ...(stream ? { stream: true } : {}),
+        }),
+        signal,
+      });
+
+      if (!response.ok) {
+        const body = await response.text().catch(() => "");
+        throw new Error(`OpenRouter API error ${response.status}: ${body}`);
+      }
+
+      return await parseFn(response, abortSignal);
+    } catch (err) {
+      if (attempt >= config.maxRetries) {
+        throw err;
+      }
+      // Exponential backoff
+      await new Promise((resolve) => setTimeout(resolve, 500 * 2 ** attempt));
+    }
+  }
+  return null;
+}
+
+/**
+ * Parse a non-streaming JSON response.
+ */
+function parseNonStreaming(response: Response): Promise<string | null> {
+  return response.json().then((data: unknown) => {
+    const typed = data as {
+      choices?: Array<{ message?: { content?: string } }>;
+    };
+    return typed.choices?.[0]?.message?.content ?? null;
+  });
+}
+
+/**
+ * Parse a streaming SSE response, accumulating chunks into a single string.
+ */
+async function parseStreaming(
+  response: Response,
+  abortSignal?: AbortSignal,
+): Promise<string | null> {
+  if (!response.body) {
+    throw new Error("No response body for streaming request");
+  }
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let accumulated = "";
+  let buffer = "";
+
+  for (;;) {
+    // Check abort between chunks for responsive cancellation
+    if (abortSignal?.aborted) {
+      reader.cancel().catch(() => {});
+      return null;
+    }
+
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    buffer += decoder.decode(value, { stream: true });
+
+    // Parse SSE lines
+    const lines = buffer.split("\n");
+    buffer = lines.pop() ?? "";
+
+    for (const line of lines) {
+      const trimmed = line.trim();
+      if (!trimmed.startsWith("data: ")) continue;
+      const data = trimmed.slice(6);
+      if (data === "[DONE]") continue;
+
+      try {
+        const parsed = JSON.parse(data) as {
+          choices?: Array<{ delta?: { content?: string } }>;
+        };
+        const chunk = parsed.choices?.[0]?.delta?.content;
+        if (chunk) {
+          accumulated += chunk;
+        }
+      } catch {
+        // Skip malformed SSE chunks
+      }
+    }
+  }
+
+  return accumulated || null;
+}
+
+export async function callOpenRouter(
+  config: ExtractionConfig,
+  prompt: string | Array<{ role: string; content: string }>,
+  abortSignal?: AbortSignal,
+): Promise<string | null> {
+  const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt;
+  return openRouterRequest(config, messages, abortSignal, false, parseNonStreaming);
+}
+
+/**
+ * Streaming variant of callOpenRouter. Uses the streaming API to receive chunks
+ * incrementally, allowing earlier cancellation via abort signal and better
+ * latency characteristics for long responses.
+ *
+ * Accumulates all chunks into a single response string since extraction
+ * uses JSON mode (which requires the complete object to parse).
+ */
+export async function callOpenRouterStream(
+  config: ExtractionConfig,
+  prompt: string | Array<{ role: string; content: string }>,
+  abortSignal?: AbortSignal,
+): Promise<string | null> {
+  const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt;
+  return openRouterRequest(config, messages, abortSignal, true, parseStreaming);
+}
+
+/**
+ * Check if an error is transient (network/timeout) vs permanent (JSON parse, etc.)
+ */
+export function isTransientError(err: unknown): boolean {
+  if (!(err instanceof Error)) {
+    return false;
+  }
+  const msg = err.message.toLowerCase();
+  return (
+    err.name === "AbortError" ||
+    err.name === "TimeoutError" ||
+    msg.includes("timeout") ||
+    msg.includes("econnrefused") ||
+    msg.includes("econnreset") ||
+    msg.includes("etimedout") ||
+    msg.includes("enotfound") ||
+    msg.includes("network") ||
+    msg.includes("fetch failed") ||
+    msg.includes("socket hang up") ||
+    msg.includes("api error 429") ||
+    msg.includes("api error 502") ||
+    msg.includes("api error 503") ||
+    msg.includes("api error 504")
+  );
+}
diff --git a/extensions/memory-neo4j/message-utils.ts b/extensions/memory-neo4j/message-utils.ts
index 8eac03368a4..7d60693c97f 100644
--- a/extensions/memory-neo4j/message-utils.ts
+++ b/extensions/memory-neo4j/message-utils.ts
@@ -8,14 +8,18 @@
  */
 
 // ============================================================================
-// User Message Extraction
+// Core Extraction
 // ============================================================================
 
 /**
- * Extract user message texts from the event.messages array.
- * Handles both string content and content block arrays.
+ * Extract text blocks from messages with a given role, apply a strip function,
+ * and filter out short results. Handles both string content and content block arrays.
  */
-export function extractUserMessages(messages: unknown[]): string[] {
+function extractMessagesByRole(
+  messages: unknown[],
+  role: string,
+  stripFn: (text: string) => string,
+): string[] {
   const texts: string[] = [];
 
   for (const msg of messages) {
@@ -24,8 +28,7 @@ export function extractUserMessages(messages: unknown[]): string[] {
     }
     const msgObj = msg as Record<string, unknown>;
 
-    // Only process user messages for auto-capture
-    if (msgObj.role !== "user") {
+    if (msgObj.role !== role) {
       continue;
     }
 
@@ -51,8 +54,18 @@ export function extractUserMessages(messages: unknown[]): string[] {
     }
   }
 
-  // Strip wrappers then filter by length
-  return texts.map(stripMessageWrappers).filter((t) => t.length >= 10);
+  return texts.map(stripFn).filter((t) => t.length >= 10);
+}
+
+// ============================================================================
+// User Message Extraction
+// ============================================================================
+
+/**
+ * Extract user message texts from the event.messages array.
+ */
+export function extractUserMessages(messages: unknown[]): string[] {
+  return extractMessagesByRole(messages, "user", stripMessageWrappers);
 }
 
 /**
@@ -84,9 +97,7 @@ export function stripMessageWrappers(text: string): string {
   s = s.replace(/---\s*Queued #\d+\s*/g, "");
   // Telegram wrapper — may now be at start after previous strips
   s = s.replace(/^\s*\[Telegram\s[^\]]+\]\s*/i, "");
-  // "[message_id: NNN]" suffix (Telegram)
-  s = s.replace(/\n?\[message_id:\s*\d+\]\s*$/i, "");
-  // "[message_id: UUID]" suffix (non-numeric Telegram/channel IDs)
+  // "[message_id: ...]" suffix (Telegram and other channel IDs)
   s = s.replace(/\n?\[message_id:\s*[^\]]+\]\s*$/i, "");
   // Slack wrapper — "[Slack <workspace> #channel @user] MESSAGE [slack message id: ...]"
   s = s.replace(/^\s*\[Slack\s[^\]]+\]\s*/i, "");
@@ -118,42 +129,7 @@ export function stripAssistantWrappers(text: string): string {
 
 /**
  * Extract assistant message texts from the event.messages array.
- * Handles both string content and content block arrays.
  */
 export function extractAssistantMessages(messages: unknown[]): string[] {
-  const texts: string[] = [];
-
-  for (const msg of messages) {
-    if (!msg || typeof msg !== "object") {
-      continue;
-    }
-    const msgObj = msg as Record<string, unknown>;
-
-    if (msgObj.role !== "assistant") {
-      continue;
-    }
-
-    const content = msgObj.content;
-    if (typeof content === "string") {
-      texts.push(content);
-      continue;
-    }
-
-    if (Array.isArray(content)) {
-      for (const block of content) {
-        if (
-          block &&
-          typeof block === "object" &&
-          "type" in block &&
-          (block as Record<string, unknown>).type === "text" &&
-          "text" in block &&
-          typeof (block as Record<string, unknown>).text === "string"
-        ) {
-          texts.push((block as Record<string, unknown>).text as string);
-        }
-      }
-    }
-  }
-
-  return texts.map(stripAssistantWrappers).filter((t) => t.length >= 10);
+  return extractMessagesByRole(messages, "assistant", stripAssistantWrappers);
 }
diff --git a/extensions/memory-neo4j/neo4j-client.test.ts b/extensions/memory-neo4j/neo4j-client.test.ts
index 08eb426e378..9678b8227c5 100644
--- a/extensions/memory-neo4j/neo4j-client.test.ts
+++ b/extensions/memory-neo4j/neo4j-client.test.ts
@@ -7,7 +7,7 @@
 
 import type { Driver } from "neo4j-driver";
 import { describe, it, expect, vi, beforeEach } from "vitest";
-import type { StoreMemoryInput, MergeEntityInput } from "./schema.js";
+import type { StoreMemoryInput } from "./schema.js";
 import { Neo4jMemoryClient } from "./neo4j-client.js";
 
 // ============================================================================
@@ -867,10 +867,10 @@ describe("Neo4jMemoryClient", () => {
   });
 
   // ------------------------------------------------------------------------
-  // promoteToCore() / demoteFromCore()
+  // promoteToCore()
   // ------------------------------------------------------------------------
 
-  describe("Core promotion/demotion", () => {
+  describe("Core promotion", () => {
     it("should promote memories to core category", async () => {
       mockSession.run.mockResolvedValue({
         records: [{ get: vi.fn().mockReturnValue(2) }],
@@ -885,26 +885,10 @@ describe("Neo4jMemoryClient", () => {
       );
     });
 
-    it("should demote memories from core category", async () => {
-      mockSession.run.mockResolvedValue({
-        records: [{ get: vi.fn().mockReturnValue(1) }],
-      });
-
-      const result = await client.demoteFromCore(["m1"]);
-
-      expect(result).toBe(1);
-      expect(mockSession.run).toHaveBeenCalledWith(
-        expect.stringContaining("category = 'fact'"),
-        expect.objectContaining({ ids: ["m1"] }),
-      );
-    });
-
     it("should handle empty ID arrays", async () => {
       const promoteResult = await client.promoteToCore([]);
-      const demoteResult = await client.demoteFromCore([]);
 
       expect(promoteResult).toBe(0);
-      expect(demoteResult).toBe(0);
     });
   });
 
@@ -1157,115 +1141,6 @@ describe("Neo4jMemoryClient", () => {
     });
   });
 
-  // ------------------------------------------------------------------------
-  // Entity and Tag operations
-  // ------------------------------------------------------------------------
-
-  describe("Entity operations", () => {
-    it("should merge entity idempotently", async () => {
-      mockSession.run.mockResolvedValue({
-        records: [
-          {
-            get: vi.fn((key) => {
-              const data: Record<string, any> = { id: "e1", name: "tarun" };
-              return data[key];
-            }),
-          },
-        ],
-      });
-
-      const input: MergeEntityInput = {
-        id: "e1",
-        name: "Tarun",
-        type: "person",
-        aliases: ["boss"],
-        description: "CEO",
-      };
-
-      const result = await client.mergeEntity(input);
-
-      expect(result).toEqual({ id: "e1", name: "tarun" });
-      expect(mockSession.run).toHaveBeenCalledWith(
-        expect.stringContaining("MERGE (e:Entity {name: $name})"),
-        expect.objectContaining({
-          name: "tarun", // normalized
-        }),
-      );
-    });
-
-    it("should create MENTIONS relationship", async () => {
-      mockSession.run.mockResolvedValue({ records: [] });
-
-      await client.createMentions("mem-1", "Tarun", "context", 0.95);
-
-      expect(mockSession.run).toHaveBeenCalledWith(
-        expect.stringContaining("MERGE (m)-[r:MENTIONS]->(e)"),
-        expect.objectContaining({
-          memoryId: "mem-1",
-          entityName: "tarun", // normalized
-          role: "context",
-          confidence: 0.95,
-        }),
-      );
-    });
-
-    it("should create entity relationships with validated type", async () => {
-      mockSession.run.mockResolvedValue({ records: [] });
-
-      await client.createEntityRelationship("Alice", "Acme", "WORKS_AT", 0.9);
-
-      expect(mockSession.run).toHaveBeenCalledWith(
-        expect.stringContaining("MERGE (e1)-[r:WORKS_AT]->(e2)"),
-        expect.objectContaining({
-          sourceName: "alice",
-          targetName: "acme",
-          confidence: 0.9,
-        }),
-      );
-    });
-
-    it("should reject invalid relationship types", async () => {
-      await client.createEntityRelationship("a", "b", "INVALID_TYPE", 0.9);
-
-      expect(mockLogger.warn).toHaveBeenCalledWith(
-        expect.stringContaining("rejected invalid relationship type"),
-      );
-      expect(mockSession.run).not.toHaveBeenCalled();
-    });
-  });
-
-  describe("Tag operations", () => {
-    it("should tag memory with normalized tag name", async () => {
-      mockSession.run.mockResolvedValue({ records: [] });
-
-      await client.tagMemory("mem-1", "Neo4j", "technology", 0.95);
-
-      expect(mockSession.run).toHaveBeenCalledWith(
-        expect.stringContaining("MERGE (t:Tag {name: $tagName})"),
-        expect.objectContaining({
-          memoryId: "mem-1",
-          tagName: "neo4j", // normalized
-          tagCategory: "technology",
-          confidence: 0.95,
-        }),
-      );
-    });
-
-    it("should update memory category only when current is 'other'", async () => {
-      mockSession.run.mockResolvedValue({ records: [] });
-
-      await client.updateMemoryCategory("mem-1", "fact");
-
-      expect(mockSession.run).toHaveBeenCalledWith(
-        expect.stringContaining("WHERE m.category = 'other'"),
-        expect.objectContaining({
-          id: "mem-1",
-          category: "fact",
-        }),
-      );
-    });
-  });
-
   // ------------------------------------------------------------------------
   // Extraction status tracking
   // ------------------------------------------------------------------------
@@ -1296,16 +1171,6 @@ describe("Neo4jMemoryClient", () => {
       );
     });
 
-    it("should get extraction retry count", async () => {
-      mockSession.run.mockResolvedValue({
-        records: [{ get: vi.fn().mockReturnValue(3) }],
-      });
-
-      const result = await client.getExtractionRetries("mem-1");
-
-      expect(result).toBe(3);
-    });
-
     it("should count memories by extraction status", async () => {
       mockSession.run.mockResolvedValue({
         records: [
diff --git a/extensions/memory-neo4j/neo4j-client.ts b/extensions/memory-neo4j/neo4j-client.ts
index 2443221e41d..8e0ee44c712 100644
--- a/extensions/memory-neo4j/neo4j-client.ts
+++ b/extensions/memory-neo4j/neo4j-client.ts
@@ -10,13 +10,13 @@
 
 import neo4j, { type Driver } from "neo4j-driver";
 import { randomUUID } from "node:crypto";
-import type {
-  ExtractionStatus,
-  MergeEntityInput,
-  SearchSignalResult,
-  StoreMemoryInput,
+import type { ExtractionStatus, Logger, SearchSignalResult, StoreMemoryInput } from "./schema.js";
+import {
+  ALLOWED_RELATIONSHIP_TYPES,
+  escapeLucene,
+  makePairKey,
+  validateRelationshipType,
 } from "./schema.js";
-import { ALLOWED_RELATIONSHIP_TYPES, escapeLucene, validateRelationshipType } from "./schema.js";
 
 // SAFETY: This pattern is built from the hardcoded ALLOWED_RELATIONSHIP_TYPES constant,
 // not from user input. It's used in Cypher variable-length path patterns like
@@ -24,17 +24,6 @@ import { ALLOWED_RELATIONSHIP_TYPES, escapeLucene, validateRelationshipType } fr
 // constant, there is no injection risk.
 const RELATIONSHIP_TYPE_PATTERN = [...ALLOWED_RELATIONSHIP_TYPES].join("|");
 
-// ============================================================================
-// Types
-// ============================================================================
-
-type Logger = {
-  info: (msg: string) => void;
-  warn: (msg: string) => void;
-  error: (msg: string) => void;
-  debug?: (msg: string) => void;
-};
-
 // Retry configuration for transient Neo4j errors (deadlocks, etc.)
 const TRANSIENT_RETRY_ATTEMPTS = 3;
 const TRANSIENT_RETRY_BASE_DELAY_MS = 500;
@@ -159,7 +148,7 @@ export class Neo4jMemoryClient {
         "CREATE INDEX entity_name_index IF NOT EXISTS FOR (e:Entity) ON (e.name)",
       );
       // Composite index for queries that filter by both agentId and category
-      // (e.g. listByCategory, promotion/demotion filtering in sleep cycle)
+      // (e.g. listByCategory, promotion filtering in sleep cycle)
       await this.runSafe(
         session,
         "CREATE INDEX memory_agent_category_index IF NOT EXISTS FOR (m:Memory) ON (m.agentId, m.category)",
@@ -256,12 +245,14 @@ export class Neo4jMemoryClient {
             agentId: $agentId, sessionKey: $sessionKey,
             createdAt: $createdAt, updatedAt: $updatedAt,
             retrievalCount: $retrievalCount, lastRetrievedAt: $lastRetrievedAt,
-            extractionRetries: $extractionRetries
+            extractionRetries: $extractionRetries,
+            userPinned: $userPinned
           })
           RETURN m.id AS id`,
           {
             ...input,
             sessionKey: input.sessionKey ?? null,
+            userPinned: input.userPinned ?? false,
             createdAt: now,
             updatedAt: now,
             retrievalCount: 0,
@@ -397,6 +388,47 @@ export class Neo4jMemoryClient {
     }
   }
 
+  /**
+   * Load core memories for injection: ALL user-pinned core memories (no limit)
+   * plus up to maxRegular non-pinned core memories ordered by importance.
+   *
+   * Total returned = (all userPinned core) + (top maxRegular non-pinned core).
+   */
+  async listCoreForInjection(
+    maxRegular: number,
+    agentId?: string,
+  ): Promise<{ id: string; text: string; category: string; importance: number }[]> {
+    await this.ensureInitialized();
+    const session = this.driver!.session();
+    try {
+      const agentFilter = agentId ? "AND m.agentId = $agentId" : "";
+      const result = await session.run(
+        `MATCH (m:Memory)
+         WHERE m.category = 'core' ${agentFilter}
+         WITH m, coalesce(m.userPinned, false) AS pinned
+         ORDER BY m.importance DESC
+         WITH collect({id: m.id, text: m.text, category: m.category, importance: m.importance, pinned: pinned}) AS all
+         WITH [x IN all WHERE x.pinned] AS pinnedList,
+              [x IN all WHERE NOT x.pinned][0..$maxRegular] AS regularList
+         UNWIND (pinnedList + regularList) AS mem
+         RETURN mem.id AS id, mem.text AS text, mem.category AS category, mem.importance AS importance`,
+        {
+          maxRegular: neo4j.int(Math.floor(maxRegular)),
+          ...(agentId ? { agentId } : {}),
+        },
+      );
+
+      return result.records.map((r) => ({
+        id: r.get("id") as string,
+        text: r.get("text") as string,
+        category: r.get("category") as string,
+        importance: r.get("importance") as number,
+      }));
+    } finally {
+      await session.close();
+    }
+  }
+
   // --------------------------------------------------------------------------
   // Search Signals
   // --------------------------------------------------------------------------
@@ -549,7 +581,7 @@ export class Neo4jMemoryClient {
           // Variable-length relationship pattern: 1..maxHops hops through entity relationships
           const hopRange = `1..${Math.max(1, Math.min(3, maxHops))}`;
           const result = await session.run(
-            `// Find matching entities via fulltext index
+            `// Find matching entities via fulltext index (SINGLE lookup)
              CALL db.index.fulltext.queryNodes('entity_fulltext_index', $query)
              YIELD node AS entity, score
              WHERE score >= 0.5
@@ -557,37 +589,32 @@ export class Neo4jMemoryClient {
              ORDER BY score DESC
              LIMIT 5
 
-             // Direct: Entity ← MENTIONS ← Memory
+             // Collect direct mentions
              OPTIONAL MATCH (entity)<-[rm:MENTIONS]-(m:Memory)
              WHERE m IS NOT NULL ${agentFilter}
-             WITH m, coalesce(rm.confidence, 1.0) AS directScore, entity
-             WHERE m IS NOT NULL
+             WITH entity, collect({
+               id: m.id, text: m.text, category: m.category,
+               importance: m.importance, createdAt: m.createdAt,
+               score: coalesce(rm.confidence, 1.0)
+             }) AS directResults
 
-             RETURN m.id AS id, m.text AS text, m.category AS category,
-                    m.importance AS importance, m.createdAt AS createdAt,
-                    max(directScore) AS graphScore
-
-             UNION
-
-             // Find matching entities via fulltext index (repeated for UNION)
-             CALL db.index.fulltext.queryNodes('entity_fulltext_index', $query)
-             YIELD node AS entity, score
-             WHERE score >= 0.5
-             WITH entity
-             ORDER BY score DESC
-             LIMIT 5
-
-             // N-hop: Entity -[rels*1..N]-> Entity ← MENTIONS ← Memory
+             // N-hop spreading activation
              OPTIONAL MATCH (entity)-[rels:${RELATIONSHIP_TYPE_PATTERN}*${hopRange}]-(e2:Entity)
              WHERE ALL(r IN rels WHERE coalesce(r.confidence, 0.7) >= $firingThreshold)
-             OPTIONAL MATCH (e2)<-[rm:MENTIONS]-(m:Memory)
-             WHERE m IS NOT NULL ${agentFilter}
-             WITH m, reduce(s = 1.0, r IN rels | s * coalesce(r.confidence, 0.7)) * coalesce(rm.confidence, 1.0) AS hopScore
-             WHERE m IS NOT NULL
+             OPTIONAL MATCH (e2)<-[rm2:MENTIONS]-(m2:Memory)
+             WHERE m2 IS NOT NULL ${agentFilter}
+             WITH directResults, collect({
+               id: m2.id, text: m2.text, category: m2.category,
+               importance: m2.importance, createdAt: m2.createdAt,
+               score: reduce(s = 1.0, r IN rels | s * coalesce(r.confidence, 0.7)) * coalesce(rm2.confidence, 1.0)
+             }) AS hopResults
 
-             RETURN m.id AS id, m.text AS text, m.category AS category,
-                    m.importance AS importance, m.createdAt AS createdAt,
-                    max(hopScore) AS graphScore`,
+             // Combine and return
+             UNWIND (directResults + hopResults) AS row
+             WITH row WHERE row.id IS NOT NULL
+             RETURN row.id AS id, row.text AS text, row.category AS category,
+                    row.importance AS importance, row.createdAt AS createdAt,
+                    max(row.score) AS graphScore`,
             { query: escaped, firingThreshold, ...(agentId ? { agentId } : {}) },
           );
 
@@ -613,7 +640,6 @@ export class Neo4jMemoryClient {
           }
 
           return Array.from(byId.values())
-            .slice()
             .sort((a, b) => b.score - a.score)
             .slice(0, limit);
         } finally {
@@ -713,159 +739,6 @@ export class Neo4jMemoryClient {
   // Entity & Relationship Operations
   // --------------------------------------------------------------------------
 
-  /**
-   * Merge (upsert) an Entity node using MERGE pattern.
-   * Idempotent — safe to call multiple times for the same entity name.
-   */
-  async mergeEntity(input: MergeEntityInput): Promise<{ id: string; name: string }> {
-    await this.ensureInitialized();
-    return this.retryOnTransient(async () => {
-      const session = this.driver!.session();
-      try {
-        const result = await session.run(
-          `MERGE (e:Entity {name: $name})
-           ON CREATE SET
-             e.id = $id, e.type = $type, e.aliases = $aliases,
-             e.description = $description,
-             e.firstSeen = $now, e.lastSeen = $now, e.mentionCount = 1
-           ON MATCH SET
-             e.type = COALESCE($type, e.type),
-             e.description = COALESCE($description, e.description),
-             e.lastSeen = $now,
-             e.mentionCount = e.mentionCount + 1
-           RETURN e.id AS id, e.name AS name`,
-          {
-            id: input.id,
-            name: input.name.trim().toLowerCase(),
-            type: input.type,
-            aliases: input.aliases ?? [],
-            description: input.description ?? null,
-            now: new Date().toISOString(),
-          },
-        );
-        const record = result.records[0];
-        return {
-          id: record.get("id") as string,
-          name: record.get("name") as string,
-        };
-      } finally {
-        await session.close();
-      }
-    });
-  }
-
-  /**
-   * Create a MENTIONS relationship between a Memory and an Entity.
-   */
-  async createMentions(
-    memoryId: string,
-    entityName: string,
-    role: string = "context",
-    confidence: number = 1.0,
-  ): Promise<void> {
-    await this.ensureInitialized();
-    const session = this.driver!.session();
-    try {
-      await session.run(
-        `MATCH (m:Memory {id: $memoryId})
-         MATCH (e:Entity {name: $entityName})
-         MERGE (m)-[r:MENTIONS]->(e)
-         ON CREATE SET r.role = $role, r.confidence = $confidence`,
-        { memoryId, entityName: entityName.trim().toLowerCase(), role, confidence },
-      );
-    } finally {
-      await session.close();
-    }
-  }
-
-  /**
-   * Create a typed relationship between two Entity nodes.
-   * The relationship type is validated against an allowlist before injection.
-   */
-  async createEntityRelationship(
-    sourceName: string,
-    targetName: string,
-    relType: string,
-    confidence: number = 1.0,
-  ): Promise<void> {
-    if (!validateRelationshipType(relType)) {
-      this.logger.warn(`memory-neo4j: rejected invalid relationship type: ${relType}`);
-      return;
-    }
-
-    await this.ensureInitialized();
-    const session = this.driver!.session();
-    try {
-      await session.run(
-        `MATCH (e1:Entity {name: $sourceName})
-         MATCH (e2:Entity {name: $targetName})
-         MERGE (e1)-[r:${relType}]->(e2)
-         ON CREATE SET r.confidence = $confidence, r.createdAt = $now
-         ON MATCH SET r.confidence = CASE WHEN $confidence > r.confidence THEN $confidence ELSE r.confidence END`,
-        {
-          sourceName: sourceName.trim().toLowerCase(),
-          targetName: targetName.trim().toLowerCase(),
-          confidence,
-          now: new Date().toISOString(),
-        },
-      );
-    } finally {
-      await session.close();
-    }
-  }
-
-  /**
-   * Merge a Tag node and link it to a Memory.
-   */
-  async tagMemory(
-    memoryId: string,
-    tagName: string,
-    tagCategory: string,
-    confidence: number = 1.0,
-  ): Promise<void> {
-    await this.ensureInitialized();
-    const session = this.driver!.session();
-    try {
-      await session.run(
-        `MERGE (t:Tag {name: $tagName})
-         ON CREATE SET t.id = $tagId, t.category = $tagCategory, t.createdAt = $now
-         WITH t
-         MATCH (m:Memory {id: $memoryId})
-         MERGE (m)-[r:TAGGED]->(t)
-         ON CREATE SET r.confidence = $confidence`,
-        {
-          memoryId,
-          tagName: tagName.trim().toLowerCase(),
-          tagId: randomUUID(),
-          tagCategory,
-          confidence,
-          now: new Date().toISOString(),
-        },
-      );
-    } finally {
-      await session.close();
-    }
-  }
-
-  /**
-   * Update a memory's category. Only updates if current category is 'other'
-   * (auto-assigned) to avoid overriding user-explicit categorization.
-   */
-  async updateMemoryCategory(id: string, category: string): Promise<void> {
-    await this.ensureInitialized();
-    const session = this.driver!.session();
-    try {
-      await session.run(
-        `MATCH (m:Memory {id: $id})
-         WHERE m.category = 'other'
-         SET m.category = $category, m.updatedAt = $now`,
-        { id, category, now: new Date().toISOString() },
-      );
-    } finally {
-      await session.close();
-    }
-  }
-
   /**
    * Update the extraction status of a Memory node.
    * Optionally increments the extractionRetries counter (for transient failure tracking).
@@ -891,24 +764,6 @@ export class Neo4jMemoryClient {
     }
   }
 
-  /**
-   * Get the current extraction retry count for a memory.
-   */
-  async getExtractionRetries(id: string): Promise<number> {
-    await this.ensureInitialized();
-    const session = this.driver!.session();
-    try {
-      const result = await session.run(
-        `MATCH (m:Memory {id: $id})
-         RETURN coalesce(m.extractionRetries, 0) AS retries`,
-        { id },
-      );
-      return (result.records[0]?.get("retries") as number) ?? 0;
-    } finally {
-      await session.close();
-    }
-  }
-
   /**
    * Batch all entity operations from an extraction result into a single managed
    * transaction. Replaces the previous pattern of N individual session-per-call
@@ -1154,21 +1009,20 @@ export class Neo4jMemoryClient {
   > {
     await this.ensureInitialized();
 
-    // Step 1: Fetch all memory metadata in a short-lived session
-    const memoryData = new Map<string, { text: string; importance: number }>();
+    // Step 1: Fetch only IDs and importance (not text) to reduce data transfer
+    const memoryMeta = new Map<string, { importance: number }>();
     {
       const session = this.driver!.session();
       try {
         const agentFilter = agentId ? "WHERE m.agentId = $agentId" : "";
         const allResult = await session.run(
           `MATCH (m:Memory) ${agentFilter}
-           RETURN m.id AS id, m.text AS text, m.importance AS importance`,
+           RETURN m.id AS id, m.importance AS importance`,
           agentId ? { agentId } : {},
         );
 
         for (const r of allResult.records) {
-          memoryData.set(r.get("id") as string, {
-            text: r.get("text") as string,
+          memoryMeta.set(r.get("id") as string, {
             importance: r.get("importance") as number,
           });
         }
@@ -1177,7 +1031,7 @@ export class Neo4jMemoryClient {
       }
     }
 
-    if (memoryData.size < 2) {
+    if (memoryMeta.size < 2) {
       return [];
     }
 
@@ -1207,16 +1061,11 @@ export class Neo4jMemoryClient {
       }
     };
 
-    // Helper to create a canonical pair key (sorted)
-    const makePairKey = (a: string, b: string): string => {
-      return a < b ? `${a}:${b}` : `${b}:${a}`;
-    };
-
     // Process vector queries in concurrent batches to avoid overwhelming Neo4j
     // while still being much faster than fully sequential execution.
     const DEDUP_CONCURRENCY = 8;
     let pairsFound = 0;
-    const allIds = [...memoryData.keys()];
+    const allIds = [...memoryMeta.keys()];
 
     for (let batchStart = 0; batchStart < allIds.length; batchStart += DEDUP_CONCURRENCY) {
       if (pairsFound > 500) {
@@ -1253,7 +1102,7 @@ export class Neo4jMemoryClient {
 
         for (const r of similar.records) {
           const matchId = r.get("matchId") as string;
-          if (memoryData.has(matchId)) {
+          if (memoryMeta.has(matchId)) {
             union(id, matchId);
             pairsFound++;
 
@@ -1274,7 +1123,7 @@ export class Neo4jMemoryClient {
 
     // Step 3: Group by root
     const clusters = new Map<string, string[]>();
-    for (const id of memoryData.keys()) {
+    for (const id of memoryMeta.keys()) {
       if (!parent.has(id)) {
         continue;
       }
@@ -1285,38 +1134,61 @@ export class Neo4jMemoryClient {
       clusters.get(root)!.push(id);
     }
 
-    // Return clusters with 2+ members
-    return Array.from(clusters.values())
-      .filter((ids) => ids.length >= 2)
-      .map((ids) => {
-        const cluster: {
-          memoryIds: string[];
-          texts: string[];
-          importances: number[];
-          similarities?: Map<string, number>;
-        } = {
-          memoryIds: ids,
-          texts: ids.map((id) => memoryData.get(id)!.text),
-          importances: ids.map((id) => memoryData.get(id)!.importance),
-        };
+    // Step 4: Fetch text only for memories that are in clusters (not all memories)
+    const duplicateClusters = Array.from(clusters.values()).filter((ids) => ids.length >= 2);
+    const clusteredIds = new Set<string>();
+    for (const ids of duplicateClusters) {
+      for (const id of ids) clusteredIds.add(id);
+    }
 
-        // Include similarities for this cluster if requested
-        if (pairwiseSimilarities) {
-          const clusterSims = new Map<string, number>();
-          for (let i = 0; i < ids.length - 1; i++) {
-            for (let j = i + 1; j < ids.length; j++) {
-              const pairKey = makePairKey(ids[i], ids[j]);
-              const score = pairwiseSimilarities.get(pairKey);
-              if (score !== undefined) {
-                clusterSims.set(pairKey, score);
-              }
+    const textMap = new Map<string, string>();
+    if (clusteredIds.size > 0) {
+      const session = this.driver!.session();
+      try {
+        const result = await session.run(
+          `UNWIND $ids AS memId
+           MATCH (m:Memory {id: memId})
+           RETURN m.id AS id, m.text AS text`,
+          { ids: [...clusteredIds] },
+        );
+        for (const r of result.records) {
+          textMap.set(r.get("id") as string, r.get("text") as string);
+        }
+      } finally {
+        await session.close();
+      }
+    }
+
+    // Return clusters with 2+ members
+    return duplicateClusters.map((ids) => {
+      const cluster: {
+        memoryIds: string[];
+        texts: string[];
+        importances: number[];
+        similarities?: Map<string, number>;
+      } = {
+        memoryIds: ids,
+        texts: ids.map((id) => textMap.get(id) ?? ""),
+        importances: ids.map((id) => memoryMeta.get(id)!.importance),
+      };
+
+      // Include similarities for this cluster if requested
+      if (pairwiseSimilarities) {
+        const clusterSims = new Map<string, number>();
+        for (let i = 0; i < ids.length - 1; i++) {
+          for (let j = i + 1; j < ids.length; j++) {
+            const pairKey = makePairKey(ids[i], ids[j]);
+            const score = pairwiseSimilarities.get(pairKey);
+            if (score !== undefined) {
+              clusterSims.set(pairKey, score);
             }
           }
-          cluster.similarities = clusterSims;
         }
+        cluster.similarities = clusterSims;
+      }
 
-        return cluster;
-      });
+      return cluster;
+    });
   }
 
   /**
@@ -1420,8 +1292,8 @@ export class Neo4jMemoryClient {
    *
    * A memory with importance=1.0 decays slower than one with importance=0.3.
    *
-   * IMPORTANT: Core memories (category='core') are EXEMPT from decay.
-   * They persist indefinitely regardless of age.
+   * IMPORTANT: Core memories (category='core') and user-pinned memories
+   * are EXEMPT from decay. They persist indefinitely regardless of age.
    */
   async findDecayedMemories(
     options: {
@@ -1473,6 +1345,7 @@ export class Neo4jMemoryClient {
         `MATCH (m:Memory)
          WHERE m.createdAt IS NOT NULL
            AND m.category <> 'core'
+           AND coalesce(m.userPinned, false) = false
            ${agentFilter}
          WITH m,
               duration.between(datetime(m.createdAt), datetime()).days AS ageDays,
@@ -1659,7 +1532,7 @@ export class Neo4jMemoryClient {
   /**
    * Find memory pairs that share at least one entity (via MENTIONS relationships).
    * These are candidates for conflict resolution — the LLM decides if they truly conflict.
-   * Excludes core memories (conflicts there are handled by promotion/demotion).
+   * Excludes core memories (conflicts there are handled by promotion).
    */
   async findConflictingMemories(agentId?: string): Promise<
     Array<{
@@ -1729,8 +1602,8 @@ export class Neo4jMemoryClient {
    * Calculate effective scores for all memories to determine Pareto threshold.
    *
    * Uses: importance × freq_boost × recency for ALL memories (including core).
-   * This gives core memories a slight disadvantage (they need strong retrieval
-   * patterns to stay in top 20%), creating healthy churn.
+   * User-pinned core memories are excluded — they have fixed importance=1.0
+   * and should not influence the Pareto threshold calculation.
    */
   async calculateAllEffectiveScores(agentId?: string): Promise<
     Array<{
@@ -1747,8 +1620,8 @@ export class Neo4jMemoryClient {
     const session = this.driver!.session();
     try {
       const agentFilter = agentId
-        ? "WHERE m.agentId = $agentId AND m.createdAt IS NOT NULL"
-        : "WHERE m.createdAt IS NOT NULL";
+        ? "WHERE m.agentId = $agentId AND m.createdAt IS NOT NULL AND coalesce(m.userPinned, false) = false"
+        : "WHERE m.createdAt IS NOT NULL AND coalesce(m.userPinned, false) = false";
       const result = await session.run(
         `MATCH (m:Memory)
          ${agentFilter}
@@ -1761,7 +1634,7 @@ export class Neo4jMemoryClient {
               END AS daysSinceRetrieval
          WITH m, retrievalCount, ageDays, daysSinceRetrieval,
               // Effective score: importance × freq_boost × recency
-              // This is used for global ranking (promotion/demotion threshold)
+              // This is used for global ranking (promotion threshold)
               m.importance * (1 + log(1 + retrievalCount) * 0.3) *
                 CASE
                   WHEN daysSinceRetrieval IS NULL THEN 0.1
@@ -1788,7 +1661,7 @@ export class Neo4jMemoryClient {
   }
 
   /**
-   * Calculate the Pareto threshold (80th percentile) for promotion/demotion.
+   * Calculate the Pareto threshold (80th percentile) for promotion.
    * Returns the effective score that separates top 20% from bottom 80%.
    */
   calculateParetoThreshold(
@@ -1836,33 +1709,6 @@ export class Neo4jMemoryClient {
     }
   }
 
-  /**
-   * Demote memories from core back to their original category.
-   * Uses 'fact' as default since we don't track original category.
-   */
-  async demoteFromCore(memoryIds: string[]): Promise<number> {
-    if (memoryIds.length === 0) {
-      return 0;
-    }
-
-    await this.ensureInitialized();
-    const session = this.driver!.session();
-    try {
-      const result = await session.run(
-        `UNWIND $ids AS memId
-         MATCH (m:Memory {id: memId})
-         WHERE m.category = 'core'
-         SET m.category = 'fact', m.demotedAt = $now, m.updatedAt = $now
-         RETURN count(*) AS demoted`,
-        { ids: memoryIds, now: new Date().toISOString() },
-      );
-
-      return (result.records[0]?.get("demoted") as number) ?? 0;
-    } finally {
-      await session.close();
-    }
-  }
-
   // --------------------------------------------------------------------------
   // Reindex: re-embed all Memory and Entity nodes
   // --------------------------------------------------------------------------
diff --git a/extensions/memory-neo4j/schema.ts b/extensions/memory-neo4j/schema.ts
index 745239c653f..bd4a60589da 100644
--- a/extensions/memory-neo4j/schema.ts
+++ b/extensions/memory-neo4j/schema.ts
@@ -2,6 +2,17 @@
  * Graph schema types, Cypher query templates, and constants for memory-neo4j.
  */
 
+// ============================================================================
+// Shared Types
+// ============================================================================
+
+export type Logger = {
+  info: (msg: string) => void;
+  warn: (msg: string) => void;
+  error: (msg: string) => void;
+  debug?: (msg: string) => void;
+};
+
 // ============================================================================
 // Node Types
 // ============================================================================
@@ -32,7 +43,7 @@ export type MemoryNode = {
   retrievalCount: number;
   lastRetrievedAt?: string;
   promotedAt?: string;
-  demotedAt?: string;
+  userPinned?: boolean;
 };
 
 export type EntityNode = {
@@ -119,6 +130,7 @@ export type StoreMemoryInput = {
   extractionStatus: ExtractionStatus;
   agentId: string;
   sessionKey?: string;
+  userPinned?: boolean;
 };
 
 export type MergeEntityInput = {
@@ -174,3 +186,10 @@ export function escapeLucene(query: string): string {
 export function validateRelationshipType(type: string): boolean {
   return ALLOWED_RELATIONSHIP_TYPES.has(type);
 }
+
+/**
+ * Create a canonical key for a pair of IDs (sorted for order-independence).
+ */
+export function makePairKey(a: string, b: string): string {
+  return a < b ? `${a}:${b}` : `${b}:${a}`;
+}
diff --git a/extensions/memory-neo4j/search.ts b/extensions/memory-neo4j/search.ts
index 60a787e747e..9ced74784fe 100644
--- a/extensions/memory-neo4j/search.ts
+++ b/extensions/memory-neo4j/search.ts
@@ -14,7 +14,7 @@
 
 import type { Embeddings } from "./embeddings.js";
 import type { Neo4jMemoryClient } from "./neo4j-client.js";
-import type { HybridSearchResult, SearchSignalResult } from "./schema.js";
+import type { HybridSearchResult, Logger, SearchSignalResult } from "./schema.js";
 
 // ============================================================================
 // Query Classification
@@ -214,6 +214,7 @@ export async function hybridSearch(
     candidateMultiplier?: number;
     graphFiringThreshold?: number;
     graphSearchDepth?: number;
+    logger?: Logger;
   } = {},
 ): Promise<HybridSearchResult[]> {
   // Guard against empty queries
@@ -226,12 +227,15 @@ export async function hybridSearch(
     candidateMultiplier = 4,
     graphFiringThreshold = 0.3,
     graphSearchDepth = 1,
+    logger,
   } = options;
 
   const candidateLimit = Math.floor(Math.min(200, Math.max(1, limit * candidateMultiplier)));
 
   // 1. Generate query embedding
+  const t0 = performance.now();
   const queryEmbedding = await embeddings.embed(query);
+  const tEmbed = performance.now();
 
   // 2. Classify query and get adaptive weights
   const queryType = classifyQuery(query);
@@ -245,9 +249,11 @@ export async function hybridSearch(
       ? db.graphSearch(query, candidateLimit, graphFiringThreshold, agentId, graphSearchDepth)
       : Promise.resolve([] as SearchSignalResult[]),
   ]);
+  const tSignals = performance.now();
 
   // 4. Fuse with confidence-weighted RRF
   const fused = fuseWithConfidenceRRF([vectorResults, bm25Results, graphResults], rrfK, weights);
+  const tFuse = performance.now();
 
   // 5. Return top results, normalized to 0-100% display scores.
   // Only normalize when maxRrf is above a minimum threshold to avoid
@@ -275,5 +281,11 @@ export async function hybridSearch(
     });
   }
 
+  // Log search timing breakdown
+  logger?.info?.(
+    `memory-neo4j: [bench] hybridSearch ${(tFuse - t0).toFixed(0)}ms (embed=${(tEmbed - t0).toFixed(0)}ms, signals=${(tSignals - tEmbed).toFixed(0)}ms, fuse=${(tFuse - tSignals).toFixed(0)}ms) ` +
+      `type=${queryType} vec=${vectorResults.length} bm25=${bm25Results.length} graph=${graphResults.length} → ${results.length} results`,
+  );
+
   return results;
 }
diff --git a/extensions/memory-neo4j/sleep-cycle.ts b/extensions/memory-neo4j/sleep-cycle.ts
new file mode 100644
index 00000000000..62b680f695f
--- /dev/null
+++ b/extensions/memory-neo4j/sleep-cycle.ts
@@ -0,0 +1,663 @@
+/**
+ * Seven-phase sleep cycle for memory consolidation.
+ *
+ * Implements a Pareto-based memory ecosystem where core memory
+ * is bounded to the top 20% of memories by effective score.
+ *
+ * Phases:
+ * 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
+ * 2. PARETO SCORING - Calculate effective scores for all memories
+ * 3. CORE PROMOTION - Regular memories above threshold -> core
+ * 4. CORE DEMOTION - Core memories below threshold -> regular
+ * 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
+ * 6. EXTRACTION - Form entity relationships (strengthen connections)
+ * 7. CLEANUP - Remove orphaned entities/tags (garbage collection)
+ *
+ * Research basis:
+ * - Pareto principle (20/80 rule) for memory tiering
+ * - ACT-R memory model for retrieval-based importance
+ * - Ebbinghaus forgetting curve for decay
+ * - MemGPT/Letta for tiered memory architecture
+ */
+
+import type { ExtractionConfig } from "./config.js";
+import type { Embeddings } from "./embeddings.js";
+import type { Neo4jMemoryClient } from "./neo4j-client.js";
+import type { Logger } from "./schema.js";
+import { isSemanticDuplicate, resolveConflict, runBackgroundExtraction } from "./extractor.js";
+import { makePairKey } from "./schema.js";
+
+/**
+ * Sleep Cycle Result - aggregated stats from all phases.
+ */
+export type SleepCycleResult = {
+  // Phase 1: Deduplication
+  dedup: {
+    clustersFound: number;
+    memoriesMerged: number;
+  };
+  // Phase 1b: Conflict Detection
+  conflict: {
+    pairsFound: number;
+    resolved: number;
+    invalidated: number;
+  };
+  // Phase 1c: Semantic Deduplication
+  semanticDedup: {
+    pairsChecked: number;
+    duplicatesMerged: number;
+  };
+  // Phase 2: Pareto Scoring & Threshold
+  pareto: {
+    totalMemories: number;
+    coreMemories: number;
+    regularMemories: number;
+    threshold: number; // The 80th percentile effective score
+  };
+  // Phase 3: Core Promotion
+  promotion: {
+    candidatesFound: number;
+    promoted: number;
+  };
+  // Phase 4: Entity Extraction
+  extraction: {
+    total: number;
+    processed: number;
+    succeeded: number;
+    failed: number;
+  };
+  // Phase 4: Decay & Pruning
+  decay: {
+    memoriesPruned: number;
+  };
+  // Phase 5: Orphan Cleanup
+  cleanup: {
+    entitiesRemoved: number;
+    tagsRemoved: number;
+  };
+  // Overall
+  durationMs: number;
+  aborted: boolean;
+};
+
+export type SleepCycleOptions = {
+  // Common
+  agentId?: string;
+  abortSignal?: AbortSignal;
+
+  // Phase 1: Deduplication
+  dedupThreshold?: number; // Vector similarity threshold (default: 0.95)
+  skipSemanticDedup?: boolean; // Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)
+
+  // Phase 2-3: Pareto-based Promotion
+  paretoPercentile?: number; // Top N% for core (default: 0.2 = top 20%)
+  promotionMinAgeDays?: number; // Min age before promotion (default: 7)
+
+  // Phase 1b: Semantic Dedup
+  maxSemanticDedupPairs?: number; // Max LLM-checked pairs (default: 500)
+
+  // Concurrency
+  llmConcurrency?: number; // Parallel LLM calls (default: 8, match OLLAMA_NUM_PARALLEL)
+
+  // Phase 4: Extraction
+  extractionBatchSize?: number; // Memories per batch (default: 50)
+  extractionDelayMs?: number; // Delay between batches (default: 1000)
+
+  // Phase 4: Decay
+  decayRetentionThreshold?: number; // Below this, memory is pruned (default: 0.1)
+  decayBaseHalfLifeDays?: number; // Base half-life in days (default: 30)
+  decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2)
+  decayCurves?: Record<string, { halfLifeDays: number }>; // Per-category decay curve overrides
+
+  // Progress callback
+  onPhaseStart?: (
+    phase:
+      | "dedup"
+      | "conflict"
+      | "semanticDedup"
+      | "pareto"
+      | "promotion"
+      | "decay"
+      | "extraction"
+      | "cleanup",
+  ) => void;
+  onProgress?: (phase: string, message: string) => void;
+};
+
+// ============================================================================
+// Sleep Cycle Implementation
+// ============================================================================
+
+/**
+ * Run the full sleep cycle - seven phases of memory consolidation.
+ *
+ * This implements a Pareto-based memory ecosystem where core memory
+ * is bounded to the top 20% of memories by effective score.
+ *
+ * Effective Score Formulas:
+ * - Regular memories: importance x freq_boost x recency
+ * - Core memories: importance x freq_boost x recency (same for threshold comparison)
+ * - Core memory retrieval ranking: freq_boost x recency (pure usage-based)
+ *
+ * Where:
+ * - freq_boost = 1 + log(1 + retrievalCount) x 0.3
+ * - recency = 2^(-days_since_last / 14)
+ */
+export async function runSleepCycle(
+  db: Neo4jMemoryClient,
+  embeddings: Embeddings,
+  config: ExtractionConfig,
+  logger: Logger,
+  options: SleepCycleOptions = {},
+): Promise<SleepCycleResult> {
+  const startTime = Date.now();
+  const {
+    agentId,
+    abortSignal,
+    dedupThreshold = 0.95,
+    skipSemanticDedup = false,
+    maxSemanticDedupPairs = 500,
+    llmConcurrency = 8,
+    paretoPercentile = 0.2,
+    promotionMinAgeDays = 7,
+    decayRetentionThreshold = 0.1,
+    decayBaseHalfLifeDays = 30,
+    decayImportanceMultiplier = 2,
+    decayCurves,
+    extractionBatchSize = 50,
+    extractionDelayMs = 1000,
+    onPhaseStart,
+    onProgress,
+  } = options;
+
+  const result: SleepCycleResult = {
+    dedup: { clustersFound: 0, memoriesMerged: 0 },
+    conflict: { pairsFound: 0, resolved: 0, invalidated: 0 },
+    semanticDedup: { pairsChecked: 0, duplicatesMerged: 0 },
+    pareto: {
+      totalMemories: 0,
+      coreMemories: 0,
+      regularMemories: 0,
+      threshold: 0,
+    },
+    promotion: { candidatesFound: 0, promoted: 0 },
+    decay: { memoriesPruned: 0 },
+    extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 },
+    cleanup: { entitiesRemoved: 0, tagsRemoved: 0 },
+    durationMs: 0,
+    aborted: false,
+  };
+
+  // --------------------------------------------------------------------------
+  // Phase 1: Deduplication (Optimized - combined vector + semantic dedup)
+  // Call findDuplicateClusters ONCE at 0.75 threshold, then split by similarity band:
+  // - >=0.95: vector merge (high-confidence duplicates)
+  // - 0.75-0.95: semantic dedup via LLM (paraphrases)
+  // --------------------------------------------------------------------------
+  if (!abortSignal?.aborted) {
+    onPhaseStart?.("dedup");
+    logger.info("memory-neo4j: [sleep] Phase 1: Deduplication (vector + semantic)");
+
+    try {
+      // Fetch clusters at 0.75 threshold with similarity scores
+      const allClusters = await db.findDuplicateClusters(0.75, agentId, true);
+
+      // Separate clusters into high-similarity (>=0.95) and medium-similarity (0.75-0.95)
+      const highSimClusters: typeof allClusters = [];
+      const mediumSimClusters: typeof allClusters = [];
+
+      for (const cluster of allClusters) {
+        if (abortSignal?.aborted) break;
+        if (!cluster.similarities || cluster.memoryIds.length < 2) continue;
+
+        // Check if ANY pair in this cluster has similarity >= dedupThreshold
+        let hasHighSim = false;
+        for (const [pairKey, score] of cluster.similarities.entries()) {
+          if (score >= dedupThreshold) {
+            hasHighSim = true;
+            break;
+          }
+        }
+
+        if (hasHighSim) {
+          // Split this cluster into high-sim and medium-sim sub-clusters
+          // For simplicity, if a cluster has ANY high-sim pair, treat the whole cluster as high-sim
+          // (This matches the old behavior where Phase 1 would merge them all)
+          highSimClusters.push(cluster);
+        } else {
+          mediumSimClusters.push(cluster);
+        }
+      }
+
+      // Part 1a: Vector merge for high-similarity clusters (>=0.95)
+      result.dedup.clustersFound = highSimClusters.length;
+
+      for (const cluster of highSimClusters) {
+        if (abortSignal?.aborted) break;
+
+        const { deletedCount } = await db.mergeMemoryCluster(
+          cluster.memoryIds,
+          cluster.importances,
+        );
+        result.dedup.memoriesMerged += deletedCount;
+        onProgress?.("dedup", `Merged cluster of ${cluster.memoryIds.length} -> 1 (vector)`);
+      }
+
+      logger.info(
+        `memory-neo4j: [sleep] Phase 1a (vector) complete — ${result.dedup.clustersFound} clusters, ${result.dedup.memoriesMerged} merged`,
+      );
+
+      // Part 1b: Semantic dedup for medium-similarity clusters (0.75-0.95)
+      if (skipSemanticDedup) {
+        onPhaseStart?.("semanticDedup");
+        logger.info("memory-neo4j: [sleep] Phase 1b: Skipped (--skip-semantic)");
+        onProgress?.("semanticDedup", "Skipped — semantic dedup disabled");
+      } else {
+        onPhaseStart?.("semanticDedup");
+        logger.info("memory-neo4j: [sleep] Phase 1b: Semantic Deduplication (0.75-0.95 band)");
+
+        // Collect all candidate pairs upfront (with pairwise similarity for pre-screening)
+        type DedupPair = {
+          textA: string;
+          textB: string;
+          idA: string;
+          idB: string;
+          importanceA: number;
+          importanceB: number;
+          similarity?: number;
+        };
+        const allPairs: DedupPair[] = [];
+
+        for (const cluster of mediumSimClusters) {
+          if (cluster.memoryIds.length < 2) continue;
+          for (let i = 0; i < cluster.memoryIds.length - 1; i++) {
+            for (let j = i + 1; j < cluster.memoryIds.length; j++) {
+              const pairKey = makePairKey(cluster.memoryIds[i], cluster.memoryIds[j]);
+              allPairs.push({
+                textA: cluster.texts[i],
+                textB: cluster.texts[j],
+                idA: cluster.memoryIds[i],
+                idB: cluster.memoryIds[j],
+                importanceA: cluster.importances[i],
+                importanceB: cluster.importances[j],
+                similarity: cluster.similarities?.get(pairKey),
+              });
+            }
+          }
+        }
+
+        // Cap the number of LLM-checked pairs to prevent sleep cycle timeouts.
+        // Sort by similarity descending so higher-similarity pairs (more likely
+        // to be duplicates) are checked first.
+        if (allPairs.length > maxSemanticDedupPairs) {
+          allPairs.sort((a, b) => (b.similarity ?? 0) - (a.similarity ?? 0));
+          const skipped = allPairs.length - maxSemanticDedupPairs;
+          allPairs.length = maxSemanticDedupPairs;
+          onProgress?.(
+            "semanticDedup",
+            `Capped at ${maxSemanticDedupPairs} pairs (${skipped} lower-similarity pairs skipped)`,
+          );
+          logger.info(
+            `memory-neo4j: [sleep] Phase 1b capped to ${maxSemanticDedupPairs} pairs (${skipped} skipped)`,
+          );
+        }
+
+        // Process pairs in concurrent batches
+        const invalidatedIds = new Set<string>();
+
+        for (let i = 0; i < allPairs.length && !abortSignal?.aborted; i += llmConcurrency) {
+          const batch = allPairs.slice(i, i + llmConcurrency);
+
+          // Filter out pairs where one side was already invalidated
+          const activeBatch = batch.filter(
+            (p) => !invalidatedIds.has(p.idA) && !invalidatedIds.has(p.idB),
+          );
+
+          if (activeBatch.length === 0) continue;
+
+          const outcomes = await Promise.allSettled(
+            activeBatch.map((p) =>
+              isSemanticDuplicate(p.textA, p.textB, config, p.similarity, abortSignal),
+            ),
+          );
+
+          for (let k = 0; k < outcomes.length; k++) {
+            const pair = activeBatch[k];
+            result.semanticDedup.pairsChecked++;
+
+            if (
+              outcomes[k].status === "fulfilled" &&
+              (outcomes[k] as PromiseFulfilledResult<boolean>).value
+            ) {
+              // Skip if either side was invalidated by an earlier result in this batch
+              if (invalidatedIds.has(pair.idA) || invalidatedIds.has(pair.idB)) continue;
+
+              const keepId = pair.importanceA >= pair.importanceB ? pair.idA : pair.idB;
+              const removeId = keepId === pair.idA ? pair.idB : pair.idA;
+              const keepText = keepId === pair.idA ? pair.textA : pair.textB;
+              const removeText = removeId === pair.idA ? pair.textA : pair.textB;
+
+              await db.invalidateMemory(removeId);
+              invalidatedIds.add(removeId);
+              result.semanticDedup.duplicatesMerged++;
+
+              onProgress?.(
+                "semanticDedup",
+                `Merged: "${removeText.slice(0, 50)}..." -> kept "${keepText.slice(0, 50)}..."`,
+              );
+            }
+          }
+        }
+
+        logger.info(
+          `memory-neo4j: [sleep] Phase 1b (semantic) complete — ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`,
+        );
+      } // close skipSemanticDedup else
+    } catch (err) {
+      logger.warn(`memory-neo4j: [sleep] Phase 1 error: ${String(err)}`);
+    }
+  }
+
+  // --------------------------------------------------------------------------
+  // Phase 1c: Conflict Detection (formerly Phase 1b)
+  // --------------------------------------------------------------------------
+  if (!abortSignal?.aborted && !skipSemanticDedup) {
+    onPhaseStart?.("conflict");
+    logger.info("memory-neo4j: [sleep] Phase 1c: Conflict Detection");
+
+    try {
+      const pairs = await db.findConflictingMemories(agentId);
+      result.conflict.pairsFound = pairs.length;
+
+      // Process conflict pairs in parallel chunks of llmConcurrency
+      for (let i = 0; i < pairs.length && !abortSignal?.aborted; i += llmConcurrency) {
+        const chunk = pairs.slice(i, i + llmConcurrency);
+        const outcomes = await Promise.allSettled(
+          chunk.map((pair) =>
+            resolveConflict(pair.memoryA.text, pair.memoryB.text, config, abortSignal),
+          ),
+        );
+
+        for (let k = 0; k < outcomes.length; k++) {
+          if (abortSignal?.aborted) break;
+          const pair = chunk[k];
+          const outcome = outcomes[k];
+          if (outcome.status !== "fulfilled") continue;
+
+          const decision = outcome.value;
+          if (decision === "a") {
+            await db.invalidateMemory(pair.memoryB.id);
+            result.conflict.invalidated++;
+            result.conflict.resolved++;
+            onProgress?.(
+              "conflict",
+              `Kept A, invalidated B: "${pair.memoryB.text.slice(0, 40)}..."`,
+            );
+          } else if (decision === "b") {
+            await db.invalidateMemory(pair.memoryA.id);
+            result.conflict.invalidated++;
+            result.conflict.resolved++;
+            onProgress?.(
+              "conflict",
+              `Kept B, invalidated A: "${pair.memoryA.text.slice(0, 40)}..."`,
+            );
+          } else if (decision === "both") {
+            result.conflict.resolved++;
+            onProgress?.("conflict", `Kept both: no real conflict`);
+          }
+          // "skip" = LLM unavailable, don't count as resolved
+        }
+      }
+
+      logger.info(
+        `memory-neo4j: [sleep] Phase 1c complete — ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`,
+      );
+    } catch (err) {
+      logger.warn(`memory-neo4j: [sleep] Phase 1c error: ${String(err)}`);
+    }
+  }
+
+  // --------------------------------------------------------------------------
+  // Phase 2: Pareto Scoring & Threshold Calculation
+  // --------------------------------------------------------------------------
+  let paretoThreshold = 0;
+  let allScores: Awaited<ReturnType<typeof db.calculateAllEffectiveScores>> = [];
+  if (!abortSignal?.aborted) {
+    onPhaseStart?.("pareto");
+    logger.info("memory-neo4j: [sleep] Phase 2: Pareto Scoring");
+
+    try {
+      allScores = await db.calculateAllEffectiveScores(agentId);
+      result.pareto.totalMemories = allScores.length;
+      result.pareto.coreMemories = allScores.filter((s) => s.category === "core").length;
+      result.pareto.regularMemories = allScores.filter((s) => s.category !== "core").length;
+
+      // Calculate the threshold for top N% (default: top 20%)
+      paretoThreshold = db.calculateParetoThreshold(allScores, 1 - paretoPercentile);
+      result.pareto.threshold = paretoThreshold;
+
+      onProgress?.(
+        "pareto",
+        `Scored ${allScores.length} memories (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`,
+      );
+      onProgress?.(
+        "pareto",
+        `Pareto threshold (top ${paretoPercentile * 100}%): ${paretoThreshold.toFixed(4)}`,
+      );
+
+      logger.info(
+        `memory-neo4j: [sleep] Phase 2 complete — threshold=${paretoThreshold.toFixed(4)} for top ${paretoPercentile * 100}%`,
+      );
+    } catch (err) {
+      logger.warn(`memory-neo4j: [sleep] Phase 2 error: ${String(err)}`);
+    }
+  }
+
+  // --------------------------------------------------------------------------
+  // Phase 3: Core Promotion (using pre-computed scores from Phase 2)
+  //
+  // Design note on staleness: The effective scores and Pareto threshold were
+  // computed in Phase 2 and may be slightly stale by the time Phases 3/4 run.
+  // This is acceptable because: (a) the sleep cycle is a background maintenance
+  // task that runs infrequently (not concurrent with itself), (b) the scoring
+  // formula is deterministic based on stored properties that change slowly, and
+  // (c) promotion is a one-way operation (core memories are never auto-demoted;
+  // bad core memories are handled manually via memory_forget). The alternative
+  // (re-querying scores per phase) adds latency without meaningful accuracy gain.
+  // --------------------------------------------------------------------------
+  if (!abortSignal?.aborted && paretoThreshold > 0) {
+    onPhaseStart?.("promotion");
+    logger.info("memory-neo4j: [sleep] Phase 3: Core Promotion");
+
+    try {
+      const candidates = allScores.filter(
+        (s) =>
+          s.category !== "core" &&
+          s.effectiveScore >= paretoThreshold &&
+          s.ageDays >= promotionMinAgeDays,
+      );
+      result.promotion.candidatesFound = candidates.length;
+
+      if (candidates.length > 0) {
+        const ids = candidates.map((m) => m.id);
+        result.promotion.promoted = await db.promoteToCore(ids);
+        for (const c of candidates) {
+          onProgress?.(
+            "promotion",
+            `Promoted "${c.text.slice(0, 40)}..." (score=${c.effectiveScore.toFixed(3)}, ${c.retrievalCount} retrievals)`,
+          );
+        }
+      }
+
+      logger.info(
+        `memory-neo4j: [sleep] Phase 3 complete — ${result.promotion.promoted} memories promoted to core`,
+      );
+    } catch (err) {
+      logger.warn(`memory-neo4j: [sleep] Phase 3 error: ${String(err)}`);
+    }
+  }
+
+  // --------------------------------------------------------------------------
+  // Phase 4: Entity Extraction (moved before decay so new memories get
+  // extracted before pruning can remove them)
+  // --------------------------------------------------------------------------
+  // Extraction uses llmConcurrency (defined above, matches OLLAMA_NUM_PARALLEL)
+  if (!abortSignal?.aborted && config.enabled) {
+    onPhaseStart?.("extraction");
+    logger.info("memory-neo4j: [sleep] Phase 4: Entity Extraction");
+
+    try {
+      // Get initial count
+      const counts = await db.countByExtractionStatus(agentId);
+      result.extraction.total = counts.pending;
+
+      if (result.extraction.total > 0) {
+        let hasMore = true;
+        while (hasMore && !abortSignal?.aborted) {
+          const pending = await db.listPendingExtractions(extractionBatchSize, agentId);
+
+          if (pending.length === 0) {
+            hasMore = false;
+            break;
+          }
+
+          // Process in parallel chunks of llmConcurrency
+          for (let i = 0; i < pending.length && !abortSignal?.aborted; i += llmConcurrency) {
+            const chunk = pending.slice(i, i + llmConcurrency);
+            const outcomes = await Promise.allSettled(
+              chunk.map((memory) =>
+                runBackgroundExtraction(
+                  memory.id,
+                  memory.text,
+                  db,
+                  embeddings,
+                  config,
+                  logger,
+                  memory.extractionRetries,
+                  abortSignal,
+                ),
+              ),
+            );
+
+            for (const outcome of outcomes) {
+              result.extraction.processed++;
+              if (outcome.status === "fulfilled" && outcome.value.success) {
+                result.extraction.succeeded++;
+              } else {
+                result.extraction.failed++;
+              }
+            }
+
+            if (result.extraction.processed % 10 === 0 || i + llmConcurrency >= pending.length) {
+              onProgress?.(
+                "extraction",
+                `${result.extraction.processed}/${result.extraction.total} processed`,
+              );
+            }
+          }
+
+          // Delay between batches (abort-aware)
+          if (hasMore && !abortSignal?.aborted) {
+            await new Promise<void>((resolve) => {
+              const timer = setTimeout(resolve, extractionDelayMs);
+              // If abort fires during delay, resolve immediately
+              abortSignal?.addEventListener(
+                "abort",
+                () => {
+                  clearTimeout(timer);
+                  resolve();
+                },
+                { once: true },
+              );
+            });
+          }
+        }
+      }
+
+      logger.info(
+        `memory-neo4j: [sleep] Phase 4 complete — ${result.extraction.succeeded} extracted, ${result.extraction.failed} failed`,
+      );
+    } catch (err) {
+      logger.warn(`memory-neo4j: [sleep] Phase 4 error: ${String(err)}`);
+    }
+  } else if (!config.enabled) {
+    logger.info("memory-neo4j: [sleep] Phase 4 skipped — extraction not enabled");
+  }
+
+  // --------------------------------------------------------------------------
+  // Phase 5: Decay & Pruning (after extraction so freshly extracted memories
+  // aren't pruned before they build entity connections)
+  // --------------------------------------------------------------------------
+  if (!abortSignal?.aborted) {
+    onPhaseStart?.("decay");
+    logger.info("memory-neo4j: [sleep] Phase 5: Decay & Pruning");
+
+    try {
+      const decayed = await db.findDecayedMemories({
+        retentionThreshold: decayRetentionThreshold,
+        baseHalfLifeDays: decayBaseHalfLifeDays,
+        importanceMultiplier: decayImportanceMultiplier,
+        decayCurves,
+        agentId,
+      });
+
+      if (decayed.length > 0) {
+        const ids = decayed.map((m) => m.id);
+        result.decay.memoriesPruned = await db.pruneMemories(ids);
+        onProgress?.("decay", `Pruned ${result.decay.memoriesPruned} decayed memories`);
+      }
+
+      logger.info(
+        `memory-neo4j: [sleep] Phase 5 complete — ${result.decay.memoriesPruned} memories pruned`,
+      );
+    } catch (err) {
+      logger.warn(`memory-neo4j: [sleep] Phase 5 error: ${String(err)}`);
+    }
+  }
+
+  // --------------------------------------------------------------------------
+  // Phase 6: Orphan Cleanup
+  // --------------------------------------------------------------------------
+  if (!abortSignal?.aborted) {
+    onPhaseStart?.("cleanup");
+    logger.info("memory-neo4j: [sleep] Phase 6: Orphan Cleanup");
+
+    try {
+      // Clean up orphan entities
+      if (!abortSignal?.aborted) {
+        const orphanEntities = await db.findOrphanEntities();
+        if (orphanEntities.length > 0) {
+          result.cleanup.entitiesRemoved = await db.deleteOrphanEntities(
+            orphanEntities.map((e) => e.id),
+          );
+          onProgress?.("cleanup", `Removed ${result.cleanup.entitiesRemoved} orphan entities`);
+        }
+      }
+
+      // Clean up orphan tags
+      if (!abortSignal?.aborted) {
+        const orphanTags = await db.findOrphanTags();
+        if (orphanTags.length > 0) {
+          result.cleanup.tagsRemoved = await db.deleteOrphanTags(orphanTags.map((t) => t.id));
+          onProgress?.("cleanup", `Removed ${result.cleanup.tagsRemoved} orphan tags`);
+        }
+      }
+
+      logger.info(
+        `memory-neo4j: [sleep] Phase 6 complete — ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
+      );
+    } catch (err) {
+      logger.warn(`memory-neo4j: [sleep] Phase 6 error: ${String(err)}`);
+    }
+  }
+
+  result.durationMs = Date.now() - startTime;
+  result.aborted = abortSignal?.aborted ?? false;
+
+  logger.info(
+    `memory-neo4j: [sleep] Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s` +
+      (result.aborted ? " (aborted)" : ""),
+  );
+
+  return result;
+}