memory-neo4j: add userPinned flag, remove demotion, add benchmarking, audit fixes

- Add userPinned boolean on Memory nodes: user-stored core memories are
  immune from importance recalculation, decay, and pruning. Only removable
  via memory_forget. Importance locked at 1.0.
- Add listCoreForInjection(): always injects ALL userPinned core memories
  plus top N non-pinned core memories by importance (no silent drop-off
  for user-pinned memories regardless of maxEntries cap).
- Remove core demotion entirely: promotion is now one-way. Bad core
  memories are handled manually via memory_forget.
- Add [bench] performance timing to auto-recall, auto-capture, core
  memory injection, core refresh, and hybridSearch.
- Audit fixes: remove dead entity/tag methods, dead test blocks, orphaned
  demoteFromCore docstring, unnecessary .slice() in graphSearch.
- Refactor attention gate into shared checks for user/assistant gates.
- Consolidate LLM client, message utils, and config helpers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Tarun Sukhani
2026-02-11 12:56:34 +08:00
parent e562ff4e31
commit e9b9da5a1f
15 changed files with 2492 additions and 2078 deletions

View File

@@ -62,6 +62,30 @@ const MIN_CAPTURE_CHARS = 30;
/** Minimum word count — short contextual phrases lack standalone meaning. */
const MIN_WORD_COUNT = 8;
/** Shared checks applied by both user and assistant attention gates. */
function failsSharedGateChecks(trimmed: string): boolean {
// Injected context from the memory system itself
if (trimmed.includes("<relevant-memories>") || trimmed.includes("<core-memory-refresh>")) {
return true;
}
// Noise patterns
if (NOISE_PATTERNS.some((r) => r.test(trimmed))) {
return true;
}
// Excessive emoji (likely reaction, not substance)
const emojiCount = (
trimmed.match(/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1FA00}-\u{1FAFF}]/gu) ||
[]
).length;
if (emojiCount > 3) {
return true;
}
return false;
}
export function passesAttentionGate(text: string): boolean {
const trimmed = text.trim();
@@ -76,22 +100,7 @@ export function passesAttentionGate(text: string): boolean {
return false;
}
// Injected context from the memory system itself
if (trimmed.includes("<relevant-memories>") || trimmed.includes("<core-memory-refresh>")) {
return false;
}
// Noise patterns
if (NOISE_PATTERNS.some((r) => r.test(trimmed))) {
return false;
}
// Excessive emoji (likely reaction, not substance)
const emojiCount = (
trimmed.match(/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1FA00}-\u{1FAFF}]/gu) ||
[]
).length;
if (emojiCount > 3) {
if (failsSharedGateChecks(trimmed)) {
return false;
}
@@ -183,13 +192,7 @@ export function passesAssistantAttentionGate(text: string): boolean {
return false;
}
// Injected context from the memory system itself
if (trimmed.includes("<relevant-memories>") || trimmed.includes("<core-memory-refresh>")) {
return false;
}
// Noise patterns (same as user gate)
if (NOISE_PATTERNS.some((r) => r.test(trimmed))) {
if (failsSharedGateChecks(trimmed)) {
return false;
}
@@ -198,14 +201,5 @@ export function passesAssistantAttentionGate(text: string): boolean {
return false;
}
// Excessive emoji (likely reaction, not substance)
const emojiCount = (
trimmed.match(/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1FA00}-\u{1FAFF}]/gu) ||
[]
).length;
if (emojiCount > 3) {
return false;
}
return true;
}

View File

@@ -0,0 +1,573 @@
/**
* Tests for the auto-capture pipeline: captureMessage and runAutoCapture.
*
* Tests the embed → dedup → rate → store pipeline including:
* - Pre-computed vector usage (batch embedding optimization)
* - Exact dedup (≥0.95 score band)
* - Semantic dedup (0.75-0.95 score band via LLM)
* - Importance pre-screening for assistant messages
* - Batch embedding in runAutoCapture
*/
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { ExtractionConfig } from "./config.js";
import type { Embeddings } from "./embeddings.js";
import type { Neo4jMemoryClient } from "./neo4j-client.js";
import { _captureMessage as captureMessage, _runAutoCapture as runAutoCapture } from "./index.js";
// ============================================================================
// Mocks
// ============================================================================
const enabledConfig: ExtractionConfig = {
enabled: true,
apiKey: "test-key",
model: "test-model",
baseUrl: "https://test.ai/api/v1",
temperature: 0.0,
maxRetries: 0,
};
const disabledConfig: ExtractionConfig = {
...enabledConfig,
enabled: false,
};
const mockLogger = {
info: vi.fn(),
warn: vi.fn(),
debug: vi.fn(),
};
function createMockDb(overrides?: Partial<Neo4jMemoryClient>): Neo4jMemoryClient {
return {
findSimilar: vi.fn().mockResolvedValue([]),
storeMemory: vi.fn().mockResolvedValue(undefined),
...overrides,
} as unknown as Neo4jMemoryClient;
}
function createMockEmbeddings(overrides?: Partial<Embeddings>): Embeddings {
return {
embed: vi.fn().mockResolvedValue([0.1, 0.2, 0.3]),
embedBatch: vi.fn().mockResolvedValue([[0.1, 0.2, 0.3]]),
...overrides,
} as unknown as Embeddings;
}
// ============================================================================
// captureMessage
// ============================================================================
describe("captureMessage", () => {
const originalFetch = globalThis.fetch;
beforeEach(() => {
vi.clearAllMocks();
});
afterEach(() => {
globalThis.fetch = originalFetch;
});
it("should store a new memory when no duplicates exist", async () => {
const db = createMockDb();
const embeddings = createMockEmbeddings();
// Mock rateImportance (LLM call via fetch)
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: JSON.stringify({ score: 7 }) } }],
}),
});
const result = await captureMessage(
"I prefer TypeScript over JavaScript",
"auto-capture",
0.5,
1.0,
"test-agent",
"session-1",
db,
embeddings,
enabledConfig,
mockLogger,
);
expect(result.stored).toBe(true);
expect(result.semanticDeduped).toBe(false);
expect(db.storeMemory).toHaveBeenCalledOnce();
expect(embeddings.embed).toHaveBeenCalledWith("I prefer TypeScript over JavaScript");
});
it("should use pre-computed vector when provided", async () => {
const db = createMockDb();
const embeddings = createMockEmbeddings();
const precomputedVector = [0.5, 0.6, 0.7];
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: JSON.stringify({ score: 7 }) } }],
}),
});
const result = await captureMessage(
"test text",
"auto-capture",
0.5,
1.0,
"test-agent",
undefined,
db,
embeddings,
enabledConfig,
mockLogger,
precomputedVector,
);
expect(result.stored).toBe(true);
// Should NOT call embed() since pre-computed vector was provided
expect(embeddings.embed).not.toHaveBeenCalled();
// Should use the pre-computed vector for findSimilar
expect(db.findSimilar).toHaveBeenCalledWith(precomputedVector, 0.75, 3, "test-agent");
});
it("should skip storage when exact duplicate found (score >= 0.95)", async () => {
const db = createMockDb({
findSimilar: vi
.fn()
.mockResolvedValue([{ id: "existing-1", text: "duplicate text", score: 0.97 }]),
});
const embeddings = createMockEmbeddings();
const result = await captureMessage(
"duplicate text",
"auto-capture",
0.5,
1.0,
"test-agent",
undefined,
db,
embeddings,
enabledConfig,
mockLogger,
);
expect(result.stored).toBe(false);
expect(result.semanticDeduped).toBe(false);
expect(db.storeMemory).not.toHaveBeenCalled();
});
it("should semantic dedup when candidate in 0.75-0.95 band is LLM-confirmed duplicate", async () => {
const db = createMockDb({
findSimilar: vi
.fn()
.mockResolvedValue([{ id: "candidate-1", text: "User prefers TypeScript", score: 0.88 }]),
});
const embeddings = createMockEmbeddings();
// First call: rateImportance, second call: isSemanticDuplicate
let callCount = 0;
globalThis.fetch = vi.fn().mockImplementation(() => {
callCount++;
if (callCount === 1) {
// rateImportance response
return Promise.resolve({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: JSON.stringify({ score: 7 }) } }],
}),
});
}
// isSemanticDuplicate response
return Promise.resolve({
ok: true,
json: () =>
Promise.resolve({
choices: [
{
message: {
content: JSON.stringify({
verdict: "duplicate",
reason: "same preference",
}),
},
},
],
}),
});
});
const result = await captureMessage(
"I like TypeScript",
"auto-capture",
0.5,
1.0,
"test-agent",
undefined,
db,
embeddings,
enabledConfig,
mockLogger,
);
expect(result.stored).toBe(false);
expect(result.semanticDeduped).toBe(true);
expect(db.storeMemory).not.toHaveBeenCalled();
});
it("should skip importance check when extraction is disabled", async () => {
const db = createMockDb();
const embeddings = createMockEmbeddings();
// With extraction disabled, rateImportance returns 0.5 fallback,
// so the threshold check is skipped entirely
const result = await captureMessage(
"some text to store",
"auto-capture",
0.5,
1.0,
"test-agent",
undefined,
db,
embeddings,
disabledConfig,
mockLogger,
);
expect(result.stored).toBe(true);
expect(db.storeMemory).toHaveBeenCalledOnce();
// Verify stored with fallback importance * discount
const storeCall = (db.storeMemory as ReturnType<typeof vi.fn>).mock.calls[0][0];
expect(storeCall.importance).toBe(0.5); // 0.5 fallback * 1.0 discount
expect(storeCall.extractionStatus).toBe("skipped");
});
it("should apply importance discount for assistant messages", async () => {
const db = createMockDb();
const embeddings = createMockEmbeddings();
// For assistant messages, importance is rated first
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: JSON.stringify({ score: 8 }) } }],
}),
});
const result = await captureMessage(
"Here's what I know about Neo4j graph databases...",
"auto-capture-assistant",
0.8, // higher threshold for assistant
0.75, // 25% discount
"test-agent",
undefined,
db,
embeddings,
enabledConfig,
mockLogger,
);
expect(result.stored).toBe(true);
const storeCall = (db.storeMemory as ReturnType<typeof vi.fn>).mock.calls[0][0];
// importance 0.8 (score 8/10) * 0.75 discount ≈ 0.6
expect(storeCall.importance).toBeCloseTo(0.6);
expect(storeCall.source).toBe("auto-capture-assistant");
});
it("should reject assistant messages below importance threshold", async () => {
const db = createMockDb();
const embeddings = createMockEmbeddings();
// Low importance score
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: JSON.stringify({ score: 3 }) } }],
}),
});
const result = await captureMessage(
"Sure, I can help with that.",
"auto-capture-assistant",
0.8, // threshold 0.8
0.75,
"test-agent",
undefined,
db,
embeddings,
enabledConfig,
mockLogger,
);
expect(result.stored).toBe(false);
// Should not even embed since importance pre-screen failed
expect(embeddings.embed).not.toHaveBeenCalled();
expect(db.storeMemory).not.toHaveBeenCalled();
});
it("should reject user messages below importance threshold", async () => {
const db = createMockDb();
const embeddings = createMockEmbeddings();
// Low importance score
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: JSON.stringify({ score: 2 }) } }],
}),
});
const result = await captureMessage(
"okay thanks",
"auto-capture",
0.5, // threshold 0.5
1.0,
"test-agent",
undefined,
db,
embeddings,
enabledConfig,
mockLogger,
);
expect(result.stored).toBe(false);
expect(db.storeMemory).not.toHaveBeenCalled();
});
});
// ============================================================================
// runAutoCapture
// ============================================================================
describe("runAutoCapture", () => {
const originalFetch = globalThis.fetch;
beforeEach(() => {
vi.clearAllMocks();
});
afterEach(() => {
globalThis.fetch = originalFetch;
});
it("should batch-embed all retained messages at once", async () => {
const db = createMockDb();
const embedBatchMock = vi.fn().mockResolvedValue([
[0.1, 0.2],
[0.3, 0.4],
]);
const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock });
// Mock rateImportance calls
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: JSON.stringify({ score: 7 }) } }],
}),
});
const messages = [
{
role: "user",
content: "I prefer TypeScript over JavaScript for backend development",
},
{
role: "assistant",
content:
"TypeScript is great for type safety and developer experience, especially with Node.js projects",
},
];
await runAutoCapture(
messages,
"test-agent",
"session-1",
db,
embeddings,
enabledConfig,
mockLogger,
);
// Should call embedBatch once with both texts
expect(embedBatchMock).toHaveBeenCalledOnce();
const batchTexts = embedBatchMock.mock.calls[0][0];
expect(batchTexts.length).toBe(2);
});
it("should not call embedBatch when no messages pass the gate", async () => {
const db = createMockDb();
const embedBatchMock = vi.fn().mockResolvedValue([]);
const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock });
// Short messages that won't pass attention gate
const messages = [
{ role: "user", content: "ok" },
{ role: "assistant", content: "yes" },
];
await runAutoCapture(
messages,
"test-agent",
"session-1",
db,
embeddings,
enabledConfig,
mockLogger,
);
expect(embedBatchMock).not.toHaveBeenCalled();
expect(db.storeMemory).not.toHaveBeenCalled();
});
it("should handle empty messages array", async () => {
const db = createMockDb();
const embeddings = createMockEmbeddings();
await runAutoCapture([], "test-agent", undefined, db, embeddings, enabledConfig, mockLogger);
expect(db.storeMemory).not.toHaveBeenCalled();
});
it("should continue processing if one message fails", async () => {
const db = createMockDb();
// First embed call fails, second succeeds
let embedCallCount = 0;
const findSimilarMock = vi.fn().mockImplementation(() => {
embedCallCount++;
if (embedCallCount === 1) {
return Promise.reject(new Error("DB connection failed"));
}
return Promise.resolve([]);
});
const embedBatchMock = vi.fn().mockResolvedValue([
[0.1, 0.2],
[0.3, 0.4],
]);
const dbWithError = createMockDb({
findSimilar: findSimilarMock,
});
const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock });
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: JSON.stringify({ score: 7 }) } }],
}),
});
const messages = [
{
role: "user",
content: "First message that is long enough to pass the attention gate filter",
},
{
role: "user",
content: "Second message that is also long enough to pass the attention gate",
},
];
// Should not throw — errors are caught per-message
await runAutoCapture(
messages,
"test-agent",
"session-1",
dbWithError,
embeddings,
enabledConfig,
mockLogger,
);
// The second message should still have been attempted
expect(findSimilarMock).toHaveBeenCalledTimes(2);
});
it("should use different thresholds for user vs assistant messages", async () => {
const db = createMockDb();
const storeMemoryMock = vi.fn().mockResolvedValue(undefined);
const dbWithStore = createMockDb({ storeMemory: storeMemoryMock });
const embedBatchMock = vi.fn().mockResolvedValue([
[0.1, 0.2],
[0.3, 0.4],
]);
const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock });
// Always return high importance so both pass
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: JSON.stringify({ score: 9 }) } }],
}),
});
const messages = [
{
role: "user",
content: "I really love working with graph databases like Neo4j for my projects",
},
{
role: "assistant",
content:
"Graph databases like Neo4j excel at modeling connected data and relationship queries",
},
];
await runAutoCapture(
messages,
"test-agent",
"session-1",
dbWithStore,
embeddings,
enabledConfig,
mockLogger,
);
// Both should be stored
const storeCalls = storeMemoryMock.mock.calls;
if (storeCalls.length === 2) {
// User message: importance * 1.0 discount
expect(storeCalls[0][0].source).toBe("auto-capture");
// Assistant message: importance * 0.75 discount
expect(storeCalls[1][0].source).toBe("auto-capture-assistant");
expect(storeCalls[1][0].importance).toBeLessThan(storeCalls[0][0].importance);
}
});
it("should log capture errors without throwing", async () => {
const embedBatchMock = vi.fn().mockRejectedValue(new Error("embedding service down"));
const embeddings = createMockEmbeddings({ embedBatch: embedBatchMock });
const db = createMockDb();
const messages = [
{
role: "user",
content: "A long enough message to pass the attention gate for testing purposes",
},
];
// Should not throw
await runAutoCapture(
messages,
"test-agent",
"session-1",
db,
embeddings,
enabledConfig,
mockLogger,
);
// Should have logged the error
expect(mockLogger.warn).toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,514 @@
/**
* CLI command registration for memory-neo4j.
*
* Registers the `openclaw memory neo4j` subcommand group with commands:
* - list: List memory counts by agent and category
* - search: Search memories via hybrid search
* - stats: Show memory statistics and configuration
* - sleep: Run sleep cycle (seven-phase memory consolidation)
* - promote: Manually promote a memory to core
* - index: Re-embed all memories after changing embedding model
* - cleanup: Retroactively apply attention gate to stored memories
*/
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
import type { ExtractionConfig, MemoryNeo4jConfig } from "./config.js";
import type { Embeddings } from "./embeddings.js";
import type { Neo4jMemoryClient } from "./neo4j-client.js";
import { passesAttentionGate } from "./attention-gate.js";
import { stripMessageWrappers } from "./message-utils.js";
import { hybridSearch } from "./search.js";
import { runSleepCycle } from "./sleep-cycle.js";
export type CliDeps = {
db: Neo4jMemoryClient;
embeddings: Embeddings;
cfg: MemoryNeo4jConfig;
extractionConfig: ExtractionConfig;
vectorDim: number;
};
/**
* Register the `openclaw memory neo4j` CLI subcommand group.
*/
export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
const { db, embeddings, cfg, extractionConfig, vectorDim } = deps;
api.registerCli(
({ program }) => {
// Find existing memory command or create fallback
let memoryCmd = program.commands.find((cmd) => cmd.name() === "memory");
if (!memoryCmd) {
// Fallback if core memory CLI not registered yet
memoryCmd = program.command("memory").description("Memory commands");
}
// Add neo4j memory subcommand group
const memory = memoryCmd.command("neo4j").description("Neo4j graph memory commands");
memory
.command("list")
.description("List memory counts by agent and category")
.option("--json", "Output as JSON")
.action(async (opts: { json?: boolean }) => {
try {
await db.ensureInitialized();
const stats = await db.getMemoryStats();
if (opts.json) {
console.log(JSON.stringify(stats, null, 2));
return;
}
if (stats.length === 0) {
console.log("No memories stored.");
return;
}
// Group by agentId
const byAgent = new Map<
string,
Array<{ category: string; count: number; avgImportance: number }>
>();
for (const row of stats) {
const list = byAgent.get(row.agentId) || [];
list.push({
category: row.category,
count: row.count,
avgImportance: row.avgImportance,
});
byAgent.set(row.agentId, list);
}
// Print table for each agent
for (const [agentId, categories] of byAgent) {
const total = categories.reduce((sum, c) => sum + c.count, 0);
console.log(`\n┌─ ${agentId} (${total} total)`);
console.log("│");
console.log("│ Category Count Avg Importance");
console.log("│ ─────────────────────────────────────");
for (const { category, count, avgImportance } of categories) {
const cat = category.padEnd(12);
const cnt = String(count).padStart(5);
const imp = (avgImportance * 100).toFixed(0).padStart(3) + "%";
console.log(`${cat} ${cnt} ${imp}`);
}
console.log("└");
}
console.log("");
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("search")
.description("Search memories")
.argument("<query>", "Search query")
.option("--limit <n>", "Max results", "5")
.option("--agent <id>", "Agent id (default: default)")
.action(async (query: string, opts: { limit: string; agent?: string }) => {
try {
const results = await hybridSearch(
db,
embeddings,
query,
parseInt(opts.limit, 10),
opts.agent ?? "default",
extractionConfig.enabled,
{ graphSearchDepth: cfg.graphSearchDepth },
);
const output = results.map((r) => ({
id: r.id,
text: r.text,
category: r.category,
importance: r.importance,
score: r.score,
}));
console.log(JSON.stringify(output, null, 2));
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("stats")
.description("Show memory statistics and configuration")
.action(async () => {
try {
await db.ensureInitialized();
const stats = await db.getMemoryStats();
const total = stats.reduce((sum, s) => sum + s.count, 0);
console.log("\nMemory (Neo4j) Statistics");
console.log("─────────────────────────");
console.log(`Total memories: ${total}`);
console.log(`Neo4j URI: ${cfg.neo4j.uri}`);
console.log(`Embedding: ${cfg.embedding.provider}/${cfg.embedding.model}`);
console.log(
`Extraction: ${extractionConfig.enabled ? extractionConfig.model : "disabled"}`,
);
console.log(`Auto-capture: ${cfg.autoCapture ? "enabled" : "disabled"}`);
console.log(`Auto-recall: ${cfg.autoRecall ? "enabled" : "disabled"}`);
console.log(`Core memory: ${cfg.coreMemory.enabled ? "enabled" : "disabled"}`);
if (stats.length > 0) {
// Group by category across all agents
const byCategory = new Map<string, number>();
for (const row of stats) {
byCategory.set(row.category, (byCategory.get(row.category) ?? 0) + row.count);
}
console.log("\nBy Category:");
for (const [category, count] of byCategory) {
console.log(` ${category.padEnd(12)} ${count}`);
}
// Show agent count
const agents = new Set(stats.map((s) => s.agentId));
console.log(`\nAgents: ${agents.size} (${[...agents].join(", ")})`);
}
console.log("");
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("sleep")
.description("Run sleep cycle — consolidate memories with Pareto-based promotion")
.option("--agent <id>", "Agent id (default: all agents)")
.option("--dedup-threshold <n>", "Vector similarity threshold for dedup (default: 0.95)")
.option("--pareto <n>", "Top N% for core memory (default: 0.2 = top 20%)")
.option("--promotion-min-age <days>", "Min age in days before promotion (default: 7)")
.option("--decay-threshold <n>", "Decay score threshold for pruning (default: 0.1)")
.option("--decay-half-life <days>", "Base half-life in days (default: 30)")
.option("--batch-size <n>", "Extraction batch size (default: 50)")
.option("--delay <ms>", "Delay between extraction batches in ms (default: 1000)")
.option("--max-semantic-pairs <n>", "Max LLM-checked semantic dedup pairs (default: 500)")
.option("--concurrency <n>", "Parallel LLM calls — match OLLAMA_NUM_PARALLEL (default: 8)")
.option(
"--skip-semantic",
"Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)",
)
.action(
async (opts: {
agent?: string;
dedupThreshold?: string;
pareto?: string;
promotionMinAge?: string;
decayThreshold?: string;
decayHalfLife?: string;
batchSize?: string;
delay?: string;
maxSemanticPairs?: string;
concurrency?: string;
skipSemantic?: boolean;
}) => {
console.log("\n🌙 Memory Sleep Cycle");
console.log("═════════════════════════════════════════════════════════════");
console.log("Seven-phase memory consolidation (Pareto-based):\n");
console.log(" Phase 1: Deduplication — Merge near-duplicate memories");
console.log(
" Phase 1b: Semantic Dedup — LLM-based paraphrase detection (0.750.95 band)",
);
console.log(" Phase 1c: Conflict Detection — Resolve contradictory memories");
console.log(
" Phase 2: Pareto Scoring — Calculate effective scores for all memories",
);
console.log(" Phase 3: Core Promotion — Regular memories above threshold → core");
console.log(" Phase 4: Core Demotion — Core memories below threshold → regular");
console.log(" Phase 5: Extraction — Extract entities and categorize");
console.log(" Phase 6: Decay & Pruning — Remove stale low-importance memories");
console.log(" Phase 7: Orphan Cleanup — Remove disconnected nodes\n");
try {
// Validate sleep cycle CLI parameters before running
const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : undefined;
const delay = opts.delay ? parseInt(opts.delay, 10) : undefined;
const decayHalfLife = opts.decayHalfLife
? parseInt(opts.decayHalfLife, 10)
: undefined;
const decayThreshold = opts.decayThreshold
? parseFloat(opts.decayThreshold)
: undefined;
const pareto = opts.pareto ? parseFloat(opts.pareto) : undefined;
const promotionMinAge = opts.promotionMinAge
? parseInt(opts.promotionMinAge, 10)
: undefined;
if (batchSize != null && (Number.isNaN(batchSize) || batchSize <= 0)) {
console.error("Error: --batch-size must be greater than 0");
process.exitCode = 1;
return;
}
if (delay != null && (Number.isNaN(delay) || delay < 0)) {
console.error("Error: --delay must be >= 0");
process.exitCode = 1;
return;
}
if (decayHalfLife != null && (Number.isNaN(decayHalfLife) || decayHalfLife <= 0)) {
console.error("Error: --decay-half-life must be greater than 0");
process.exitCode = 1;
return;
}
if (
decayThreshold != null &&
(Number.isNaN(decayThreshold) || decayThreshold < 0 || decayThreshold > 1)
) {
console.error("Error: --decay-threshold must be between 0 and 1");
process.exitCode = 1;
return;
}
if (pareto != null && (Number.isNaN(pareto) || pareto < 0 || pareto > 1)) {
console.error("Error: --pareto must be between 0 and 1");
process.exitCode = 1;
return;
}
if (
promotionMinAge != null &&
(Number.isNaN(promotionMinAge) || promotionMinAge < 0)
) {
console.error("Error: --promotion-min-age must be >= 0");
process.exitCode = 1;
return;
}
const maxSemanticPairs = opts.maxSemanticPairs
? parseInt(opts.maxSemanticPairs, 10)
: undefined;
if (
maxSemanticPairs != null &&
(Number.isNaN(maxSemanticPairs) || maxSemanticPairs <= 0)
) {
console.error("Error: --max-semantic-pairs must be greater than 0");
process.exitCode = 1;
return;
}
const concurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : undefined;
if (concurrency != null && (Number.isNaN(concurrency) || concurrency <= 0)) {
console.error("Error: --concurrency must be greater than 0");
process.exitCode = 1;
return;
}
await db.ensureInitialized();
const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, {
agentId: opts.agent,
dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined,
skipSemanticDedup: opts.skipSemantic === true,
maxSemanticDedupPairs: maxSemanticPairs,
llmConcurrency: concurrency,
paretoPercentile: pareto,
promotionMinAgeDays: promotionMinAge,
decayRetentionThreshold: decayThreshold,
decayBaseHalfLifeDays: decayHalfLife,
decayCurves: Object.keys(cfg.decayCurves).length > 0 ? cfg.decayCurves : undefined,
extractionBatchSize: batchSize,
extractionDelayMs: delay,
onPhaseStart: (phase) => {
const phaseNames: Record<string, string> = {
dedup: "Phase 1: Deduplication",
semanticDedup: "Phase 1b: Semantic Deduplication",
conflict: "Phase 1c: Conflict Detection",
pareto: "Phase 2: Pareto Scoring",
promotion: "Phase 3: Core Promotion",
extraction: "Phase 4: Extraction",
decay: "Phase 5: Decay & Pruning",
cleanup: "Phase 6: Orphan Cleanup",
};
console.log(`\n▶ ${phaseNames[phase]}`);
console.log("─────────────────────────────────────────────────────────────");
},
onProgress: (_phase, message) => {
console.log(` ${message}`);
},
});
console.log("\n═════════════════════════════════════════════════════════════");
console.log(`✅ Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s`);
console.log("─────────────────────────────────────────────────────────────");
console.log(
` Deduplication: ${result.dedup.clustersFound} clusters → ${result.dedup.memoriesMerged} merged`,
);
console.log(
` Conflicts: ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`,
);
console.log(
` Semantic Dedup: ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`,
);
console.log(
` Pareto: ${result.pareto.totalMemories} total (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`,
);
console.log(
` Threshold: ${result.pareto.threshold.toFixed(4)} (top 20%)`,
);
console.log(
` Promotion: ${result.promotion.promoted}/${result.promotion.candidatesFound} promoted to core`,
);
console.log(` Decay/Pruning: ${result.decay.memoriesPruned} memories pruned`);
console.log(
` Extraction: ${result.extraction.succeeded}/${result.extraction.total} extracted` +
(result.extraction.failed > 0 ? ` (${result.extraction.failed} failed)` : ""),
);
console.log(
` Cleanup: ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
);
if (result.aborted) {
console.log("\n⚠ Sleep cycle was aborted before completion.");
}
console.log("");
} catch (err) {
console.error(
`\n❌ Sleep cycle failed: ${err instanceof Error ? err.message : String(err)}`,
);
process.exitCode = 1;
}
},
);
memory
.command("promote")
.description("Manually promote a memory to core status")
.argument("<id>", "Memory ID to promote")
.action(async (id: string) => {
try {
await db.ensureInitialized();
const promoted = await db.promoteToCore([id]);
if (promoted > 0) {
console.log(`✅ Memory ${id} promoted to core.`);
} else {
console.log(`❌ Memory ${id} not found.`);
process.exitCode = 1;
}
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("index")
.description(
"Re-embed all memories and entities — use after changing embedding model/provider",
)
.option("--batch-size <n>", "Embedding batch size (default: 50)")
.action(async (opts: { batchSize?: string }) => {
const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : 50;
if (Number.isNaN(batchSize) || batchSize <= 0) {
console.error("Error: --batch-size must be greater than 0");
process.exitCode = 1;
return;
}
console.log("\nMemory Neo4j — Reindex Embeddings");
console.log("═════════════════════════════════════════════════════════════");
console.log(`Model: ${cfg.embedding.provider}/${cfg.embedding.model}`);
console.log(`Dimensions: ${vectorDim}`);
console.log(`Batch size: ${batchSize}\n`);
try {
const startedAt = Date.now();
const result = await db.reindex((texts) => embeddings.embedBatch(texts), {
batchSize,
onProgress: (phase, done, total) => {
if (phase === "drop-indexes" && done === 0) {
console.log("▶ Dropping old vector index…");
} else if (phase === "memories") {
console.log(` Memories: ${done}/${total}`);
} else if (phase === "create-indexes" && done === 0) {
console.log("▶ Recreating vector index…");
}
},
});
const elapsed = ((Date.now() - startedAt) / 1000).toFixed(1);
console.log("\n═════════════════════════════════════════════════════════════");
console.log(`✅ Reindex complete in ${elapsed}s — ${result.memories} memories`);
console.log("");
} catch (err) {
console.error(
`\n❌ Reindex failed: ${err instanceof Error ? err.message : String(err)}`,
);
process.exitCode = 1;
}
});
memory
.command("cleanup")
.description(
"Retroactively apply the attention gate — find and remove low-substance memories",
)
.option("--execute", "Actually delete (default: dry-run preview)")
.option("--all", "Include explicitly-stored memories (default: auto-capture only)")
.option("--agent <id>", "Only clean up memories for a specific agent")
.action(async (opts: { execute?: boolean; all?: boolean; agent?: string }) => {
try {
await db.ensureInitialized();
// Fetch memories — by default only auto-capture (explicit stores are trusted)
const conditions: string[] = [];
if (!opts.all) {
conditions.push("m.source = 'auto-capture'");
}
if (opts.agent) {
conditions.push("m.agentId = $agentId");
}
const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
const allMemories = await db.runQuery<{
id: string;
text: string;
source: string;
}>(
`MATCH (m:Memory) ${where}
RETURN m.id AS id, m.text AS text, COALESCE(m.source, 'unknown') AS source
ORDER BY m.createdAt ASC`,
opts.agent ? { agentId: opts.agent } : {},
);
// Strip channel metadata wrappers (same as the real pipeline) then gate
const noise: Array<{ id: string; text: string; source: string }> = [];
for (const mem of allMemories) {
const stripped = stripMessageWrappers(mem.text);
if (!passesAttentionGate(stripped)) {
noise.push(mem);
}
}
if (noise.length === 0) {
console.log("\nNo low-substance memories found. Everything passes the gate.");
return;
}
console.log(
`\nFound ${noise.length}/${allMemories.length} memories that fail the attention gate:\n`,
);
for (const mem of noise) {
const preview = mem.text.length > 80 ? `${mem.text.slice(0, 77)}...` : mem.text;
console.log(` [${mem.source}] "${preview}"`);
}
if (!opts.execute) {
console.log(
`\nDry run — ${noise.length} memories would be removed. Re-run with --execute to delete.\n`,
);
return;
}
// Delete in batch
const deleted = await db.pruneMemories(noise.map((m) => m.id));
console.log(`\nDeleted ${deleted} low-substance memories.\n`);
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
},
{ commands: [] }, // Adds subcommands to existing "memory" command, no conflict
);
}

View File

@@ -92,24 +92,27 @@ export const EMBEDDING_DIMENSIONS: Record<string, number> = {
// Default dimension for unknown models (Ollama models vary)
export const DEFAULT_EMBEDDING_DIMS = 1024;
export function vectorDimsForModel(model: string): number {
// Check exact match first
if (EMBEDDING_DIMENSIONS[model]) {
return EMBEDDING_DIMENSIONS[model];
/**
* Lookup a value by exact key or longest matching prefix.
* Returns undefined if no match found.
*/
function lookupByPrefix<T>(table: Record<string, T>, key: string): T | undefined {
if (table[key] !== undefined) {
return table[key];
}
// Prefer longest matching prefix (e.g. "mxbai-embed-large-2k" over "mxbai-embed-large")
let best: { dims: number; keyLen: number } | undefined;
for (const [known, dims] of Object.entries(EMBEDDING_DIMENSIONS)) {
if (model.startsWith(known) && (!best || known.length > best.keyLen)) {
best = { dims, keyLen: known.length };
let best: { value: T; keyLen: number } | undefined;
for (const [known, value] of Object.entries(table)) {
if (key.startsWith(known) && (!best || known.length > best.keyLen)) {
best = { value, keyLen: known.length };
}
}
if (best) {
return best.dims;
}
return best?.value;
}
export function vectorDimsForModel(model: string): number {
// Return default for unknown models — callers should warn when this path is taken,
// as the default 1024 dimensions may not match the actual model's output.
return DEFAULT_EMBEDDING_DIMS;
return lookupByPrefix(EMBEDDING_DIMENSIONS, model) ?? DEFAULT_EMBEDDING_DIMS;
}
/** Max input token lengths for known embedding models. */
@@ -129,17 +132,7 @@ export const EMBEDDING_CONTEXT_LENGTHS: Record<string, number> = {
export const DEFAULT_EMBEDDING_CONTEXT_LENGTH = 512;
export function contextLengthForModel(model: string): number {
if (EMBEDDING_CONTEXT_LENGTHS[model]) {
return EMBEDDING_CONTEXT_LENGTHS[model];
}
// Prefer longest matching prefix (e.g. "mxbai-embed-large-8k" over "mxbai-embed-large")
let best: { len: number; keyLen: number } | undefined;
for (const [known, len] of Object.entries(EMBEDDING_CONTEXT_LENGTHS)) {
if (model.startsWith(known) && (!best || known.length > best.keyLen)) {
best = { len, keyLen: known.length };
}
}
return best?.len ?? DEFAULT_EMBEDDING_CONTEXT_LENGTH;
return lookupByPrefix(EMBEDDING_CONTEXT_LENGTHS, model) ?? DEFAULT_EMBEDDING_CONTEXT_LENGTH;
}
/**

View File

@@ -8,15 +8,9 @@
import { createHash } from "node:crypto";
import OpenAI from "openai";
import type { EmbeddingProvider } from "./config.js";
import type { Logger } from "./schema.js";
import { contextLengthForModel } from "./config.js";
type Logger = {
info: (msg: string) => void;
warn: (msg: string) => void;
error: (msg: string) => void;
debug?: (msg: string) => void;
};
/**
* Simple LRU cache for embedding vectors.
* Keyed by SHA-256 hash of the input text to avoid storing large strings.

View File

@@ -8,19 +8,22 @@
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import type { ExtractionConfig } from "./config.js";
import { passesAttentionGate, passesAssistantAttentionGate } from "./attention-gate.js";
import {
extractUserMessages,
extractAssistantMessages,
stripAssistantWrappers,
extractEntities,
runBackgroundExtraction,
rateImportance,
resolveConflict,
isSemanticDuplicate,
isTransientError,
runSleepCycle,
SEMANTIC_DEDUP_VECTOR_THRESHOLD,
} from "./extractor.js";
import { passesAttentionGate, passesAssistantAttentionGate } from "./index.js";
import { isTransientError } from "./llm-client.js";
import {
extractUserMessages,
extractAssistantMessages,
stripAssistantWrappers,
} from "./message-utils.js";
import { runSleepCycle } from "./sleep-cycle.js";
// ============================================================================
// passesAttentionGate()
@@ -1756,7 +1759,6 @@ describe("runSleepCycle", () => {
calculateAllEffectiveScores: vi.fn().mockResolvedValue([]),
calculateParetoThreshold: vi.fn().mockReturnValue(0.5),
promoteToCore: vi.fn().mockResolvedValue(0),
demoteFromCore: vi.fn().mockResolvedValue(0),
findDecayedMemories: vi.fn().mockResolvedValue([]),
pruneMemories: vi.fn().mockResolvedValue(0),
countByExtractionStatus: vi
@@ -1768,11 +1770,6 @@ describe("runSleepCycle", () => {
findOrphanTags: vi.fn().mockResolvedValue([]),
deleteOrphanTags: vi.fn().mockResolvedValue(0),
updateExtractionStatus: vi.fn().mockResolvedValue(undefined),
mergeEntity: vi.fn().mockResolvedValue({ id: "e1", name: "test" }),
createMentions: vi.fn().mockResolvedValue(undefined),
createEntityRelationship: vi.fn().mockResolvedValue(undefined),
tagMemory: vi.fn().mockResolvedValue(undefined),
updateMemoryCategory: vi.fn().mockResolvedValue(undefined),
};
});
@@ -2252,64 +2249,7 @@ describe("runSleepCycle", () => {
});
});
// Phase 4: Demotion
describe("Phase 4: Core Demotion", () => {
it("should demote core memories below threshold", async () => {
const scores = [
{
id: "m1",
text: "test",
category: "core",
importance: 0.3,
retrievalCount: 1,
ageDays: 30,
effectiveScore: 0.3,
},
{
id: "m2",
text: "test",
category: "core",
importance: 0.9,
retrievalCount: 10,
ageDays: 5,
effectiveScore: 0.95,
},
];
mockDb.calculateAllEffectiveScores.mockResolvedValue(scores);
mockDb.calculateParetoThreshold.mockReturnValue(0.7);
mockDb.demoteFromCore.mockResolvedValue(1);
const result = await runSleepCycle(mockDb, mockEmbeddings, mockConfig, mockLogger);
// m1 should be demoted (category=core, score=0.30 < 0.70)
expect(mockDb.demoteFromCore).toHaveBeenCalledWith(["m1"]);
expect(result.demotion.candidatesFound).toBe(1);
expect(result.demotion.demoted).toBe(1);
});
it("should not demote regular memories", async () => {
const scores = [
{
id: "m1",
text: "test",
category: "fact",
importance: 0.2,
retrievalCount: 0,
ageDays: 50,
effectiveScore: 0.1,
},
];
mockDb.calculateAllEffectiveScores.mockResolvedValue(scores);
mockDb.calculateParetoThreshold.mockReturnValue(0.7);
const result = await runSleepCycle(mockDb, mockEmbeddings, mockConfig, mockLogger);
expect(result.demotion.candidatesFound).toBe(0);
expect(mockDb.demoteFromCore).not.toHaveBeenCalled();
});
});
// Phase 5: Extraction
// Phase 4: Extraction
describe("Phase 5: Entity Extraction", () => {
it("should process pending extractions in batches", async () => {
mockDb.countByExtractionStatus.mockResolvedValue({
@@ -2606,7 +2546,6 @@ describe("runSleepCycle", () => {
expect(onPhaseStart).toHaveBeenCalledWith("semanticDedup");
expect(onPhaseStart).toHaveBeenCalledWith("pareto");
expect(onPhaseStart).toHaveBeenCalledWith("promotion");
expect(onPhaseStart).toHaveBeenCalledWith("demotion");
expect(onPhaseStart).toHaveBeenCalledWith("extraction");
expect(onPhaseStart).toHaveBeenCalledWith("decay");
expect(onPhaseStart).toHaveBeenCalledWith("cleanup");
@@ -2642,7 +2581,6 @@ describe("runSleepCycle", () => {
expect(result).toHaveProperty("semanticDedup");
expect(result).toHaveProperty("pareto");
expect(result).toHaveProperty("promotion");
expect(result).toHaveProperty("demotion");
expect(result).toHaveProperty("decay");
expect(result).toHaveProperty("extraction");
expect(result).toHaveProperty("cleanup");
@@ -2669,6 +2607,208 @@ describe("runSleepCycle", () => {
// isTransientError()
// ============================================================================
// ============================================================================
// isSemanticDuplicate
// ============================================================================
describe("isSemanticDuplicate", () => {
const originalFetch = globalThis.fetch;
afterEach(() => {
globalThis.fetch = originalFetch;
});
const enabledConfig: ExtractionConfig = {
enabled: true,
apiKey: "test-key",
model: "test-model",
baseUrl: "https://test.ai/api/v1",
temperature: 0.0,
maxRetries: 0,
};
const disabledConfig: ExtractionConfig = {
...enabledConfig,
enabled: false,
};
it("should return false when extraction is disabled", async () => {
const result = await isSemanticDuplicate("new text", "existing text", disabledConfig);
expect(result).toBe(false);
});
it("should return true when LLM says duplicate", async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [
{
message: {
content: JSON.stringify({ verdict: "duplicate", reason: "same fact" }),
},
},
],
}),
});
const result = await isSemanticDuplicate("I like Neo4j", "User prefers Neo4j", enabledConfig);
expect(result).toBe(true);
});
it("should return false when LLM says unique", async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [
{
message: {
content: JSON.stringify({ verdict: "unique", reason: "different topic" }),
},
},
],
}),
});
const result = await isSemanticDuplicate("I like coffee", "User lives in NYC", enabledConfig);
expect(result).toBe(false);
});
it("should skip LLM call when vector similarity is below threshold", async () => {
const fetchSpy = vi.fn();
globalThis.fetch = fetchSpy;
const result = await isSemanticDuplicate(
"text a",
"text b",
enabledConfig,
SEMANTIC_DEDUP_VECTOR_THRESHOLD - 0.01,
);
expect(result).toBe(false);
expect(fetchSpy).not.toHaveBeenCalled();
});
it("should call LLM when vector similarity is at or above threshold", async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [
{
message: {
content: JSON.stringify({ verdict: "duplicate", reason: "same" }),
},
},
],
}),
});
const result = await isSemanticDuplicate(
"text a",
"text b",
enabledConfig,
SEMANTIC_DEDUP_VECTOR_THRESHOLD,
);
expect(result).toBe(true);
expect(globalThis.fetch).toHaveBeenCalled();
});
it("should call LLM when no vector similarity is provided", async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [
{
message: {
content: JSON.stringify({ verdict: "unique", reason: "different" }),
},
},
],
}),
});
const result = await isSemanticDuplicate("text a", "text b", enabledConfig);
expect(result).toBe(false);
expect(globalThis.fetch).toHaveBeenCalled();
});
it("should return false on fetch error (fail-open)", async () => {
globalThis.fetch = vi
.fn()
.mockRejectedValue(new DOMException("signal timed out", "TimeoutError"));
const result = await isSemanticDuplicate("text a", "text b", enabledConfig);
expect(result).toBe(false);
});
it("should return false on invalid JSON response", async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: "not valid json" } }],
}),
});
const result = await isSemanticDuplicate("text a", "text b", enabledConfig);
expect(result).toBe(false);
});
it("should return false when verdict is missing from response", async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [
{
message: {
content: JSON.stringify({ reason: "no verdict field" }),
},
},
],
}),
});
const result = await isSemanticDuplicate("text a", "text b", enabledConfig);
expect(result).toBe(false);
});
it("should return false when LLM returns null content", async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
choices: [{ message: { content: null } }],
}),
});
const result = await isSemanticDuplicate("text a", "text b", enabledConfig);
expect(result).toBe(false);
});
it("should respect abort signal", async () => {
const controller = new AbortController();
controller.abort();
globalThis.fetch = vi.fn().mockRejectedValue(new DOMException("signal aborted", "AbortError"));
const result = await isSemanticDuplicate(
"text a",
"text b",
enabledConfig,
undefined,
controller.signal,
);
expect(result).toBe(false);
});
});
// ============================================================================
// isTransientError
// ============================================================================
describe("isTransientError", () => {
it("should return false for non-Error values", () => {
expect(isTransientError("string error")).toBe(false);

View File

@@ -1,9 +1,12 @@
/**
* LLM-based entity extraction and sleep cycle for memory-neo4j.
* LLM-based entity extraction and memory operations for memory-neo4j.
*
* Extraction uses a configurable OpenAI-compatible LLM (OpenRouter, Ollama, etc.) to:
* - Extract entities, relationships, and tags from stored memories
* - Classify memories into categories (preference, fact, decision, etc.)
* - Rate memory importance on a 1-10 scale
* - Detect semantic duplicates via LLM comparison
* - Resolve conflicting memories
*
* Runs as background fire-and-forget operations with graceful degradation.
*/
@@ -12,20 +15,10 @@ import { randomUUID } from "node:crypto";
import type { ExtractionConfig } from "./config.js";
import type { Embeddings } from "./embeddings.js";
import type { Neo4jMemoryClient } from "./neo4j-client.js";
import type { EntityType, ExtractionResult, MemoryCategory } from "./schema.js";
import type { EntityType, ExtractionResult, Logger, MemoryCategory } from "./schema.js";
import { callOpenRouter, callOpenRouterStream, isTransientError } from "./llm-client.js";
import { ALLOWED_RELATIONSHIP_TYPES, ENTITY_TYPES, MEMORY_CATEGORIES } from "./schema.js";
// ============================================================================
// Types
// ============================================================================
type Logger = {
info: (msg: string) => void;
warn: (msg: string) => void;
error: (msg: string) => void;
debug?: (msg: string) => void;
};
// ============================================================================
// Extraction Prompt
// ============================================================================
@@ -58,161 +51,6 @@ Rules:
- Keep entity descriptions brief (1 sentence max)
- Category: "preference" for opinions/preferences, "fact" for factual info, "decision" for choices made, "entity" for entity-focused, "other" for miscellaneous`;
// ============================================================================
// OpenRouter API Client
// ============================================================================
// Timeout for LLM and embedding fetch calls to prevent hanging indefinitely
const FETCH_TIMEOUT_MS = 30_000;
async function callOpenRouter(
config: ExtractionConfig,
prompt: string | Array<{ role: string; content: string }>,
abortSignal?: AbortSignal,
): Promise<string | null> {
const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt;
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
try {
// Combine the caller's abort signal with a per-request timeout
const signal = abortSignal
? AbortSignal.any([abortSignal, AbortSignal.timeout(FETCH_TIMEOUT_MS)])
: AbortSignal.timeout(FETCH_TIMEOUT_MS);
const response = await fetch(`${config.baseUrl}/chat/completions`, {
method: "POST",
headers: {
Authorization: `Bearer ${config.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: config.model,
messages,
temperature: config.temperature,
response_format: { type: "json_object" },
}),
signal,
});
if (!response.ok) {
const body = await response.text().catch(() => "");
throw new Error(`OpenRouter API error ${response.status}: ${body}`);
}
const data = (await response.json()) as {
choices?: Array<{ message?: { content?: string } }>;
};
return data.choices?.[0]?.message?.content ?? null;
} catch (err) {
if (attempt >= config.maxRetries) {
throw err;
}
// Exponential backoff
await new Promise((resolve) => setTimeout(resolve, 500 * Math.pow(2, attempt)));
}
}
return null;
}
/**
* Streaming variant of callOpenRouter. Uses the streaming API to receive chunks
* incrementally, allowing earlier cancellation via abort signal and better
* latency characteristics for long responses.
*
* Accumulates all chunks into a single response string since extraction
* uses JSON mode (which requires the complete object to parse).
*/
async function callOpenRouterStream(
config: ExtractionConfig,
prompt: string | Array<{ role: string; content: string }>,
abortSignal?: AbortSignal,
): Promise<string | null> {
const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt;
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
try {
const signal = abortSignal
? AbortSignal.any([abortSignal, AbortSignal.timeout(FETCH_TIMEOUT_MS)])
: AbortSignal.timeout(FETCH_TIMEOUT_MS);
const response = await fetch(`${config.baseUrl}/chat/completions`, {
method: "POST",
headers: {
Authorization: `Bearer ${config.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: config.model,
messages,
temperature: config.temperature,
response_format: { type: "json_object" },
stream: true,
}),
signal,
});
if (!response.ok) {
const body = await response.text().catch(() => "");
throw new Error(`OpenRouter API error ${response.status}: ${body}`);
}
if (!response.body) {
throw new Error("No response body for streaming request");
}
// Read SSE stream and accumulate content chunks
const reader = response.body.getReader();
const decoder = new TextDecoder();
let accumulated = "";
let buffer = "";
for (;;) {
// Check abort between chunks for responsive cancellation
if (abortSignal?.aborted) {
reader.cancel().catch(() => {});
return null;
}
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
// Parse SSE lines
const lines = buffer.split("\n");
buffer = lines.pop() ?? "";
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed.startsWith("data: ")) continue;
const data = trimmed.slice(6);
if (data === "[DONE]") continue;
try {
const parsed = JSON.parse(data) as {
choices?: Array<{ delta?: { content?: string } }>;
};
const chunk = parsed.choices?.[0]?.delta?.content;
if (chunk) {
accumulated += chunk;
}
} catch {
// Skip malformed SSE chunks
}
}
}
return accumulated || null;
} catch (err) {
if (attempt >= config.maxRetries) {
throw err;
}
await new Promise((resolve) => setTimeout(resolve, 500 * Math.pow(2, attempt)));
}
}
return null;
}
// ============================================================================
// Entity Extraction
// ============================================================================
@@ -227,32 +65,6 @@ async function callOpenRouterStream(
*/
const MAX_EXTRACTION_RETRIES = 3;
/**
* Check if an error is transient (network/timeout) vs permanent (JSON parse, etc.)
*/
export function isTransientError(err: unknown): boolean {
if (!(err instanceof Error)) {
return false;
}
const msg = err.message.toLowerCase();
return (
err.name === "AbortError" ||
err.name === "TimeoutError" ||
msg.includes("timeout") ||
msg.includes("econnrefused") ||
msg.includes("econnreset") ||
msg.includes("etimedout") ||
msg.includes("enotfound") ||
msg.includes("network") ||
msg.includes("fetch failed") ||
msg.includes("socket hang up") ||
msg.includes("api error 429") ||
msg.includes("api error 502") ||
msg.includes("api error 503") ||
msg.includes("api error 504")
);
}
/**
* Extract entities and relationships from a memory text using LLM.
*
@@ -526,712 +338,6 @@ export async function runBackgroundExtraction(
}
}
// ============================================================================
// Sleep Cycle - Seven Phase Memory Consolidation
// ============================================================================
/**
* Sleep Cycle Result - aggregated stats from all phases.
*/
export type SleepCycleResult = {
// Phase 1: Deduplication
dedup: {
clustersFound: number;
memoriesMerged: number;
};
// Phase 1b: Conflict Detection
conflict: {
pairsFound: number;
resolved: number;
invalidated: number;
};
// Phase 1c: Semantic Deduplication
semanticDedup: {
pairsChecked: number;
duplicatesMerged: number;
};
// Phase 2: Pareto Scoring & Threshold
pareto: {
totalMemories: number;
coreMemories: number;
regularMemories: number;
threshold: number; // The 80th percentile effective score
};
// Phase 3: Core Promotion
promotion: {
candidatesFound: number;
promoted: number;
};
// Phase 4: Core Demotion
demotion: {
candidatesFound: number;
demoted: number;
};
// Phase 6: Decay & Pruning
decay: {
memoriesPruned: number;
};
// Phase 5: Entity Extraction
extraction: {
total: number;
processed: number;
succeeded: number;
failed: number;
};
// Phase 7: Orphan Cleanup
cleanup: {
entitiesRemoved: number;
tagsRemoved: number;
};
// Overall
durationMs: number;
aborted: boolean;
};
export type SleepCycleOptions = {
// Common
agentId?: string;
abortSignal?: AbortSignal;
// Phase 1: Deduplication
dedupThreshold?: number; // Vector similarity threshold (default: 0.95)
skipSemanticDedup?: boolean; // Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)
// Phase 2-4: Pareto-based Promotion/Demotion
paretoPercentile?: number; // Top N% for core (default: 0.2 = top 20%)
promotionMinAgeDays?: number; // Min age before promotion (default: 7)
// Phase 1b: Semantic Dedup
maxSemanticDedupPairs?: number; // Max LLM-checked pairs (default: 500)
// Concurrency
llmConcurrency?: number; // Parallel LLM calls (default: 8, match OLLAMA_NUM_PARALLEL)
// Phase 5: Extraction
extractionBatchSize?: number; // Memories per batch (default: 50)
extractionDelayMs?: number; // Delay between batches (default: 1000)
// Phase 6: Decay
decayRetentionThreshold?: number; // Below this, memory is pruned (default: 0.1)
decayBaseHalfLifeDays?: number; // Base half-life in days (default: 30)
decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2)
decayCurves?: Record<string, { halfLifeDays: number }>; // Per-category decay curve overrides
// Progress callback
onPhaseStart?: (
phase:
| "dedup"
| "conflict"
| "semanticDedup"
| "pareto"
| "promotion"
| "demotion"
| "decay"
| "extraction"
| "cleanup",
) => void;
onProgress?: (phase: string, message: string) => void;
};
/**
* Run the full sleep cycle - seven phases of memory consolidation.
*
* This implements a Pareto-based memory ecosystem where core memory
* is bounded to the top 20% of memories by effective score.
*
* Phases:
* 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
* 2. PARETO SCORING - Calculate effective scores for all memories
* 3. CORE PROMOTION - Regular memories above threshold → core
* 4. CORE DEMOTION - Core memories below threshold → regular
* 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
* 6. EXTRACTION - Form entity relationships (strengthen connections)
* 7. CLEANUP - Remove orphaned entities/tags (garbage collection)
*
* Effective Score Formulas:
* - Regular memories: importance × freq_boost × recency
* - Core memories: importance × freq_boost × recency (same for threshold comparison)
* - Core memory retrieval ranking: freq_boost × recency (pure usage-based)
*
* Where:
* - freq_boost = 1 + log(1 + retrievalCount) × 0.3
* - recency = 2^(-days_since_last / 14)
*
* Benefits:
* - Self-regulating core memory size (Pareto distribution)
* - Memories can be promoted AND demoted based on usage
* - Simulates human memory consolidation during sleep
*
* Research basis:
* - Pareto principle (20/80 rule) for memory tiering
* - ACT-R memory model for retrieval-based importance
* - Ebbinghaus forgetting curve for decay
* - MemGPT/Letta for tiered memory architecture
*/
export async function runSleepCycle(
db: Neo4jMemoryClient,
embeddings: Embeddings,
config: ExtractionConfig,
logger: Logger,
options: SleepCycleOptions = {},
): Promise<SleepCycleResult> {
const startTime = Date.now();
const {
agentId,
abortSignal,
dedupThreshold = 0.95,
skipSemanticDedup = false,
maxSemanticDedupPairs = 500,
llmConcurrency = 8,
paretoPercentile = 0.2,
promotionMinAgeDays = 7,
decayRetentionThreshold = 0.1,
decayBaseHalfLifeDays = 30,
decayImportanceMultiplier = 2,
decayCurves,
extractionBatchSize = 50,
extractionDelayMs = 1000,
onPhaseStart,
onProgress,
} = options;
const result: SleepCycleResult = {
dedup: { clustersFound: 0, memoriesMerged: 0 },
conflict: { pairsFound: 0, resolved: 0, invalidated: 0 },
semanticDedup: { pairsChecked: 0, duplicatesMerged: 0 },
pareto: { totalMemories: 0, coreMemories: 0, regularMemories: 0, threshold: 0 },
promotion: { candidatesFound: 0, promoted: 0 },
demotion: { candidatesFound: 0, demoted: 0 },
decay: { memoriesPruned: 0 },
extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 },
cleanup: { entitiesRemoved: 0, tagsRemoved: 0 },
durationMs: 0,
aborted: false,
};
const LLM_CONCURRENCY = llmConcurrency;
// --------------------------------------------------------------------------
// Phase 1: Deduplication (Optimized - combined vector + semantic dedup)
// Call findDuplicateClusters ONCE at 0.75 threshold, then split by similarity band:
// - ≥0.95: vector merge (high-confidence duplicates)
// - 0.75-0.95: semantic dedup via LLM (paraphrases)
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("dedup");
logger.info("memory-neo4j: [sleep] Phase 1: Deduplication (vector + semantic)");
try {
// Fetch clusters at 0.75 threshold with similarity scores
const allClusters = await db.findDuplicateClusters(0.75, agentId, true);
// Helper to create canonical pair key (sorted)
const makePairKey = (a: string, b: string): string => {
return a < b ? `${a}:${b}` : `${b}:${a}`;
};
// Separate clusters into high-similarity (≥0.95) and medium-similarity (0.75-0.95)
const highSimClusters: typeof allClusters = [];
const mediumSimClusters: typeof allClusters = [];
for (const cluster of allClusters) {
if (abortSignal?.aborted) break;
if (!cluster.similarities || cluster.memoryIds.length < 2) continue;
// Check if ANY pair in this cluster has similarity ≥ dedupThreshold
let hasHighSim = false;
for (const [pairKey, score] of cluster.similarities.entries()) {
if (score >= dedupThreshold) {
hasHighSim = true;
break;
}
}
if (hasHighSim) {
// Split this cluster into high-sim and medium-sim sub-clusters
// For simplicity, if a cluster has ANY high-sim pair, treat the whole cluster as high-sim
// (This matches the old behavior where Phase 1 would merge them all)
highSimClusters.push(cluster);
} else {
mediumSimClusters.push(cluster);
}
}
// Part 1a: Vector merge for high-similarity clusters (≥0.95)
result.dedup.clustersFound = highSimClusters.length;
for (const cluster of highSimClusters) {
if (abortSignal?.aborted) break;
const { deletedCount } = await db.mergeMemoryCluster(
cluster.memoryIds,
cluster.importances,
);
result.dedup.memoriesMerged += deletedCount;
onProgress?.("dedup", `Merged cluster of ${cluster.memoryIds.length} → 1 (vector)`);
}
logger.info(
`memory-neo4j: [sleep] Phase 1a (vector) complete — ${result.dedup.clustersFound} clusters, ${result.dedup.memoriesMerged} merged`,
);
// Part 1b: Semantic dedup for medium-similarity clusters (0.75-0.95)
if (skipSemanticDedup) {
onPhaseStart?.("semanticDedup");
logger.info("memory-neo4j: [sleep] Phase 1b: Skipped (--skip-semantic)");
onProgress?.("semanticDedup", "Skipped — semantic dedup disabled");
} else {
onPhaseStart?.("semanticDedup");
logger.info("memory-neo4j: [sleep] Phase 1b: Semantic Deduplication (0.75-0.95 band)");
// Collect all candidate pairs upfront (with pairwise similarity for pre-screening)
type DedupPair = {
textA: string;
textB: string;
idA: string;
idB: string;
importanceA: number;
importanceB: number;
similarity?: number;
};
const allPairs: DedupPair[] = [];
for (const cluster of mediumSimClusters) {
if (cluster.memoryIds.length < 2) continue;
for (let i = 0; i < cluster.memoryIds.length - 1; i++) {
for (let j = i + 1; j < cluster.memoryIds.length; j++) {
const pairKey = makePairKey(cluster.memoryIds[i], cluster.memoryIds[j]);
allPairs.push({
textA: cluster.texts[i],
textB: cluster.texts[j],
idA: cluster.memoryIds[i],
idB: cluster.memoryIds[j],
importanceA: cluster.importances[i],
importanceB: cluster.importances[j],
similarity: cluster.similarities?.get(pairKey),
});
}
}
}
// Cap the number of LLM-checked pairs to prevent sleep cycle timeouts.
// Sort by similarity descending so higher-similarity pairs (more likely
// to be duplicates) are checked first.
if (allPairs.length > maxSemanticDedupPairs) {
allPairs.sort((a, b) => (b.similarity ?? 0) - (a.similarity ?? 0));
const skipped = allPairs.length - maxSemanticDedupPairs;
allPairs.length = maxSemanticDedupPairs;
onProgress?.(
"semanticDedup",
`Capped at ${maxSemanticDedupPairs} pairs (${skipped} lower-similarity pairs skipped)`,
);
logger.info(
`memory-neo4j: [sleep] Phase 1b capped to ${maxSemanticDedupPairs} pairs (${skipped} skipped)`,
);
}
// Process pairs in concurrent batches
const invalidatedIds = new Set<string>();
for (let i = 0; i < allPairs.length && !abortSignal?.aborted; i += LLM_CONCURRENCY) {
const batch = allPairs.slice(i, i + LLM_CONCURRENCY);
// Filter out pairs where one side was already invalidated
const activeBatch = batch.filter(
(p) => !invalidatedIds.has(p.idA) && !invalidatedIds.has(p.idB),
);
if (activeBatch.length === 0) continue;
const outcomes = await Promise.allSettled(
activeBatch.map((p) =>
isSemanticDuplicate(p.textA, p.textB, config, p.similarity, abortSignal),
),
);
for (let k = 0; k < outcomes.length; k++) {
const pair = activeBatch[k];
result.semanticDedup.pairsChecked++;
if (
outcomes[k].status === "fulfilled" &&
(outcomes[k] as PromiseFulfilledResult<boolean>).value
) {
// Skip if either side was invalidated by an earlier result in this batch
if (invalidatedIds.has(pair.idA) || invalidatedIds.has(pair.idB)) continue;
const keepId = pair.importanceA >= pair.importanceB ? pair.idA : pair.idB;
const removeId = keepId === pair.idA ? pair.idB : pair.idA;
const keepText = keepId === pair.idA ? pair.textA : pair.textB;
const removeText = removeId === pair.idA ? pair.textA : pair.textB;
await db.invalidateMemory(removeId);
invalidatedIds.add(removeId);
result.semanticDedup.duplicatesMerged++;
onProgress?.(
"semanticDedup",
`Merged: "${removeText.slice(0, 50)}..." → kept "${keepText.slice(0, 50)}..."`,
);
}
}
}
logger.info(
`memory-neo4j: [sleep] Phase 1b (semantic) complete — ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`,
);
} // close skipSemanticDedup else
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 1 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 1c: Conflict Detection (formerly Phase 1b)
// --------------------------------------------------------------------------
if (!abortSignal?.aborted && !skipSemanticDedup) {
onPhaseStart?.("conflict");
logger.info("memory-neo4j: [sleep] Phase 1c: Conflict Detection");
try {
const pairs = await db.findConflictingMemories(agentId);
result.conflict.pairsFound = pairs.length;
// Process conflict pairs in parallel chunks of LLM_CONCURRENCY
for (let i = 0; i < pairs.length && !abortSignal?.aborted; i += LLM_CONCURRENCY) {
const chunk = pairs.slice(i, i + LLM_CONCURRENCY);
const outcomes = await Promise.allSettled(
chunk.map((pair) =>
resolveConflict(pair.memoryA.text, pair.memoryB.text, config, abortSignal),
),
);
for (let k = 0; k < outcomes.length; k++) {
if (abortSignal?.aborted) break;
const pair = chunk[k];
const outcome = outcomes[k];
if (outcome.status !== "fulfilled") continue;
const decision = outcome.value;
if (decision === "a") {
await db.invalidateMemory(pair.memoryB.id);
result.conflict.invalidated++;
result.conflict.resolved++;
onProgress?.(
"conflict",
`Kept A, invalidated B: "${pair.memoryB.text.slice(0, 40)}..."`,
);
} else if (decision === "b") {
await db.invalidateMemory(pair.memoryA.id);
result.conflict.invalidated++;
result.conflict.resolved++;
onProgress?.(
"conflict",
`Kept B, invalidated A: "${pair.memoryA.text.slice(0, 40)}..."`,
);
} else if (decision === "both") {
result.conflict.resolved++;
onProgress?.("conflict", `Kept both: no real conflict`);
}
// "skip" = LLM unavailable, don't count as resolved
}
}
logger.info(
`memory-neo4j: [sleep] Phase 1c complete — ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 1c error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 2: Pareto Scoring & Threshold Calculation
// --------------------------------------------------------------------------
let paretoThreshold = 0;
let allScores: Awaited<ReturnType<typeof db.calculateAllEffectiveScores>> = [];
if (!abortSignal?.aborted) {
onPhaseStart?.("pareto");
logger.info("memory-neo4j: [sleep] Phase 2: Pareto Scoring");
try {
allScores = await db.calculateAllEffectiveScores(agentId);
result.pareto.totalMemories = allScores.length;
result.pareto.coreMemories = allScores.filter((s) => s.category === "core").length;
result.pareto.regularMemories = allScores.filter((s) => s.category !== "core").length;
// Calculate the threshold for top N% (default: top 20%)
paretoThreshold = db.calculateParetoThreshold(allScores, 1 - paretoPercentile);
result.pareto.threshold = paretoThreshold;
onProgress?.(
"pareto",
`Scored ${allScores.length} memories (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`,
);
onProgress?.(
"pareto",
`Pareto threshold (top ${paretoPercentile * 100}%): ${paretoThreshold.toFixed(4)}`,
);
logger.info(
`memory-neo4j: [sleep] Phase 2 complete — threshold=${paretoThreshold.toFixed(4)} for top ${paretoPercentile * 100}%`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 2 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 3: Core Promotion (using pre-computed scores from Phase 2)
//
// Design note on staleness: The effective scores and Pareto threshold were
// computed in Phase 2 and may be slightly stale by the time Phases 3/4 run.
// This is acceptable because: (a) the sleep cycle is a background maintenance
// task that runs infrequently (not concurrent with itself), (b) the scoring
// formula is deterministic based on stored properties that change slowly, and
// (c) promotion/demotion are reversible in the next cycle. The alternative
// (re-querying scores per phase) adds latency without meaningful accuracy gain.
// --------------------------------------------------------------------------
if (!abortSignal?.aborted && paretoThreshold > 0) {
onPhaseStart?.("promotion");
logger.info("memory-neo4j: [sleep] Phase 3: Core Promotion");
try {
const candidates = allScores.filter(
(s) =>
s.category !== "core" &&
s.effectiveScore >= paretoThreshold &&
s.ageDays >= promotionMinAgeDays,
);
result.promotion.candidatesFound = candidates.length;
if (candidates.length > 0) {
const ids = candidates.map((m) => m.id);
result.promotion.promoted = await db.promoteToCore(ids);
for (const c of candidates) {
onProgress?.(
"promotion",
`Promoted "${c.text.slice(0, 40)}..." (score=${c.effectiveScore.toFixed(3)}, ${c.retrievalCount} retrievals)`,
);
}
}
logger.info(
`memory-neo4j: [sleep] Phase 3 complete — ${result.promotion.promoted} memories promoted to core`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 3 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 4: Core Demotion (using pre-computed scores from Phase 2)
// --------------------------------------------------------------------------
if (!abortSignal?.aborted && paretoThreshold > 0) {
onPhaseStart?.("demotion");
logger.info("memory-neo4j: [sleep] Phase 4: Core Demotion");
try {
const candidates = allScores.filter(
(s) => s.category === "core" && s.effectiveScore < paretoThreshold,
);
result.demotion.candidatesFound = candidates.length;
if (candidates.length > 0) {
const ids = candidates.map((m) => m.id);
result.demotion.demoted = await db.demoteFromCore(ids);
for (const c of candidates) {
onProgress?.(
"demotion",
`Demoted "${c.text.slice(0, 40)}..." (score=${c.effectiveScore.toFixed(3)}, ${c.retrievalCount} retrievals)`,
);
}
}
logger.info(
`memory-neo4j: [sleep] Phase 4 complete — ${result.demotion.demoted} memories demoted from core`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 4 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 5: Entity Extraction (moved before decay so new memories get
// extracted before pruning can remove them)
// --------------------------------------------------------------------------
// Extraction uses LLM_CONCURRENCY (defined above, matches OLLAMA_NUM_PARALLEL)
if (!abortSignal?.aborted && config.enabled) {
onPhaseStart?.("extraction");
logger.info("memory-neo4j: [sleep] Phase 5: Entity Extraction");
try {
// Get initial count
const counts = await db.countByExtractionStatus(agentId);
result.extraction.total = counts.pending;
if (result.extraction.total > 0) {
let hasMore = true;
while (hasMore && !abortSignal?.aborted) {
const pending = await db.listPendingExtractions(extractionBatchSize, agentId);
if (pending.length === 0) {
hasMore = false;
break;
}
// Process in parallel chunks of LLM_CONCURRENCY
for (let i = 0; i < pending.length && !abortSignal?.aborted; i += LLM_CONCURRENCY) {
const chunk = pending.slice(i, i + LLM_CONCURRENCY);
const outcomes = await Promise.allSettled(
chunk.map((memory) =>
runBackgroundExtraction(
memory.id,
memory.text,
db,
embeddings,
config,
logger,
memory.extractionRetries,
abortSignal,
),
),
);
for (const outcome of outcomes) {
result.extraction.processed++;
if (outcome.status === "fulfilled" && outcome.value.success) {
result.extraction.succeeded++;
} else {
result.extraction.failed++;
}
}
if (result.extraction.processed % 10 === 0 || i + LLM_CONCURRENCY >= pending.length) {
onProgress?.(
"extraction",
`${result.extraction.processed}/${result.extraction.total} processed`,
);
}
}
// Delay between batches (abort-aware)
if (hasMore && !abortSignal?.aborted) {
await new Promise<void>((resolve) => {
const timer = setTimeout(resolve, extractionDelayMs);
// If abort fires during delay, resolve immediately
abortSignal?.addEventListener(
"abort",
() => {
clearTimeout(timer);
resolve();
},
{ once: true },
);
});
}
}
}
logger.info(
`memory-neo4j: [sleep] Phase 5 complete — ${result.extraction.succeeded} extracted, ${result.extraction.failed} failed`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 5 error: ${String(err)}`);
}
} else if (!config.enabled) {
logger.info("memory-neo4j: [sleep] Phase 5 skipped — extraction not enabled");
}
// --------------------------------------------------------------------------
// Phase 6: Decay & Pruning (after extraction so freshly extracted memories
// aren't pruned before they build entity connections)
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("decay");
logger.info("memory-neo4j: [sleep] Phase 6: Decay & Pruning");
try {
const decayed = await db.findDecayedMemories({
retentionThreshold: decayRetentionThreshold,
baseHalfLifeDays: decayBaseHalfLifeDays,
importanceMultiplier: decayImportanceMultiplier,
decayCurves,
agentId,
});
if (decayed.length > 0) {
const ids = decayed.map((m) => m.id);
result.decay.memoriesPruned = await db.pruneMemories(ids);
onProgress?.("decay", `Pruned ${result.decay.memoriesPruned} decayed memories`);
}
logger.info(
`memory-neo4j: [sleep] Phase 6 complete — ${result.decay.memoriesPruned} memories pruned`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 6 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 7: Orphan Cleanup
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("cleanup");
logger.info("memory-neo4j: [sleep] Phase 7: Orphan Cleanup");
try {
// Clean up orphan entities
if (!abortSignal?.aborted) {
const orphanEntities = await db.findOrphanEntities();
if (orphanEntities.length > 0) {
result.cleanup.entitiesRemoved = await db.deleteOrphanEntities(
orphanEntities.map((e) => e.id),
);
onProgress?.("cleanup", `Removed ${result.cleanup.entitiesRemoved} orphan entities`);
}
}
// Clean up orphan tags
if (!abortSignal?.aborted) {
const orphanTags = await db.findOrphanTags();
if (orphanTags.length > 0) {
result.cleanup.tagsRemoved = await db.deleteOrphanTags(orphanTags.map((t) => t.id));
onProgress?.("cleanup", `Removed ${result.cleanup.tagsRemoved} orphan tags`);
}
}
logger.info(
`memory-neo4j: [sleep] Phase 7 complete — ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 7 error: ${String(err)}`);
}
}
result.durationMs = Date.now() - startTime;
result.aborted = abortSignal?.aborted ?? false;
logger.info(
`memory-neo4j: [sleep] Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s` +
(result.aborted ? " (aborted)" : ""),
);
return result;
}
// ============================================================================
// Message Extraction (re-exported from message-utils.ts)
// ============================================================================
export {
extractUserMessages,
extractAssistantMessages,
stripMessageWrappers,
stripAssistantWrappers,
} from "./message-utils.js";
// ============================================================================
// LLM-Judged Importance Rating
// ============================================================================

View File

@@ -16,8 +16,9 @@ import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
import { Type } from "@sinclair/typebox";
import { randomUUID } from "node:crypto";
import { stringEnum } from "openclaw/plugin-sdk";
import type { MemoryCategory, MemorySource } from "./schema.js";
import type { Logger, MemoryCategory, MemorySource } from "./schema.js";
import { passesAttentionGate, passesAssistantAttentionGate } from "./attention-gate.js";
import { registerCli } from "./cli.js";
import {
DEFAULT_EMBEDDING_DIMS,
EMBEDDING_DIMENSIONS,
@@ -27,14 +28,8 @@ import {
vectorDimsForModel,
} from "./config.js";
import { Embeddings } from "./embeddings.js";
import {
extractUserMessages,
extractAssistantMessages,
stripMessageWrappers,
runSleepCycle,
isSemanticDuplicate,
rateImportance,
} from "./extractor.js";
import { isSemanticDuplicate, rateImportance } from "./extractor.js";
import { extractUserMessages, extractAssistantMessages } from "./message-utils.js";
import { Neo4jMemoryClient } from "./neo4j-client.js";
import { hybridSearch } from "./search.js";
@@ -127,7 +122,7 @@ const memoryNeo4jPlugin = {
limit,
agentId,
extractionConfig.enabled,
{ graphSearchDepth: cfg.graphSearchDepth },
{ graphSearchDepth: cfg.graphSearchDepth, logger: api.logger },
);
if (results.length === 0) {
@@ -216,17 +211,21 @@ const memoryNeo4jPlugin = {
}
// 3. Store memory immediately (fast path)
// User-stored core memories get pinned: importance locked at 1.0,
// immune from decay, scoring recalculation, and pruning.
const isUserPinnedCore = category === "core";
const memoryId = randomUUID();
await db.storeMemory({
id: memoryId,
text,
embedding: vector,
importance: Math.min(1, Math.max(0, importance)),
importance: isUserPinnedCore ? 1.0 : Math.min(1, Math.max(0, importance)),
category,
source: "user" as MemorySource,
extractionStatus: extractionConfig.enabled ? "pending" : "skipped",
agentId,
sessionKey,
userPinned: isUserPinnedCore,
});
// 4. Extraction is deferred to sleep cycle (like human memory consolidation)
@@ -352,492 +351,10 @@ const memoryNeo4jPlugin = {
);
// ========================================================================
// CLI Commands
// CLI Commands (delegated to cli.ts)
// ========================================================================
api.registerCli(
({ program }) => {
// Find existing memory command or create fallback
let memoryCmd = program.commands.find((cmd) => cmd.name() === "memory");
if (!memoryCmd) {
// Fallback if core memory CLI not registered yet
memoryCmd = program.command("memory").description("Memory commands");
}
// Add neo4j memory subcommand group
const memory = memoryCmd.command("neo4j").description("Neo4j graph memory commands");
memory
.command("list")
.description("List memory counts by agent and category")
.option("--json", "Output as JSON")
.action(async (opts: { json?: boolean }) => {
try {
await db.ensureInitialized();
const stats = await db.getMemoryStats();
if (opts.json) {
console.log(JSON.stringify(stats, null, 2));
return;
}
if (stats.length === 0) {
console.log("No memories stored.");
return;
}
// Group by agentId
const byAgent = new Map<
string,
Array<{ category: string; count: number; avgImportance: number }>
>();
for (const row of stats) {
const list = byAgent.get(row.agentId) || [];
list.push({
category: row.category,
count: row.count,
avgImportance: row.avgImportance,
});
byAgent.set(row.agentId, list);
}
// Print table for each agent
for (const [agentId, categories] of byAgent) {
const total = categories.reduce((sum, c) => sum + c.count, 0);
console.log(`\n┌─ ${agentId} (${total} total)`);
console.log("│");
console.log("│ Category Count Avg Importance");
console.log("│ ─────────────────────────────────────");
for (const { category, count, avgImportance } of categories) {
const cat = category.padEnd(12);
const cnt = String(count).padStart(5);
const imp = (avgImportance * 100).toFixed(0).padStart(3) + "%";
console.log(`${cat} ${cnt} ${imp}`);
}
console.log("└");
}
console.log("");
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("search")
.description("Search memories")
.argument("<query>", "Search query")
.option("--limit <n>", "Max results", "5")
.option("--agent <id>", "Agent id (default: default)")
.action(async (query: string, opts: { limit: string; agent?: string }) => {
try {
const results = await hybridSearch(
db,
embeddings,
query,
parseInt(opts.limit, 10),
opts.agent ?? "default",
extractionConfig.enabled,
{ graphSearchDepth: cfg.graphSearchDepth },
);
const output = results.map((r) => ({
id: r.id,
text: r.text,
category: r.category,
importance: r.importance,
score: r.score,
}));
console.log(JSON.stringify(output, null, 2));
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("stats")
.description("Show memory statistics and configuration")
.action(async () => {
try {
await db.ensureInitialized();
const stats = await db.getMemoryStats();
const total = stats.reduce((sum, s) => sum + s.count, 0);
console.log("\nMemory (Neo4j) Statistics");
console.log("─────────────────────────");
console.log(`Total memories: ${total}`);
console.log(`Neo4j URI: ${cfg.neo4j.uri}`);
console.log(`Embedding: ${cfg.embedding.provider}/${cfg.embedding.model}`);
console.log(
`Extraction: ${extractionConfig.enabled ? extractionConfig.model : "disabled"}`,
);
console.log(`Auto-capture: ${cfg.autoCapture ? "enabled" : "disabled"}`);
console.log(`Auto-recall: ${cfg.autoRecall ? "enabled" : "disabled"}`);
console.log(`Core memory: ${cfg.coreMemory.enabled ? "enabled" : "disabled"}`);
if (stats.length > 0) {
// Group by category across all agents
const byCategory = new Map<string, number>();
for (const row of stats) {
byCategory.set(row.category, (byCategory.get(row.category) ?? 0) + row.count);
}
console.log("\nBy Category:");
for (const [category, count] of byCategory) {
console.log(` ${category.padEnd(12)} ${count}`);
}
// Show agent count
const agents = new Set(stats.map((s) => s.agentId));
console.log(`\nAgents: ${agents.size} (${[...agents].join(", ")})`);
}
console.log("");
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("sleep")
.description(
"Run sleep cycle — consolidate memories with Pareto-based promotion/demotion",
)
.option("--agent <id>", "Agent id (default: all agents)")
.option("--dedup-threshold <n>", "Vector similarity threshold for dedup (default: 0.95)")
.option("--pareto <n>", "Top N% for core memory (default: 0.2 = top 20%)")
.option("--promotion-min-age <days>", "Min age in days before promotion (default: 7)")
.option("--decay-threshold <n>", "Decay score threshold for pruning (default: 0.1)")
.option("--decay-half-life <days>", "Base half-life in days (default: 30)")
.option("--batch-size <n>", "Extraction batch size (default: 50)")
.option("--delay <ms>", "Delay between extraction batches in ms (default: 1000)")
.option("--max-semantic-pairs <n>", "Max LLM-checked semantic dedup pairs (default: 500)")
.option(
"--concurrency <n>",
"Parallel LLM calls — match OLLAMA_NUM_PARALLEL (default: 8)",
)
.option(
"--skip-semantic",
"Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)",
)
.action(
async (opts: {
agent?: string;
dedupThreshold?: string;
pareto?: string;
promotionMinAge?: string;
decayThreshold?: string;
decayHalfLife?: string;
batchSize?: string;
delay?: string;
maxSemanticPairs?: string;
concurrency?: string;
skipSemantic?: boolean;
}) => {
console.log("\n🌙 Memory Sleep Cycle");
console.log("═════════════════════════════════════════════════════════════");
console.log("Seven-phase memory consolidation (Pareto-based):\n");
console.log(" Phase 1: Deduplication — Merge near-duplicate memories");
console.log(
" Phase 1b: Semantic Dedup — LLM-based paraphrase detection (0.750.95 band)",
);
console.log(" Phase 1c: Conflict Detection — Resolve contradictory memories");
console.log(
" Phase 2: Pareto Scoring — Calculate effective scores for all memories",
);
console.log(" Phase 3: Core Promotion — Regular memories above threshold → core");
console.log(" Phase 4: Core Demotion — Core memories below threshold → regular");
console.log(" Phase 5: Extraction — Extract entities and categorize");
console.log(" Phase 6: Decay & Pruning — Remove stale low-importance memories");
console.log(" Phase 7: Orphan Cleanup — Remove disconnected nodes\n");
try {
// Validate sleep cycle CLI parameters before running
const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : undefined;
const delay = opts.delay ? parseInt(opts.delay, 10) : undefined;
const decayHalfLife = opts.decayHalfLife
? parseInt(opts.decayHalfLife, 10)
: undefined;
const decayThreshold = opts.decayThreshold
? parseFloat(opts.decayThreshold)
: undefined;
const pareto = opts.pareto ? parseFloat(opts.pareto) : undefined;
const promotionMinAge = opts.promotionMinAge
? parseInt(opts.promotionMinAge, 10)
: undefined;
if (batchSize != null && (Number.isNaN(batchSize) || batchSize <= 0)) {
console.error("Error: --batch-size must be greater than 0");
process.exitCode = 1;
return;
}
if (delay != null && (Number.isNaN(delay) || delay < 0)) {
console.error("Error: --delay must be >= 0");
process.exitCode = 1;
return;
}
if (decayHalfLife != null && (Number.isNaN(decayHalfLife) || decayHalfLife <= 0)) {
console.error("Error: --decay-half-life must be greater than 0");
process.exitCode = 1;
return;
}
if (
decayThreshold != null &&
(Number.isNaN(decayThreshold) || decayThreshold < 0 || decayThreshold > 1)
) {
console.error("Error: --decay-threshold must be between 0 and 1");
process.exitCode = 1;
return;
}
if (pareto != null && (Number.isNaN(pareto) || pareto < 0 || pareto > 1)) {
console.error("Error: --pareto must be between 0 and 1");
process.exitCode = 1;
return;
}
if (
promotionMinAge != null &&
(Number.isNaN(promotionMinAge) || promotionMinAge < 0)
) {
console.error("Error: --promotion-min-age must be >= 0");
process.exitCode = 1;
return;
}
const maxSemanticPairs = opts.maxSemanticPairs
? parseInt(opts.maxSemanticPairs, 10)
: undefined;
if (
maxSemanticPairs != null &&
(Number.isNaN(maxSemanticPairs) || maxSemanticPairs <= 0)
) {
console.error("Error: --max-semantic-pairs must be greater than 0");
process.exitCode = 1;
return;
}
const concurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : undefined;
if (concurrency != null && (Number.isNaN(concurrency) || concurrency <= 0)) {
console.error("Error: --concurrency must be greater than 0");
process.exitCode = 1;
return;
}
await db.ensureInitialized();
const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, {
agentId: opts.agent,
dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined,
skipSemanticDedup: opts.skipSemantic === true,
maxSemanticDedupPairs: maxSemanticPairs,
llmConcurrency: concurrency,
paretoPercentile: pareto,
promotionMinAgeDays: promotionMinAge,
decayRetentionThreshold: decayThreshold,
decayBaseHalfLifeDays: decayHalfLife,
decayCurves:
Object.keys(cfg.decayCurves).length > 0 ? cfg.decayCurves : undefined,
extractionBatchSize: batchSize,
extractionDelayMs: delay,
onPhaseStart: (phase) => {
const phaseNames: Record<string, string> = {
dedup: "Phase 1: Deduplication",
semanticDedup: "Phase 1b: Semantic Deduplication",
conflict: "Phase 1c: Conflict Detection",
pareto: "Phase 2: Pareto Scoring",
promotion: "Phase 3: Core Promotion",
demotion: "Phase 4: Core Demotion",
extraction: "Phase 5: Extraction",
decay: "Phase 6: Decay & Pruning",
cleanup: "Phase 7: Orphan Cleanup",
};
console.log(`\n▶ ${phaseNames[phase]}`);
console.log("─────────────────────────────────────────────────────────────");
},
onProgress: (_phase, message) => {
console.log(` ${message}`);
},
});
console.log("\n═════════════════════════════════════════════════════════════");
console.log(`✅ Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s`);
console.log("─────────────────────────────────────────────────────────────");
console.log(
` Deduplication: ${result.dedup.clustersFound} clusters → ${result.dedup.memoriesMerged} merged`,
);
console.log(
` Conflicts: ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`,
);
console.log(
` Semantic Dedup: ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`,
);
console.log(
` Pareto: ${result.pareto.totalMemories} total (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`,
);
console.log(
` Threshold: ${result.pareto.threshold.toFixed(4)} (top 20%)`,
);
console.log(
` Promotion: ${result.promotion.promoted}/${result.promotion.candidatesFound} promoted to core`,
);
console.log(
` Demotion: ${result.demotion.demoted}/${result.demotion.candidatesFound} demoted from core`,
);
console.log(` Decay/Pruning: ${result.decay.memoriesPruned} memories pruned`);
console.log(
` Extraction: ${result.extraction.succeeded}/${result.extraction.total} extracted` +
(result.extraction.failed > 0 ? ` (${result.extraction.failed} failed)` : ""),
);
console.log(
` Cleanup: ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
);
if (result.aborted) {
console.log("\n⚠ Sleep cycle was aborted before completion.");
}
console.log("");
} catch (err) {
console.error(
`\n❌ Sleep cycle failed: ${err instanceof Error ? err.message : String(err)}`,
);
process.exitCode = 1;
}
},
);
memory
.command("promote")
.description("Manually promote a memory to core status")
.argument("<id>", "Memory ID to promote")
.action(async (id: string) => {
try {
await db.ensureInitialized();
const promoted = await db.promoteToCore([id]);
if (promoted > 0) {
console.log(`✅ Memory ${id} promoted to core.`);
} else {
console.log(`❌ Memory ${id} not found.`);
process.exitCode = 1;
}
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("index")
.description(
"Re-embed all memories and entities — use after changing embedding model/provider",
)
.option("--batch-size <n>", "Embedding batch size (default: 50)")
.action(async (opts: { batchSize?: string }) => {
const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : 50;
if (Number.isNaN(batchSize) || batchSize <= 0) {
console.error("Error: --batch-size must be greater than 0");
process.exitCode = 1;
return;
}
console.log("\nMemory Neo4j — Reindex Embeddings");
console.log("═════════════════════════════════════════════════════════════");
console.log(`Model: ${cfg.embedding.provider}/${cfg.embedding.model}`);
console.log(`Dimensions: ${vectorDim}`);
console.log(`Batch size: ${batchSize}\n`);
try {
const startedAt = Date.now();
const result = await db.reindex((texts) => embeddings.embedBatch(texts), {
batchSize,
onProgress: (phase, done, total) => {
if (phase === "drop-indexes" && done === 0) {
console.log("▶ Dropping old vector index…");
} else if (phase === "memories") {
console.log(` Memories: ${done}/${total}`);
} else if (phase === "create-indexes" && done === 0) {
console.log("▶ Recreating vector index…");
}
},
});
const elapsed = ((Date.now() - startedAt) / 1000).toFixed(1);
console.log("\n═════════════════════════════════════════════════════════════");
console.log(`✅ Reindex complete in ${elapsed}s — ${result.memories} memories`);
console.log("");
} catch (err) {
console.error(
`\n❌ Reindex failed: ${err instanceof Error ? err.message : String(err)}`,
);
process.exitCode = 1;
}
});
memory
.command("cleanup")
.description(
"Retroactively apply the attention gate — find and remove low-substance memories",
)
.option("--execute", "Actually delete (default: dry-run preview)")
.option("--all", "Include explicitly-stored memories (default: auto-capture only)")
.option("--agent <id>", "Only clean up memories for a specific agent")
.action(async (opts: { execute?: boolean; all?: boolean; agent?: string }) => {
try {
await db.ensureInitialized();
// Fetch memories — by default only auto-capture (explicit stores are trusted)
const conditions: string[] = [];
if (!opts.all) {
conditions.push("m.source = 'auto-capture'");
}
if (opts.agent) {
conditions.push("m.agentId = $agentId");
}
const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
const allMemories = await db.runQuery<{ id: string; text: string; source: string }>(
`MATCH (m:Memory) ${where}
RETURN m.id AS id, m.text AS text, COALESCE(m.source, 'unknown') AS source
ORDER BY m.createdAt ASC`,
opts.agent ? { agentId: opts.agent } : {},
);
// Strip channel metadata wrappers (same as the real pipeline) then gate
const noise: Array<{ id: string; text: string; source: string }> = [];
for (const mem of allMemories) {
const stripped = stripMessageWrappers(mem.text);
if (!passesAttentionGate(stripped)) {
noise.push(mem);
}
}
if (noise.length === 0) {
console.log("\nNo low-substance memories found. Everything passes the gate.");
return;
}
console.log(
`\nFound ${noise.length}/${allMemories.length} memories that fail the attention gate:\n`,
);
for (const mem of noise) {
const preview = mem.text.length > 80 ? `${mem.text.slice(0, 77)}...` : mem.text;
console.log(` [${mem.source}] "${preview}"`);
}
if (!opts.execute) {
console.log(
`\nDry run — ${noise.length} memories would be removed. Re-run with --execute to delete.\n`,
);
return;
}
// Delete in batch
const deleted = await db.pruneMemories(noise.map((m) => m.id));
console.log(`\nDeleted ${deleted} low-substance memories.\n`);
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
},
{ commands: [] }, // Adds subcommands to existing "memory" command, no conflict
);
registerCli(api, { db, embeddings, cfg, extractionConfig, vectorDim });
// ========================================================================
// Lifecycle Hooks
@@ -952,8 +469,9 @@ const memoryNeo4jPlugin = {
}
try {
const t0 = performance.now();
const maxEntries = cfg.coreMemory.maxEntries;
const coreMemories = await db.listByCategory("core", maxEntries, 0, agentId);
const coreMemories = await db.listCoreForInjection(maxEntries, agentId);
if (coreMemories.length === 0) {
return;
@@ -964,8 +482,9 @@ const memoryNeo4jPlugin = {
touchSession(sessionKey);
const content = coreMemories.map((m) => `- ${m.text}`).join("\n");
const totalMs = performance.now() - t0;
api.logger.info?.(
`memory-neo4j: mid-session core refresh at ${usagePercent.toFixed(1)}% context (${coreMemories.length} memories)`,
`memory-neo4j: [bench] core-refresh ${totalMs.toFixed(0)}ms at ${usagePercent.toFixed(1)}% context (${coreMemories.length} memories)`,
);
return {
@@ -1009,6 +528,7 @@ const memoryNeo4jPlugin = {
: event.prompt;
try {
const t0 = performance.now();
let results = await hybridSearch(
db,
embeddings,
@@ -1016,8 +536,9 @@ const memoryNeo4jPlugin = {
3,
agentId,
extractionConfig.enabled,
{ graphSearchDepth: cfg.graphSearchDepth },
{ graphSearchDepth: cfg.graphSearchDepth, logger: api.logger },
);
const tSearch = performance.now();
// Feature 1: Filter out low-relevance results below min RRF score
results = results.filter((r) => r.score >= cfg.autoRecallMinScore);
@@ -1029,13 +550,17 @@ const memoryNeo4jPlugin = {
results = results.filter((r) => !coreIds.has(r.id));
}
const totalMs = performance.now() - t0;
api.logger.info?.(
`memory-neo4j: [bench] auto-recall ${totalMs.toFixed(0)}ms total (search=${(tSearch - t0).toFixed(0)}ms), ${results.length} results`,
);
if (results.length === 0) {
return;
}
const memoryContext = results.map((r) => `- [${r.category}] ${r.text}`).join("\n");
api.logger.info?.(`memory-neo4j: injecting ${results.length} memories into context`);
api.logger.debug?.(
`memory-neo4j: auto-recall memories: ${JSON.stringify(results.map((r) => ({ id: r.id, text: r.text.slice(0, 80), category: r.category, score: r.score })))}`,
);
@@ -1075,23 +600,25 @@ const memoryNeo4jPlugin = {
}
try {
const t0 = performance.now();
const agentId = ctx.agentId || "default";
const maxEntries = cfg.coreMemory.maxEntries;
api.logger.debug?.(
`memory-neo4j: loading core memories for agent=${agentId} session=${sessionKey ?? "unknown"}`,
);
// Core memories are always included (no importance filter) - if marked as core, it's important
// Results are ordered by importance desc, so most important come first up to maxEntries
const coreMemories = await db.listByCategory("core", maxEntries, 0, agentId);
// All user-pinned core memories are always included (no limit).
// Non-pinned core memories fill remaining slots up to maxEntries, ordered by importance.
const coreMemories = await db.listCoreForInjection(maxEntries, agentId);
const tQuery = performance.now();
if (coreMemories.length === 0) {
if (sessionKey) {
bootstrappedSessions.add(sessionKey);
touchSession(sessionKey);
}
api.logger.debug?.(
`memory-neo4j: no core memories found for agent=${agentId}, marking session as bootstrapped`,
api.logger.info?.(
`memory-neo4j: [bench] core-inject ${(tQuery - t0).toFixed(0)}ms (0 memories, skipped)`,
);
return;
}
@@ -1128,9 +655,10 @@ const memoryNeo4jPlugin = {
coreMemoryIdsBySession.set(sessionKey, new Set(coreMemories.map((m) => m.id)));
touchSession(sessionKey);
}
// Log at info level when actually injecting, debug for skips
const totalMs = performance.now() - t0;
api.logger.info?.(
`memory-neo4j: ${action} MEMORY.md with ${coreMemories.length} core memories for agent=${agentId} session=${sessionKey ?? "unknown"}`,
`memory-neo4j: [bench] core-inject ${totalMs.toFixed(0)}ms (query=${(tQuery - t0).toFixed(0)}ms), ${action} MEMORY.md with ${coreMemories.length} memories`,
);
return { files };
@@ -1152,7 +680,7 @@ const memoryNeo4jPlugin = {
//
// Phase 3 — Sleep consolidation (deferred to `openclaw memory neo4j sleep`):
// The sleep cycle handles entity extraction, categorization, Pareto
// scoring, promotion/demotion, and decay — mirroring hippocampal replay.
// scoring, promotion, and decay — mirroring hippocampal replay.
api.logger.debug?.(
`memory-neo4j: autoCapture=${cfg.autoCapture}, extraction.enabled=${extractionConfig.enabled}`,
);
@@ -1228,12 +756,6 @@ const memoryNeo4jPlugin = {
// Auto-capture pipeline (fire-and-forget from agent_end hook)
// ============================================================================
type AutoCaptureLogger = {
info: (msg: string) => void;
warn: (msg: string) => void;
debug?: (msg: string) => void;
};
/**
* Shared capture logic for both user and assistant messages.
* Extracts the common embed → dedup → rate → store pipeline.
@@ -1248,7 +770,8 @@ async function captureMessage(
db: import("./neo4j-client.js").Neo4jMemoryClient,
embeddings: import("./embeddings.js").Embeddings,
extractionConfig: import("./config.js").ExtractionConfig,
logger: AutoCaptureLogger,
logger: Logger,
precomputedVector?: number[],
): Promise<{ stored: boolean; semanticDeduped: boolean }> {
// For assistant messages, rate importance first (before embedding) to skip early.
// When extraction is disabled, rateImportance returns 0.5 (the fallback), so we
@@ -1263,11 +786,14 @@ async function captureMessage(
}
}
const vector = await embeddings.embed(text);
const vector = precomputedVector ?? (await embeddings.embed(text));
// Quick dedup (same content already stored — cosine >= 0.95)
const existing = await db.findSimilar(vector, 0.95, 1, agentId);
if (existing.length > 0) {
// Single vector search at lower threshold, split by score band
const candidates = await db.findSimilar(vector, 0.75, 3, agentId);
// Exact dedup: any candidate with score >= 0.95 means it's a duplicate
const exactDup = candidates.find((c) => c.score >= 0.95);
if (exactDup) {
return { stored: false, semanticDeduped: false };
}
@@ -1281,10 +807,9 @@ async function captureMessage(
}
}
// Semantic dedup: check moderate-similarity memories (0.75-0.95)
// Semantic dedup: remaining candidates in 0.75-0.95 band
// Pass the vector similarity score as a pre-screen to skip LLM calls
// for pairs below SEMANTIC_DEDUP_VECTOR_THRESHOLD.
const candidates = await db.findSimilar(vector, 0.75, 3, agentId);
if (candidates.length > 0) {
for (const candidate of candidates) {
if (await isSemanticDuplicate(text, candidate.text, extractionConfig, candidate.score)) {
@@ -1321,9 +846,10 @@ async function runAutoCapture(
db: import("./neo4j-client.js").Neo4jMemoryClient,
embeddings: import("./embeddings.js").Embeddings,
extractionConfig: import("./config.js").ExtractionConfig,
logger: AutoCaptureLogger,
logger: Logger,
): Promise<void> {
try {
const t0 = performance.now();
let stored = 0;
let semanticDeduped = 0;
@@ -1331,19 +857,51 @@ async function runAutoCapture(
const userMessages = extractUserMessages(messages);
const retained = userMessages.filter((text) => passesAttentionGate(text));
// Process assistant messages
const assistantMessages = extractAssistantMessages(messages);
const retainedAssistant = assistantMessages.filter((text) =>
passesAssistantAttentionGate(text),
);
const tGate = performance.now();
// Collect all texts to embed in a single batch
const allTexts: string[] = [];
const allMeta: Array<{
text: string;
source: "auto-capture" | "auto-capture-assistant";
threshold: number;
discount: number;
}> = [];
for (const text of retained) {
allTexts.push(text);
allMeta.push({ text, source: "auto-capture", threshold: 0.5, discount: 1.0 });
}
for (const text of retainedAssistant) {
allTexts.push(text);
allMeta.push({ text, source: "auto-capture-assistant", threshold: 0.8, discount: 0.75 });
}
// Batch embed all at once
const vectors = allTexts.length > 0 ? await embeddings.embedBatch(allTexts) : [];
const tEmbed = performance.now();
// Process each with pre-computed vector
for (let i = 0; i < allMeta.length; i++) {
try {
const meta = allMeta[i];
const result = await captureMessage(
text,
"auto-capture",
0.5,
1.0,
meta.text,
meta.source,
meta.threshold,
meta.discount,
agentId,
sessionKey,
db,
embeddings,
extractionConfig,
logger,
vectors[i],
);
if (result.stored) stored++;
if (result.semanticDeduped) semanticDeduped++;
@@ -1351,50 +909,23 @@ async function runAutoCapture(
logger.debug?.(`memory-neo4j: auto-capture item failed: ${String(err)}`);
}
}
const tProcess = performance.now();
// Process assistant messages
const assistantMessages = extractAssistantMessages(messages);
const retainedAssistant = assistantMessages.filter((text) =>
passesAssistantAttentionGate(text),
const totalMs = tProcess - t0;
const gateMs = tGate - t0;
const embedMs = tEmbed - tGate;
const processMs = tProcess - tEmbed;
logger.info(
`memory-neo4j: [bench] auto-capture ${totalMs.toFixed(0)}ms total (gate=${gateMs.toFixed(0)}ms, embed=${embedMs.toFixed(0)}ms, process=${processMs.toFixed(0)}ms), ` +
`${retained.length}+${retainedAssistant.length} gated, ${stored} stored, ${semanticDeduped} deduped`,
);
for (const text of retainedAssistant) {
try {
const result = await captureMessage(
text,
"auto-capture-assistant",
0.8,
0.75,
agentId,
sessionKey,
db,
embeddings,
extractionConfig,
logger,
);
if (result.stored) stored++;
if (result.semanticDeduped) semanticDeduped++;
} catch (err) {
logger.debug?.(`memory-neo4j: assistant auto-capture item failed: ${String(err)}`);
}
}
if (stored > 0 || semanticDeduped > 0) {
logger.info(
`memory-neo4j: auto-captured ${stored} memories (attention-gated)${semanticDeduped > 0 ? `, ${semanticDeduped} semantic dupes skipped` : ""}`,
);
} else if (userMessages.length > 0 || assistantMessages.length > 0) {
logger.info(
`memory-neo4j: auto-capture ran (0 stored, ${userMessages.length} user msgs, ${retained.length} passed gate, ${assistantMessages.length} assistant msgs, ${retainedAssistant.length} passed gate)`,
);
}
} catch (err) {
logger.warn(`memory-neo4j: auto-capture failed: ${String(err)}`);
}
}
// Re-export attention gate for backwards compatibility (tests import from here)
export { passesAttentionGate, passesAssistantAttentionGate } from "./attention-gate.js";
// Export auto-capture internals for testing
export { captureMessage as _captureMessage, runAutoCapture as _runAutoCapture };
// ============================================================================
// Export

View File

@@ -0,0 +1,188 @@
/**
* OpenRouter/OpenAI-compatible LLM API client for memory-neo4j.
*
* Handles non-streaming and streaming chat completion requests with
* retry logic, timeout handling, and abort signal support.
*/
import type { ExtractionConfig } from "./config.js";
// Timeout for LLM and embedding fetch calls to prevent hanging indefinitely
export const FETCH_TIMEOUT_MS = 30_000;
/**
* Build a combined abort signal from the caller's signal and a per-request timeout.
*/
function buildSignal(abortSignal?: AbortSignal): AbortSignal {
return abortSignal
? AbortSignal.any([abortSignal, AbortSignal.timeout(FETCH_TIMEOUT_MS)])
: AbortSignal.timeout(FETCH_TIMEOUT_MS);
}
/**
* Shared request/retry logic for OpenRouter API calls.
* Handles signal composition, request building, error handling, and exponential backoff.
* The `parseFn` callback processes the Response differently for streaming vs non-streaming.
*/
async function openRouterRequest(
config: ExtractionConfig,
messages: Array<{ role: string; content: string }>,
abortSignal: AbortSignal | undefined,
stream: boolean,
parseFn: (response: Response, abortSignal?: AbortSignal) => Promise<string | null>,
): Promise<string | null> {
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
try {
const signal = buildSignal(abortSignal);
const response = await fetch(`${config.baseUrl}/chat/completions`, {
method: "POST",
headers: {
Authorization: `Bearer ${config.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: config.model,
messages,
temperature: config.temperature,
response_format: { type: "json_object" },
...(stream ? { stream: true } : {}),
}),
signal,
});
if (!response.ok) {
const body = await response.text().catch(() => "");
throw new Error(`OpenRouter API error ${response.status}: ${body}`);
}
return await parseFn(response, abortSignal);
} catch (err) {
if (attempt >= config.maxRetries) {
throw err;
}
// Exponential backoff
await new Promise((resolve) => setTimeout(resolve, 500 * 2 ** attempt));
}
}
return null;
}
/**
* Parse a non-streaming JSON response.
*/
function parseNonStreaming(response: Response): Promise<string | null> {
return response.json().then((data: unknown) => {
const typed = data as {
choices?: Array<{ message?: { content?: string } }>;
};
return typed.choices?.[0]?.message?.content ?? null;
});
}
/**
* Parse a streaming SSE response, accumulating chunks into a single string.
*/
async function parseStreaming(
response: Response,
abortSignal?: AbortSignal,
): Promise<string | null> {
if (!response.body) {
throw new Error("No response body for streaming request");
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let accumulated = "";
let buffer = "";
for (;;) {
// Check abort between chunks for responsive cancellation
if (abortSignal?.aborted) {
reader.cancel().catch(() => {});
return null;
}
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
// Parse SSE lines
const lines = buffer.split("\n");
buffer = lines.pop() ?? "";
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed.startsWith("data: ")) continue;
const data = trimmed.slice(6);
if (data === "[DONE]") continue;
try {
const parsed = JSON.parse(data) as {
choices?: Array<{ delta?: { content?: string } }>;
};
const chunk = parsed.choices?.[0]?.delta?.content;
if (chunk) {
accumulated += chunk;
}
} catch {
// Skip malformed SSE chunks
}
}
}
return accumulated || null;
}
export async function callOpenRouter(
config: ExtractionConfig,
prompt: string | Array<{ role: string; content: string }>,
abortSignal?: AbortSignal,
): Promise<string | null> {
const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt;
return openRouterRequest(config, messages, abortSignal, false, parseNonStreaming);
}
/**
* Streaming variant of callOpenRouter. Uses the streaming API to receive chunks
* incrementally, allowing earlier cancellation via abort signal and better
* latency characteristics for long responses.
*
* Accumulates all chunks into a single response string since extraction
* uses JSON mode (which requires the complete object to parse).
*/
export async function callOpenRouterStream(
config: ExtractionConfig,
prompt: string | Array<{ role: string; content: string }>,
abortSignal?: AbortSignal,
): Promise<string | null> {
const messages = typeof prompt === "string" ? [{ role: "user", content: prompt }] : prompt;
return openRouterRequest(config, messages, abortSignal, true, parseStreaming);
}
/**
* Check if an error is transient (network/timeout) vs permanent (JSON parse, etc.)
*/
export function isTransientError(err: unknown): boolean {
if (!(err instanceof Error)) {
return false;
}
const msg = err.message.toLowerCase();
return (
err.name === "AbortError" ||
err.name === "TimeoutError" ||
msg.includes("timeout") ||
msg.includes("econnrefused") ||
msg.includes("econnreset") ||
msg.includes("etimedout") ||
msg.includes("enotfound") ||
msg.includes("network") ||
msg.includes("fetch failed") ||
msg.includes("socket hang up") ||
msg.includes("api error 429") ||
msg.includes("api error 502") ||
msg.includes("api error 503") ||
msg.includes("api error 504")
);
}

View File

@@ -8,14 +8,18 @@
*/
// ============================================================================
// User Message Extraction
// Core Extraction
// ============================================================================
/**
* Extract user message texts from the event.messages array.
* Handles both string content and content block arrays.
* Extract text blocks from messages with a given role, apply a strip function,
* and filter out short results. Handles both string content and content block arrays.
*/
export function extractUserMessages(messages: unknown[]): string[] {
function extractMessagesByRole(
messages: unknown[],
role: string,
stripFn: (text: string) => string,
): string[] {
const texts: string[] = [];
for (const msg of messages) {
@@ -24,8 +28,7 @@ export function extractUserMessages(messages: unknown[]): string[] {
}
const msgObj = msg as Record<string, unknown>;
// Only process user messages for auto-capture
if (msgObj.role !== "user") {
if (msgObj.role !== role) {
continue;
}
@@ -51,8 +54,18 @@ export function extractUserMessages(messages: unknown[]): string[] {
}
}
// Strip wrappers then filter by length
return texts.map(stripMessageWrappers).filter((t) => t.length >= 10);
return texts.map(stripFn).filter((t) => t.length >= 10);
}
// ============================================================================
// User Message Extraction
// ============================================================================
/**
* Extract user message texts from the event.messages array.
*/
export function extractUserMessages(messages: unknown[]): string[] {
return extractMessagesByRole(messages, "user", stripMessageWrappers);
}
/**
@@ -84,9 +97,7 @@ export function stripMessageWrappers(text: string): string {
s = s.replace(/---\s*Queued #\d+\s*/g, "");
// Telegram wrapper — may now be at start after previous strips
s = s.replace(/^\s*\[Telegram\s[^\]]+\]\s*/i, "");
// "[message_id: NNN]" suffix (Telegram)
s = s.replace(/\n?\[message_id:\s*\d+\]\s*$/i, "");
// "[message_id: UUID]" suffix (non-numeric Telegram/channel IDs)
// "[message_id: ...]" suffix (Telegram and other channel IDs)
s = s.replace(/\n?\[message_id:\s*[^\]]+\]\s*$/i, "");
// Slack wrapper — "[Slack <workspace> #channel @user] MESSAGE [slack message id: ...]"
s = s.replace(/^\s*\[Slack\s[^\]]+\]\s*/i, "");
@@ -118,42 +129,7 @@ export function stripAssistantWrappers(text: string): string {
/**
* Extract assistant message texts from the event.messages array.
* Handles both string content and content block arrays.
*/
export function extractAssistantMessages(messages: unknown[]): string[] {
const texts: string[] = [];
for (const msg of messages) {
if (!msg || typeof msg !== "object") {
continue;
}
const msgObj = msg as Record<string, unknown>;
if (msgObj.role !== "assistant") {
continue;
}
const content = msgObj.content;
if (typeof content === "string") {
texts.push(content);
continue;
}
if (Array.isArray(content)) {
for (const block of content) {
if (
block &&
typeof block === "object" &&
"type" in block &&
(block as Record<string, unknown>).type === "text" &&
"text" in block &&
typeof (block as Record<string, unknown>).text === "string"
) {
texts.push((block as Record<string, unknown>).text as string);
}
}
}
}
return texts.map(stripAssistantWrappers).filter((t) => t.length >= 10);
return extractMessagesByRole(messages, "assistant", stripAssistantWrappers);
}

View File

@@ -7,7 +7,7 @@
import type { Driver } from "neo4j-driver";
import { describe, it, expect, vi, beforeEach } from "vitest";
import type { StoreMemoryInput, MergeEntityInput } from "./schema.js";
import type { StoreMemoryInput } from "./schema.js";
import { Neo4jMemoryClient } from "./neo4j-client.js";
// ============================================================================
@@ -867,10 +867,10 @@ describe("Neo4jMemoryClient", () => {
});
// ------------------------------------------------------------------------
// promoteToCore() / demoteFromCore()
// promoteToCore()
// ------------------------------------------------------------------------
describe("Core promotion/demotion", () => {
describe("Core promotion", () => {
it("should promote memories to core category", async () => {
mockSession.run.mockResolvedValue({
records: [{ get: vi.fn().mockReturnValue(2) }],
@@ -885,26 +885,10 @@ describe("Neo4jMemoryClient", () => {
);
});
it("should demote memories from core category", async () => {
mockSession.run.mockResolvedValue({
records: [{ get: vi.fn().mockReturnValue(1) }],
});
const result = await client.demoteFromCore(["m1"]);
expect(result).toBe(1);
expect(mockSession.run).toHaveBeenCalledWith(
expect.stringContaining("category = 'fact'"),
expect.objectContaining({ ids: ["m1"] }),
);
});
it("should handle empty ID arrays", async () => {
const promoteResult = await client.promoteToCore([]);
const demoteResult = await client.demoteFromCore([]);
expect(promoteResult).toBe(0);
expect(demoteResult).toBe(0);
});
});
@@ -1157,115 +1141,6 @@ describe("Neo4jMemoryClient", () => {
});
});
// ------------------------------------------------------------------------
// Entity and Tag operations
// ------------------------------------------------------------------------
describe("Entity operations", () => {
it("should merge entity idempotently", async () => {
mockSession.run.mockResolvedValue({
records: [
{
get: vi.fn((key) => {
const data: Record<string, any> = { id: "e1", name: "tarun" };
return data[key];
}),
},
],
});
const input: MergeEntityInput = {
id: "e1",
name: "Tarun",
type: "person",
aliases: ["boss"],
description: "CEO",
};
const result = await client.mergeEntity(input);
expect(result).toEqual({ id: "e1", name: "tarun" });
expect(mockSession.run).toHaveBeenCalledWith(
expect.stringContaining("MERGE (e:Entity {name: $name})"),
expect.objectContaining({
name: "tarun", // normalized
}),
);
});
it("should create MENTIONS relationship", async () => {
mockSession.run.mockResolvedValue({ records: [] });
await client.createMentions("mem-1", "Tarun", "context", 0.95);
expect(mockSession.run).toHaveBeenCalledWith(
expect.stringContaining("MERGE (m)-[r:MENTIONS]->(e)"),
expect.objectContaining({
memoryId: "mem-1",
entityName: "tarun", // normalized
role: "context",
confidence: 0.95,
}),
);
});
it("should create entity relationships with validated type", async () => {
mockSession.run.mockResolvedValue({ records: [] });
await client.createEntityRelationship("Alice", "Acme", "WORKS_AT", 0.9);
expect(mockSession.run).toHaveBeenCalledWith(
expect.stringContaining("MERGE (e1)-[r:WORKS_AT]->(e2)"),
expect.objectContaining({
sourceName: "alice",
targetName: "acme",
confidence: 0.9,
}),
);
});
it("should reject invalid relationship types", async () => {
await client.createEntityRelationship("a", "b", "INVALID_TYPE", 0.9);
expect(mockLogger.warn).toHaveBeenCalledWith(
expect.stringContaining("rejected invalid relationship type"),
);
expect(mockSession.run).not.toHaveBeenCalled();
});
});
describe("Tag operations", () => {
it("should tag memory with normalized tag name", async () => {
mockSession.run.mockResolvedValue({ records: [] });
await client.tagMemory("mem-1", "Neo4j", "technology", 0.95);
expect(mockSession.run).toHaveBeenCalledWith(
expect.stringContaining("MERGE (t:Tag {name: $tagName})"),
expect.objectContaining({
memoryId: "mem-1",
tagName: "neo4j", // normalized
tagCategory: "technology",
confidence: 0.95,
}),
);
});
it("should update memory category only when current is 'other'", async () => {
mockSession.run.mockResolvedValue({ records: [] });
await client.updateMemoryCategory("mem-1", "fact");
expect(mockSession.run).toHaveBeenCalledWith(
expect.stringContaining("WHERE m.category = 'other'"),
expect.objectContaining({
id: "mem-1",
category: "fact",
}),
);
});
});
// ------------------------------------------------------------------------
// Extraction status tracking
// ------------------------------------------------------------------------
@@ -1296,16 +1171,6 @@ describe("Neo4jMemoryClient", () => {
);
});
it("should get extraction retry count", async () => {
mockSession.run.mockResolvedValue({
records: [{ get: vi.fn().mockReturnValue(3) }],
});
const result = await client.getExtractionRetries("mem-1");
expect(result).toBe(3);
});
it("should count memories by extraction status", async () => {
mockSession.run.mockResolvedValue({
records: [

View File

@@ -10,13 +10,13 @@
import neo4j, { type Driver } from "neo4j-driver";
import { randomUUID } from "node:crypto";
import type {
ExtractionStatus,
MergeEntityInput,
SearchSignalResult,
StoreMemoryInput,
import type { ExtractionStatus, Logger, SearchSignalResult, StoreMemoryInput } from "./schema.js";
import {
ALLOWED_RELATIONSHIP_TYPES,
escapeLucene,
makePairKey,
validateRelationshipType,
} from "./schema.js";
import { ALLOWED_RELATIONSHIP_TYPES, escapeLucene, validateRelationshipType } from "./schema.js";
// SAFETY: This pattern is built from the hardcoded ALLOWED_RELATIONSHIP_TYPES constant,
// not from user input. It's used in Cypher variable-length path patterns like
@@ -24,17 +24,6 @@ import { ALLOWED_RELATIONSHIP_TYPES, escapeLucene, validateRelationshipType } fr
// constant, there is no injection risk.
const RELATIONSHIP_TYPE_PATTERN = [...ALLOWED_RELATIONSHIP_TYPES].join("|");
// ============================================================================
// Types
// ============================================================================
type Logger = {
info: (msg: string) => void;
warn: (msg: string) => void;
error: (msg: string) => void;
debug?: (msg: string) => void;
};
// Retry configuration for transient Neo4j errors (deadlocks, etc.)
const TRANSIENT_RETRY_ATTEMPTS = 3;
const TRANSIENT_RETRY_BASE_DELAY_MS = 500;
@@ -159,7 +148,7 @@ export class Neo4jMemoryClient {
"CREATE INDEX entity_name_index IF NOT EXISTS FOR (e:Entity) ON (e.name)",
);
// Composite index for queries that filter by both agentId and category
// (e.g. listByCategory, promotion/demotion filtering in sleep cycle)
// (e.g. listByCategory, promotion filtering in sleep cycle)
await this.runSafe(
session,
"CREATE INDEX memory_agent_category_index IF NOT EXISTS FOR (m:Memory) ON (m.agentId, m.category)",
@@ -256,12 +245,14 @@ export class Neo4jMemoryClient {
agentId: $agentId, sessionKey: $sessionKey,
createdAt: $createdAt, updatedAt: $updatedAt,
retrievalCount: $retrievalCount, lastRetrievedAt: $lastRetrievedAt,
extractionRetries: $extractionRetries
extractionRetries: $extractionRetries,
userPinned: $userPinned
})
RETURN m.id AS id`,
{
...input,
sessionKey: input.sessionKey ?? null,
userPinned: input.userPinned ?? false,
createdAt: now,
updatedAt: now,
retrievalCount: 0,
@@ -397,6 +388,47 @@ export class Neo4jMemoryClient {
}
}
/**
* Load core memories for injection: ALL user-pinned core memories (no limit)
* plus up to maxRegular non-pinned core memories ordered by importance.
*
* Total returned = (all userPinned core) + (top maxRegular non-pinned core).
*/
async listCoreForInjection(
maxRegular: number,
agentId?: string,
): Promise<{ id: string; text: string; category: string; importance: number }[]> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
const agentFilter = agentId ? "AND m.agentId = $agentId" : "";
const result = await session.run(
`MATCH (m:Memory)
WHERE m.category = 'core' ${agentFilter}
WITH m, coalesce(m.userPinned, false) AS pinned
ORDER BY m.importance DESC
WITH collect({id: m.id, text: m.text, category: m.category, importance: m.importance, pinned: pinned}) AS all
WITH [x IN all WHERE x.pinned] AS pinnedList,
[x IN all WHERE NOT x.pinned][0..$maxRegular] AS regularList
UNWIND (pinnedList + regularList) AS mem
RETURN mem.id AS id, mem.text AS text, mem.category AS category, mem.importance AS importance`,
{
maxRegular: neo4j.int(Math.floor(maxRegular)),
...(agentId ? { agentId } : {}),
},
);
return result.records.map((r) => ({
id: r.get("id") as string,
text: r.get("text") as string,
category: r.get("category") as string,
importance: r.get("importance") as number,
}));
} finally {
await session.close();
}
}
// --------------------------------------------------------------------------
// Search Signals
// --------------------------------------------------------------------------
@@ -549,7 +581,7 @@ export class Neo4jMemoryClient {
// Variable-length relationship pattern: 1..maxHops hops through entity relationships
const hopRange = `1..${Math.max(1, Math.min(3, maxHops))}`;
const result = await session.run(
`// Find matching entities via fulltext index
`// Find matching entities via fulltext index (SINGLE lookup)
CALL db.index.fulltext.queryNodes('entity_fulltext_index', $query)
YIELD node AS entity, score
WHERE score >= 0.5
@@ -557,37 +589,32 @@ export class Neo4jMemoryClient {
ORDER BY score DESC
LIMIT 5
// Direct: Entity ← MENTIONS ← Memory
// Collect direct mentions
OPTIONAL MATCH (entity)<-[rm:MENTIONS]-(m:Memory)
WHERE m IS NOT NULL ${agentFilter}
WITH m, coalesce(rm.confidence, 1.0) AS directScore, entity
WHERE m IS NOT NULL
WITH entity, collect({
id: m.id, text: m.text, category: m.category,
importance: m.importance, createdAt: m.createdAt,
score: coalesce(rm.confidence, 1.0)
}) AS directResults
RETURN m.id AS id, m.text AS text, m.category AS category,
m.importance AS importance, m.createdAt AS createdAt,
max(directScore) AS graphScore
UNION
// Find matching entities via fulltext index (repeated for UNION)
CALL db.index.fulltext.queryNodes('entity_fulltext_index', $query)
YIELD node AS entity, score
WHERE score >= 0.5
WITH entity
ORDER BY score DESC
LIMIT 5
// N-hop: Entity -[rels*1..N]-> Entity ← MENTIONS ← Memory
// N-hop spreading activation
OPTIONAL MATCH (entity)-[rels:${RELATIONSHIP_TYPE_PATTERN}*${hopRange}]-(e2:Entity)
WHERE ALL(r IN rels WHERE coalesce(r.confidence, 0.7) >= $firingThreshold)
OPTIONAL MATCH (e2)<-[rm:MENTIONS]-(m:Memory)
WHERE m IS NOT NULL ${agentFilter}
WITH m, reduce(s = 1.0, r IN rels | s * coalesce(r.confidence, 0.7)) * coalesce(rm.confidence, 1.0) AS hopScore
WHERE m IS NOT NULL
OPTIONAL MATCH (e2)<-[rm2:MENTIONS]-(m2:Memory)
WHERE m2 IS NOT NULL ${agentFilter}
WITH directResults, collect({
id: m2.id, text: m2.text, category: m2.category,
importance: m2.importance, createdAt: m2.createdAt,
score: reduce(s = 1.0, r IN rels | s * coalesce(r.confidence, 0.7)) * coalesce(rm2.confidence, 1.0)
}) AS hopResults
RETURN m.id AS id, m.text AS text, m.category AS category,
m.importance AS importance, m.createdAt AS createdAt,
max(hopScore) AS graphScore`,
// Combine and return
UNWIND (directResults + hopResults) AS row
WITH row WHERE row.id IS NOT NULL
RETURN row.id AS id, row.text AS text, row.category AS category,
row.importance AS importance, row.createdAt AS createdAt,
max(row.score) AS graphScore`,
{ query: escaped, firingThreshold, ...(agentId ? { agentId } : {}) },
);
@@ -613,7 +640,6 @@ export class Neo4jMemoryClient {
}
return Array.from(byId.values())
.slice()
.sort((a, b) => b.score - a.score)
.slice(0, limit);
} finally {
@@ -713,159 +739,6 @@ export class Neo4jMemoryClient {
// Entity & Relationship Operations
// --------------------------------------------------------------------------
/**
* Merge (upsert) an Entity node using MERGE pattern.
* Idempotent — safe to call multiple times for the same entity name.
*/
async mergeEntity(input: MergeEntityInput): Promise<{ id: string; name: string }> {
await this.ensureInitialized();
return this.retryOnTransient(async () => {
const session = this.driver!.session();
try {
const result = await session.run(
`MERGE (e:Entity {name: $name})
ON CREATE SET
e.id = $id, e.type = $type, e.aliases = $aliases,
e.description = $description,
e.firstSeen = $now, e.lastSeen = $now, e.mentionCount = 1
ON MATCH SET
e.type = COALESCE($type, e.type),
e.description = COALESCE($description, e.description),
e.lastSeen = $now,
e.mentionCount = e.mentionCount + 1
RETURN e.id AS id, e.name AS name`,
{
id: input.id,
name: input.name.trim().toLowerCase(),
type: input.type,
aliases: input.aliases ?? [],
description: input.description ?? null,
now: new Date().toISOString(),
},
);
const record = result.records[0];
return {
id: record.get("id") as string,
name: record.get("name") as string,
};
} finally {
await session.close();
}
});
}
/**
* Create a MENTIONS relationship between a Memory and an Entity.
*/
async createMentions(
memoryId: string,
entityName: string,
role: string = "context",
confidence: number = 1.0,
): Promise<void> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
await session.run(
`MATCH (m:Memory {id: $memoryId})
MATCH (e:Entity {name: $entityName})
MERGE (m)-[r:MENTIONS]->(e)
ON CREATE SET r.role = $role, r.confidence = $confidence`,
{ memoryId, entityName: entityName.trim().toLowerCase(), role, confidence },
);
} finally {
await session.close();
}
}
/**
* Create a typed relationship between two Entity nodes.
* The relationship type is validated against an allowlist before injection.
*/
async createEntityRelationship(
sourceName: string,
targetName: string,
relType: string,
confidence: number = 1.0,
): Promise<void> {
if (!validateRelationshipType(relType)) {
this.logger.warn(`memory-neo4j: rejected invalid relationship type: ${relType}`);
return;
}
await this.ensureInitialized();
const session = this.driver!.session();
try {
await session.run(
`MATCH (e1:Entity {name: $sourceName})
MATCH (e2:Entity {name: $targetName})
MERGE (e1)-[r:${relType}]->(e2)
ON CREATE SET r.confidence = $confidence, r.createdAt = $now
ON MATCH SET r.confidence = CASE WHEN $confidence > r.confidence THEN $confidence ELSE r.confidence END`,
{
sourceName: sourceName.trim().toLowerCase(),
targetName: targetName.trim().toLowerCase(),
confidence,
now: new Date().toISOString(),
},
);
} finally {
await session.close();
}
}
/**
* Merge a Tag node and link it to a Memory.
*/
async tagMemory(
memoryId: string,
tagName: string,
tagCategory: string,
confidence: number = 1.0,
): Promise<void> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
await session.run(
`MERGE (t:Tag {name: $tagName})
ON CREATE SET t.id = $tagId, t.category = $tagCategory, t.createdAt = $now
WITH t
MATCH (m:Memory {id: $memoryId})
MERGE (m)-[r:TAGGED]->(t)
ON CREATE SET r.confidence = $confidence`,
{
memoryId,
tagName: tagName.trim().toLowerCase(),
tagId: randomUUID(),
tagCategory,
confidence,
now: new Date().toISOString(),
},
);
} finally {
await session.close();
}
}
/**
* Update a memory's category. Only updates if current category is 'other'
* (auto-assigned) to avoid overriding user-explicit categorization.
*/
async updateMemoryCategory(id: string, category: string): Promise<void> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
await session.run(
`MATCH (m:Memory {id: $id})
WHERE m.category = 'other'
SET m.category = $category, m.updatedAt = $now`,
{ id, category, now: new Date().toISOString() },
);
} finally {
await session.close();
}
}
/**
* Update the extraction status of a Memory node.
* Optionally increments the extractionRetries counter (for transient failure tracking).
@@ -891,24 +764,6 @@ export class Neo4jMemoryClient {
}
}
/**
* Get the current extraction retry count for a memory.
*/
async getExtractionRetries(id: string): Promise<number> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
const result = await session.run(
`MATCH (m:Memory {id: $id})
RETURN coalesce(m.extractionRetries, 0) AS retries`,
{ id },
);
return (result.records[0]?.get("retries") as number) ?? 0;
} finally {
await session.close();
}
}
/**
* Batch all entity operations from an extraction result into a single managed
* transaction. Replaces the previous pattern of N individual session-per-call
@@ -1154,21 +1009,20 @@ export class Neo4jMemoryClient {
> {
await this.ensureInitialized();
// Step 1: Fetch all memory metadata in a short-lived session
const memoryData = new Map<string, { text: string; importance: number }>();
// Step 1: Fetch only IDs and importance (not text) to reduce data transfer
const memoryMeta = new Map<string, { importance: number }>();
{
const session = this.driver!.session();
try {
const agentFilter = agentId ? "WHERE m.agentId = $agentId" : "";
const allResult = await session.run(
`MATCH (m:Memory) ${agentFilter}
RETURN m.id AS id, m.text AS text, m.importance AS importance`,
RETURN m.id AS id, m.importance AS importance`,
agentId ? { agentId } : {},
);
for (const r of allResult.records) {
memoryData.set(r.get("id") as string, {
text: r.get("text") as string,
memoryMeta.set(r.get("id") as string, {
importance: r.get("importance") as number,
});
}
@@ -1177,7 +1031,7 @@ export class Neo4jMemoryClient {
}
}
if (memoryData.size < 2) {
if (memoryMeta.size < 2) {
return [];
}
@@ -1207,16 +1061,11 @@ export class Neo4jMemoryClient {
}
};
// Helper to create a canonical pair key (sorted)
const makePairKey = (a: string, b: string): string => {
return a < b ? `${a}:${b}` : `${b}:${a}`;
};
// Process vector queries in concurrent batches to avoid overwhelming Neo4j
// while still being much faster than fully sequential execution.
const DEDUP_CONCURRENCY = 8;
let pairsFound = 0;
const allIds = [...memoryData.keys()];
const allIds = [...memoryMeta.keys()];
for (let batchStart = 0; batchStart < allIds.length; batchStart += DEDUP_CONCURRENCY) {
if (pairsFound > 500) {
@@ -1253,7 +1102,7 @@ export class Neo4jMemoryClient {
for (const r of similar.records) {
const matchId = r.get("matchId") as string;
if (memoryData.has(matchId)) {
if (memoryMeta.has(matchId)) {
union(id, matchId);
pairsFound++;
@@ -1274,7 +1123,7 @@ export class Neo4jMemoryClient {
// Step 3: Group by root
const clusters = new Map<string, string[]>();
for (const id of memoryData.keys()) {
for (const id of memoryMeta.keys()) {
if (!parent.has(id)) {
continue;
}
@@ -1285,38 +1134,61 @@ export class Neo4jMemoryClient {
clusters.get(root)!.push(id);
}
// Return clusters with 2+ members
return Array.from(clusters.values())
.filter((ids) => ids.length >= 2)
.map((ids) => {
const cluster: {
memoryIds: string[];
texts: string[];
importances: number[];
similarities?: Map<string, number>;
} = {
memoryIds: ids,
texts: ids.map((id) => memoryData.get(id)!.text),
importances: ids.map((id) => memoryData.get(id)!.importance),
};
// Step 4: Fetch text only for memories that are in clusters (not all memories)
const duplicateClusters = Array.from(clusters.values()).filter((ids) => ids.length >= 2);
const clusteredIds = new Set<string>();
for (const ids of duplicateClusters) {
for (const id of ids) clusteredIds.add(id);
}
// Include similarities for this cluster if requested
if (pairwiseSimilarities) {
const clusterSims = new Map<string, number>();
for (let i = 0; i < ids.length - 1; i++) {
for (let j = i + 1; j < ids.length; j++) {
const pairKey = makePairKey(ids[i], ids[j]);
const score = pairwiseSimilarities.get(pairKey);
if (score !== undefined) {
clusterSims.set(pairKey, score);
}
const textMap = new Map<string, string>();
if (clusteredIds.size > 0) {
const session = this.driver!.session();
try {
const result = await session.run(
`UNWIND $ids AS memId
MATCH (m:Memory {id: memId})
RETURN m.id AS id, m.text AS text`,
{ ids: [...clusteredIds] },
);
for (const r of result.records) {
textMap.set(r.get("id") as string, r.get("text") as string);
}
} finally {
await session.close();
}
}
// Return clusters with 2+ members
return duplicateClusters.map((ids) => {
const cluster: {
memoryIds: string[];
texts: string[];
importances: number[];
similarities?: Map<string, number>;
} = {
memoryIds: ids,
texts: ids.map((id) => textMap.get(id) ?? ""),
importances: ids.map((id) => memoryMeta.get(id)!.importance),
};
// Include similarities for this cluster if requested
if (pairwiseSimilarities) {
const clusterSims = new Map<string, number>();
for (let i = 0; i < ids.length - 1; i++) {
for (let j = i + 1; j < ids.length; j++) {
const pairKey = makePairKey(ids[i], ids[j]);
const score = pairwiseSimilarities.get(pairKey);
if (score !== undefined) {
clusterSims.set(pairKey, score);
}
}
cluster.similarities = clusterSims;
}
cluster.similarities = clusterSims;
}
return cluster;
});
return cluster;
});
}
/**
@@ -1420,8 +1292,8 @@ export class Neo4jMemoryClient {
*
* A memory with importance=1.0 decays slower than one with importance=0.3.
*
* IMPORTANT: Core memories (category='core') are EXEMPT from decay.
* They persist indefinitely regardless of age.
* IMPORTANT: Core memories (category='core') and user-pinned memories
* are EXEMPT from decay. They persist indefinitely regardless of age.
*/
async findDecayedMemories(
options: {
@@ -1473,6 +1345,7 @@ export class Neo4jMemoryClient {
`MATCH (m:Memory)
WHERE m.createdAt IS NOT NULL
AND m.category <> 'core'
AND coalesce(m.userPinned, false) = false
${agentFilter}
WITH m,
duration.between(datetime(m.createdAt), datetime()).days AS ageDays,
@@ -1659,7 +1532,7 @@ export class Neo4jMemoryClient {
/**
* Find memory pairs that share at least one entity (via MENTIONS relationships).
* These are candidates for conflict resolution — the LLM decides if they truly conflict.
* Excludes core memories (conflicts there are handled by promotion/demotion).
* Excludes core memories (conflicts there are handled by promotion).
*/
async findConflictingMemories(agentId?: string): Promise<
Array<{
@@ -1729,8 +1602,8 @@ export class Neo4jMemoryClient {
* Calculate effective scores for all memories to determine Pareto threshold.
*
* Uses: importance × freq_boost × recency for ALL memories (including core).
* This gives core memories a slight disadvantage (they need strong retrieval
* patterns to stay in top 20%), creating healthy churn.
* User-pinned core memories are excluded — they have fixed importance=1.0
* and should not influence the Pareto threshold calculation.
*/
async calculateAllEffectiveScores(agentId?: string): Promise<
Array<{
@@ -1747,8 +1620,8 @@ export class Neo4jMemoryClient {
const session = this.driver!.session();
try {
const agentFilter = agentId
? "WHERE m.agentId = $agentId AND m.createdAt IS NOT NULL"
: "WHERE m.createdAt IS NOT NULL";
? "WHERE m.agentId = $agentId AND m.createdAt IS NOT NULL AND coalesce(m.userPinned, false) = false"
: "WHERE m.createdAt IS NOT NULL AND coalesce(m.userPinned, false) = false";
const result = await session.run(
`MATCH (m:Memory)
${agentFilter}
@@ -1761,7 +1634,7 @@ export class Neo4jMemoryClient {
END AS daysSinceRetrieval
WITH m, retrievalCount, ageDays, daysSinceRetrieval,
// Effective score: importance × freq_boost × recency
// This is used for global ranking (promotion/demotion threshold)
// This is used for global ranking (promotion threshold)
m.importance * (1 + log(1 + retrievalCount) * 0.3) *
CASE
WHEN daysSinceRetrieval IS NULL THEN 0.1
@@ -1788,7 +1661,7 @@ export class Neo4jMemoryClient {
}
/**
* Calculate the Pareto threshold (80th percentile) for promotion/demotion.
* Calculate the Pareto threshold (80th percentile) for promotion.
* Returns the effective score that separates top 20% from bottom 80%.
*/
calculateParetoThreshold(
@@ -1836,33 +1709,6 @@ export class Neo4jMemoryClient {
}
}
/**
* Demote memories from core back to their original category.
* Uses 'fact' as default since we don't track original category.
*/
async demoteFromCore(memoryIds: string[]): Promise<number> {
if (memoryIds.length === 0) {
return 0;
}
await this.ensureInitialized();
const session = this.driver!.session();
try {
const result = await session.run(
`UNWIND $ids AS memId
MATCH (m:Memory {id: memId})
WHERE m.category = 'core'
SET m.category = 'fact', m.demotedAt = $now, m.updatedAt = $now
RETURN count(*) AS demoted`,
{ ids: memoryIds, now: new Date().toISOString() },
);
return (result.records[0]?.get("demoted") as number) ?? 0;
} finally {
await session.close();
}
}
// --------------------------------------------------------------------------
// Reindex: re-embed all Memory and Entity nodes
// --------------------------------------------------------------------------

View File

@@ -2,6 +2,17 @@
* Graph schema types, Cypher query templates, and constants for memory-neo4j.
*/
// ============================================================================
// Shared Types
// ============================================================================
export type Logger = {
info: (msg: string) => void;
warn: (msg: string) => void;
error: (msg: string) => void;
debug?: (msg: string) => void;
};
// ============================================================================
// Node Types
// ============================================================================
@@ -32,7 +43,7 @@ export type MemoryNode = {
retrievalCount: number;
lastRetrievedAt?: string;
promotedAt?: string;
demotedAt?: string;
userPinned?: boolean;
};
export type EntityNode = {
@@ -119,6 +130,7 @@ export type StoreMemoryInput = {
extractionStatus: ExtractionStatus;
agentId: string;
sessionKey?: string;
userPinned?: boolean;
};
export type MergeEntityInput = {
@@ -174,3 +186,10 @@ export function escapeLucene(query: string): string {
export function validateRelationshipType(type: string): boolean {
return ALLOWED_RELATIONSHIP_TYPES.has(type);
}
/**
* Create a canonical key for a pair of IDs (sorted for order-independence).
*/
export function makePairKey(a: string, b: string): string {
return a < b ? `${a}:${b}` : `${b}:${a}`;
}

View File

@@ -14,7 +14,7 @@
import type { Embeddings } from "./embeddings.js";
import type { Neo4jMemoryClient } from "./neo4j-client.js";
import type { HybridSearchResult, SearchSignalResult } from "./schema.js";
import type { HybridSearchResult, Logger, SearchSignalResult } from "./schema.js";
// ============================================================================
// Query Classification
@@ -214,6 +214,7 @@ export async function hybridSearch(
candidateMultiplier?: number;
graphFiringThreshold?: number;
graphSearchDepth?: number;
logger?: Logger;
} = {},
): Promise<HybridSearchResult[]> {
// Guard against empty queries
@@ -226,12 +227,15 @@ export async function hybridSearch(
candidateMultiplier = 4,
graphFiringThreshold = 0.3,
graphSearchDepth = 1,
logger,
} = options;
const candidateLimit = Math.floor(Math.min(200, Math.max(1, limit * candidateMultiplier)));
// 1. Generate query embedding
const t0 = performance.now();
const queryEmbedding = await embeddings.embed(query);
const tEmbed = performance.now();
// 2. Classify query and get adaptive weights
const queryType = classifyQuery(query);
@@ -245,9 +249,11 @@ export async function hybridSearch(
? db.graphSearch(query, candidateLimit, graphFiringThreshold, agentId, graphSearchDepth)
: Promise.resolve([] as SearchSignalResult[]),
]);
const tSignals = performance.now();
// 4. Fuse with confidence-weighted RRF
const fused = fuseWithConfidenceRRF([vectorResults, bm25Results, graphResults], rrfK, weights);
const tFuse = performance.now();
// 5. Return top results, normalized to 0-100% display scores.
// Only normalize when maxRrf is above a minimum threshold to avoid
@@ -275,5 +281,11 @@ export async function hybridSearch(
});
}
// Log search timing breakdown
logger?.info?.(
`memory-neo4j: [bench] hybridSearch ${(tFuse - t0).toFixed(0)}ms (embed=${(tEmbed - t0).toFixed(0)}ms, signals=${(tSignals - tEmbed).toFixed(0)}ms, fuse=${(tFuse - tSignals).toFixed(0)}ms) ` +
`type=${queryType} vec=${vectorResults.length} bm25=${bm25Results.length} graph=${graphResults.length}${results.length} results`,
);
return results;
}

View File

@@ -0,0 +1,663 @@
/**
* Seven-phase sleep cycle for memory consolidation.
*
* Implements a Pareto-based memory ecosystem where core memory
* is bounded to the top 20% of memories by effective score.
*
* Phases:
* 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
* 2. PARETO SCORING - Calculate effective scores for all memories
* 3. CORE PROMOTION - Regular memories above threshold -> core
* 4. CORE DEMOTION - Core memories below threshold -> regular
* 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
* 6. EXTRACTION - Form entity relationships (strengthen connections)
* 7. CLEANUP - Remove orphaned entities/tags (garbage collection)
*
* Research basis:
* - Pareto principle (20/80 rule) for memory tiering
* - ACT-R memory model for retrieval-based importance
* - Ebbinghaus forgetting curve for decay
* - MemGPT/Letta for tiered memory architecture
*/
import type { ExtractionConfig } from "./config.js";
import type { Embeddings } from "./embeddings.js";
import type { Neo4jMemoryClient } from "./neo4j-client.js";
import type { Logger } from "./schema.js";
import { isSemanticDuplicate, resolveConflict, runBackgroundExtraction } from "./extractor.js";
import { makePairKey } from "./schema.js";
/**
* Sleep Cycle Result - aggregated stats from all phases.
*/
export type SleepCycleResult = {
// Phase 1: Deduplication
dedup: {
clustersFound: number;
memoriesMerged: number;
};
// Phase 1b: Conflict Detection
conflict: {
pairsFound: number;
resolved: number;
invalidated: number;
};
// Phase 1c: Semantic Deduplication
semanticDedup: {
pairsChecked: number;
duplicatesMerged: number;
};
// Phase 2: Pareto Scoring & Threshold
pareto: {
totalMemories: number;
coreMemories: number;
regularMemories: number;
threshold: number; // The 80th percentile effective score
};
// Phase 3: Core Promotion
promotion: {
candidatesFound: number;
promoted: number;
};
// Phase 4: Entity Extraction
extraction: {
total: number;
processed: number;
succeeded: number;
failed: number;
};
// Phase 4: Decay & Pruning
decay: {
memoriesPruned: number;
};
// Phase 5: Orphan Cleanup
cleanup: {
entitiesRemoved: number;
tagsRemoved: number;
};
// Overall
durationMs: number;
aborted: boolean;
};
export type SleepCycleOptions = {
// Common
agentId?: string;
abortSignal?: AbortSignal;
// Phase 1: Deduplication
dedupThreshold?: number; // Vector similarity threshold (default: 0.95)
skipSemanticDedup?: boolean; // Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)
// Phase 2-3: Pareto-based Promotion
paretoPercentile?: number; // Top N% for core (default: 0.2 = top 20%)
promotionMinAgeDays?: number; // Min age before promotion (default: 7)
// Phase 1b: Semantic Dedup
maxSemanticDedupPairs?: number; // Max LLM-checked pairs (default: 500)
// Concurrency
llmConcurrency?: number; // Parallel LLM calls (default: 8, match OLLAMA_NUM_PARALLEL)
// Phase 4: Extraction
extractionBatchSize?: number; // Memories per batch (default: 50)
extractionDelayMs?: number; // Delay between batches (default: 1000)
// Phase 4: Decay
decayRetentionThreshold?: number; // Below this, memory is pruned (default: 0.1)
decayBaseHalfLifeDays?: number; // Base half-life in days (default: 30)
decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2)
decayCurves?: Record<string, { halfLifeDays: number }>; // Per-category decay curve overrides
// Progress callback
onPhaseStart?: (
phase:
| "dedup"
| "conflict"
| "semanticDedup"
| "pareto"
| "promotion"
| "decay"
| "extraction"
| "cleanup",
) => void;
onProgress?: (phase: string, message: string) => void;
};
// ============================================================================
// Sleep Cycle Implementation
// ============================================================================
/**
* Run the full sleep cycle - seven phases of memory consolidation.
*
* This implements a Pareto-based memory ecosystem where core memory
* is bounded to the top 20% of memories by effective score.
*
* Effective Score Formulas:
* - Regular memories: importance x freq_boost x recency
* - Core memories: importance x freq_boost x recency (same for threshold comparison)
* - Core memory retrieval ranking: freq_boost x recency (pure usage-based)
*
* Where:
* - freq_boost = 1 + log(1 + retrievalCount) x 0.3
* - recency = 2^(-days_since_last / 14)
*/
export async function runSleepCycle(
db: Neo4jMemoryClient,
embeddings: Embeddings,
config: ExtractionConfig,
logger: Logger,
options: SleepCycleOptions = {},
): Promise<SleepCycleResult> {
const startTime = Date.now();
const {
agentId,
abortSignal,
dedupThreshold = 0.95,
skipSemanticDedup = false,
maxSemanticDedupPairs = 500,
llmConcurrency = 8,
paretoPercentile = 0.2,
promotionMinAgeDays = 7,
decayRetentionThreshold = 0.1,
decayBaseHalfLifeDays = 30,
decayImportanceMultiplier = 2,
decayCurves,
extractionBatchSize = 50,
extractionDelayMs = 1000,
onPhaseStart,
onProgress,
} = options;
const result: SleepCycleResult = {
dedup: { clustersFound: 0, memoriesMerged: 0 },
conflict: { pairsFound: 0, resolved: 0, invalidated: 0 },
semanticDedup: { pairsChecked: 0, duplicatesMerged: 0 },
pareto: {
totalMemories: 0,
coreMemories: 0,
regularMemories: 0,
threshold: 0,
},
promotion: { candidatesFound: 0, promoted: 0 },
decay: { memoriesPruned: 0 },
extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 },
cleanup: { entitiesRemoved: 0, tagsRemoved: 0 },
durationMs: 0,
aborted: false,
};
// --------------------------------------------------------------------------
// Phase 1: Deduplication (Optimized - combined vector + semantic dedup)
// Call findDuplicateClusters ONCE at 0.75 threshold, then split by similarity band:
// - >=0.95: vector merge (high-confidence duplicates)
// - 0.75-0.95: semantic dedup via LLM (paraphrases)
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("dedup");
logger.info("memory-neo4j: [sleep] Phase 1: Deduplication (vector + semantic)");
try {
// Fetch clusters at 0.75 threshold with similarity scores
const allClusters = await db.findDuplicateClusters(0.75, agentId, true);
// Separate clusters into high-similarity (>=0.95) and medium-similarity (0.75-0.95)
const highSimClusters: typeof allClusters = [];
const mediumSimClusters: typeof allClusters = [];
for (const cluster of allClusters) {
if (abortSignal?.aborted) break;
if (!cluster.similarities || cluster.memoryIds.length < 2) continue;
// Check if ANY pair in this cluster has similarity >= dedupThreshold
let hasHighSim = false;
for (const [pairKey, score] of cluster.similarities.entries()) {
if (score >= dedupThreshold) {
hasHighSim = true;
break;
}
}
if (hasHighSim) {
// Split this cluster into high-sim and medium-sim sub-clusters
// For simplicity, if a cluster has ANY high-sim pair, treat the whole cluster as high-sim
// (This matches the old behavior where Phase 1 would merge them all)
highSimClusters.push(cluster);
} else {
mediumSimClusters.push(cluster);
}
}
// Part 1a: Vector merge for high-similarity clusters (>=0.95)
result.dedup.clustersFound = highSimClusters.length;
for (const cluster of highSimClusters) {
if (abortSignal?.aborted) break;
const { deletedCount } = await db.mergeMemoryCluster(
cluster.memoryIds,
cluster.importances,
);
result.dedup.memoriesMerged += deletedCount;
onProgress?.("dedup", `Merged cluster of ${cluster.memoryIds.length} -> 1 (vector)`);
}
logger.info(
`memory-neo4j: [sleep] Phase 1a (vector) complete — ${result.dedup.clustersFound} clusters, ${result.dedup.memoriesMerged} merged`,
);
// Part 1b: Semantic dedup for medium-similarity clusters (0.75-0.95)
if (skipSemanticDedup) {
onPhaseStart?.("semanticDedup");
logger.info("memory-neo4j: [sleep] Phase 1b: Skipped (--skip-semantic)");
onProgress?.("semanticDedup", "Skipped — semantic dedup disabled");
} else {
onPhaseStart?.("semanticDedup");
logger.info("memory-neo4j: [sleep] Phase 1b: Semantic Deduplication (0.75-0.95 band)");
// Collect all candidate pairs upfront (with pairwise similarity for pre-screening)
type DedupPair = {
textA: string;
textB: string;
idA: string;
idB: string;
importanceA: number;
importanceB: number;
similarity?: number;
};
const allPairs: DedupPair[] = [];
for (const cluster of mediumSimClusters) {
if (cluster.memoryIds.length < 2) continue;
for (let i = 0; i < cluster.memoryIds.length - 1; i++) {
for (let j = i + 1; j < cluster.memoryIds.length; j++) {
const pairKey = makePairKey(cluster.memoryIds[i], cluster.memoryIds[j]);
allPairs.push({
textA: cluster.texts[i],
textB: cluster.texts[j],
idA: cluster.memoryIds[i],
idB: cluster.memoryIds[j],
importanceA: cluster.importances[i],
importanceB: cluster.importances[j],
similarity: cluster.similarities?.get(pairKey),
});
}
}
}
// Cap the number of LLM-checked pairs to prevent sleep cycle timeouts.
// Sort by similarity descending so higher-similarity pairs (more likely
// to be duplicates) are checked first.
if (allPairs.length > maxSemanticDedupPairs) {
allPairs.sort((a, b) => (b.similarity ?? 0) - (a.similarity ?? 0));
const skipped = allPairs.length - maxSemanticDedupPairs;
allPairs.length = maxSemanticDedupPairs;
onProgress?.(
"semanticDedup",
`Capped at ${maxSemanticDedupPairs} pairs (${skipped} lower-similarity pairs skipped)`,
);
logger.info(
`memory-neo4j: [sleep] Phase 1b capped to ${maxSemanticDedupPairs} pairs (${skipped} skipped)`,
);
}
// Process pairs in concurrent batches
const invalidatedIds = new Set<string>();
for (let i = 0; i < allPairs.length && !abortSignal?.aborted; i += llmConcurrency) {
const batch = allPairs.slice(i, i + llmConcurrency);
// Filter out pairs where one side was already invalidated
const activeBatch = batch.filter(
(p) => !invalidatedIds.has(p.idA) && !invalidatedIds.has(p.idB),
);
if (activeBatch.length === 0) continue;
const outcomes = await Promise.allSettled(
activeBatch.map((p) =>
isSemanticDuplicate(p.textA, p.textB, config, p.similarity, abortSignal),
),
);
for (let k = 0; k < outcomes.length; k++) {
const pair = activeBatch[k];
result.semanticDedup.pairsChecked++;
if (
outcomes[k].status === "fulfilled" &&
(outcomes[k] as PromiseFulfilledResult<boolean>).value
) {
// Skip if either side was invalidated by an earlier result in this batch
if (invalidatedIds.has(pair.idA) || invalidatedIds.has(pair.idB)) continue;
const keepId = pair.importanceA >= pair.importanceB ? pair.idA : pair.idB;
const removeId = keepId === pair.idA ? pair.idB : pair.idA;
const keepText = keepId === pair.idA ? pair.textA : pair.textB;
const removeText = removeId === pair.idA ? pair.textA : pair.textB;
await db.invalidateMemory(removeId);
invalidatedIds.add(removeId);
result.semanticDedup.duplicatesMerged++;
onProgress?.(
"semanticDedup",
`Merged: "${removeText.slice(0, 50)}..." -> kept "${keepText.slice(0, 50)}..."`,
);
}
}
}
logger.info(
`memory-neo4j: [sleep] Phase 1b (semantic) complete — ${result.semanticDedup.pairsChecked} pairs checked, ${result.semanticDedup.duplicatesMerged} merged`,
);
} // close skipSemanticDedup else
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 1 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 1c: Conflict Detection (formerly Phase 1b)
// --------------------------------------------------------------------------
if (!abortSignal?.aborted && !skipSemanticDedup) {
onPhaseStart?.("conflict");
logger.info("memory-neo4j: [sleep] Phase 1c: Conflict Detection");
try {
const pairs = await db.findConflictingMemories(agentId);
result.conflict.pairsFound = pairs.length;
// Process conflict pairs in parallel chunks of llmConcurrency
for (let i = 0; i < pairs.length && !abortSignal?.aborted; i += llmConcurrency) {
const chunk = pairs.slice(i, i + llmConcurrency);
const outcomes = await Promise.allSettled(
chunk.map((pair) =>
resolveConflict(pair.memoryA.text, pair.memoryB.text, config, abortSignal),
),
);
for (let k = 0; k < outcomes.length; k++) {
if (abortSignal?.aborted) break;
const pair = chunk[k];
const outcome = outcomes[k];
if (outcome.status !== "fulfilled") continue;
const decision = outcome.value;
if (decision === "a") {
await db.invalidateMemory(pair.memoryB.id);
result.conflict.invalidated++;
result.conflict.resolved++;
onProgress?.(
"conflict",
`Kept A, invalidated B: "${pair.memoryB.text.slice(0, 40)}..."`,
);
} else if (decision === "b") {
await db.invalidateMemory(pair.memoryA.id);
result.conflict.invalidated++;
result.conflict.resolved++;
onProgress?.(
"conflict",
`Kept B, invalidated A: "${pair.memoryA.text.slice(0, 40)}..."`,
);
} else if (decision === "both") {
result.conflict.resolved++;
onProgress?.("conflict", `Kept both: no real conflict`);
}
// "skip" = LLM unavailable, don't count as resolved
}
}
logger.info(
`memory-neo4j: [sleep] Phase 1c complete — ${result.conflict.pairsFound} pairs, ${result.conflict.resolved} resolved, ${result.conflict.invalidated} invalidated`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 1c error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 2: Pareto Scoring & Threshold Calculation
// --------------------------------------------------------------------------
let paretoThreshold = 0;
let allScores: Awaited<ReturnType<typeof db.calculateAllEffectiveScores>> = [];
if (!abortSignal?.aborted) {
onPhaseStart?.("pareto");
logger.info("memory-neo4j: [sleep] Phase 2: Pareto Scoring");
try {
allScores = await db.calculateAllEffectiveScores(agentId);
result.pareto.totalMemories = allScores.length;
result.pareto.coreMemories = allScores.filter((s) => s.category === "core").length;
result.pareto.regularMemories = allScores.filter((s) => s.category !== "core").length;
// Calculate the threshold for top N% (default: top 20%)
paretoThreshold = db.calculateParetoThreshold(allScores, 1 - paretoPercentile);
result.pareto.threshold = paretoThreshold;
onProgress?.(
"pareto",
`Scored ${allScores.length} memories (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`,
);
onProgress?.(
"pareto",
`Pareto threshold (top ${paretoPercentile * 100}%): ${paretoThreshold.toFixed(4)}`,
);
logger.info(
`memory-neo4j: [sleep] Phase 2 complete — threshold=${paretoThreshold.toFixed(4)} for top ${paretoPercentile * 100}%`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 2 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 3: Core Promotion (using pre-computed scores from Phase 2)
//
// Design note on staleness: The effective scores and Pareto threshold were
// computed in Phase 2 and may be slightly stale by the time Phases 3/4 run.
// This is acceptable because: (a) the sleep cycle is a background maintenance
// task that runs infrequently (not concurrent with itself), (b) the scoring
// formula is deterministic based on stored properties that change slowly, and
// (c) promotion is a one-way operation (core memories are never auto-demoted;
// bad core memories are handled manually via memory_forget). The alternative
// (re-querying scores per phase) adds latency without meaningful accuracy gain.
// --------------------------------------------------------------------------
if (!abortSignal?.aborted && paretoThreshold > 0) {
onPhaseStart?.("promotion");
logger.info("memory-neo4j: [sleep] Phase 3: Core Promotion");
try {
const candidates = allScores.filter(
(s) =>
s.category !== "core" &&
s.effectiveScore >= paretoThreshold &&
s.ageDays >= promotionMinAgeDays,
);
result.promotion.candidatesFound = candidates.length;
if (candidates.length > 0) {
const ids = candidates.map((m) => m.id);
result.promotion.promoted = await db.promoteToCore(ids);
for (const c of candidates) {
onProgress?.(
"promotion",
`Promoted "${c.text.slice(0, 40)}..." (score=${c.effectiveScore.toFixed(3)}, ${c.retrievalCount} retrievals)`,
);
}
}
logger.info(
`memory-neo4j: [sleep] Phase 3 complete — ${result.promotion.promoted} memories promoted to core`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 3 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 4: Entity Extraction (moved before decay so new memories get
// extracted before pruning can remove them)
// --------------------------------------------------------------------------
// Extraction uses llmConcurrency (defined above, matches OLLAMA_NUM_PARALLEL)
if (!abortSignal?.aborted && config.enabled) {
onPhaseStart?.("extraction");
logger.info("memory-neo4j: [sleep] Phase 4: Entity Extraction");
try {
// Get initial count
const counts = await db.countByExtractionStatus(agentId);
result.extraction.total = counts.pending;
if (result.extraction.total > 0) {
let hasMore = true;
while (hasMore && !abortSignal?.aborted) {
const pending = await db.listPendingExtractions(extractionBatchSize, agentId);
if (pending.length === 0) {
hasMore = false;
break;
}
// Process in parallel chunks of llmConcurrency
for (let i = 0; i < pending.length && !abortSignal?.aborted; i += llmConcurrency) {
const chunk = pending.slice(i, i + llmConcurrency);
const outcomes = await Promise.allSettled(
chunk.map((memory) =>
runBackgroundExtraction(
memory.id,
memory.text,
db,
embeddings,
config,
logger,
memory.extractionRetries,
abortSignal,
),
),
);
for (const outcome of outcomes) {
result.extraction.processed++;
if (outcome.status === "fulfilled" && outcome.value.success) {
result.extraction.succeeded++;
} else {
result.extraction.failed++;
}
}
if (result.extraction.processed % 10 === 0 || i + llmConcurrency >= pending.length) {
onProgress?.(
"extraction",
`${result.extraction.processed}/${result.extraction.total} processed`,
);
}
}
// Delay between batches (abort-aware)
if (hasMore && !abortSignal?.aborted) {
await new Promise<void>((resolve) => {
const timer = setTimeout(resolve, extractionDelayMs);
// If abort fires during delay, resolve immediately
abortSignal?.addEventListener(
"abort",
() => {
clearTimeout(timer);
resolve();
},
{ once: true },
);
});
}
}
}
logger.info(
`memory-neo4j: [sleep] Phase 4 complete — ${result.extraction.succeeded} extracted, ${result.extraction.failed} failed`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 4 error: ${String(err)}`);
}
} else if (!config.enabled) {
logger.info("memory-neo4j: [sleep] Phase 4 skipped — extraction not enabled");
}
// --------------------------------------------------------------------------
// Phase 5: Decay & Pruning (after extraction so freshly extracted memories
// aren't pruned before they build entity connections)
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("decay");
logger.info("memory-neo4j: [sleep] Phase 5: Decay & Pruning");
try {
const decayed = await db.findDecayedMemories({
retentionThreshold: decayRetentionThreshold,
baseHalfLifeDays: decayBaseHalfLifeDays,
importanceMultiplier: decayImportanceMultiplier,
decayCurves,
agentId,
});
if (decayed.length > 0) {
const ids = decayed.map((m) => m.id);
result.decay.memoriesPruned = await db.pruneMemories(ids);
onProgress?.("decay", `Pruned ${result.decay.memoriesPruned} decayed memories`);
}
logger.info(
`memory-neo4j: [sleep] Phase 5 complete — ${result.decay.memoriesPruned} memories pruned`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 5 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 6: Orphan Cleanup
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("cleanup");
logger.info("memory-neo4j: [sleep] Phase 6: Orphan Cleanup");
try {
// Clean up orphan entities
if (!abortSignal?.aborted) {
const orphanEntities = await db.findOrphanEntities();
if (orphanEntities.length > 0) {
result.cleanup.entitiesRemoved = await db.deleteOrphanEntities(
orphanEntities.map((e) => e.id),
);
onProgress?.("cleanup", `Removed ${result.cleanup.entitiesRemoved} orphan entities`);
}
}
// Clean up orphan tags
if (!abortSignal?.aborted) {
const orphanTags = await db.findOrphanTags();
if (orphanTags.length > 0) {
result.cleanup.tagsRemoved = await db.deleteOrphanTags(orphanTags.map((t) => t.id));
onProgress?.("cleanup", `Removed ${result.cleanup.tagsRemoved} orphan tags`);
}
}
logger.info(
`memory-neo4j: [sleep] Phase 6 complete — ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 6 error: ${String(err)}`);
}
}
result.durationMs = Date.now() - startTime;
result.aborted = abortSignal?.aborted ?? false;
logger.info(
`memory-neo4j: [sleep] Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s` +
(result.aborted ? " (aborted)" : ""),
);
return result;
}