mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-02 15:27:14 +00:00
memory-neo4j: configurable extraction model + sleep cycle optimizations
- Add extraction config section (apiKey, model, baseUrl) to plugin schema with env-var fallback and Ollama/local LLM support (no API key required) - Add category classification to extraction prompt; update memories from 'other' to LLM-assigned category - Reorder sleep phases: extraction before decay - Parallelize extraction (3 concurrent via Promise.allSettled) - Pre-compute effective scores once and reuse for promotion/demotion - Replace O(n²) Cartesian dedup with per-memory HNSW vector index queries - Use mentionCount for orphan entity detection instead of subquery - Remove dead auto-capture code (evaluateAutoCapture, CaptureItem, etc.)
This commit is contained in:
@@ -19,6 +19,11 @@ export type MemoryNeo4jConfig = {
|
|||||||
model: string;
|
model: string;
|
||||||
baseUrl?: string;
|
baseUrl?: string;
|
||||||
};
|
};
|
||||||
|
extraction?: {
|
||||||
|
apiKey?: string;
|
||||||
|
model: string;
|
||||||
|
baseUrl: string;
|
||||||
|
};
|
||||||
autoCapture: boolean;
|
autoCapture: boolean;
|
||||||
autoRecall: boolean;
|
autoRecall: boolean;
|
||||||
coreMemory: {
|
coreMemory: {
|
||||||
@@ -101,16 +106,26 @@ function resolveEnvVars(value: string): string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resolve extraction config from environment variables.
|
* Resolve extraction config from plugin config with env var fallback.
|
||||||
* Returns enabled: false if OPENROUTER_API_KEY is not set.
|
* Enabled when an API key is available (cloud) or a baseUrl is explicitly
|
||||||
|
* configured (Ollama / local LLMs that don't need a key).
|
||||||
*/
|
*/
|
||||||
export function resolveExtractionConfig(): ExtractionConfig {
|
export function resolveExtractionConfig(
|
||||||
const apiKey = process.env.OPENROUTER_API_KEY ?? "";
|
cfgExtraction?: MemoryNeo4jConfig["extraction"],
|
||||||
|
): ExtractionConfig {
|
||||||
|
const apiKey = cfgExtraction?.apiKey ?? process.env.OPENROUTER_API_KEY ?? "";
|
||||||
|
const model =
|
||||||
|
cfgExtraction?.model ?? process.env.EXTRACTION_MODEL ?? "google/gemini-2.0-flash-001";
|
||||||
|
const baseUrl =
|
||||||
|
cfgExtraction?.baseUrl ?? process.env.EXTRACTION_BASE_URL ?? "https://openrouter.ai/api/v1";
|
||||||
|
// Enabled when an API key is set (cloud provider) or baseUrl was explicitly
|
||||||
|
// configured in the plugin config (Ollama / local — no key needed).
|
||||||
|
const enabled = apiKey.length > 0 || cfgExtraction?.baseUrl != null;
|
||||||
return {
|
return {
|
||||||
enabled: apiKey.length > 0,
|
enabled,
|
||||||
apiKey,
|
apiKey,
|
||||||
model: process.env.EXTRACTION_MODEL ?? "google/gemini-2.0-flash-001",
|
model,
|
||||||
baseUrl: process.env.EXTRACTION_BASE_URL ?? "https://openrouter.ai/api/v1",
|
baseUrl,
|
||||||
temperature: 0.0,
|
temperature: 0.0,
|
||||||
maxRetries: 2,
|
maxRetries: 2,
|
||||||
};
|
};
|
||||||
@@ -136,7 +151,7 @@ export const memoryNeo4jConfigSchema = {
|
|||||||
const cfg = value as Record<string, unknown>;
|
const cfg = value as Record<string, unknown>;
|
||||||
assertAllowedKeys(
|
assertAllowedKeys(
|
||||||
cfg,
|
cfg,
|
||||||
["embedding", "neo4j", "autoCapture", "autoRecall", "coreMemory"],
|
["embedding", "neo4j", "autoCapture", "autoRecall", "coreMemory", "extraction"],
|
||||||
"memory-neo4j config",
|
"memory-neo4j config",
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -205,6 +220,26 @@ export const memoryNeo4jConfigSchema = {
|
|||||||
? coreMemoryRaw.refreshAtContextPercent
|
? coreMemoryRaw.refreshAtContextPercent
|
||||||
: undefined;
|
: undefined;
|
||||||
|
|
||||||
|
// Parse extraction section (optional — falls back to env vars in resolveExtractionConfig)
|
||||||
|
const extractionRaw = cfg.extraction as Record<string, unknown> | undefined;
|
||||||
|
assertAllowedKeys(extractionRaw ?? {}, ["apiKey", "model", "baseUrl"], "extraction config");
|
||||||
|
let extraction: MemoryNeo4jConfig["extraction"];
|
||||||
|
if (extractionRaw) {
|
||||||
|
const exApiKey =
|
||||||
|
typeof extractionRaw.apiKey === "string" ? resolveEnvVars(extractionRaw.apiKey) : undefined;
|
||||||
|
const exModel = typeof extractionRaw.model === "string" ? extractionRaw.model : undefined;
|
||||||
|
const exBaseUrl =
|
||||||
|
typeof extractionRaw.baseUrl === "string" ? extractionRaw.baseUrl : undefined;
|
||||||
|
// Only include if at least one field was provided
|
||||||
|
if (exApiKey || exModel || exBaseUrl) {
|
||||||
|
extraction = {
|
||||||
|
apiKey: exApiKey,
|
||||||
|
model: exModel ?? (process.env.EXTRACTION_MODEL || "google/gemini-2.0-flash-001"),
|
||||||
|
baseUrl: exBaseUrl ?? (process.env.EXTRACTION_BASE_URL || "https://openrouter.ai/api/v1"),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
neo4j: {
|
neo4j: {
|
||||||
uri: neo4jRaw.uri,
|
uri: neo4jRaw.uri,
|
||||||
@@ -217,6 +252,7 @@ export const memoryNeo4jConfigSchema = {
|
|||||||
model: embeddingModel,
|
model: embeddingModel,
|
||||||
baseUrl,
|
baseUrl,
|
||||||
},
|
},
|
||||||
|
extraction,
|
||||||
autoCapture: cfg.autoCapture !== false,
|
autoCapture: cfg.autoCapture !== false,
|
||||||
autoRecall: cfg.autoRecall !== false,
|
autoRecall: cfg.autoRecall !== false,
|
||||||
coreMemory: {
|
coreMemory: {
|
||||||
|
|||||||
@@ -1,19 +1,19 @@
|
|||||||
/**
|
/**
|
||||||
* LLM-based entity extraction and auto-capture decision for memory-neo4j.
|
* LLM-based entity extraction and sleep cycle for memory-neo4j.
|
||||||
*
|
*
|
||||||
* Uses Gemini Flash via OpenRouter for:
|
* Extraction uses a configurable OpenAI-compatible LLM (OpenRouter, Ollama, etc.) to:
|
||||||
* 1. Entity extraction: Extract entities and relationships from stored memories
|
* - Extract entities, relationships, and tags from stored memories
|
||||||
* 2. Auto-capture decision: Decide what's worth remembering from conversations
|
* - Classify memories into categories (preference, fact, decision, etc.)
|
||||||
*
|
*
|
||||||
* Both run as background fire-and-forget operations with graceful degradation.
|
* Runs as background fire-and-forget operations with graceful degradation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { randomUUID } from "node:crypto";
|
import { randomUUID } from "node:crypto";
|
||||||
import type { ExtractionConfig } from "./config.js";
|
import type { ExtractionConfig } from "./config.js";
|
||||||
import type { Embeddings } from "./embeddings.js";
|
import type { Embeddings } from "./embeddings.js";
|
||||||
import type { Neo4jMemoryClient } from "./neo4j-client.js";
|
import type { Neo4jMemoryClient } from "./neo4j-client.js";
|
||||||
import type { CaptureItem, EntityType, ExtractionResult, MemoryCategory } from "./schema.js";
|
import type { EntityType, ExtractionResult, MemoryCategory } from "./schema.js";
|
||||||
import { ALLOWED_RELATIONSHIP_TYPES, ENTITY_TYPES } from "./schema.js";
|
import { ALLOWED_RELATIONSHIP_TYPES, ENTITY_TYPES, MEMORY_CATEGORIES } from "./schema.js";
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Types
|
// Types
|
||||||
@@ -31,12 +31,13 @@ type Logger = {
|
|||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
const ENTITY_EXTRACTION_PROMPT = `You are an entity extraction system for a personal memory store.
|
const ENTITY_EXTRACTION_PROMPT = `You are an entity extraction system for a personal memory store.
|
||||||
Extract entities and relationships from this memory text.
|
Extract entities and relationships from this memory text, and classify the memory.
|
||||||
|
|
||||||
Memory: "{text}"
|
Memory: "{text}"
|
||||||
|
|
||||||
Return JSON:
|
Return JSON:
|
||||||
{
|
{
|
||||||
|
"category": "preference|fact|decision|entity|other",
|
||||||
"entities": [
|
"entities": [
|
||||||
{"name": "tarun", "type": "person", "aliases": ["boss"], "description": "brief description"}
|
{"name": "tarun", "type": "person", "aliases": ["boss"], "description": "brief description"}
|
||||||
],
|
],
|
||||||
@@ -55,49 +56,8 @@ Rules:
|
|||||||
- Confidence: 0.0-1.0
|
- Confidence: 0.0-1.0
|
||||||
- Only extract what's explicitly stated or strongly implied
|
- Only extract what's explicitly stated or strongly implied
|
||||||
- Return empty arrays if nothing to extract
|
- Return empty arrays if nothing to extract
|
||||||
- Keep entity descriptions brief (1 sentence max)`;
|
- Keep entity descriptions brief (1 sentence max)
|
||||||
|
- Category: "preference" for opinions/preferences, "fact" for factual info, "decision" for choices made, "entity" for entity-focused, "other" for miscellaneous`;
|
||||||
// ============================================================================
|
|
||||||
// Auto-Capture Decision Prompt
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
const AUTO_CAPTURE_PROMPT = `You are an AI memory curator. Given these user messages from a conversation, identify information worth storing as long-term memories.
|
|
||||||
|
|
||||||
Only extract:
|
|
||||||
- Personal preferences and opinions ("I prefer dark mode", "I like TypeScript")
|
|
||||||
- Important facts about people, places, organizations
|
|
||||||
- Decisions made ("We decided to use Neo4j", "Going with plan A")
|
|
||||||
- Contact information (emails, phone numbers, usernames)
|
|
||||||
- Important events or dates
|
|
||||||
- Technical decisions and configurations
|
|
||||||
|
|
||||||
Do NOT extract:
|
|
||||||
- General questions or instructions to the AI
|
|
||||||
- Routine greetings or acknowledgments
|
|
||||||
- Information that is too vague or contextual
|
|
||||||
- Information already in system prompts or documentation
|
|
||||||
|
|
||||||
Categories:
|
|
||||||
- "core": Foundational identity info that should ALWAYS be remembered (user's name, role, company, key relationships, critical preferences that define who they are). Use sparingly - only for truly foundational facts.
|
|
||||||
- "preference": User preferences and opinions
|
|
||||||
- "fact": Facts about people, places, things
|
|
||||||
- "decision": Decisions made
|
|
||||||
- "entity": Entity-focused memories
|
|
||||||
- "other": Miscellaneous
|
|
||||||
|
|
||||||
Messages:
|
|
||||||
"""
|
|
||||||
{messages}
|
|
||||||
"""
|
|
||||||
|
|
||||||
Return JSON:
|
|
||||||
{
|
|
||||||
"memories": [
|
|
||||||
{"text": "concise memory text", "category": "core|preference|fact|decision|entity|other", "importance": 0.7}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
If nothing is worth remembering, return: {"memories": []}`;
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// OpenRouter API Client
|
// OpenRouter API Client
|
||||||
@@ -180,8 +140,13 @@ function validateExtractionResult(raw: Record<string, unknown>): ExtractionResul
|
|||||||
const tags = Array.isArray(raw.tags) ? raw.tags : [];
|
const tags = Array.isArray(raw.tags) ? raw.tags : [];
|
||||||
|
|
||||||
const validEntityTypes = new Set<string>(ENTITY_TYPES);
|
const validEntityTypes = new Set<string>(ENTITY_TYPES);
|
||||||
|
const validCategories = new Set<string>(MEMORY_CATEGORIES);
|
||||||
|
const rawCategory = typeof raw.category === "string" ? raw.category : undefined;
|
||||||
|
const category =
|
||||||
|
rawCategory && validCategories.has(rawCategory) ? (rawCategory as MemoryCategory) : undefined;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
category,
|
||||||
entities: entities
|
entities: entities
|
||||||
.filter(
|
.filter(
|
||||||
(e: unknown): e is Record<string, unknown> =>
|
(e: unknown): e is Record<string, unknown> =>
|
||||||
@@ -332,10 +297,16 @@ export async function runBackgroundExtraction(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update category if the LLM classified it (only overwrites 'other')
|
||||||
|
if (result.category) {
|
||||||
|
await db.updateMemoryCategory(memoryId, result.category);
|
||||||
|
}
|
||||||
|
|
||||||
await db.updateExtractionStatus(memoryId, "complete");
|
await db.updateExtractionStatus(memoryId, "complete");
|
||||||
logger.info(
|
logger.info(
|
||||||
`memory-neo4j: extraction complete for ${memoryId.slice(0, 8)} — ` +
|
`memory-neo4j: extraction complete for ${memoryId.slice(0, 8)} — ` +
|
||||||
`${result.entities.length} entities, ${result.relationships.length} rels, ${result.tags.length} tags`,
|
`${result.entities.length} entities, ${result.relationships.length} rels, ${result.tags.length} tags` +
|
||||||
|
(result.category ? `, category=${result.category}` : ""),
|
||||||
);
|
);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.warn(`memory-neo4j: extraction failed for ${memoryId.slice(0, 8)}: ${String(err)}`);
|
logger.warn(`memory-neo4j: extraction failed for ${memoryId.slice(0, 8)}: ${String(err)}`);
|
||||||
@@ -528,12 +499,13 @@ export async function runSleepCycle(
|
|||||||
// Phase 2: Pareto Scoring & Threshold Calculation
|
// Phase 2: Pareto Scoring & Threshold Calculation
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
let paretoThreshold = 0;
|
let paretoThreshold = 0;
|
||||||
|
let allScores: Awaited<ReturnType<typeof db.calculateAllEffectiveScores>> = [];
|
||||||
if (!abortSignal?.aborted) {
|
if (!abortSignal?.aborted) {
|
||||||
onPhaseStart?.("pareto");
|
onPhaseStart?.("pareto");
|
||||||
logger.info("memory-neo4j: [sleep] Phase 2: Pareto Scoring");
|
logger.info("memory-neo4j: [sleep] Phase 2: Pareto Scoring");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const allScores = await db.calculateAllEffectiveScores(agentId);
|
allScores = await db.calculateAllEffectiveScores(agentId);
|
||||||
result.pareto.totalMemories = allScores.length;
|
result.pareto.totalMemories = allScores.length;
|
||||||
result.pareto.coreMemories = allScores.filter((s) => s.category === "core").length;
|
result.pareto.coreMemories = allScores.filter((s) => s.category === "core").length;
|
||||||
result.pareto.regularMemories = allScores.filter((s) => s.category !== "core").length;
|
result.pareto.regularMemories = allScores.filter((s) => s.category !== "core").length;
|
||||||
@@ -560,18 +532,19 @@ export async function runSleepCycle(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// Phase 3: Core Promotion (regular memories above threshold)
|
// Phase 3: Core Promotion (using pre-computed scores from Phase 2)
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
if (!abortSignal?.aborted && paretoThreshold > 0) {
|
if (!abortSignal?.aborted && paretoThreshold > 0) {
|
||||||
onPhaseStart?.("promotion");
|
onPhaseStart?.("promotion");
|
||||||
logger.info("memory-neo4j: [sleep] Phase 3: Core Promotion");
|
logger.info("memory-neo4j: [sleep] Phase 3: Core Promotion");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const candidates = await db.findPromotionCandidates({
|
const candidates = allScores.filter(
|
||||||
paretoThreshold,
|
(s) =>
|
||||||
minAgeDays: promotionMinAgeDays,
|
s.category !== "core" &&
|
||||||
agentId,
|
s.effectiveScore >= paretoThreshold &&
|
||||||
});
|
s.ageDays >= promotionMinAgeDays,
|
||||||
|
);
|
||||||
result.promotion.candidatesFound = candidates.length;
|
result.promotion.candidatesFound = candidates.length;
|
||||||
|
|
||||||
if (candidates.length > 0) {
|
if (candidates.length > 0) {
|
||||||
@@ -594,17 +567,16 @@ export async function runSleepCycle(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// Phase 4: Core Demotion (core memories fallen below threshold)
|
// Phase 4: Core Demotion (using pre-computed scores from Phase 2)
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
if (!abortSignal?.aborted && paretoThreshold > 0) {
|
if (!abortSignal?.aborted && paretoThreshold > 0) {
|
||||||
onPhaseStart?.("demotion");
|
onPhaseStart?.("demotion");
|
||||||
logger.info("memory-neo4j: [sleep] Phase 4: Core Demotion");
|
logger.info("memory-neo4j: [sleep] Phase 4: Core Demotion");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const candidates = await db.findDemotionCandidates({
|
const candidates = allScores.filter(
|
||||||
paretoThreshold,
|
(s) => s.category === "core" && s.effectiveScore < paretoThreshold,
|
||||||
agentId,
|
);
|
||||||
});
|
|
||||||
result.demotion.candidatesFound = candidates.length;
|
result.demotion.candidatesFound = candidates.length;
|
||||||
|
|
||||||
if (candidates.length > 0) {
|
if (candidates.length > 0) {
|
||||||
@@ -627,40 +599,13 @@ export async function runSleepCycle(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// Phase 5: Decay & Pruning
|
// Phase 5: Entity Extraction (moved before decay so new memories get
|
||||||
// --------------------------------------------------------------------------
|
// extracted before pruning can remove them)
|
||||||
if (!abortSignal?.aborted) {
|
|
||||||
onPhaseStart?.("decay");
|
|
||||||
logger.info("memory-neo4j: [sleep] Phase 5: Decay & Pruning");
|
|
||||||
|
|
||||||
try {
|
|
||||||
const decayed = await db.findDecayedMemories({
|
|
||||||
retentionThreshold: decayRetentionThreshold,
|
|
||||||
baseHalfLifeDays: decayBaseHalfLifeDays,
|
|
||||||
importanceMultiplier: decayImportanceMultiplier,
|
|
||||||
agentId,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (decayed.length > 0) {
|
|
||||||
const ids = decayed.map((m) => m.id);
|
|
||||||
result.decay.memoriesPruned = await db.pruneMemories(ids);
|
|
||||||
onProgress?.("decay", `Pruned ${result.decay.memoriesPruned} decayed memories`);
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
`memory-neo4j: [sleep] Phase 5 complete — ${result.decay.memoriesPruned} memories pruned`,
|
|
||||||
);
|
|
||||||
} catch (err) {
|
|
||||||
logger.warn(`memory-neo4j: [sleep] Phase 5 error: ${String(err)}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
|
||||||
// Phase 6: Entity Extraction
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
const EXTRACTION_CONCURRENCY = 3;
|
||||||
if (!abortSignal?.aborted && config.enabled) {
|
if (!abortSignal?.aborted && config.enabled) {
|
||||||
onPhaseStart?.("extraction");
|
onPhaseStart?.("extraction");
|
||||||
logger.info("memory-neo4j: [sleep] Phase 6: Entity Extraction");
|
logger.info("memory-neo4j: [sleep] Phase 5: Entity Extraction");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Get initial count
|
// Get initial count
|
||||||
@@ -677,24 +622,32 @@ export async function runSleepCycle(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const memory of pending) {
|
// Process in parallel chunks of EXTRACTION_CONCURRENCY
|
||||||
if (abortSignal?.aborted) {
|
for (
|
||||||
break;
|
let i = 0;
|
||||||
|
i < pending.length && !abortSignal?.aborted;
|
||||||
|
i += EXTRACTION_CONCURRENCY
|
||||||
|
) {
|
||||||
|
const chunk = pending.slice(i, i + EXTRACTION_CONCURRENCY);
|
||||||
|
const outcomes = await Promise.allSettled(
|
||||||
|
chunk.map((memory) =>
|
||||||
|
runBackgroundExtraction(memory.id, memory.text, db, embeddings, config, logger),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const outcome of outcomes) {
|
||||||
|
result.extraction.processed++;
|
||||||
|
if (outcome.status === "fulfilled") {
|
||||||
|
result.extraction.succeeded++;
|
||||||
|
} else {
|
||||||
|
result.extraction.failed++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
if (
|
||||||
await runBackgroundExtraction(memory.id, memory.text, db, embeddings, config, logger);
|
result.extraction.processed % 10 === 0 ||
|
||||||
result.extraction.succeeded++;
|
i + EXTRACTION_CONCURRENCY >= pending.length
|
||||||
} catch (err) {
|
) {
|
||||||
logger.warn(
|
|
||||||
`memory-neo4j: extraction failed for ${memory.id.slice(0, 8)}: ${String(err)}`,
|
|
||||||
);
|
|
||||||
result.extraction.failed++;
|
|
||||||
}
|
|
||||||
|
|
||||||
result.extraction.processed++;
|
|
||||||
|
|
||||||
if (result.extraction.processed % 10 === 0) {
|
|
||||||
onProgress?.(
|
onProgress?.(
|
||||||
"extraction",
|
"extraction",
|
||||||
`${result.extraction.processed}/${result.extraction.total} processed`,
|
`${result.extraction.processed}/${result.extraction.total} processed`,
|
||||||
@@ -710,13 +663,43 @@ export async function runSleepCycle(
|
|||||||
}
|
}
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
`memory-neo4j: [sleep] Phase 6 complete — ${result.extraction.succeeded} extracted, ${result.extraction.failed} failed`,
|
`memory-neo4j: [sleep] Phase 5 complete — ${result.extraction.succeeded} extracted, ${result.extraction.failed} failed`,
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
logger.warn(`memory-neo4j: [sleep] Phase 5 error: ${String(err)}`);
|
||||||
|
}
|
||||||
|
} else if (!config.enabled) {
|
||||||
|
logger.info("memory-neo4j: [sleep] Phase 5 skipped — extraction not enabled");
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// Phase 6: Decay & Pruning (after extraction so freshly extracted memories
|
||||||
|
// aren't pruned before they build entity connections)
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
if (!abortSignal?.aborted) {
|
||||||
|
onPhaseStart?.("decay");
|
||||||
|
logger.info("memory-neo4j: [sleep] Phase 6: Decay & Pruning");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const decayed = await db.findDecayedMemories({
|
||||||
|
retentionThreshold: decayRetentionThreshold,
|
||||||
|
baseHalfLifeDays: decayBaseHalfLifeDays,
|
||||||
|
importanceMultiplier: decayImportanceMultiplier,
|
||||||
|
agentId,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (decayed.length > 0) {
|
||||||
|
const ids = decayed.map((m) => m.id);
|
||||||
|
result.decay.memoriesPruned = await db.pruneMemories(ids);
|
||||||
|
onProgress?.("decay", `Pruned ${result.decay.memoriesPruned} decayed memories`);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
`memory-neo4j: [sleep] Phase 6 complete — ${result.decay.memoriesPruned} memories pruned`,
|
||||||
);
|
);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.warn(`memory-neo4j: [sleep] Phase 6 error: ${String(err)}`);
|
logger.warn(`memory-neo4j: [sleep] Phase 6 error: ${String(err)}`);
|
||||||
}
|
}
|
||||||
} else if (!config.enabled) {
|
|
||||||
logger.info("memory-neo4j: [sleep] Phase 6 skipped — extraction not enabled");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -762,69 +745,6 @@ export async function runSleepCycle(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// Auto-Capture Decision
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Evaluate user messages and decide what's worth storing as long-term memory.
|
|
||||||
* Returns a list of memory items to store, or empty if nothing worth keeping.
|
|
||||||
*/
|
|
||||||
export async function evaluateAutoCapture(
|
|
||||||
userMessages: string[],
|
|
||||||
config: ExtractionConfig,
|
|
||||||
): Promise<CaptureItem[]> {
|
|
||||||
if (!config.enabled || userMessages.length === 0) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
const combined = userMessages.join("\n\n");
|
|
||||||
if (combined.length < 10) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
const prompt = AUTO_CAPTURE_PROMPT.replace("{messages}", combined);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const content = await callOpenRouter(config, prompt);
|
|
||||||
if (!content) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
const parsed = JSON.parse(content) as Record<string, unknown>;
|
|
||||||
return validateCaptureDecision(parsed);
|
|
||||||
} catch {
|
|
||||||
// Silently fail — auto-capture is best-effort
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Validate and sanitize the auto-capture LLM output.
|
|
||||||
*/
|
|
||||||
function validateCaptureDecision(raw: Record<string, unknown>): CaptureItem[] {
|
|
||||||
const memories = Array.isArray(raw.memories) ? raw.memories : [];
|
|
||||||
|
|
||||||
const validCategories = new Set<string>(["preference", "fact", "decision", "entity", "other"]);
|
|
||||||
|
|
||||||
return memories
|
|
||||||
.filter(
|
|
||||||
(m: unknown): m is Record<string, unknown> =>
|
|
||||||
m !== null &&
|
|
||||||
typeof m === "object" &&
|
|
||||||
typeof (m as Record<string, unknown>).text === "string" &&
|
|
||||||
(m as Record<string, unknown>).text !== "",
|
|
||||||
)
|
|
||||||
.map((m) => ({
|
|
||||||
text: String(m.text).slice(0, 2000), // cap length
|
|
||||||
category: validCategories.has(String(m.category))
|
|
||||||
? (String(m.category) as MemoryCategory)
|
|
||||||
: "other",
|
|
||||||
importance: typeof m.importance === "number" ? Math.min(1, Math.max(0, m.importance)) : 0.7,
|
|
||||||
}))
|
|
||||||
.slice(0, 5); // Max 5 captures per conversation
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Message Extraction Helper
|
// Message Extraction Helper
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ import {
|
|||||||
vectorDimsForModel,
|
vectorDimsForModel,
|
||||||
} from "./config.js";
|
} from "./config.js";
|
||||||
import { Embeddings } from "./embeddings.js";
|
import { Embeddings } from "./embeddings.js";
|
||||||
import { evaluateAutoCapture, extractUserMessages, runSleepCycle } from "./extractor.js";
|
import { extractUserMessages, runSleepCycle } from "./extractor.js";
|
||||||
import { Neo4jMemoryClient } from "./neo4j-client.js";
|
import { Neo4jMemoryClient } from "./neo4j-client.js";
|
||||||
import { hybridSearch } from "./search.js";
|
import { hybridSearch } from "./search.js";
|
||||||
|
|
||||||
@@ -43,7 +43,7 @@ const memoryNeo4jPlugin = {
|
|||||||
register(api: OpenClawPluginApi) {
|
register(api: OpenClawPluginApi) {
|
||||||
// Parse configuration
|
// Parse configuration
|
||||||
const cfg = memoryNeo4jConfigSchema.parse(api.pluginConfig);
|
const cfg = memoryNeo4jConfigSchema.parse(api.pluginConfig);
|
||||||
const extractionConfig = resolveExtractionConfig();
|
const extractionConfig = resolveExtractionConfig(cfg.extraction);
|
||||||
const vectorDim = vectorDimsForModel(cfg.embedding.model);
|
const vectorDim = vectorDimsForModel(cfg.embedding.model);
|
||||||
|
|
||||||
// Create shared resources
|
// Create shared resources
|
||||||
@@ -494,8 +494,8 @@ const memoryNeo4jPlugin = {
|
|||||||
);
|
);
|
||||||
console.log(" Phase 3: Core Promotion — Regular memories above threshold → core");
|
console.log(" Phase 3: Core Promotion — Regular memories above threshold → core");
|
||||||
console.log(" Phase 4: Core Demotion — Core memories below threshold → regular");
|
console.log(" Phase 4: Core Demotion — Core memories below threshold → regular");
|
||||||
console.log(" Phase 5: Decay & Pruning — Remove stale low-importance memories");
|
console.log(" Phase 5: Extraction — Extract entities and categorize");
|
||||||
console.log(" Phase 6: Extraction — Form entity relationships");
|
console.log(" Phase 6: Decay & Pruning — Remove stale low-importance memories");
|
||||||
console.log(" Phase 7: Orphan Cleanup — Remove disconnected nodes\n");
|
console.log(" Phase 7: Orphan Cleanup — Remove disconnected nodes\n");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -522,8 +522,8 @@ const memoryNeo4jPlugin = {
|
|||||||
pareto: "Phase 2: Pareto Scoring",
|
pareto: "Phase 2: Pareto Scoring",
|
||||||
promotion: "Phase 3: Core Promotion",
|
promotion: "Phase 3: Core Promotion",
|
||||||
demotion: "Phase 4: Core Demotion",
|
demotion: "Phase 4: Core Demotion",
|
||||||
decay: "Phase 5: Decay & Pruning",
|
extraction: "Phase 5: Extraction",
|
||||||
extraction: "Phase 6: Extraction",
|
decay: "Phase 6: Decay & Pruning",
|
||||||
cleanup: "Phase 7: Orphan Cleanup",
|
cleanup: "Phase 7: Orphan Cleanup",
|
||||||
};
|
};
|
||||||
console.log(`\n▶ ${phaseNames[phase]}`);
|
console.log(`\n▶ ${phaseNames[phase]}`);
|
||||||
@@ -818,7 +818,19 @@ const memoryNeo4jPlugin = {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Auto-capture: LLM-based decision on what to store from conversations
|
// Auto-capture: attention-gated memory pipeline modeled on human memory.
|
||||||
|
//
|
||||||
|
// Phase 1 — Attention gating (real-time):
|
||||||
|
// Lightweight heuristic filter rejects obvious noise (greetings, short
|
||||||
|
// acks, system markup, code dumps) without any LLM call.
|
||||||
|
//
|
||||||
|
// Phase 2 — Short-term retention:
|
||||||
|
// Everything that passes the gate is embedded, deduped, and stored as
|
||||||
|
// regular memory with extractionStatus "pending".
|
||||||
|
//
|
||||||
|
// Phase 3 — Sleep consolidation (deferred to `openclaw memory neo4j sleep`):
|
||||||
|
// The sleep cycle handles entity extraction, categorization, Pareto
|
||||||
|
// scoring, promotion/demotion, and decay — mirroring hippocampal replay.
|
||||||
api.logger.debug?.(
|
api.logger.debug?.(
|
||||||
`memory-neo4j: autoCapture=${cfg.autoCapture}, extraction.enabled=${extractionConfig.enabled}`,
|
`memory-neo4j: autoCapture=${cfg.autoCapture}, extraction.enabled=${extractionConfig.enabled}`,
|
||||||
);
|
);
|
||||||
@@ -837,71 +849,24 @@ const memoryNeo4jPlugin = {
|
|||||||
const sessionKey = ctx.sessionKey;
|
const sessionKey = ctx.sessionKey;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (extractionConfig.enabled) {
|
const userMessages = extractUserMessages(event.messages);
|
||||||
// LLM-based auto-capture (Decision Q8)
|
if (userMessages.length === 0) {
|
||||||
const userMessages = extractUserMessages(event.messages);
|
return;
|
||||||
if (userMessages.length === 0) {
|
}
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const items = await evaluateAutoCapture(userMessages, extractionConfig);
|
// Phase 1: Attention gating — fast heuristic filter
|
||||||
if (items.length === 0) {
|
const retained = userMessages.filter((text) => passesAttentionGate(text));
|
||||||
return;
|
if (retained.length === 0) {
|
||||||
}
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
let stored = 0;
|
// Phase 2: Short-term retention — embed, dedup, store
|
||||||
for (const item of items) {
|
let stored = 0;
|
||||||
try {
|
for (const text of retained) {
|
||||||
const vector = await embeddings.embed(item.text);
|
try {
|
||||||
|
|
||||||
// Check for duplicates
|
|
||||||
const existing = await db.findSimilar(vector, 0.95, 1);
|
|
||||||
if (existing.length > 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const memoryId = randomUUID();
|
|
||||||
await db.storeMemory({
|
|
||||||
id: memoryId,
|
|
||||||
text: item.text,
|
|
||||||
embedding: vector,
|
|
||||||
importance: item.importance,
|
|
||||||
category: item.category,
|
|
||||||
source: "auto-capture",
|
|
||||||
extractionStatus: "pending",
|
|
||||||
agentId,
|
|
||||||
sessionKey,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Extraction deferred to sleep cycle (like human memory consolidation)
|
|
||||||
stored++;
|
|
||||||
} catch (err) {
|
|
||||||
api.logger.debug?.(`memory-neo4j: auto-capture item failed: ${String(err)}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (stored > 0) {
|
|
||||||
api.logger.info(`memory-neo4j: auto-captured ${stored} memories (LLM-based)`);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Fallback: rule-based capture (no extraction API key)
|
|
||||||
const userMessages = extractUserMessages(event.messages);
|
|
||||||
if (userMessages.length === 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const toCapture = userMessages.filter(
|
|
||||||
(text) => text.length >= 10 && text.length <= 500 && shouldCaptureRuleBased(text),
|
|
||||||
);
|
|
||||||
if (toCapture.length === 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let stored = 0;
|
|
||||||
for (const text of toCapture.slice(0, 3)) {
|
|
||||||
const category = detectCategory(text);
|
|
||||||
const vector = await embeddings.embed(text);
|
const vector = await embeddings.embed(text);
|
||||||
|
|
||||||
|
// Quick dedup (same content already stored)
|
||||||
const existing = await db.findSimilar(vector, 0.95, 1);
|
const existing = await db.findSimilar(vector, 0.95, 1);
|
||||||
if (existing.length > 0) {
|
if (existing.length > 0) {
|
||||||
continue;
|
continue;
|
||||||
@@ -911,19 +876,21 @@ const memoryNeo4jPlugin = {
|
|||||||
id: randomUUID(),
|
id: randomUUID(),
|
||||||
text,
|
text,
|
||||||
embedding: vector,
|
embedding: vector,
|
||||||
importance: 0.7,
|
importance: 0.5, // neutral — sleep cycle scores via Pareto
|
||||||
category,
|
category: "other", // sleep cycle will categorize
|
||||||
source: "auto-capture",
|
source: "auto-capture",
|
||||||
extractionStatus: "skipped",
|
extractionStatus: extractionConfig.enabled ? "pending" : "skipped",
|
||||||
agentId,
|
agentId,
|
||||||
sessionKey,
|
sessionKey,
|
||||||
});
|
});
|
||||||
stored++;
|
stored++;
|
||||||
|
} catch (err) {
|
||||||
|
api.logger.debug?.(`memory-neo4j: auto-capture item failed: ${String(err)}`);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (stored > 0) {
|
if (stored > 0) {
|
||||||
api.logger.info(`memory-neo4j: auto-captured ${stored} memories (rule-based)`);
|
api.logger.info(`memory-neo4j: auto-captured ${stored} memories (attention-gated)`);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
api.logger.warn(`memory-neo4j: auto-capture failed: ${String(err)}`);
|
api.logger.warn(`memory-neo4j: auto-capture failed: ${String(err)}`);
|
||||||
@@ -960,52 +927,57 @@ const memoryNeo4jPlugin = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Rule-based capture filter (fallback when no extraction API key)
|
// Attention gate — lightweight heuristic filter (phase 1 of memory pipeline)
|
||||||
|
//
|
||||||
|
// Rejects obvious noise without any LLM call, analogous to how the brain's
|
||||||
|
// sensory gating filters out irrelevant stimuli before they enter working
|
||||||
|
// memory. Everything that passes gets stored; the sleep cycle decides what
|
||||||
|
// matters.
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
const MEMORY_TRIGGERS = [
|
const NOISE_PATTERNS = [
|
||||||
/remember|zapamatuj|pamatuj/i,
|
// Greetings / acknowledgments
|
||||||
/prefer|radši|nechci|preferuji/i,
|
/^(hi|hey|hello|yo|sup|ok|okay|sure|thanks|thank you|thx|ty|yep|yup|nope|no|yes|yeah|cool|nice|great|got it|sounds good|perfect|alright|fine|noted|ack|kk|k)\s*[.!?]*$/i,
|
||||||
/decided|rozhodli|budeme používat/i,
|
// Single-word or near-empty
|
||||||
/\+\d{10,}/,
|
/^\S{0,3}$/,
|
||||||
/[\w.-]+@[\w.-]+\.\w+/,
|
// Pure emoji
|
||||||
/my\s+\w+\s+is|is\s+my/i,
|
/^[\p{Emoji}\s]+$/u,
|
||||||
/i (like|prefer|hate|love|want|need)/i,
|
// System/XML markup
|
||||||
/always|never|important/i,
|
/^<[a-z-]+>[\s\S]*<\/[a-z-]+>$/i,
|
||||||
];
|
];
|
||||||
|
|
||||||
function shouldCaptureRuleBased(text: string): boolean {
|
/** Maximum message length — code dumps, logs, etc. are not memories. */
|
||||||
if (text.includes("<relevant-memories>")) {
|
const MAX_CAPTURE_CHARS = 2000;
|
||||||
|
|
||||||
|
/** Minimum message length — too short to be meaningful. */
|
||||||
|
const MIN_CAPTURE_CHARS = 10;
|
||||||
|
|
||||||
|
function passesAttentionGate(text: string): boolean {
|
||||||
|
const trimmed = text.trim();
|
||||||
|
|
||||||
|
// Length bounds
|
||||||
|
if (trimmed.length < MIN_CAPTURE_CHARS || trimmed.length > MAX_CAPTURE_CHARS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (text.startsWith("<") && text.includes("</")) {
|
|
||||||
|
// Injected context from the memory system itself
|
||||||
|
if (trimmed.includes("<relevant-memories>") || trimmed.includes("<core-memory-refresh>")) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (text.includes("**") && text.includes("\n-")) {
|
|
||||||
|
// Noise patterns
|
||||||
|
if (NOISE_PATTERNS.some((r) => r.test(trimmed))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const emojiCount = (text.match(/[\u{1F300}-\u{1F9FF}]/gu) || []).length;
|
|
||||||
|
// Excessive emoji (likely reaction, not substance)
|
||||||
|
const emojiCount = (trimmed.match(/[\u{1F300}-\u{1F9FF}]/gu) || []).length;
|
||||||
if (emojiCount > 3) {
|
if (emojiCount > 3) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return MEMORY_TRIGGERS.some((r) => r.test(text));
|
|
||||||
}
|
|
||||||
|
|
||||||
function detectCategory(text: string): MemoryCategory {
|
// Passes gate — retain for short-term storage
|
||||||
const lower = text.toLowerCase();
|
return true;
|
||||||
if (/prefer|radši|like|love|hate|want/i.test(lower)) {
|
|
||||||
return "preference";
|
|
||||||
}
|
|
||||||
if (/decided|rozhodli|will use|budeme/i.test(lower)) {
|
|
||||||
return "decision";
|
|
||||||
}
|
|
||||||
if (/\+\d{10,}|@[\w.-]+\.\w+|is called|jmenuje se/i.test(lower)) {
|
|
||||||
return "entity";
|
|
||||||
}
|
|
||||||
if (/is|are|has|have|je|má|jsou/i.test(lower)) {
|
|
||||||
return "fact";
|
|
||||||
}
|
|
||||||
return "other";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
@@ -811,6 +811,25 @@ export class Neo4jMemoryClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update a memory's category. Only updates if current category is 'other'
|
||||||
|
* (auto-assigned) to avoid overriding user-explicit categorization.
|
||||||
|
*/
|
||||||
|
async updateMemoryCategory(id: string, category: string): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
const session = this.driver!.session();
|
||||||
|
try {
|
||||||
|
await session.run(
|
||||||
|
`MATCH (m:Memory {id: $id})
|
||||||
|
WHERE m.category = 'other'
|
||||||
|
SET m.category = $category, m.updatedAt = $now`,
|
||||||
|
{ id, category, now: new Date().toISOString() },
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
await session.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the extraction status of a Memory node.
|
* Update the extraction status of a Memory node.
|
||||||
*/
|
*/
|
||||||
@@ -900,10 +919,11 @@ export class Neo4jMemoryClient {
|
|||||||
* Find clusters of near-duplicate memories by vector similarity.
|
* Find clusters of near-duplicate memories by vector similarity.
|
||||||
* Returns groups where each group contains memories that are duplicates of each other.
|
* Returns groups where each group contains memories that are duplicates of each other.
|
||||||
*
|
*
|
||||||
* Algorithm:
|
* Algorithm (O(N log N) via HNSW index, replaces O(N²) Cartesian product):
|
||||||
* 1. For each memory, find others with similarity >= threshold
|
* 1. Fetch all memory IDs and metadata
|
||||||
* 2. Group into clusters (transitive closure)
|
* 2. For each memory, query the vector index for nearest neighbors above threshold
|
||||||
* 3. Return clusters with 2+ members
|
* 3. Build clusters via union-find (transitive closure)
|
||||||
|
* 4. Return clusters with 2+ members
|
||||||
*/
|
*/
|
||||||
async findDuplicateClusters(
|
async findDuplicateClusters(
|
||||||
threshold: number = 0.95,
|
threshold: number = 0.95,
|
||||||
@@ -912,24 +932,29 @@ export class Neo4jMemoryClient {
|
|||||||
await this.ensureInitialized();
|
await this.ensureInitialized();
|
||||||
const session = this.driver!.session();
|
const session = this.driver!.session();
|
||||||
try {
|
try {
|
||||||
// Find all pairs of similar memories
|
// Step 1: Fetch all memory metadata (no embeddings — lightweight)
|
||||||
const agentFilter = agentId ? "WHERE m1.agentId = $agentId AND m2.agentId = $agentId" : "";
|
const agentFilter = agentId ? "WHERE m.agentId = $agentId" : "";
|
||||||
const result = await session.run(
|
const allResult = await session.run(
|
||||||
`MATCH (m1:Memory), (m2:Memory)
|
`MATCH (m:Memory) ${agentFilter}
|
||||||
${agentFilter}
|
RETURN m.id AS id, m.text AS text, m.importance AS importance`,
|
||||||
WHERE m1.id < m2.id
|
agentId ? { agentId } : {},
|
||||||
AND vector.similarity.cosine(m1.embedding, m2.embedding) >= $threshold
|
|
||||||
RETURN m1.id AS id1, m1.text AS text1, m1.importance AS imp1,
|
|
||||||
m2.id AS id2, m2.text AS text2, m2.importance AS imp2,
|
|
||||||
vector.similarity.cosine(m1.embedding, m2.embedding) AS similarity
|
|
||||||
ORDER BY similarity DESC
|
|
||||||
LIMIT 500`,
|
|
||||||
{ threshold, agentId },
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// Build clusters using union-find
|
|
||||||
const parent = new Map<string, string>();
|
|
||||||
const memoryData = new Map<string, { text: string; importance: number }>();
|
const memoryData = new Map<string, { text: string; importance: number }>();
|
||||||
|
for (const r of allResult.records) {
|
||||||
|
memoryData.set(r.get("id") as string, {
|
||||||
|
text: r.get("text") as string,
|
||||||
|
importance: r.get("importance") as number,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (memoryData.size < 2) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: For each memory, find near-duplicates via HNSW vector index
|
||||||
|
// Each query is O(log N) vs O(N) for brute-force, total O(N log N)
|
||||||
|
const parent = new Map<string, string>();
|
||||||
|
|
||||||
const find = (x: string): string => {
|
const find = (x: string): string => {
|
||||||
if (!parent.has(x)) {
|
if (!parent.has(x)) {
|
||||||
@@ -949,23 +974,37 @@ export class Neo4jMemoryClient {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const record of result.records) {
|
let pairsFound = 0;
|
||||||
const id1 = record.get("id1") as string;
|
for (const id of memoryData.keys()) {
|
||||||
const id2 = record.get("id2") as string;
|
const similar = await session.run(
|
||||||
union(id1, id2);
|
`MATCH (src:Memory {id: $id})
|
||||||
memoryData.set(id1, {
|
CALL db.index.vector.queryNodes('memory_embedding_index', $k, src.embedding)
|
||||||
text: record.get("text1") as string,
|
YIELD node, score
|
||||||
importance: record.get("imp1") as number,
|
WHERE node.id <> $id AND score >= $threshold
|
||||||
});
|
RETURN node.id AS matchId`,
|
||||||
memoryData.set(id2, {
|
{ id, k: neo4j.int(10), threshold },
|
||||||
text: record.get("text2") as string,
|
);
|
||||||
importance: record.get("imp2") as number,
|
|
||||||
});
|
for (const r of similar.records) {
|
||||||
|
const matchId = r.get("matchId") as string;
|
||||||
|
if (memoryData.has(matchId)) {
|
||||||
|
union(id, matchId);
|
||||||
|
pairsFound++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Early exit if we've found many pairs (safety bound)
|
||||||
|
if (pairsFound > 500) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Group by root
|
// Step 3: Group by root
|
||||||
const clusters = new Map<string, string[]>();
|
const clusters = new Map<string, string[]>();
|
||||||
for (const id of memoryData.keys()) {
|
for (const id of memoryData.keys()) {
|
||||||
|
if (!parent.has(id)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
const root = find(id);
|
const root = find(id);
|
||||||
if (!clusters.has(root)) {
|
if (!clusters.has(root)) {
|
||||||
clusters.set(root, []);
|
clusters.set(root, []);
|
||||||
@@ -1159,8 +1198,7 @@ export class Neo4jMemoryClient {
|
|||||||
try {
|
try {
|
||||||
const result = await session.run(
|
const result = await session.run(
|
||||||
`MATCH (e:Entity)
|
`MATCH (e:Entity)
|
||||||
WHERE NOT EXISTS { MATCH (:Memory)-[:MENTIONS]->(e) }
|
WHERE coalesce(e.mentionCount, 0) <= 0
|
||||||
OR e.mentionCount <= 0
|
|
||||||
RETURN e.id AS id, e.name AS name, e.type AS type
|
RETURN e.id AS id, e.name AS name, e.type AS type
|
||||||
LIMIT $limit`,
|
LIMIT $limit`,
|
||||||
{ limit: neo4j.int(limit) },
|
{ limit: neo4j.int(limit) },
|
||||||
@@ -1261,23 +1299,34 @@ export class Neo4jMemoryClient {
|
|||||||
* This gives core memories a slight disadvantage (they need strong retrieval
|
* This gives core memories a slight disadvantage (they need strong retrieval
|
||||||
* patterns to stay in top 20%), creating healthy churn.
|
* patterns to stay in top 20%), creating healthy churn.
|
||||||
*/
|
*/
|
||||||
async calculateAllEffectiveScores(
|
async calculateAllEffectiveScores(agentId?: string): Promise<
|
||||||
agentId?: string,
|
Array<{
|
||||||
): Promise<Array<{ id: string; category: string; effectiveScore: number }>> {
|
id: string;
|
||||||
|
text: string;
|
||||||
|
category: string;
|
||||||
|
importance: number;
|
||||||
|
retrievalCount: number;
|
||||||
|
ageDays: number;
|
||||||
|
effectiveScore: number;
|
||||||
|
}>
|
||||||
|
> {
|
||||||
await this.ensureInitialized();
|
await this.ensureInitialized();
|
||||||
const session = this.driver!.session();
|
const session = this.driver!.session();
|
||||||
try {
|
try {
|
||||||
const agentFilter = agentId ? "WHERE m.agentId = $agentId" : "";
|
const agentFilter = agentId
|
||||||
|
? "WHERE m.agentId = $agentId AND m.createdAt IS NOT NULL"
|
||||||
|
: "WHERE m.createdAt IS NOT NULL";
|
||||||
const result = await session.run(
|
const result = await session.run(
|
||||||
`MATCH (m:Memory)
|
`MATCH (m:Memory)
|
||||||
${agentFilter}
|
${agentFilter}
|
||||||
WITH m,
|
WITH m,
|
||||||
coalesce(m.retrievalCount, 0) AS retrievalCount,
|
coalesce(m.retrievalCount, 0) AS retrievalCount,
|
||||||
|
duration.between(datetime(m.createdAt), datetime()).days AS ageDays,
|
||||||
CASE
|
CASE
|
||||||
WHEN m.lastRetrievedAt IS NULL THEN null
|
WHEN m.lastRetrievedAt IS NULL THEN null
|
||||||
ELSE duration.between(datetime(m.lastRetrievedAt), datetime()).days
|
ELSE duration.between(datetime(m.lastRetrievedAt), datetime()).days
|
||||||
END AS daysSinceRetrieval
|
END AS daysSinceRetrieval
|
||||||
WITH m, retrievalCount, daysSinceRetrieval,
|
WITH m, retrievalCount, ageDays, daysSinceRetrieval,
|
||||||
// Effective score: importance × freq_boost × recency
|
// Effective score: importance × freq_boost × recency
|
||||||
// This is used for global ranking (promotion/demotion threshold)
|
// This is used for global ranking (promotion/demotion threshold)
|
||||||
m.importance * (1 + log(1 + retrievalCount) * 0.3) *
|
m.importance * (1 + log(1 + retrievalCount) * 0.3) *
|
||||||
@@ -1285,14 +1334,19 @@ export class Neo4jMemoryClient {
|
|||||||
WHEN daysSinceRetrieval IS NULL THEN 0.1
|
WHEN daysSinceRetrieval IS NULL THEN 0.1
|
||||||
ELSE 2.0 ^ (-1.0 * daysSinceRetrieval / 14.0)
|
ELSE 2.0 ^ (-1.0 * daysSinceRetrieval / 14.0)
|
||||||
END AS effectiveScore
|
END AS effectiveScore
|
||||||
RETURN m.id AS id, m.category AS category, effectiveScore
|
RETURN m.id AS id, m.text AS text, m.category AS category,
|
||||||
|
m.importance AS importance, retrievalCount, ageDays, effectiveScore
|
||||||
ORDER BY effectiveScore DESC`,
|
ORDER BY effectiveScore DESC`,
|
||||||
agentId ? { agentId } : {},
|
agentId ? { agentId } : {},
|
||||||
);
|
);
|
||||||
|
|
||||||
return result.records.map((r) => ({
|
return result.records.map((r) => ({
|
||||||
id: r.get("id") as string,
|
id: r.get("id") as string,
|
||||||
|
text: r.get("text") as string,
|
||||||
category: r.get("category") as string,
|
category: r.get("category") as string,
|
||||||
|
importance: r.get("importance") as number,
|
||||||
|
retrievalCount: r.get("retrievalCount") as number,
|
||||||
|
ageDays: r.get("ageDays") as number,
|
||||||
effectiveScore: r.get("effectiveScore") as number,
|
effectiveScore: r.get("effectiveScore") as number,
|
||||||
}));
|
}));
|
||||||
} finally {
|
} finally {
|
||||||
|
|||||||
@@ -43,6 +43,22 @@
|
|||||||
"autoRecall": {
|
"autoRecall": {
|
||||||
"label": "Auto-Recall",
|
"label": "Auto-Recall",
|
||||||
"help": "Automatically inject relevant memories into context"
|
"help": "Automatically inject relevant memories into context"
|
||||||
|
},
|
||||||
|
"extraction.apiKey": {
|
||||||
|
"label": "Extraction API Key",
|
||||||
|
"sensitive": true,
|
||||||
|
"placeholder": "sk-or-v1-...",
|
||||||
|
"help": "API key for extraction LLM (not needed for Ollama/local models)"
|
||||||
|
},
|
||||||
|
"extraction.model": {
|
||||||
|
"label": "Extraction Model",
|
||||||
|
"placeholder": "google/gemini-2.0-flash-001",
|
||||||
|
"help": "Model for entity extraction (e.g., google/gemini-2.0-flash-001 for OpenRouter, llama3.1:8b for Ollama)"
|
||||||
|
},
|
||||||
|
"extraction.baseUrl": {
|
||||||
|
"label": "Extraction Base URL",
|
||||||
|
"placeholder": "https://openrouter.ai/api/v1",
|
||||||
|
"help": "Base URL for extraction API (e.g., https://openrouter.ai/api/v1 or http://localhost:11434/v1 for Ollama)"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"configSchema": {
|
"configSchema": {
|
||||||
@@ -92,6 +108,21 @@
|
|||||||
},
|
},
|
||||||
"autoRecall": {
|
"autoRecall": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"extraction": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": false,
|
||||||
|
"properties": {
|
||||||
|
"apiKey": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"baseUrl": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["neo4j"]
|
"required": ["neo4j"]
|
||||||
|
|||||||
@@ -68,25 +68,12 @@ export type ExtractedTag = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export type ExtractionResult = {
|
export type ExtractionResult = {
|
||||||
|
category?: MemoryCategory;
|
||||||
entities: ExtractedEntity[];
|
entities: ExtractedEntity[];
|
||||||
relationships: ExtractedRelationship[];
|
relationships: ExtractedRelationship[];
|
||||||
tags: ExtractedTag[];
|
tags: ExtractedTag[];
|
||||||
};
|
};
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// Auto-Capture Types
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
export type CaptureItem = {
|
|
||||||
text: string;
|
|
||||||
category: MemoryCategory;
|
|
||||||
importance: number;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type CaptureDecision = {
|
|
||||||
memories: CaptureItem[];
|
|
||||||
};
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Search Types
|
// Search Types
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user