diff --git a/extensions/memory-neo4j/index.ts b/extensions/memory-neo4j/index.ts index 8c2d9dfd071..99d52575830 100644 --- a/extensions/memory-neo4j/index.ts +++ b/extensions/memory-neo4j/index.ts @@ -32,6 +32,8 @@ import { isSemanticDuplicate, rateImportance } from "./extractor.js"; import { extractUserMessages, extractAssistantMessages } from "./message-utils.js"; import { Neo4jMemoryClient } from "./neo4j-client.js"; import { hybridSearch } from "./search.js"; +import { isRelatedToCompletedTask, loadCompletedTaskKeywords } from "./task-filter.js"; +import { parseTaskLedger } from "./task-ledger.js"; // ============================================================================ // Plugin Definition @@ -185,16 +187,21 @@ const memoryNeo4jPlugin = { }), ), category: Type.Optional(stringEnum(MEMORY_CATEGORIES)), + taskId: Type.Optional( + Type.String({ description: "Optional task ID to link memory to (e.g., TASK-001)" }), + ), }), async execute(_toolCallId: string, params: unknown) { const { text, importance = 0.7, category = "other", + taskId, } = params as { text: string; importance?: number; category?: MemoryCategory; + taskId?: string; }; // 1. Generate embedding @@ -232,6 +239,8 @@ const memoryNeo4jPlugin = { extractionStatus: extractionConfig.enabled ? "pending" : "skipped", agentId, sessionKey, + // Layer 3: Pass through taskId if provided by the agent + ...(taskId ? { taskId } : {}), }); // 4. Extraction is deferred to sleep cycle (like human memory consolidation) @@ -557,6 +566,49 @@ const memoryNeo4jPlugin = { results = results.filter((r) => !coreIds.has(r.id)); } + // Feature 3: Filter out memories related to completed tasks + const workspaceDir = ctx.workspaceDir; + if (workspaceDir) { + try { + const completedTasks = await loadCompletedTaskKeywords(workspaceDir); + if (completedTasks.length > 0) { + const before = results.length; + results = results.filter((r) => !isRelatedToCompletedTask(r.text, completedTasks)); + if (results.length < before) { + api.logger.debug?.( + `memory-neo4j: task-filter removed ${before - results.length} memories related to completed tasks`, + ); + } + } + } catch (err) { + api.logger.debug?.(`memory-neo4j: task-filter skipped: ${String(err)}`); + } + } + + // Layer 3: Filter out memories linked to completed tasks by taskId + // This complements Layer 1's keyword-based filter with precise taskId matching + if (workspaceDir) { + try { + const fs = await import("node:fs/promises"); + const path = await import("node:path"); + const tasksPath = path.default.join(workspaceDir, "TASKS.md"); + const content = await fs.default.readFile(tasksPath, "utf-8"); + const ledger = parseTaskLedger(content); + const completedTaskIds = new Set(ledger.completedTasks.map((t) => t.id)); + if (completedTaskIds.size > 0) { + const before = results.length; + results = results.filter((r) => !r.taskId || !completedTaskIds.has(r.taskId)); + if (results.length < before) { + api.logger.debug?.( + `memory-neo4j: taskId-filter removed ${before - results.length} memories linked to completed tasks`, + ); + } + } + } catch { + // TASKS.md doesn't exist or can't be read — skip taskId filter + } + } + const totalMs = performance.now() - t0; api.logger.info?.( `memory-neo4j: [bench] auto-recall ${totalMs.toFixed(0)}ms total (search=${(tSearch - t0).toFixed(0)}ms), ${results.length} results`, @@ -723,6 +775,7 @@ const memoryNeo4jPlugin = { embeddings, extractionConfig, api.logger, + ctx.workspaceDir, // Layer 3: pass workspace dir for task auto-tagging ); }); } @@ -756,6 +809,51 @@ const memoryNeo4jPlugin = { }, }; +// ============================================================================ +// Layer 3: TASKS.md cache for auto-capture task tagging +// ============================================================================ + +/** Cached result of TASKS.md parsing for auto-capture task tagging. */ +let _taskLedgerCache: { activeTaskId: string | undefined; expiresAt: number } | null = null; +const TASK_LEDGER_CACHE_TTL_MS = 60_000; // 60 seconds + +/** + * Get the active task ID from TASKS.md (if exactly one active task). + * Results are cached with a 60-second TTL to avoid re-parsing on every message. + */ +async function getActiveTaskIdForCapture( + workspaceDir: string | undefined, + logger: Logger, +): Promise { + const now = Date.now(); + if (_taskLedgerCache && now < _taskLedgerCache.expiresAt) { + return _taskLedgerCache.activeTaskId; + } + + let activeTaskId: string | undefined; + if (workspaceDir) { + try { + const fs = await import("node:fs/promises"); + const path = await import("node:path"); + const tasksPath = path.default.join(workspaceDir, "TASKS.md"); + const content = await fs.default.readFile(tasksPath, "utf-8"); + const ledger = parseTaskLedger(content); + // Only auto-tag when there's exactly one active task to avoid ambiguity + if (ledger.activeTasks.length === 1) { + activeTaskId = ledger.activeTasks[0].id; + } + } catch { + // TASKS.md doesn't exist or can't be read — no auto-tagging + } + } + + _taskLedgerCache = { activeTaskId, expiresAt: now + TASK_LEDGER_CACHE_TTL_MS }; + return activeTaskId; +} + +// Exported for testing +export { _taskLedgerCache, getActiveTaskIdForCapture as _getActiveTaskIdForCapture }; + // ============================================================================ // Auto-capture pipeline (fire-and-forget from agent_end hook) // ============================================================================ @@ -776,6 +874,7 @@ async function captureMessage( extractionConfig: import("./config.js").ExtractionConfig, logger: Logger, precomputedVector?: number[], + taskId?: string, // Layer 3: optional task ID for auto-tagging ): Promise<{ stored: boolean; semanticDeduped: boolean }> { // For assistant messages, rate importance first (before embedding) to skip early. // When extraction is disabled, rateImportance returns 0.5 (the fallback), so we @@ -835,6 +934,8 @@ async function captureMessage( extractionStatus: extractionConfig.enabled ? "pending" : "skipped", agentId, sessionKey, + // Layer 3: auto-tag with active task ID when available + ...(taskId ? { taskId } : {}), }); return { stored: true, semanticDeduped: false }; } @@ -851,6 +952,7 @@ async function runAutoCapture( embeddings: import("./embeddings.js").Embeddings, extractionConfig: import("./config.js").ExtractionConfig, logger: Logger, + workspaceDir?: string, // Layer 3: workspace dir for task auto-tagging ): Promise { try { const t0 = performance.now(); @@ -890,6 +992,9 @@ async function runAutoCapture( const vectors = allTexts.length > 0 ? await embeddings.embedBatch(allTexts) : []; const tEmbed = performance.now(); + // Layer 3: Detect active task for auto-tagging + const activeTaskId = await getActiveTaskIdForCapture(workspaceDir, logger); + // Process each with pre-computed vector for (let i = 0; i < allMeta.length; i++) { try { @@ -906,6 +1011,7 @@ async function runAutoCapture( extractionConfig, logger, vectors[i], + activeTaskId, // Layer 3: auto-tag with active task ID ); if (result.stored) stored++; if (result.semanticDeduped) semanticDeduped++; diff --git a/extensions/memory-neo4j/neo4j-client.ts b/extensions/memory-neo4j/neo4j-client.ts index c11818b0131..e6a0699776c 100644 --- a/extensions/memory-neo4j/neo4j-client.ts +++ b/extensions/memory-neo4j/neo4j-client.ts @@ -237,6 +237,8 @@ export class Neo4jMemoryClient { const session = this.driver!.session(); try { const now = new Date().toISOString(); + // Layer 3: taskId is optional — only set on the node when provided + const taskIdClause = input.taskId ? ", taskId: $taskId" : ""; const result = await session.run( `CREATE (m:Memory { id: $id, text: $text, embedding: $embedding, @@ -245,12 +247,13 @@ export class Neo4jMemoryClient { agentId: $agentId, sessionKey: $sessionKey, createdAt: $createdAt, updatedAt: $updatedAt, retrievalCount: $retrievalCount, lastRetrievedAt: $lastRetrievedAt, - extractionRetries: $extractionRetries + extractionRetries: $extractionRetries${taskIdClause} }) RETURN m.id AS id`, { ...input, sessionKey: input.sessionKey ?? null, + taskId: input.taskId ?? null, createdAt: now, updatedAt: now, retrievalCount: 0, @@ -444,6 +447,7 @@ export class Neo4jMemoryClient { WHERE score >= $minScore ${agentFilter} RETURN node.id AS id, node.text AS text, node.category AS category, node.importance AS importance, node.createdAt AS createdAt, + node.taskId AS taskId, score AS similarity ORDER BY score DESC`, { @@ -461,6 +465,7 @@ export class Neo4jMemoryClient { importance: r.get("importance") as number, createdAt: String(r.get("createdAt") ?? ""), score: r.get("similarity") as number, + taskId: (r.get("taskId") as string) ?? undefined, })); } finally { await session.close(); @@ -495,6 +500,7 @@ export class Neo4jMemoryClient { WHERE true ${agentFilter} RETURN node.id AS id, node.text AS text, node.category AS category, node.importance AS importance, node.createdAt AS createdAt, + node.taskId AS taskId, score AS bm25Score ORDER BY score DESC LIMIT $limit`, @@ -513,6 +519,7 @@ export class Neo4jMemoryClient { importance: r.get("importance") as number, createdAt: String(r.get("createdAt") ?? ""), rawScore: r.get("bm25Score") as number, + taskId: (r.get("taskId") as string) ?? undefined, })); if (records.length === 0) { @@ -585,6 +592,7 @@ export class Neo4jMemoryClient { WITH entity, collect({ id: m.id, text: m.text, category: m.category, importance: m.importance, createdAt: m.createdAt, + taskId: m.taskId, score: coalesce(rm.confidence, 1.0) }) AS directResults @@ -596,6 +604,7 @@ export class Neo4jMemoryClient { WITH directResults, collect({ id: m2.id, text: m2.text, category: m2.category, importance: m2.importance, createdAt: m2.createdAt, + taskId: m2.taskId, score: reduce(s = 1.0, r IN rels | s * coalesce(r.confidence, 0.7)) * coalesce(rm2.confidence, 1.0) }) AS hopResults @@ -604,6 +613,7 @@ export class Neo4jMemoryClient { WITH row WHERE row.id IS NOT NULL RETURN row.id AS id, row.text AS text, row.category AS category, row.importance AS importance, row.createdAt AS createdAt, + row.taskId AS taskId, max(row.score) AS graphScore`, { query: escaped, firingThreshold, ...(agentId ? { agentId } : {}) }, ); @@ -625,6 +635,7 @@ export class Neo4jMemoryClient { importance: record.get("importance") as number, createdAt: String(record.get("createdAt") ?? ""), score, + taskId: (record.get("taskId") as string) ?? undefined, }); } } @@ -725,6 +736,62 @@ export class Neo4jMemoryClient { }); } + // -------------------------------------------------------------------------- + // Layer 3: Task Metadata Operations + // -------------------------------------------------------------------------- + + /** + * Find memories linked to a specific task ID. + * Used by recall filter and sleep cycle to identify task-related memories. + */ + async findMemoriesByTaskId( + taskId: string, + agentId?: string, + ): Promise> { + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + const agentFilter = agentId ? "AND m.agentId = $agentId" : ""; + const result = await session.run( + `MATCH (m:Memory) + WHERE m.taskId = $taskId ${agentFilter} + RETURN m.id AS id, m.text AS text, m.category AS category, m.importance AS importance + ORDER BY m.createdAt ASC`, + { taskId, ...(agentId ? { agentId } : {}) }, + ); + return result.records.map((r) => ({ + id: r.get("id") as string, + text: r.get("text") as string, + category: r.get("category") as string, + importance: r.get("importance") as number, + })); + } finally { + await session.close(); + } + } + + /** + * Bulk-clear taskId from memories (e.g., when the task-memory link is no longer needed). + * Sets taskId to null rather than deleting the memory. + */ + async clearTaskIdFromMemories(taskId: string, agentId?: string): Promise { + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + const agentFilter = agentId ? "AND m.agentId = $agentId" : ""; + const result = await session.run( + `MATCH (m:Memory) + WHERE m.taskId = $taskId ${agentFilter} + SET m.taskId = null + RETURN count(m) AS cleared`, + { taskId, ...(agentId ? { agentId } : {}) }, + ); + return (result.records[0]?.get("cleared") as number) ?? 0; + } finally { + await session.close(); + } + } + // -------------------------------------------------------------------------- // Entity & Relationship Operations // -------------------------------------------------------------------------- @@ -2089,6 +2156,55 @@ export class Neo4jMemoryClient { } } + /** + * Search memories by keywords using the fulltext (BM25) index. + * Returns memories whose text matches any of the given keywords. + * Used by the sleep cycle task-memory cleanup phase to find memories + * related to completed tasks. + */ + async searchMemoriesByKeywords( + keywords: string[], + limit: number = 50, + agentId?: string, + ): Promise> { + if (keywords.length === 0) { + return []; + } + + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + // Build a Lucene OR query from the keywords + const escaped = keywords.map((k) => escapeLucene(k.trim())).filter((k) => k.length > 0); + if (escaped.length === 0) { + return []; + } + const query = escaped.join(" OR "); + const agentFilter = agentId ? "AND node.agentId = $agentId" : ""; + const result = await session.run( + `CALL db.index.fulltext.queryNodes('memory_fulltext_index', $query) + YIELD node, score + WHERE true ${agentFilter} + RETURN node.id AS id, node.text AS text, node.category AS category + ORDER BY score DESC + LIMIT $limit`, + { + query, + limit: neo4j.int(Math.floor(limit)), + ...(agentId ? { agentId } : {}), + }, + ); + + return result.records.map((r) => ({ + id: r.get("id") as string, + text: r.get("text") as string, + category: r.get("category") as string, + })); + } finally { + await session.close(); + } + } + /** * Reconcile mentionCount for all entities by counting actual MENTIONS relationships. * Fixes entities with NULL or stale mentionCount values (e.g., entities created diff --git a/extensions/memory-neo4j/schema.ts b/extensions/memory-neo4j/schema.ts index f60fbade61f..aee9bdb024c 100644 --- a/extensions/memory-neo4j/schema.ts +++ b/extensions/memory-neo4j/schema.ts @@ -42,6 +42,7 @@ export type MemoryNode = { sessionKey?: string; retrievalCount: number; lastRetrievedAt?: string; + taskId?: string; // Optional link to TASKS.md task (e.g., "TASK-001") }; export type EntityNode = { @@ -103,6 +104,7 @@ export type SearchSignalResult = { importance: number; createdAt: string; score: number; + taskId?: string; // Optional link to TASKS.md task (e.g., "TASK-001") }; export type SignalAttribution = { @@ -117,6 +119,7 @@ export type HybridSearchResult = { importance: number; createdAt: string; score: number; + taskId?: string; // Optional link to TASKS.md task (e.g., "TASK-001") signals?: { vector: SignalAttribution; bm25: SignalAttribution; @@ -138,6 +141,7 @@ export type StoreMemoryInput = { extractionStatus: ExtractionStatus; agentId: string; sessionKey?: string; + taskId?: string; // Optional link to TASKS.md task (e.g., "TASK-001") }; export type MergeEntityInput = { diff --git a/extensions/memory-neo4j/search.ts b/extensions/memory-neo4j/search.ts index 7aa6c145d73..0ee40a6dba9 100644 --- a/extensions/memory-neo4j/search.ts +++ b/extensions/memory-neo4j/search.ts @@ -112,6 +112,7 @@ type FusedCandidate = { importance: number; createdAt: string; rrfScore: number; + taskId?: string; signals: { vector: SignalAttribution; bm25: SignalAttribution; @@ -151,7 +152,7 @@ export function fuseWithConfidenceRRF( // Collect all unique candidate IDs with their metadata const candidateMetadata = new Map< string, - { text: string; category: string; importance: number; createdAt: string } + { text: string; category: string; importance: number; createdAt: string; taskId?: string } >(); for (const signal of signals) { @@ -162,6 +163,7 @@ export function fuseWithConfidenceRRF( category: entry.category, importance: entry.importance, createdAt: entry.createdAt, + taskId: entry.taskId, }); } } @@ -196,6 +198,7 @@ export function fuseWithConfidenceRRF( importance: meta.importance, createdAt: meta.createdAt, rrfScore, + taskId: meta.taskId, signals, }); } @@ -288,6 +291,7 @@ export async function hybridSearch( importance: r.importance, createdAt: r.createdAt, score: Math.min(1, r.rrfScore * normalizer), // Normalize to 0-1 + taskId: r.taskId, signals: r.signals, })); diff --git a/extensions/memory-neo4j/sleep-cycle.task-memory.test.ts b/extensions/memory-neo4j/sleep-cycle.task-memory.test.ts new file mode 100644 index 00000000000..0fe4533fafd --- /dev/null +++ b/extensions/memory-neo4j/sleep-cycle.task-memory.test.ts @@ -0,0 +1,234 @@ +/** + * Tests for Phase 7: Task-Memory Cleanup in the sleep cycle. + * + * Tests the LLM classification function and integration with the sleep cycle. + */ + +import { describe, it, expect, vi, beforeEach } from "vitest"; +import type { ExtractionConfig } from "./config.js"; +import { classifyTaskMemory } from "./sleep-cycle.js"; + +// -------------------------------------------------------------------------- +// Mock the LLM client so we don't make real API calls +// -------------------------------------------------------------------------- +vi.mock("./llm-client.js", () => ({ + callOpenRouter: vi.fn(), + callOpenRouterStream: vi.fn(), + isTransientError: vi.fn(() => false), +})); + +// Import the mocked function for controlling behavior per test +import { callOpenRouter } from "./llm-client.js"; +const mockCallOpenRouter = vi.mocked(callOpenRouter); + +// -------------------------------------------------------------------------- +// Helpers +// -------------------------------------------------------------------------- + +const baseConfig: ExtractionConfig = { + enabled: true, + apiKey: "test-key", + model: "test-model", + baseUrl: "http://localhost:8080", + temperature: 0, + maxRetries: 0, +}; + +const disabledConfig: ExtractionConfig = { + ...baseConfig, + enabled: false, +}; + +// -------------------------------------------------------------------------- +// classifyTaskMemory() +// -------------------------------------------------------------------------- + +describe("classifyTaskMemory", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("returns 'noise' for task-specific progress memory", async () => { + mockCallOpenRouter.mockResolvedValueOnce( + JSON.stringify({ + classification: "noise", + reason: "This is task-specific progress tracking", + }), + ); + + const result = await classifyTaskMemory( + "Currently working on TASK-003, step 2: fixing the column alignment in the LinkedIn dashboard", + "Fix LinkedIn Dashboard tab", + baseConfig, + ); + + expect(result).toBe("noise"); + expect(mockCallOpenRouter).toHaveBeenCalledOnce(); + }); + + it("returns 'lasting' for decision/fact memory", async () => { + mockCallOpenRouter.mockResolvedValueOnce( + JSON.stringify({ + classification: "lasting", + reason: "Contains a reusable technical decision", + }), + ); + + const result = await classifyTaskMemory( + "ReActor face swap produces better results than Replicate for video face replacement", + "Implement face swap pipeline", + baseConfig, + ); + + expect(result).toBe("lasting"); + expect(mockCallOpenRouter).toHaveBeenCalledOnce(); + }); + + it("returns 'lasting' when LLM returns null (conservative)", async () => { + mockCallOpenRouter.mockResolvedValueOnce(null); + + const result = await classifyTaskMemory("Some ambiguous memory", "Some task", baseConfig); + + expect(result).toBe("lasting"); + }); + + it("returns 'lasting' when LLM throws (conservative)", async () => { + mockCallOpenRouter.mockRejectedValueOnce(new Error("network error")); + + const result = await classifyTaskMemory("Some memory", "Some task", baseConfig); + + expect(result).toBe("lasting"); + }); + + it("returns 'lasting' when LLM returns malformed JSON", async () => { + mockCallOpenRouter.mockResolvedValueOnce("not json at all"); + + const result = await classifyTaskMemory("Some memory", "Some task", baseConfig); + + expect(result).toBe("lasting"); + }); + + it("returns 'lasting' when LLM returns unexpected classification", async () => { + mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "unknown_value" })); + + const result = await classifyTaskMemory("Some memory", "Some task", baseConfig); + + expect(result).toBe("lasting"); + }); + + it("returns 'lasting' when config is disabled", async () => { + const result = await classifyTaskMemory("Task progress memory", "Some task", disabledConfig); + + expect(result).toBe("lasting"); + expect(mockCallOpenRouter).not.toHaveBeenCalled(); + }); + + it("passes task title in system prompt", async () => { + mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "lasting" })); + + await classifyTaskMemory("Memory text here", "Fix LinkedIn Dashboard tab", baseConfig); + + expect(mockCallOpenRouter).toHaveBeenCalledOnce(); + const callArgs = mockCallOpenRouter.mock.calls[0]; + const messages = callArgs[1] as Array<{ role: string; content: string }>; + expect(messages[0].content).toContain("Fix LinkedIn Dashboard tab"); + }); + + it("passes memory text as user message", async () => { + mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "noise" })); + + await classifyTaskMemory( + "Debugging step: checked column B3 alignment", + "Fix Dashboard", + baseConfig, + ); + + const callArgs = mockCallOpenRouter.mock.calls[0]; + const messages = callArgs[1] as Array<{ role: string; content: string }>; + expect(messages[1].role).toBe("user"); + expect(messages[1].content).toBe("Debugging step: checked column B3 alignment"); + }); + + it("passes abort signal to LLM call", async () => { + const controller = new AbortController(); + mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "lasting" })); + + await classifyTaskMemory("Memory text", "Task title", baseConfig, controller.signal); + + const callArgs = mockCallOpenRouter.mock.calls[0]; + expect(callArgs[2]).toBe(controller.signal); + }); +}); + +// -------------------------------------------------------------------------- +// Classification examples — verify the prompt produces expected behavior +// These test that noise vs lasting classification is passed through correctly +// -------------------------------------------------------------------------- + +describe("classifyTaskMemory classification examples", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + const noiseExamples = [ + { + memory: "Currently working on TASK-003, step 2: fixing the column alignment", + task: "Fix LinkedIn Dashboard tab", + reason: "task progress update", + }, + { + memory: "ACTIVE TASK: TASK-004 — Fix browser port collision. Step: testing port 18807", + task: "Fix browser port collision", + reason: "active task checkpoint", + }, + { + memory: "Debugging the flight search: Scoot API returned 500, retrying with different dates", + task: "Book KL↔Singapore flights for India trip", + reason: "debugging steps", + }, + ]; + + for (const example of noiseExamples) { + it(`classifies "${example.reason}" as noise`, async () => { + mockCallOpenRouter.mockResolvedValueOnce( + JSON.stringify({ classification: "noise", reason: example.reason }), + ); + + const result = await classifyTaskMemory(example.memory, example.task, baseConfig); + + expect(result).toBe("noise"); + }); + } + + const lastingExamples = [ + { + memory: + "Port map: 18792 (chrome), 18800 (chetan), 18805 (linkedin), 18806 (tsukhani), 18807 (openclaw)", + task: "Fix browser port collision", + reason: "useful reference configuration", + }, + { + memory: + "Dashboard layout: B3:B9 = Total, Accepted, Pending, Not Connected, Follow-ups Sent, Acceptance Rate%, Date", + task: "Fix LinkedIn Dashboard tab", + reason: "lasting documentation of layout", + }, + { + memory: "ReActor face swap produces better results than Replicate for video face replacement", + task: "Implement face swap pipeline", + reason: "tool comparison decision", + }, + ]; + + for (const example of lastingExamples) { + it(`classifies "${example.reason}" as lasting`, async () => { + mockCallOpenRouter.mockResolvedValueOnce( + JSON.stringify({ classification: "lasting", reason: example.reason }), + ); + + const result = await classifyTaskMemory(example.memory, example.task, baseConfig); + + expect(result).toBe("lasting"); + }); + } +}); diff --git a/extensions/memory-neo4j/sleep-cycle.ts b/extensions/memory-neo4j/sleep-cycle.ts index e8e54cda789..eb1516524e6 100644 --- a/extensions/memory-neo4j/sleep-cycle.ts +++ b/extensions/memory-neo4j/sleep-cycle.ts @@ -12,6 +12,7 @@ * 5. NOISE CLEANUP - Remove dangerous pattern memories * 5b. CREDENTIAL SCAN - Remove memories containing leaked credentials * 6. TASK LEDGER - Archive stale tasks in TASKS.md + * 7. TASK-MEMORY CLEANUP - Remove task-noise memories for completed tasks * * Research basis: * - ACT-R memory model for retrieval-based importance @@ -19,6 +20,8 @@ * - MemGPT/Letta for tiered memory architecture */ +import fs from "node:fs/promises"; +import path from "node:path"; import type { ExtractionConfig } from "./config.js"; import type { Embeddings } from "./embeddings.js"; import type { Neo4jMemoryClient } from "./neo4j-client.js"; @@ -29,8 +32,13 @@ import { resolveConflict, runBackgroundExtraction, } from "./extractor.js"; +import { callOpenRouter } from "./llm-client.js"; import { makePairKey } from "./schema.js"; -import { reviewAndArchiveStaleTasks, type StaleTaskResult } from "./task-ledger.js"; +import { + parseTaskLedger, + reviewAndArchiveStaleTasks, + type StaleTaskResult, +} from "./task-ledger.js"; /** * Sleep Cycle Result - aggregated stats from all phases. @@ -92,6 +100,12 @@ export type SleepCycleResult = { archivedCount: number; archivedIds: string[]; }; + // Phase 7: Task-Memory Cleanup + taskMemoryCleanup: { + tasksChecked: number; + memoriesEvaluated: number; + memoriesRemoved: number; + }; // Overall durationMs: number; aborted: boolean; @@ -133,6 +147,10 @@ export type SleepCycleOptions = { workspaceDir?: string; // Workspace dir for TASKS.md (default: resolved from env) staleTaskMaxAgeMs?: number; // Max age before task is stale (default: 24h) + // Phase 7: Task-Memory Cleanup + skipTaskMemoryCleanup?: boolean; // Skip task-memory cleanup (default: false) + taskMemoryMaxAgeDays?: number; // Only check tasks completed within this many days (default: 7) + // Progress callback onPhaseStart?: ( phase: @@ -146,7 +164,8 @@ export type SleepCycleOptions = { | "cleanup" | "noiseCleanup" | "credentialScan" - | "taskLedger", + | "taskLedger" + | "taskMemoryCleanup", ) => void; onProgress?: (phase: string, message: string) => void; }; @@ -213,12 +232,68 @@ export function detectCredential(text: string): string | null { return null; } +// ============================================================================ +// Task-Memory Classification +// ============================================================================ + +/** + * Use LLM to classify whether a memory is "lasting" (valuable independent + * of the completed task) or "noise" (only useful while the task was active). + * + * Conservative: returns "lasting" on any failure to avoid deleting valuable memories. + */ +export async function classifyTaskMemory( + memoryText: string, + taskTitle: string, + config: ExtractionConfig, + abortSignal?: AbortSignal, +): Promise<"lasting" | "noise"> { + if (!config.enabled) { + return "lasting"; + } + + try { + const content = await callOpenRouter( + config, + [ + { + role: "system", + content: `A task titled "${taskTitle}" has been completed. The following memory was created during this task. + +Classify this memory: +- "lasting" if it contains a decision, preference, fact, or knowledge that is valuable INDEPENDENT of the task +- "noise" if it contains task progress, debugging steps, intermediate state, or context that is only useful while the task was active + +When in doubt, choose "lasting". It is better to keep some noise than to delete valuable knowledge. + +Return JSON: {"classification": "lasting"|"noise", "reason": "brief explanation"}`, + }, + { role: "user", content: memoryText }, + ], + abortSignal, + ); + + if (!content) { + return "lasting"; + } + + const parsed = JSON.parse(content) as { classification?: string }; + if (parsed.classification === "noise") { + return "noise"; + } + return "lasting"; + } catch { + // On any failure, keep the memory (conservative) + return "lasting"; + } +} + // ============================================================================ // Sleep Cycle Implementation // ============================================================================ /** - * Run the full sleep cycle - six phases of memory consolidation. + * Run the full sleep cycle - seven phases of memory consolidation. */ export async function runSleepCycle( db: Neo4jMemoryClient, @@ -246,6 +321,8 @@ export async function runSleepCycle( singleUseTagMinAgeDays = 14, workspaceDir, staleTaskMaxAgeMs, + skipTaskMemoryCleanup = false, + taskMemoryMaxAgeDays = 7, onPhaseStart, onProgress, } = options; @@ -261,6 +338,7 @@ export async function runSleepCycle( cleanup: { entitiesRemoved: 0, tagsRemoved: 0, singleUseTagsRemoved: 0 }, credentialScan: { memoriesScanned: 0, credentialsFound: 0, memoriesRemoved: 0 }, taskLedger: { staleCount: 0, archivedCount: 0, archivedIds: [] }, + taskMemoryCleanup: { tasksChecked: 0, memoriesEvaluated: 0, memoriesRemoved: 0 }, durationMs: 0, aborted: false, }; @@ -929,6 +1007,155 @@ export async function runSleepCycle( logger.info("memory-neo4j: [sleep] Phase 6: Task Ledger Cleanup — SKIPPED (no workspace dir)"); } + // -------------------------------------------------------------------------- + // Phase 7: Task-Memory Cleanup + // Cross-references completed tasks (from TASKS.md) with stored memories. + // For each completed task (within the last N days), searches for memories + // mentioning that task and uses LLM to classify them as "lasting" (keep) + // vs "noise" (delete). This prevents stale task-specific memories from + // being recalled after tasks are done. + // -------------------------------------------------------------------------- + if (!abortSignal?.aborted && workspaceDir && config.enabled && !skipTaskMemoryCleanup) { + onPhaseStart?.("taskMemoryCleanup"); + logger.info("memory-neo4j: [sleep] Phase 7: Task-Memory Cleanup"); + + try { + const tasksPath = path.join(workspaceDir, "TASKS.md"); + let tasksContent: string | null = null; + try { + tasksContent = await fs.readFile(tasksPath, "utf-8"); + } catch { + // TASKS.md doesn't exist — skip + } + + if (tasksContent) { + const ledger = parseTaskLedger(tasksContent); + const now = new Date(); + const maxAgeMs = taskMemoryMaxAgeDays * 24 * 60 * 60 * 1000; + + // Filter to recently completed tasks (within maxAgeDays) + const recentCompleted = ledger.completedTasks.filter((task) => { + // Use the "Completed" field, "Updated" field, or "Started" field as date source + const dateStr = + task.details?.match(/Completed:\s*(\S+)/)?.[1] || task.updated || task.started; + if (!dateStr) { + return false; + } + // Try to parse date — accept formats like "2026-02-16", "2026-02-16 09:15" + const cleaned = dateStr + .trim() + .replace(/\s+[A-Z]{2,5}$/, "") + .replace(/^(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2})/, "$1T$2"); + const date = new Date(cleaned); + if (Number.isNaN(date.getTime())) { + return false; + } + return now.getTime() - date.getTime() <= maxAgeMs; + }); + + result.taskMemoryCleanup.tasksChecked = recentCompleted.length; + + if (recentCompleted.length > 0) { + onProgress?.( + "taskMemoryCleanup", + `Found ${recentCompleted.length} recently completed tasks to check`, + ); + + // Collect all memories to evaluate across all tasks (dedup by id) + const memoriesToEvaluate = new Map< + string, + { id: string; text: string; category: string; taskTitle: string } + >(); + + for (const task of recentCompleted) { + if (abortSignal?.aborted) break; + + // Build keywords from task ID and title words + const keywords = [task.id]; + const titleWords = task.title + .split(/\s+/) + .filter((w) => w.length > 3) + .map((w) => w.replace(/[^a-zA-Z0-9-]/g, "")) + .filter((w) => w.length > 3); + keywords.push(...titleWords); + + const matches = await db.searchMemoriesByKeywords(keywords, 50, agentId); + + for (const mem of matches) { + // Skip core memories — those are user-curated + if (mem.category === "core") continue; + if (!memoriesToEvaluate.has(mem.id)) { + memoriesToEvaluate.set(mem.id, { ...mem, taskTitle: task.title }); + } + } + } + + // Classify memories in parallel batches using LLM + const toEvaluate = [...memoriesToEvaluate.values()]; + result.taskMemoryCleanup.memoriesEvaluated = toEvaluate.length; + + if (toEvaluate.length > 0) { + onProgress?.("taskMemoryCleanup", `Evaluating ${toEvaluate.length} memories with LLM`); + } + + const toRemove: string[] = []; + + for (let i = 0; i < toEvaluate.length && !abortSignal?.aborted; i += llmConcurrency) { + const batch = toEvaluate.slice(i, i + llmConcurrency); + + const outcomes = await Promise.allSettled( + batch.map((mem) => classifyTaskMemory(mem.text, mem.taskTitle, config, abortSignal)), + ); + + for (let k = 0; k < outcomes.length; k++) { + const outcome = outcomes[k]; + const mem = batch[k]; + + if (outcome.status === "fulfilled" && outcome.value === "noise") { + toRemove.push(mem.id); + onProgress?.( + "taskMemoryCleanup", + `Noise: "${mem.text.slice(0, 60)}..." (task: ${mem.taskTitle})`, + ); + } else if (outcome.status === "fulfilled" && outcome.value === "lasting") { + onProgress?.("taskMemoryCleanup", `Lasting: "${mem.text.slice(0, 60)}..."`); + } + // On failure, keep the memory (conservative) + } + } + + // Remove noise memories + if (toRemove.length > 0 && !abortSignal?.aborted) { + for (const id of toRemove) { + if (abortSignal?.aborted) break; + await db.invalidateMemory(id); + } + result.taskMemoryCleanup.memoriesRemoved = toRemove.length; + onProgress?.("taskMemoryCleanup", `Invalidated ${toRemove.length} task-noise memories`); + } + } else { + onProgress?.("taskMemoryCleanup", "No recently completed tasks found"); + } + } else { + onProgress?.("taskMemoryCleanup", "TASKS.md not found — skipped"); + } + + logger.info( + `memory-neo4j: [sleep] Phase 7 complete — ${result.taskMemoryCleanup.tasksChecked} tasks checked, ${result.taskMemoryCleanup.memoriesEvaluated} memories evaluated, ${result.taskMemoryCleanup.memoriesRemoved} removed`, + ); + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 7 error: ${String(err)}`); + } + } else if (!workspaceDir) { + logger.info("memory-neo4j: [sleep] Phase 7: Task-Memory Cleanup — SKIPPED (no workspace dir)"); + } else if (!config.enabled) { + logger.info( + "memory-neo4j: [sleep] Phase 7: Task-Memory Cleanup — SKIPPED (extraction not enabled)", + ); + } else if (skipTaskMemoryCleanup) { + logger.info("memory-neo4j: [sleep] Phase 7: Task-Memory Cleanup — SKIPPED (disabled)"); + } + result.durationMs = Date.now() - startTime; result.aborted = abortSignal?.aborted ?? false; diff --git a/extensions/memory-neo4j/task-filter.test.ts b/extensions/memory-neo4j/task-filter.test.ts new file mode 100644 index 00000000000..ffcb3b6b1a6 --- /dev/null +++ b/extensions/memory-neo4j/task-filter.test.ts @@ -0,0 +1,426 @@ +/** + * Tests for task-filter.ts — Task-aware recall filtering (Layer 1). + * + * Verifies that memories related to completed tasks are correctly identified + * and filtered, while unrelated or loosely-matching memories are preserved. + */ + +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + buildCompletedTaskInfo, + clearTaskFilterCache, + extractSignificantKeywords, + isRelatedToCompletedTask, + loadCompletedTaskKeywords, + type CompletedTaskInfo, +} from "./task-filter.js"; + +// ============================================================================ +// Sample TASKS.md content +// ============================================================================ + +const SAMPLE_TASKS_MD = `# Active Tasks + +_No active tasks_ + +# Completed + +## TASK-002: Book KL↔Singapore flights for India trip +- **Completed:** 2026-02-16 +- **Details:** Tarun booked manually — Scoot TR453 (Feb 23 KUL→SIN) and AirAsia AK720 (Mar 3 SIN→KUL) + +## TASK-003: Fix LinkedIn Dashboard tab +- **Completed:** 2026-02-16 +- **Details:** Fixed misaligned stats, wrong industry numbers, stale data. Added Not Connected row, consolidated industries into 10 groups, cleared residual data. + +## TASK-004: Fix browser port collision +- **Completed:** 2026-02-16 +- **Details:** Added explicit openclaw profile on port 18807 (was colliding with chetan on 18800) +`; + +// ============================================================================ +// extractSignificantKeywords() +// ============================================================================ + +describe("extractSignificantKeywords", () => { + it("extracts words with length >= 4", () => { + const keywords = extractSignificantKeywords("Fix the big dashboard bug"); + expect(keywords).toContain("dashboard"); + expect(keywords).not.toContain("fix"); // too short + expect(keywords).not.toContain("the"); // too short + expect(keywords).not.toContain("big"); // too short + expect(keywords).not.toContain("bug"); // too short + }); + + it("removes stop words", () => { + const keywords = extractSignificantKeywords("should have been using this work"); + // All of these are stop words + expect(keywords).toHaveLength(0); + }); + + it("lowercases all keywords", () => { + const keywords = extractSignificantKeywords("LinkedIn Dashboard Singapore"); + expect(keywords).toContain("linkedin"); + expect(keywords).toContain("dashboard"); + expect(keywords).toContain("singapore"); + }); + + it("deduplicates keywords", () => { + const keywords = extractSignificantKeywords("dashboard dashboard dashboard"); + expect(keywords).toEqual(["dashboard"]); + }); + + it("returns empty for empty/null input", () => { + expect(extractSignificantKeywords("")).toEqual([]); + expect(extractSignificantKeywords(null as unknown as string)).toEqual([]); + }); + + it("handles special characters", () => { + const keywords = extractSignificantKeywords("port 18807 (colliding with chetan)"); + expect(keywords).toContain("port"); + expect(keywords).toContain("18807"); + expect(keywords).toContain("colliding"); + expect(keywords).toContain("chetan"); + }); +}); + +// ============================================================================ +// buildCompletedTaskInfo() +// ============================================================================ + +describe("buildCompletedTaskInfo", () => { + it("extracts keywords from title and details", () => { + const info = buildCompletedTaskInfo({ + id: "TASK-003", + title: "Fix LinkedIn Dashboard tab", + status: "done", + details: + "Fixed misaligned stats, wrong industry numbers, stale data. Added Not Connected row, consolidated industries into 10 groups, cleared residual data.", + rawLines: [ + "## TASK-003: Fix LinkedIn Dashboard tab", + "- **Completed:** 2026-02-16", + "- **Details:** Fixed misaligned stats, wrong industry numbers, stale data.", + ], + isCompleted: true, + }); + + expect(info.id).toBe("TASK-003"); + expect(info.keywords).toContain("linkedin"); + expect(info.keywords).toContain("dashboard"); + expect(info.keywords).toContain("misaligned"); + expect(info.keywords).toContain("stats"); + expect(info.keywords).toContain("industry"); + }); + + it("includes currentStep keywords", () => { + const info = buildCompletedTaskInfo({ + id: "TASK-010", + title: "Deploy staging server", + status: "done", + currentStep: "Verifying nginx configuration", + rawLines: ["## TASK-010: Deploy staging server"], + isCompleted: true, + }); + + expect(info.keywords).toContain("deploy"); + expect(info.keywords).toContain("staging"); + expect(info.keywords).toContain("server"); + expect(info.keywords).toContain("nginx"); + expect(info.keywords).toContain("configuration"); + }); + + it("handles task with minimal fields", () => { + const info = buildCompletedTaskInfo({ + id: "TASK-001", + title: "Quick fix", + status: "done", + rawLines: ["## TASK-001: Quick fix"], + isCompleted: true, + }); + + expect(info.id).toBe("TASK-001"); + expect(info.keywords).toContain("quick"); + // "fix" is only 3 chars, should be excluded + expect(info.keywords).not.toContain("fix"); + }); +}); + +// ============================================================================ +// isRelatedToCompletedTask() +// ============================================================================ + +describe("isRelatedToCompletedTask", () => { + const completedTasks: CompletedTaskInfo[] = [ + { + id: "TASK-002", + keywords: [ + "book", + "singapore", + "flights", + "india", + "trip", + "scoot", + "tr453", + "airasia", + "ak720", + ], + }, + { + id: "TASK-003", + keywords: [ + "linkedin", + "dashboard", + "misaligned", + "stats", + "industry", + "numbers", + "stale", + "connected", + "consolidated", + "industries", + "groups", + "cleared", + "residual", + "data", + ], + }, + { + id: "TASK-004", + keywords: [ + "browser", + "port", + "collision", + "openclaw", + "profile", + "18807", + "colliding", + "chetan", + "18800", + ], + }, + ]; + + // --- Task ID matching --- + + it("matches memory containing task ID", () => { + expect( + isRelatedToCompletedTask("TASK-002 flights have been booked successfully", completedTasks), + ).toBe(true); + }); + + it("matches task ID case-insensitively", () => { + expect( + isRelatedToCompletedTask("Completed task-003 — dashboard is fixed", completedTasks), + ).toBe(true); + }); + + // --- Keyword matching --- + + it("matches memory with 2+ keywords from a completed task", () => { + expect( + isRelatedToCompletedTask( + "LinkedIn dashboard stats are now showing correctly", + completedTasks, + ), + ).toBe(true); + }); + + it("matches memory with keywords from flight task", () => { + expect( + isRelatedToCompletedTask("Booked Singapore flights for the India trip", completedTasks), + ).toBe(true); + }); + + // --- False positive prevention --- + + it("does NOT match memory with only 1 keyword overlap", () => { + expect(isRelatedToCompletedTask("Singapore has great food markets", completedTasks)).toBe( + false, + ); + }); + + it("does NOT match memory about LinkedIn that is unrelated to dashboard fix", () => { + // "linkedin" alone is only 1 keyword match — should NOT be filtered + expect( + isRelatedToCompletedTask( + "LinkedIn connection request from John Smith accepted", + completedTasks, + ), + ).toBe(false); + }); + + it("does NOT match memory about browser that is unrelated to port fix", () => { + // "browser" alone is only 1 keyword + expect( + isRelatedToCompletedTask("Browser extension for Flux image generation", completedTasks), + ).toBe(false); + }); + + it("does NOT match completely unrelated memory", () => { + expect(isRelatedToCompletedTask("Tarun's birthday is August 23, 1974", completedTasks)).toBe( + false, + ); + }); + + // --- Edge cases --- + + it("returns false for empty memory text", () => { + expect(isRelatedToCompletedTask("", completedTasks)).toBe(false); + }); + + it("returns false for empty completed tasks array", () => { + expect(isRelatedToCompletedTask("TASK-002 flights booked", [])).toBe(false); + }); + + it("handles task with no keywords (only ID matching works)", () => { + const tasksNoKeywords: CompletedTaskInfo[] = [{ id: "TASK-099", keywords: [] }]; + expect(isRelatedToCompletedTask("Completed TASK-099", tasksNoKeywords)).toBe(true); + expect(isRelatedToCompletedTask("Some random memory", tasksNoKeywords)).toBe(false); + }); +}); + +// ============================================================================ +// loadCompletedTaskKeywords() +// ============================================================================ + +describe("loadCompletedTaskKeywords", () => { + let tmpDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "task-filter-test-")); + clearTaskFilterCache(); + }); + + afterEach(async () => { + clearTaskFilterCache(); + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it("parses completed tasks from TASKS.md", async () => { + await fs.writeFile(path.join(tmpDir, "TASKS.md"), SAMPLE_TASKS_MD); + + const tasks = await loadCompletedTaskKeywords(tmpDir); + expect(tasks).toHaveLength(3); + expect(tasks.map((t) => t.id)).toEqual(["TASK-002", "TASK-003", "TASK-004"]); + }); + + it("extracts keywords from completed tasks", async () => { + await fs.writeFile(path.join(tmpDir, "TASKS.md"), SAMPLE_TASKS_MD); + + const tasks = await loadCompletedTaskKeywords(tmpDir); + const flightTask = tasks.find((t) => t.id === "TASK-002"); + expect(flightTask).toBeDefined(); + expect(flightTask!.keywords).toContain("singapore"); + expect(flightTask!.keywords).toContain("flights"); + }); + + it("returns empty array when TASKS.md does not exist", async () => { + const tasks = await loadCompletedTaskKeywords(tmpDir); + expect(tasks).toEqual([]); + }); + + it("returns empty array for empty TASKS.md", async () => { + await fs.writeFile(path.join(tmpDir, "TASKS.md"), ""); + + const tasks = await loadCompletedTaskKeywords(tmpDir); + expect(tasks).toEqual([]); + }); + + it("returns empty array for TASKS.md with no completed tasks", async () => { + const content = `# Active Tasks + +## TASK-001: Do something +- **Status:** in_progress +- **Details:** Working on it + +# Completed + +`; + await fs.writeFile(path.join(tmpDir, "TASKS.md"), content); + + const tasks = await loadCompletedTaskKeywords(tmpDir); + expect(tasks).toEqual([]); + }); + + it("handles malformed TASKS.md gracefully", async () => { + const content = `This is not a valid TASKS.md file +Just some random text +No headers or structure at all`; + await fs.writeFile(path.join(tmpDir, "TASKS.md"), content); + + const tasks = await loadCompletedTaskKeywords(tmpDir); + expect(tasks).toEqual([]); + }); + + // --- Cache behavior --- + + it("returns cached data within TTL", async () => { + await fs.writeFile(path.join(tmpDir, "TASKS.md"), SAMPLE_TASKS_MD); + + const first = await loadCompletedTaskKeywords(tmpDir); + expect(first).toHaveLength(3); + + // Modify the file — should still return cached result + await fs.writeFile(path.join(tmpDir, "TASKS.md"), "# Active Tasks\n\n# Completed\n"); + + const second = await loadCompletedTaskKeywords(tmpDir); + expect(second).toHaveLength(3); // Still cached + expect(second).toBe(first); // Same reference (from cache) + }); + + it("refreshes after cache is cleared", async () => { + await fs.writeFile(path.join(tmpDir, "TASKS.md"), SAMPLE_TASKS_MD); + + const first = await loadCompletedTaskKeywords(tmpDir); + expect(first).toHaveLength(3); + + // Modify file and clear cache + await fs.writeFile(path.join(tmpDir, "TASKS.md"), "# Active Tasks\n\n# Completed\n"); + clearTaskFilterCache(); + + const second = await loadCompletedTaskKeywords(tmpDir); + expect(second).toHaveLength(0); // Re-read from disk + }); +}); + +// ============================================================================ +// Integration: end-to-end filtering +// ============================================================================ + +describe("end-to-end recall filtering", () => { + let tmpDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "task-filter-e2e-")); + clearTaskFilterCache(); + }); + + afterEach(async () => { + clearTaskFilterCache(); + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it("filters memories related to completed tasks while keeping unrelated ones", async () => { + await fs.writeFile(path.join(tmpDir, "TASKS.md"), SAMPLE_TASKS_MD); + + const completedTasks = await loadCompletedTaskKeywords(tmpDir); + + const memories = [ + { text: "TASK-002 flights have been booked — Scoot TR453 confirmed", keep: false }, + { text: "LinkedIn dashboard stats fixed — industry numbers corrected", keep: false }, + { text: "Browser port collision resolved — openclaw on 18807", keep: false }, + { text: "Tarun's birthday is August 23, 1974", keep: true }, + { text: "Singapore has great food markets", keep: true }, + { text: "LinkedIn connection from Jane Doe accepted", keep: true }, + { text: "Memory-neo4j sleep cycle runs at 3am", keep: true }, + ]; + + for (const m of memories) { + const isRelated = isRelatedToCompletedTask(m.text, completedTasks); + expect(isRelated).toBe(!m.keep); + } + }); +}); diff --git a/extensions/memory-neo4j/task-filter.ts b/extensions/memory-neo4j/task-filter.ts new file mode 100644 index 00000000000..45b9f572829 --- /dev/null +++ b/extensions/memory-neo4j/task-filter.ts @@ -0,0 +1,324 @@ +/** + * Task-aware recall filter (Layer 1). + * + * Filters out auto-recalled memories that relate to completed tasks, + * preventing stale task-state memories from being injected into agent context. + * + * Design principles: + * - Conservative: false positives (filtering useful memories) are worse than + * false negatives (letting some stale ones through). + * - Fast: runs on every message, targeting < 5ms with caching. + * - Graceful: missing/malformed TASKS.md is silently ignored. + */ + +import fs from "node:fs/promises"; +import path from "node:path"; +import { parseTaskLedger, type ParsedTask } from "./task-ledger.js"; + +// ============================================================================ +// Types +// ============================================================================ + +/** Extracted keyword info for a single completed task. */ +export type CompletedTaskInfo = { + /** Task ID (e.g. "TASK-002") */ + id: string; + /** Significant keywords extracted from the task title + details + currentStep */ + keywords: string[]; +}; + +// ============================================================================ +// Constants +// ============================================================================ + +/** Cache TTL in milliseconds — avoids re-reading TASKS.md on every message. */ +const CACHE_TTL_MS = 60_000; + +/** Minimum keyword length to be considered "significant". */ +const MIN_KEYWORD_LENGTH = 4; + +/** + * Common English stop words that should be excluded from keyword matching. + * Only words ≥ MIN_KEYWORD_LENGTH are included (shorter ones are filtered by length). + */ +const STOP_WORDS = new Set([ + "about", + "also", + "been", + "before", + "being", + "between", + "both", + "came", + "come", + "could", + "does", + "done", + "each", + "even", + "find", + "first", + "found", + "from", + "going", + "good", + "great", + "have", + "here", + "high", + "however", + "into", + "just", + "keep", + "know", + "last", + "like", + "long", + "look", + "made", + "make", + "many", + "more", + "most", + "much", + "must", + "need", + "next", + "only", + "other", + "over", + "part", + "said", + "same", + "should", + "show", + "since", + "some", + "still", + "such", + "take", + "than", + "that", + "their", + "them", + "then", + "there", + "these", + "they", + "this", + "through", + "time", + "under", + "used", + "using", + "very", + "want", + "were", + "what", + "when", + "where", + "which", + "while", + "will", + "with", + "without", + "work", + "would", + "your", + // Task-related generic words that shouldn't be matching keywords: + "task", + "tasks", + "active", + "completed", + "details", + "status", + "started", + "updated", + "blocked", +]); + +/** + * Minimum number of keyword matches required to consider a memory related + * to a completed task (when matching by keywords rather than task ID). + */ +const MIN_KEYWORD_MATCHES = 2; + +// ============================================================================ +// Cache +// ============================================================================ + +type CacheEntry = { + tasks: CompletedTaskInfo[]; + timestamp: number; +}; + +const cache = new Map(); + +/** Clear the cache (exposed for testing). */ +export function clearTaskFilterCache(): void { + cache.clear(); +} + +// ============================================================================ +// Keyword Extraction +// ============================================================================ + +/** + * Extract significant keywords from a text string. + * + * Filters out short words, stop words, and common noise to produce + * a set of meaningful terms that can identify task-specific content. + */ +export function extractSignificantKeywords(text: string): string[] { + if (!text) { + return []; + } + + const words = text + .toLowerCase() + // Replace non-alphanumeric chars (except hyphens in task IDs) with spaces + .replace(/[^a-z0-9\-]/g, " ") + .split(/\s+/) + .filter((w) => w.length >= MIN_KEYWORD_LENGTH && !STOP_WORDS.has(w)); + + // Deduplicate while preserving order + return [...new Set(words)]; +} + +/** + * Build a {@link CompletedTaskInfo} from a parsed completed task. + * + * Extracts keywords from the task's title, details, and current step. + */ +export function buildCompletedTaskInfo(task: ParsedTask): CompletedTaskInfo { + const parts: string[] = [task.title]; + if (task.details) { + parts.push(task.details); + } + if (task.currentStep) { + parts.push(task.currentStep); + } + + // Also extract from raw lines to capture fields the parser doesn't map + // (e.g. "- **Completed:** 2026-02-16") + for (const line of task.rawLines) { + const trimmed = line.trim(); + // Skip the header line (already have title) and empty lines + if (trimmed.startsWith("##") || trimmed === "") { + continue; + } + // Include field values from bullet lines + const fieldMatch = trimmed.match(/^-\s+\*\*.+?:\*\*\s*(.+)$/); + if (fieldMatch) { + parts.push(fieldMatch[1]); + } + } + + const keywords = extractSignificantKeywords(parts.join(" ")); + + return { + id: task.id, + keywords, + }; +} + +// ============================================================================ +// Core API +// ============================================================================ + +/** + * Load completed task info from TASKS.md in the given workspace directory. + * + * Results are cached per workspace dir with a 60-second TTL to avoid + * re-reading and re-parsing on every message. + * + * @param workspaceDir - Path to the workspace directory containing TASKS.md + * @returns Array of completed task info (empty if TASKS.md is missing or has no completed tasks) + */ +export async function loadCompletedTaskKeywords( + workspaceDir: string, +): Promise { + const now = Date.now(); + + // Check cache + const cached = cache.get(workspaceDir); + if (cached && now - cached.timestamp < CACHE_TTL_MS) { + return cached.tasks; + } + + // Read and parse TASKS.md + const tasksPath = path.join(workspaceDir, "TASKS.md"); + let content: string; + try { + content = await fs.readFile(tasksPath, "utf-8"); + } catch { + // File doesn't exist or isn't readable — cache empty result + cache.set(workspaceDir, { tasks: [], timestamp: now }); + return []; + } + + if (!content.trim()) { + cache.set(workspaceDir, { tasks: [], timestamp: now }); + return []; + } + + const ledger = parseTaskLedger(content); + const tasks = ledger.completedTasks.map(buildCompletedTaskInfo); + + // Cache the result + cache.set(workspaceDir, { tasks, timestamp: now }); + + return tasks; +} + +/** + * Check if a memory's text is related to a completed task. + * + * Uses two matching strategies: + * 1. **Task ID match** — if the memory text contains a completed task's ID + * (e.g. "TASK-002"), it's considered related. + * 2. **Keyword match** — if the memory text matches {@link MIN_KEYWORD_MATCHES} + * or more significant keywords from a completed task, it's considered related. + * + * The filter is intentionally conservative: a memory about "Flux 2" won't be + * filtered just because a completed task mentioned "Flux", unless the memory + * also matches additional task-specific keywords. + * + * @param memoryText - The text content of the recalled memory + * @param completedTasks - Completed task info from {@link loadCompletedTaskKeywords} + * @returns `true` if the memory appears related to a completed task + */ +export function isRelatedToCompletedTask( + memoryText: string, + completedTasks: CompletedTaskInfo[], +): boolean { + if (!memoryText || completedTasks.length === 0) { + return false; + } + + const lowerText = memoryText.toLowerCase(); + + for (const task of completedTasks) { + // Strategy 1: Direct task ID match (case-insensitive) + if (lowerText.includes(task.id.toLowerCase())) { + return true; + } + + // Strategy 2: Keyword overlap — require MIN_KEYWORD_MATCHES distinct keywords + if (task.keywords.length === 0) { + continue; + } + + let matchCount = 0; + for (const keyword of task.keywords) { + if (lowerText.includes(keyword)) { + matchCount++; + if (matchCount >= MIN_KEYWORD_MATCHES) { + return true; + } + } + } + } + + return false; +} diff --git a/extensions/memory-neo4j/task-metadata.test.ts b/extensions/memory-neo4j/task-metadata.test.ts new file mode 100644 index 00000000000..2286e24e078 --- /dev/null +++ b/extensions/memory-neo4j/task-metadata.test.ts @@ -0,0 +1,606 @@ +/** + * Tests for Layer 3: Task Metadata on memories. + * + * Tests that memories can be linked to specific tasks via taskId, + * enabling precise task-aware filtering at recall and cleanup time. + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import type { StoreMemoryInput } from "./schema.js"; +import { Neo4jMemoryClient } from "./neo4j-client.js"; +import { fuseWithConfidenceRRF } from "./search.js"; +import { parseTaskLedger } from "./task-ledger.js"; + +// ============================================================================ +// Test Helpers +// ============================================================================ + +function createMockSession() { + return { + run: vi.fn().mockResolvedValue({ records: [] }), + close: vi.fn().mockResolvedValue(undefined), + executeWrite: vi.fn( + async (work: (tx: { run: ReturnType }) => Promise) => { + const mockTx = { run: vi.fn().mockResolvedValue({ records: [] }) }; + return work(mockTx); + }, + ), + }; +} + +function createMockDriver() { + return { + session: vi.fn().mockReturnValue(createMockSession()), + close: vi.fn().mockResolvedValue(undefined), + }; +} + +function createMockLogger() { + return { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }; +} + +function createMockRecord(data: Record) { + return { + get: (key: string) => data[key], + keys: Object.keys(data), + }; +} + +// ============================================================================ +// Neo4jMemoryClient: storeMemory with taskId +// ============================================================================ + +describe("Task Metadata: storeMemory", () => { + let client: Neo4jMemoryClient; + let mockDriver: ReturnType; + let mockSession: ReturnType; + + beforeEach(() => { + const mockLogger = createMockLogger(); + mockDriver = createMockDriver(); + mockSession = createMockSession(); + mockDriver.session.mockReturnValue(mockSession); + + client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger); + (client as any).driver = mockDriver; + (client as any).indexesReady = true; + }); + + it("should store memory with taskId when provided", async () => { + mockSession.run.mockResolvedValue({ + records: [createMockRecord({ id: "mem-1" })], + }); + + const input: StoreMemoryInput = { + id: "mem-1", + text: "test memory with task", + embedding: [0.1, 0.2], + importance: 0.7, + category: "fact", + source: "user", + extractionStatus: "pending", + agentId: "agent-1", + taskId: "TASK-001", + }; + + await client.storeMemory(input); + + const runCall = mockSession.run.mock.calls[0]; + const cypher = runCall[0] as string; + const params = runCall[1] as Record; + + // Cypher should include taskId clause + expect(cypher).toContain("taskId"); + // Params should include the taskId value + expect(params.taskId).toBe("TASK-001"); + }); + + it("should store memory without taskId when not provided", async () => { + mockSession.run.mockResolvedValue({ + records: [createMockRecord({ id: "mem-2" })], + }); + + const input: StoreMemoryInput = { + id: "mem-2", + text: "test memory without task", + embedding: [0.1, 0.2], + importance: 0.7, + category: "fact", + source: "user", + extractionStatus: "pending", + agentId: "agent-1", + }; + + await client.storeMemory(input); + + const runCall = mockSession.run.mock.calls[0]; + const cypher = runCall[0] as string; + + // Cypher should NOT include taskId clause when not provided + // The dynamic clause is only added when taskId is present + expect(cypher).not.toContain(", taskId: $taskId"); + }); + + it("backward compatibility: existing memories without taskId still work", async () => { + // Storing without taskId should work exactly as before + mockSession.run.mockResolvedValue({ + records: [createMockRecord({ id: "mem-3" })], + }); + + const input: StoreMemoryInput = { + id: "mem-3", + text: "legacy memory", + embedding: [0.1], + importance: 0.5, + category: "other", + source: "auto-capture", + extractionStatus: "skipped", + agentId: "default", + }; + + const id = await client.storeMemory(input); + expect(id).toBe("mem-3"); + }); +}); + +// ============================================================================ +// Neo4jMemoryClient: findMemoriesByTaskId +// ============================================================================ + +describe("Task Metadata: findMemoriesByTaskId", () => { + let client: Neo4jMemoryClient; + let mockDriver: ReturnType; + let mockSession: ReturnType; + + beforeEach(() => { + const mockLogger = createMockLogger(); + mockDriver = createMockDriver(); + mockSession = createMockSession(); + mockDriver.session.mockReturnValue(mockSession); + + client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger); + (client as any).driver = mockDriver; + (client as any).indexesReady = true; + }); + + it("should find memories by taskId", async () => { + mockSession.run.mockResolvedValue({ + records: [ + createMockRecord({ + id: "mem-1", + text: "task-related memory", + category: "fact", + importance: 0.8, + }), + createMockRecord({ + id: "mem-2", + text: "another task memory", + category: "other", + importance: 0.6, + }), + ], + }); + + const results = await client.findMemoriesByTaskId("TASK-001"); + + expect(results).toHaveLength(2); + expect(results[0].id).toBe("mem-1"); + expect(results[1].id).toBe("mem-2"); + + const runCall = mockSession.run.mock.calls[0]; + const cypher = runCall[0] as string; + const params = runCall[1] as Record; + + expect(cypher).toContain("m.taskId = $taskId"); + expect(params.taskId).toBe("TASK-001"); + }); + + it("should filter by agentId when provided", async () => { + mockSession.run.mockResolvedValue({ records: [] }); + + await client.findMemoriesByTaskId("TASK-001", "agent-1"); + + const runCall = mockSession.run.mock.calls[0]; + const cypher = runCall[0] as string; + const params = runCall[1] as Record; + + expect(cypher).toContain("m.agentId = $agentId"); + expect(params.agentId).toBe("agent-1"); + }); + + it("should return empty array when no memories match", async () => { + mockSession.run.mockResolvedValue({ records: [] }); + + const results = await client.findMemoriesByTaskId("TASK-999"); + expect(results).toHaveLength(0); + }); +}); + +// ============================================================================ +// Neo4jMemoryClient: clearTaskIdFromMemories +// ============================================================================ + +describe("Task Metadata: clearTaskIdFromMemories", () => { + let client: Neo4jMemoryClient; + let mockDriver: ReturnType; + let mockSession: ReturnType; + + beforeEach(() => { + const mockLogger = createMockLogger(); + mockDriver = createMockDriver(); + mockSession = createMockSession(); + mockDriver.session.mockReturnValue(mockSession); + + client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger); + (client as any).driver = mockDriver; + (client as any).indexesReady = true; + }); + + it("should clear taskId from all matching memories", async () => { + mockSession.run.mockResolvedValue({ + records: [createMockRecord({ cleared: 3 })], + }); + + const count = await client.clearTaskIdFromMemories("TASK-001"); + + expect(count).toBe(3); + + const runCall = mockSession.run.mock.calls[0]; + const cypher = runCall[0] as string; + const params = runCall[1] as Record; + + expect(cypher).toContain("m.taskId = $taskId"); + expect(cypher).toContain("SET m.taskId = null"); + expect(params.taskId).toBe("TASK-001"); + }); + + it("should filter by agentId when provided", async () => { + mockSession.run.mockResolvedValue({ + records: [createMockRecord({ cleared: 1 })], + }); + + await client.clearTaskIdFromMemories("TASK-001", "agent-1"); + + const runCall = mockSession.run.mock.calls[0]; + const cypher = runCall[0] as string; + const params = runCall[1] as Record; + + expect(cypher).toContain("m.agentId = $agentId"); + expect(params.agentId).toBe("agent-1"); + }); + + it("should return 0 when no memories match", async () => { + mockSession.run.mockResolvedValue({ + records: [createMockRecord({ cleared: 0 })], + }); + + const count = await client.clearTaskIdFromMemories("TASK-999"); + expect(count).toBe(0); + }); +}); + +// ============================================================================ +// Hybrid search results include taskId +// ============================================================================ + +describe("Task Metadata: hybrid search includes taskId", () => { + it("should carry taskId through RRF fusion", () => { + const vectorResults = [ + { + id: "mem-1", + text: "memory with task", + category: "fact", + importance: 0.8, + createdAt: "2026-01-01", + score: 0.9, + taskId: "TASK-001", + }, + { + id: "mem-2", + text: "memory without task", + category: "other", + importance: 0.5, + createdAt: "2026-01-02", + score: 0.8, + }, + ]; + + const bm25Results = [ + { + id: "mem-1", + text: "memory with task", + category: "fact", + importance: 0.8, + createdAt: "2026-01-01", + score: 0.7, + taskId: "TASK-001", + }, + ]; + + const graphResults: typeof vectorResults = []; + + const fused = fuseWithConfidenceRRF( + [vectorResults, bm25Results, graphResults], + 60, + [1.0, 1.0, 1.0], + ); + + // mem-1 should have taskId preserved + const mem1 = fused.find((r) => r.id === "mem-1"); + expect(mem1).toBeDefined(); + expect(mem1!.taskId).toBe("TASK-001"); + + // mem-2 should have undefined taskId + const mem2 = fused.find((r) => r.id === "mem-2"); + expect(mem2).toBeDefined(); + expect(mem2!.taskId).toBeUndefined(); + }); + + it("should include taskId in fused results when present in any signal", () => { + // taskId present only in BM25 signal + const vectorResults = [ + { + id: "mem-1", + text: "test", + category: "fact", + importance: 0.8, + createdAt: "2026-01-01", + score: 0.9, + // no taskId + }, + ]; + + const bm25Results = [ + { + id: "mem-1", + text: "test", + category: "fact", + importance: 0.8, + createdAt: "2026-01-01", + score: 0.7, + taskId: "TASK-002", + }, + ]; + + const fused = fuseWithConfidenceRRF([vectorResults, bm25Results, []], 60, [1.0, 1.0, 1.0]); + + // The first signal (vector) is used for metadata — taskId would be undefined + // because candidateMetadata takes the first occurrence + const mem1 = fused.find((r) => r.id === "mem-1"); + expect(mem1).toBeDefined(); + // The first signal to contribute metadata wins + // vector came first and has no taskId + expect(mem1!.taskId).toBeUndefined(); + }); +}); + +// ============================================================================ +// Auto-tagging: parseTaskLedger for active task detection +// ============================================================================ + +describe("Task Metadata: auto-tagging via parseTaskLedger", () => { + it("should detect single active task for auto-tagging", () => { + const content = `# Active Tasks + +## TASK-005: Fix login bug +- **Status:** in_progress +- **Started:** 2026-02-16 + +# Completed +## TASK-004: Fix browser port collision +- **Completed:** 2026-02-16 +`; + + const ledger = parseTaskLedger(content); + expect(ledger.activeTasks).toHaveLength(1); + expect(ledger.activeTasks[0].id).toBe("TASK-005"); + }); + + it("should not auto-tag when multiple active tasks exist", () => { + const content = `# Active Tasks + +## TASK-005: Fix login bug +- **Status:** in_progress + +## TASK-006: Update docs +- **Status:** in_progress + +# Completed +`; + + const ledger = parseTaskLedger(content); + // Multiple active tasks — should NOT auto-tag + expect(ledger.activeTasks.length).toBeGreaterThan(1); + }); + + it("should not auto-tag when no active tasks exist", () => { + const content = `# Active Tasks + +_No active tasks_ + +# Completed +## TASK-004: Fix browser port collision +- **Completed:** 2026-02-16 +`; + + const ledger = parseTaskLedger(content); + expect(ledger.activeTasks).toHaveLength(0); + }); + + it("should extract completed task IDs for recall filtering", () => { + const content = `# Active Tasks + +## TASK-007: New feature +- **Status:** in_progress + +# Completed +## TASK-002: Book flights +- **Completed:** 2026-02-16 + +## TASK-003: Fix dashboard +- **Completed:** 2026-02-16 +`; + + const ledger = parseTaskLedger(content); + const completedTaskIds = new Set(ledger.completedTasks.map((t) => t.id)); + expect(completedTaskIds.has("TASK-002")).toBe(true); + expect(completedTaskIds.has("TASK-003")).toBe(true); + expect(completedTaskIds.has("TASK-007")).toBe(false); + }); +}); + +// ============================================================================ +// Recall filter: taskId-based completed task filtering +// ============================================================================ + +describe("Task Metadata: recall filter", () => { + it("should filter out memories linked to completed tasks", () => { + const completedTaskIds = new Set(["TASK-002", "TASK-003"]); + + const results = [ + { + id: "1", + text: "active task memory", + taskId: "TASK-007", + score: 0.9, + category: "fact", + importance: 0.8, + createdAt: "2026-01-01", + }, + { + id: "2", + text: "completed task memory", + taskId: "TASK-002", + score: 0.85, + category: "fact", + importance: 0.7, + createdAt: "2026-01-01", + }, + { + id: "3", + text: "no task memory", + score: 0.8, + category: "other", + importance: 0.5, + createdAt: "2026-01-01", + }, + { + id: "4", + text: "another completed", + taskId: "TASK-003", + score: 0.75, + category: "fact", + importance: 0.6, + createdAt: "2026-01-01", + }, + ]; + + const filtered = results.filter((r) => !r.taskId || !completedTaskIds.has(r.taskId)); + + expect(filtered).toHaveLength(2); + expect(filtered[0].id).toBe("1"); // active task — kept + expect(filtered[1].id).toBe("3"); // no task — kept + }); + + it("should keep all memories when no completed task IDs", () => { + const completedTaskIds = new Set(); + + const results = [ + { id: "1", text: "memory A", taskId: "TASK-001", score: 0.9 }, + { id: "2", text: "memory B", score: 0.8 }, + ]; + + const filtered = results.filter((r) => !r.taskId || !completedTaskIds.has(r.taskId)); + + expect(filtered).toHaveLength(2); + }); + + it("should keep memories without taskId regardless of filter", () => { + const completedTaskIds = new Set(["TASK-001", "TASK-002"]); + + const results = [ + { id: "1", text: "old memory without task", score: 0.9 }, + { id: "2", text: "another old one", taskId: undefined, score: 0.8 }, + ]; + + const filtered = results.filter((r) => !r.taskId || !completedTaskIds.has(r.taskId)); + + expect(filtered).toHaveLength(2); + }); +}); + +// ============================================================================ +// Vector/BM25 search results include taskId +// ============================================================================ + +describe("Task Metadata: search signal taskId", () => { + let client: Neo4jMemoryClient; + let mockDriver: ReturnType; + let mockSession: ReturnType; + + beforeEach(() => { + const mockLogger = createMockLogger(); + mockDriver = createMockDriver(); + mockSession = createMockSession(); + mockDriver.session.mockReturnValue(mockSession); + + client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger); + (client as any).driver = mockDriver; + (client as any).indexesReady = true; + }); + + it("vector search should include taskId in results", async () => { + mockSession.run.mockResolvedValue({ + records: [ + createMockRecord({ + id: "mem-1", + text: "test", + category: "fact", + importance: 0.8, + createdAt: "2026-01-01", + taskId: "TASK-001", + similarity: 0.95, + }), + createMockRecord({ + id: "mem-2", + text: "test2", + category: "other", + importance: 0.5, + createdAt: "2026-01-02", + taskId: null, // Legacy memory without taskId + similarity: 0.85, + }), + ], + }); + + const results = await client.vectorSearch([0.1, 0.2], 10, 0.1); + + expect(results[0].taskId).toBe("TASK-001"); + expect(results[1].taskId).toBeUndefined(); // null → undefined + }); + + it("BM25 search should include taskId in results", async () => { + mockSession.run.mockResolvedValue({ + records: [ + createMockRecord({ + id: "mem-1", + text: "test query", + category: "fact", + importance: 0.8, + createdAt: "2026-01-01", + taskId: "TASK-002", + bm25Score: 5.0, + }), + ], + }); + + const results = await client.bm25Search("test query", 10); + + expect(results[0].taskId).toBe("TASK-002"); + }); +});