diff --git a/extensions/memory-neo4j/cli.ts b/extensions/memory-neo4j/cli.ts index 4ff6e60415c..8cdab6556a7 100644 --- a/extensions/memory-neo4j/cli.ts +++ b/extensions/memory-neo4j/cli.ts @@ -292,6 +292,7 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void { "--skip-semantic", "Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)", ) + .option("--workspace ", "Workspace directory for TASKS.md cleanup") .action( async (opts: { agent?: string; @@ -305,23 +306,31 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void { maxSemanticPairs?: string; concurrency?: string; skipSemantic?: boolean; + workspace?: string; }) => { console.log("\n๐ŸŒ™ Memory Sleep Cycle"); console.log("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - console.log("Seven-phase memory consolidation (Pareto-based):\n"); - console.log(" Phase 1: Deduplication โ€” Merge near-duplicate memories"); + console.log("Multi-phase memory consolidation (Pareto-based):\n"); + console.log(" Phase 1: Deduplication โ€” Merge near-duplicate memories"); console.log( - " Phase 1b: Semantic Dedup โ€” LLM-based paraphrase detection (0.75โ€“0.95 band)", + " Phase 1b: Semantic Dedup โ€” LLM-based paraphrase detection (0.75โ€“0.95 band)", ); - console.log(" Phase 1c: Conflict Detection โ€” Resolve contradictory memories"); + console.log(" Phase 1c: Conflict Detection โ€” Resolve contradictory memories"); + console.log(" Phase 1d: Entity Dedup โ€” Merge duplicate entity nodes"); console.log( - " Phase 2: Pareto Scoring โ€” Calculate effective scores for all memories", + " Phase 2: Pareto Scoring โ€” Calculate effective scores for all memories", ); - console.log(" Phase 3: Core Promotion โ€” Regular memories above threshold โ†’ core"); - console.log(" Phase 4: Core Demotion โ€” Core memories below threshold โ†’ regular"); - console.log(" Phase 5: Extraction โ€” Extract entities and categorize"); - console.log(" Phase 6: Decay & Pruning โ€” Remove stale low-importance memories"); - console.log(" Phase 7: Orphan Cleanup โ€” Remove disconnected nodes\n"); + console.log( + " Phase 3: Core Promotion โ€” Regular memories above threshold โ†’ core", + ); + console.log( + " Phase 4: Core Demotion โ€” Core memories below threshold โ†’ regular", + ); + console.log(" Phase 5: Extraction โ€” Extract entities and categorize"); + console.log(" Phase 6: Decay & Pruning โ€” Remove stale low-importance memories"); + console.log(" Phase 7: Orphan Cleanup โ€” Remove disconnected nodes"); + console.log(" Phase 7b: Credential Scan โ€” Remove memories with leaked secrets"); + console.log(" Phase 8: Task Ledger Cleanup โ€” Archive stale tasks in TASKS.md\n"); try { // Validate sleep cycle CLI parameters before running @@ -396,6 +405,9 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void { await db.ensureInitialized(); + // Resolve workspace dir for task ledger cleanup + const resolvedWorkspace = opts.workspace?.trim() || undefined; + const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, { agentId: opts.agent, dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined, @@ -409,18 +421,23 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void { decayCurves: Object.keys(cfg.decayCurves).length > 0 ? cfg.decayCurves : undefined, extractionBatchSize: batchSize, extractionDelayMs: delay, + workspaceDir: resolvedWorkspace, onPhaseStart: (phase) => { const phaseNames: Record = { dedup: "Phase 1: Deduplication", semanticDedup: "Phase 1b: Semantic Deduplication", conflict: "Phase 1c: Conflict Detection", + entityDedup: "Phase 1d: Entity Deduplication", pareto: "Phase 2: Pareto Scoring", promotion: "Phase 3: Core Promotion", extraction: "Phase 4: Extraction", decay: "Phase 5: Decay & Pruning", cleanup: "Phase 6: Orphan Cleanup", + noiseCleanup: "Phase 7: Noise Cleanup", + credentialScan: "Phase 7b: Credential Scan", + taskLedger: "Phase 8: Task Ledger Cleanup", }; - console.log(`\nโ–ถ ${phaseNames[phase]}`); + console.log(`\nโ–ถ ${phaseNames[phase] ?? phase}`); console.log("โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"); }, onProgress: (_phase, message) => { @@ -457,6 +474,12 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void { console.log( ` Cleanup: ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`, ); + console.log( + ` Task Ledger: ${result.taskLedger.archivedCount} stale tasks archived` + + (result.taskLedger.archivedIds.length > 0 + ? ` (${result.taskLedger.archivedIds.join(", ")})` + : ""), + ); if (result.aborted) { console.log("\nโš ๏ธ Sleep cycle was aborted before completion."); } diff --git a/extensions/memory-neo4j/neo4j-client.entity-dedup.test.ts b/extensions/memory-neo4j/neo4j-client.entity-dedup.test.ts new file mode 100644 index 00000000000..549790ca84d --- /dev/null +++ b/extensions/memory-neo4j/neo4j-client.entity-dedup.test.ts @@ -0,0 +1,327 @@ +/** + * Tests for entity deduplication in neo4j-client.ts. + * + * Tests findDuplicateEntityPairs() and mergeEntityPair() using mocked Neo4j driver. + * Verifies substring-matching logic, mention-count based decisions, and merge behavior. + */ + +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { Neo4jMemoryClient } from "./neo4j-client.js"; + +// ============================================================================ +// Test Helpers +// ============================================================================ + +function createMockSession() { + return { + run: vi.fn().mockResolvedValue({ records: [] }), + close: vi.fn().mockResolvedValue(undefined), + executeWrite: vi.fn( + async (work: (tx: { run: ReturnType }) => Promise) => { + const mockTx = { run: vi.fn().mockResolvedValue({ records: [] }) }; + return work(mockTx); + }, + ), + }; +} + +function createMockDriver() { + return { + session: vi.fn().mockReturnValue(createMockSession()), + close: vi.fn().mockResolvedValue(undefined), + }; +} + +function createMockLogger() { + return { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }; +} + +function mockRecord(data: Record) { + return { + get: (key: string) => data[key], + }; +} + +// ============================================================================ +// Entity Deduplication Tests +// ============================================================================ + +describe("Entity Deduplication", () => { + let client: Neo4jMemoryClient; + let mockDriver: ReturnType; + let mockSession: ReturnType; + let mockLogger: ReturnType; + + beforeEach(() => { + mockLogger = createMockLogger(); + mockDriver = createMockDriver(); + mockSession = createMockSession(); + mockDriver.session.mockReturnValue(mockSession); + + client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger); + (client as any).driver = mockDriver; + (client as any).indexesReady = true; + }); + + // -------------------------------------------------------------------------- + // findDuplicateEntityPairs() + // -------------------------------------------------------------------------- + + describe("findDuplicateEntityPairs", () => { + it("finds substring matches: 'tarun' + 'tarun sukhani' (same type)", async () => { + mockSession.run.mockResolvedValueOnce({ + records: [ + mockRecord({ + id1: "e1", + name1: "tarun", + mc1: 5, + id2: "e2", + name2: "tarun sukhani", + mc2: 3, + }), + ], + }); + + const pairs = await client.findDuplicateEntityPairs(); + + expect(pairs).toHaveLength(1); + // "tarun" has more mentions (5 > 3), so it should be kept + expect(pairs[0].keepId).toBe("e1"); + expect(pairs[0].keepName).toBe("tarun"); + expect(pairs[0].removeId).toBe("e2"); + expect(pairs[0].removeName).toBe("tarun sukhani"); + }); + + it("keeps entity with more mentions regardless of name length", async () => { + mockSession.run.mockResolvedValueOnce({ + records: [ + mockRecord({ + id1: "e1", + name1: "fish speech", + mc1: 2, + id2: "e2", + name2: "fish speech s1 mini", + mc2: 10, + }), + ], + }); + + const pairs = await client.findDuplicateEntityPairs(); + + expect(pairs).toHaveLength(1); + // "fish speech s1 mini" has more mentions (10 > 2), so it should be kept + expect(pairs[0].keepId).toBe("e2"); + expect(pairs[0].keepName).toBe("fish speech s1 mini"); + expect(pairs[0].removeId).toBe("e1"); + expect(pairs[0].removeName).toBe("fish speech"); + }); + + it("keeps shorter name when mentions are equal", async () => { + mockSession.run.mockResolvedValueOnce({ + records: [ + mockRecord({ + id1: "e1", + name1: "aaditya", + mc1: 5, + id2: "e2", + name2: "aaditya sukhani", + mc2: 5, + }), + ], + }); + + const pairs = await client.findDuplicateEntityPairs(); + + expect(pairs).toHaveLength(1); + // Equal mentions, so keep the shorter name ("aaditya") + expect(pairs[0].keepId).toBe("e1"); + expect(pairs[0].keepName).toBe("aaditya"); + expect(pairs[0].removeId).toBe("e2"); + expect(pairs[0].removeName).toBe("aaditya sukhani"); + }); + + it("returns empty array when no duplicates exist", async () => { + mockSession.run.mockResolvedValueOnce({ records: [] }); + + const pairs = await client.findDuplicateEntityPairs(); + + expect(pairs).toHaveLength(0); + }); + + it("handles multiple duplicate pairs", async () => { + mockSession.run.mockResolvedValueOnce({ + records: [ + mockRecord({ + id1: "e1", + name1: "tarun", + mc1: 5, + id2: "e2", + name2: "tarun sukhani", + mc2: 3, + }), + mockRecord({ + id1: "e3", + name1: "fish speech", + mc1: 2, + id2: "e4", + name2: "fish speech s1 mini", + mc2: 8, + }), + ], + }); + + const pairs = await client.findDuplicateEntityPairs(); + + expect(pairs).toHaveLength(2); + }); + + it("handles NULL mention counts (treats as 0)", async () => { + mockSession.run.mockResolvedValueOnce({ + records: [ + mockRecord({ + id1: "e1", + name1: "neo4j", + mc1: null, + id2: "e2", + name2: "neo4j database", + mc2: null, + }), + ], + }); + + const pairs = await client.findDuplicateEntityPairs(); + + expect(pairs).toHaveLength(1); + // Both NULL (treated as 0), so keep the shorter name + expect(pairs[0].keepId).toBe("e1"); + expect(pairs[0].keepName).toBe("neo4j"); + }); + + it("passes the Cypher query with substring matching and type constraint", async () => { + mockSession.run.mockResolvedValueOnce({ records: [] }); + + await client.findDuplicateEntityPairs(); + + const query = mockSession.run.mock.calls[0][0] as string; + // Verify the query checks same type + expect(query).toContain("e1.type = e2.type"); + // Verify the query checks CONTAINS in both directions + expect(query).toContain("e1.name CONTAINS e2.name"); + expect(query).toContain("e2.name CONTAINS e1.name"); + // Verify minimum name length filter + expect(query).toContain("size(e1.name) > 2"); + }); + }); + + // -------------------------------------------------------------------------- + // mergeEntityPair() + // -------------------------------------------------------------------------- + + describe("mergeEntityPair", () => { + it("transfers MENTIONS and deletes source entity", async () => { + // mergeEntityPair uses executeWrite, so we need to set up the mock transaction + const mockTx = { + run: vi + .fn() + .mockResolvedValueOnce({ + // Transfer MENTIONS + records: [mockRecord({ transferred: 3 })], + }) + .mockResolvedValueOnce({ + // Update mentionCount + records: [], + }) + .mockResolvedValueOnce({ + // Delete removed entity + records: [], + }), + }; + + mockSession.executeWrite.mockImplementationOnce(async (work: any) => work(mockTx)); + + const result = await client.mergeEntityPair("keep-id", "remove-id"); + + expect(result).toBe(true); + // Should have been called 3 times: transfer, update count, delete + expect(mockTx.run).toHaveBeenCalledTimes(3); + + // Verify transfer query + const transferQuery = mockTx.run.mock.calls[0][0] as string; + expect(transferQuery).toContain("MERGE (m)-[:MENTIONS]->(keep)"); + expect(transferQuery).toContain("DELETE r"); + + // Verify update mentionCount + const updateQuery = mockTx.run.mock.calls[1][0] as string; + expect(updateQuery).toContain("mentionCount"); + + // Verify delete query + const deleteQuery = mockTx.run.mock.calls[2][0] as string; + expect(deleteQuery).toContain("DETACH DELETE e"); + }); + + it("skips mentionCount update when no relationships to transfer", async () => { + const mockTx = { + run: vi + .fn() + .mockResolvedValueOnce({ + // Transfer MENTIONS โ€” 0 transferred + records: [mockRecord({ transferred: 0 })], + }) + .mockResolvedValueOnce({ + // Delete removed entity (mentionCount update is skipped) + records: [], + }), + }; + + mockSession.executeWrite.mockImplementationOnce(async (work: any) => work(mockTx)); + + const result = await client.mergeEntityPair("keep-id", "remove-id"); + + expect(result).toBe(true); + // Only 2 calls: transfer (0 results) and delete (skip update) + expect(mockTx.run).toHaveBeenCalledTimes(2); + }); + + it("returns false on error", async () => { + mockSession.executeWrite.mockRejectedValueOnce(new Error("Neo4j connection lost")); + + const result = await client.mergeEntityPair("keep-id", "remove-id"); + + expect(result).toBe(false); + }); + }); + + // -------------------------------------------------------------------------- + // reconcileEntityMentionCounts() + // -------------------------------------------------------------------------- + + describe("reconcileEntityMentionCounts", () => { + it("updates entities with NULL mentionCount", async () => { + mockSession.run.mockResolvedValueOnce({ + records: [mockRecord({ updated: 42 })], + }); + + const updated = await client.reconcileEntityMentionCounts(); + + expect(updated).toBe(42); + const query = mockSession.run.mock.calls[0][0] as string; + expect(query).toContain("mentionCount IS NULL"); + expect(query).toContain("SET e.mentionCount = actual"); + }); + + it("returns 0 when all entities have mentionCount set", async () => { + mockSession.run.mockResolvedValueOnce({ + records: [mockRecord({ updated: 0 })], + }); + + const updated = await client.reconcileEntityMentionCounts(); + + expect(updated).toBe(0); + }); + }); +}); diff --git a/extensions/memory-neo4j/neo4j-client.ts b/extensions/memory-neo4j/neo4j-client.ts index 390a76bb8db..0445eb175be 100644 --- a/extensions/memory-neo4j/neo4j-client.ts +++ b/extensions/memory-neo4j/neo4j-client.ts @@ -2022,4 +2022,108 @@ export class Neo4jMemoryClient { } }); } + + /** + * Delete non-core, non-pinned memories matching a regex pattern. + * Used by the sleep cycle noise pattern cleanup. + * + * @returns Number of memories deleted + */ + async deleteMemoriesByPattern(pattern: string, agentId?: string, limit = 100): Promise { + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + const agentFilter = agentId ? "AND m.agentId = $agentId" : ""; + const result = await session.run( + `MATCH (m:Memory) + WHERE m.text =~ $pattern + AND coalesce(m.userPinned, false) = false + AND m.category <> 'core' + ${agentFilter} + WITH m LIMIT ${limit} + DETACH DELETE m + RETURN count(*) AS removed`, + { pattern, agentId }, + ); + return (result.records[0]?.get("removed") as number) ?? 0; + } finally { + await session.close(); + } + } + + /** + * Fetch all non-core memories (id + text) for a given agent, or all agents. + * Used by the sleep cycle credential scanner. + */ + async fetchNonCoreMemories(agentId?: string): Promise> { + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + const agentFilter = agentId ? "AND m.agentId = $agentId" : ""; + const result = await session.run( + `MATCH (m:Memory) + WHERE m.category <> 'core' + ${agentFilter} + RETURN m.id AS id, m.text AS text`, + agentId ? { agentId } : {}, + ); + return result.records.map((r) => ({ + id: r.get("id") as string, + text: r.get("text") as string, + })); + } finally { + await session.close(); + } + } + + /** + * Delete memories by IDs (DETACH DELETE). + * Used by the sleep cycle credential scanner. + * + * @returns Number of memories deleted + */ + async deleteMemoriesByIds(ids: string[]): Promise { + if (ids.length === 0) { + return 0; + } + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + const result = await session.run( + `UNWIND $ids AS id + MATCH (m:Memory {id: id}) + DETACH DELETE m + RETURN count(*) AS removed`, + { ids }, + ); + return (result.records[0]?.get("removed") as number) ?? 0; + } finally { + await session.close(); + } + } + + /** + * Reconcile mentionCount for all entities by counting actual MENTIONS relationships. + * Fixes entities with NULL or stale mentionCount values (e.g., entities created + * before mentionCount tracking was added). + * + * @returns Number of entities updated + */ + async reconcileEntityMentionCounts(): Promise { + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + const result = await session.run( + `MATCH (e:Entity) + WHERE e.mentionCount IS NULL + OPTIONAL MATCH (m:Memory)-[:MENTIONS]->(e) + WITH e, count(m) AS actual + SET e.mentionCount = actual + RETURN count(e) AS updated`, + ); + return (result.records[0]?.get("updated") as number) ?? 0; + } finally { + await session.close(); + } + } } diff --git a/extensions/memory-neo4j/sleep-cycle.credential-scan.test.ts b/extensions/memory-neo4j/sleep-cycle.credential-scan.test.ts new file mode 100644 index 00000000000..fb0a2fcf5e0 --- /dev/null +++ b/extensions/memory-neo4j/sleep-cycle.credential-scan.test.ts @@ -0,0 +1,165 @@ +/** + * Tests for credential scanning in the sleep cycle. + * + * Verifies that CREDENTIAL_PATTERNS and detectCredential() correctly + * identify credential-like content in memory text while not flagging + * clean text. + */ + +import { describe, it, expect } from "vitest"; +import { CREDENTIAL_PATTERNS, detectCredential } from "./sleep-cycle.js"; + +describe("Credential Detection", () => { + // -------------------------------------------------------------------------- + // detectCredential() โ€” should flag dangerous content + // -------------------------------------------------------------------------- + + describe("should detect credentials", () => { + it("detects API keys (sk-...)", () => { + const result = detectCredential("Use the key sk-abc123def456ghi789jkl012mno345"); + expect(result).toBe("API key"); + }); + + it("detects api_key patterns", () => { + const result = detectCredential("Set api_key_live_abcdef1234567890abcdef"); + expect(result).toBe("API key"); + }); + + it("detects Bearer tokens", () => { + const result = detectCredential( + "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature", + ); + // Could match either Bearer token or JWT โ€” both are valid detections + expect(result).not.toBeNull(); + }); + + it("detects password assignments (password: X)", () => { + const result = detectCredential("The database password: myS3cretP@ss!"); + expect(result).toBe("Password assignment"); + }); + + it("detects password assignments (password=X)", () => { + const result = detectCredential("config has password=hunter2 in it"); + expect(result).toBe("Password assignment"); + }); + + it("detects the missed pattern: login with X creds user/pass", () => { + const result = detectCredential("login with radarr creds hullah/fuckbar"); + expect(result).toBe("Credentials (user/pass)"); + }); + + it("detects creds user/pass without login prefix", () => { + const result = detectCredential("use creds admin/password123 for the server"); + expect(result).toBe("Credentials (user/pass)"); + }); + + it("detects URL-embedded credentials", () => { + const result = detectCredential("Connect to https://admin:secretpass@db.example.com/mydb"); + expect(result).toBe("URL credentials"); + }); + + it("detects URL credentials with http://", () => { + const result = detectCredential("http://user:pass@192.168.1.1:8080/api"); + expect(result).toBe("URL credentials"); + }); + + it("detects private keys", () => { + const result = detectCredential("-----BEGIN RSA PRIVATE KEY-----\nMIIEow..."); + expect(result).toBe("Private key"); + }); + + it("detects AWS access keys", () => { + const result = detectCredential("AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE"); + expect(result).toBe("AWS key"); + }); + + it("detects GitHub personal access tokens", () => { + const result = detectCredential("Set GITHUB_TOKEN=ghp_ABCDEFabcdef1234567890"); + expect(result).toBe("GitHub/GitLab token"); + }); + + it("detects GitLab tokens", () => { + const result = detectCredential("Use glpat-xxxxxxxxxxxxxxxxxxxx for auth"); + expect(result).toBe("GitHub/GitLab token"); + }); + + it("detects JWT tokens", () => { + const result = detectCredential( + "Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c", + ); + expect(result).toBe("JWT"); + }); + + it("detects token=value patterns", () => { + const result = detectCredential( + "Set token=abcdef1234567890abcdef1234567890ab for authentication", + ); + expect(result).toBe("Token/secret"); + }); + + it("detects secret: value patterns", () => { + const result = detectCredential( + "The client secret: abcdef1234567890abcdef1234567890abcdef12", + ); + expect(result).toBe("Token/secret"); + }); + }); + + // -------------------------------------------------------------------------- + // detectCredential() โ€” should NOT flag clean text + // -------------------------------------------------------------------------- + + describe("should not flag clean text", () => { + it("does not flag normal text", () => { + expect(detectCredential("Remember to buy groceries tomorrow")).toBeNull(); + }); + + it("does not flag password advice (without actual password)", () => { + expect( + detectCredential("Make sure the password is at least 8 characters long for security"), + ).toBeNull(); + }); + + it("does not flag discussion about tokens", () => { + expect(detectCredential("We should use JWT tokens for authentication")).toBeNull(); + }); + + it("does not flag short key-like words", () => { + expect(detectCredential("The key to success is persistence")).toBeNull(); + }); + + it("does not flag URLs without credentials", () => { + expect(detectCredential("Visit https://example.com/api/v1 for docs")).toBeNull(); + }); + + it("does not flag discussion about API key rotation", () => { + expect(detectCredential("Rotate your API keys every 90 days as a best practice")).toBeNull(); + }); + + it("does not flag file paths", () => { + expect(detectCredential("Credentials are stored in /home/user/.secrets/api.json")).toBeNull(); + }); + + it("does not flag casual use of slash in text", () => { + expect(detectCredential("Use the read/write mode for better performance")).toBeNull(); + }); + }); + + // -------------------------------------------------------------------------- + // CREDENTIAL_PATTERNS โ€” structural checks + // -------------------------------------------------------------------------- + + describe("CREDENTIAL_PATTERNS structure", () => { + it("has at least 8 patterns", () => { + expect(CREDENTIAL_PATTERNS.length).toBeGreaterThanOrEqual(8); + }); + + it("each pattern has a label and valid RegExp", () => { + for (const { pattern, label } of CREDENTIAL_PATTERNS) { + expect(pattern).toBeInstanceOf(RegExp); + expect(label).toBeTruthy(); + expect(typeof label).toBe("string"); + } + }); + }); +}); diff --git a/extensions/memory-neo4j/sleep-cycle.ts b/extensions/memory-neo4j/sleep-cycle.ts index cbf0a973be0..b4485fa0261 100644 --- a/extensions/memory-neo4j/sleep-cycle.ts +++ b/extensions/memory-neo4j/sleep-cycle.ts @@ -1,18 +1,22 @@ /** - * Eight-phase sleep cycle for memory consolidation. + * Multi-phase sleep cycle for memory consolidation. * * Implements a Pareto-based memory ecosystem where core memory * is bounded to the top 20% of memories by effective score. * * Phases: - * 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy) + * 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy) + * 1b. SEMANTIC DEDUP - LLM-based paraphrase detection + * 1c. CONFLICT DETECTION - Resolve contradictory memories * 1d. ENTITY DEDUP - Merge near-duplicate entities (reduce entity bloat) - * 2. PARETO SCORING - Calculate effective scores for all memories - * 3. CORE PROMOTION - Regular memories above threshold -> core - * 4. EXTRACTION - Form entity relationships (strengthen connections) - * 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve) - * 6. CLEANUP - Remove orphaned entities/tags (garbage collection) - * 7. NOISE CLEANUP - Remove dangerous pattern memories + * 2. PARETO SCORING - Calculate effective scores for all memories + * 3. CORE PROMOTION - Regular memories above threshold -> core + * 4. EXTRACTION - Form entity relationships (strengthen connections) + * 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve) + * 6. CLEANUP - Remove orphaned entities/tags (garbage collection) + * 7. NOISE CLEANUP - Remove dangerous pattern memories + * 7b. CREDENTIAL SCAN - Remove memories containing leaked credentials + * 8. TASK LEDGER - Archive stale tasks in TASKS.md * * Research basis: * - Pareto principle (20/80 rule) for memory tiering @@ -27,6 +31,7 @@ import type { Neo4jMemoryClient } from "./neo4j-client.js"; import type { Logger } from "./schema.js"; import { isSemanticDuplicate, resolveConflict, runBackgroundExtraction } from "./extractor.js"; import { makePairKey } from "./schema.js"; +import { reviewAndArchiveStaleTasks, type StaleTaskResult } from "./task-ledger.js"; /** * Sleep Cycle Result - aggregated stats from all phases. @@ -82,6 +87,18 @@ export type SleepCycleResult = { tagsRemoved: number; singleUseTagsRemoved: number; }; + // Phase 7b: Credential Scanning + credentialScan: { + memoriesScanned: number; + credentialsFound: number; + memoriesRemoved: number; + }; + // Phase 8: Task Ledger Cleanup + taskLedger: { + staleCount: number; + archivedCount: number; + archivedIds: string[]; + }; // Overall durationMs: number; aborted: boolean; @@ -120,6 +137,10 @@ export type SleepCycleOptions = { decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2) decayCurves?: Record; // Per-category decay curve overrides + // Phase 8: Task Ledger + workspaceDir?: string; // Workspace dir for TASKS.md (default: resolved from env) + staleTaskMaxAgeMs?: number; // Max age before task is stale (default: 24h) + // Progress callback onPhaseStart?: ( phase: @@ -131,11 +152,76 @@ export type SleepCycleOptions = { | "promotion" | "decay" | "extraction" - | "cleanup", + | "cleanup" + | "noiseCleanup" + | "credentialScan" + | "taskLedger", ) => void; onProgress?: (phase: string, message: string) => void; }; +// ============================================================================ +// Credential Detection Patterns +// ============================================================================ + +/** + * Regex patterns that match credential-like content in memory text. + * Used by the credential scanning phase to find and remove memories + * that accidentally stored secrets, passwords, API keys, or tokens. + * + * These are JavaScript RegExp patterns (case-insensitive). + */ +export const CREDENTIAL_PATTERNS: Array<{ pattern: RegExp; label: string }> = [ + // API keys: sk-..., api_key_..., api_key_live_..., apikey-..., etc. + { pattern: /\b(?:sk|api[_-]?key(?:[_-]\w+)?)[_-][a-z0-9]{16,}/i, label: "API key" }, + + // Bearer tokens + { pattern: /bearer\s+[a-z0-9_\-.]{20,}/i, label: "Bearer token" }, + + // JWT tokens (three base64 segments separated by dots) โ€” check before generic token pattern + { pattern: /\beyJ[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}/i, label: "JWT" }, + + // Generic long tokens/secrets (hex or base64, 32+ chars) + { + pattern: /\b(?:token|secret|key)\s*[:=]\s*["']?[a-z0-9+/=_\-]{32,}["']?/i, + label: "Token/secret", + }, + + // Password patterns: password: X, password=X, password X, passwd=X, pwd=X + { + pattern: /\b(?:password|passwd|pwd)\s*[:=]\s*["']?\S{4,}["']?/i, + label: "Password assignment", + }, + + // Credentials in "creds user/pass" format: "login with X creds user/pass" + { pattern: /\bcreds?\s+\S+[/\\]\S+/i, label: "Credentials (user/pass)" }, + + // URL-embedded credentials: https://user:pass@host + { pattern: /\/\/[^/\s:]+:[^/\s@]+@/i, label: "URL credentials" }, + + // Private keys + { pattern: /-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----/i, label: "Private key" }, + + // AWS-style keys + { pattern: /\b(?:AKIA|ASIA)[A-Z0-9]{16}\b/, label: "AWS key" }, + + // GitHub/GitLab tokens + { pattern: /\b(?:ghp|gho|ghu|ghs|ghr|glpat)[_-][a-zA-Z0-9]{16,}/i, label: "GitHub/GitLab token" }, +]; + +/** + * Check if a text contains credential-like content. + * Returns the first matching pattern label, or null if clean. + */ +export function detectCredential(text: string): string | null { + for (const { pattern, label } of CREDENTIAL_PATTERNS) { + if (pattern.test(text)) { + return label; + } + } + return null; +} + // ============================================================================ // Sleep Cycle Implementation // ============================================================================ @@ -180,6 +266,8 @@ export async function runSleepCycle( extractionBatchSize = 50, extractionDelayMs = 1000, singleUseTagMinAgeDays = 14, + workspaceDir, + staleTaskMaxAgeMs, onPhaseStart, onProgress, } = options; @@ -199,6 +287,8 @@ export async function runSleepCycle( decay: { memoriesPruned: 0 }, extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 }, cleanup: { entitiesRemoved: 0, tagsRemoved: 0, singleUseTagsRemoved: 0 }, + credentialScan: { memoriesScanned: 0, credentialsFound: 0, memoriesRemoved: 0 }, + taskLedger: { staleCount: 0, archivedCount: 0, archivedIds: [] }, durationMs: 0, aborted: false, }; @@ -443,6 +533,15 @@ export async function runSleepCycle( logger.info("memory-neo4j: [sleep] Phase 1d: Entity Deduplication"); try { + // Reconcile NULL mentionCounts before dedup so decisions are based on accurate counts + const reconciled = await db.reconcileEntityMentionCounts(); + if (reconciled > 0) { + logger.info( + `memory-neo4j: [sleep] Phase 1d: Reconciled mentionCount for ${reconciled} entities`, + ); + onProgress?.("entityDedup", `Reconciled ${reconciled} entity mention counts`); + } + const pairs = await db.findDuplicateEntityPairs(agentId); result.entityDedup.pairsFound = pairs.length; @@ -749,6 +848,7 @@ export async function runSleepCycle( // stored (open proposals, action items that trigger rogue sessions). // -------------------------------------------------------------------------- if (!abortSignal?.aborted) { + onPhaseStart?.("noiseCleanup"); logger.info("memory-neo4j: [sleep] Phase 7: Noise Pattern Cleanup"); try { @@ -763,29 +863,11 @@ export async function runSleepCycle( ]; let noiseRemoved = 0; - const noiseSession = (db as any).driver!.session(); - try { - for (const pattern of noisePatterns) { - if (abortSignal?.aborted) { - break; - } - - const agentFilter = agentId ? "AND m.agentId = $agentId" : ""; - const result = await noiseSession.run( - `MATCH (m:Memory) - WHERE m.text =~ $pattern - AND coalesce(m.userPinned, false) = false - AND m.category <> 'core' - ${agentFilter} - WITH m LIMIT 100 - DETACH DELETE m - RETURN count(*) AS removed`, - { pattern: `.*${pattern}.*`, agentId }, - ); - noiseRemoved += (result.records[0]?.get("removed") as number) ?? 0; + for (const pattern of noisePatterns) { + if (abortSignal?.aborted) { + break; } - } finally { - await noiseSession.close(); + noiseRemoved += await db.deleteMemoriesByPattern(`.*${pattern}.*`, agentId); } if (noiseRemoved > 0) { @@ -800,6 +882,90 @@ export async function runSleepCycle( } } + // -------------------------------------------------------------------------- + // Phase 7b: Credential Scanning + // Scans all memories for accidentally stored credentials (API keys, + // passwords, tokens) and removes them. This is a security measure + // to prevent credential leaks in the memory store. + // -------------------------------------------------------------------------- + if (!abortSignal?.aborted) { + onPhaseStart?.("credentialScan"); + logger.info("memory-neo4j: [sleep] Phase 7b: Credential Scanning"); + + try { + const allMemories = await db.fetchNonCoreMemories(agentId); + result.credentialScan.memoriesScanned = allMemories.length; + + const toRemove: string[] = []; + for (const { id, text } of allMemories) { + if (abortSignal?.aborted) { + break; + } + const matched = detectCredential(text); + if (matched) { + toRemove.push(id); + result.credentialScan.credentialsFound++; + onProgress?.( + "credentialScan", + `Found ${matched} in memory ${id.slice(0, 8)}...: "${text.slice(0, 40)}..."`, + ); + logger.warn( + `memory-neo4j: [sleep] Credential detected (${matched}) in memory ${id} โ€” removing`, + ); + } + } + + if (toRemove.length > 0) { + result.credentialScan.memoriesRemoved = await db.deleteMemoriesByIds(toRemove); + } + + logger.info( + `memory-neo4j: [sleep] Phase 7b complete โ€” ${result.credentialScan.memoriesScanned} scanned, ${result.credentialScan.credentialsFound} credentials found, ${result.credentialScan.memoriesRemoved} removed`, + ); + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 7b error: ${String(err)}`); + } + } + + // -------------------------------------------------------------------------- + // Phase 8: Task Ledger Cleanup + // Reviews TASKS.md for stale tasks (>24h with no activity) and archives them. + // Requires workspaceDir to be provided (otherwise skipped). + // -------------------------------------------------------------------------- + if (!abortSignal?.aborted && workspaceDir) { + onPhaseStart?.("taskLedger"); + logger.info("memory-neo4j: [sleep] Phase 8: Task Ledger Cleanup"); + + try { + const staleResult = await reviewAndArchiveStaleTasks(workspaceDir, staleTaskMaxAgeMs); + + if (staleResult) { + result.taskLedger.staleCount = staleResult.staleCount; + result.taskLedger.archivedCount = staleResult.archivedCount; + result.taskLedger.archivedIds = staleResult.archivedIds; + + if (staleResult.archivedCount > 0) { + onProgress?.( + "taskLedger", + `Archived ${staleResult.archivedCount} stale tasks: ${staleResult.archivedIds.join(", ")}`, + ); + } else { + onProgress?.("taskLedger", "No stale tasks found"); + } + } else { + onProgress?.("taskLedger", "TASKS.md not found โ€” skipped"); + } + + logger.info( + `memory-neo4j: [sleep] Phase 8 complete โ€” ${result.taskLedger.archivedCount} stale tasks archived`, + ); + } catch (err) { + logger.warn(`memory-neo4j: [sleep] Phase 8 error: ${String(err)}`); + } + } else if (!workspaceDir) { + logger.info("memory-neo4j: [sleep] Phase 8: Task Ledger Cleanup โ€” SKIPPED (no workspace dir)"); + } + result.durationMs = Date.now() - startTime; result.aborted = abortSignal?.aborted ?? false; diff --git a/extensions/memory-neo4j/task-ledger.test.ts b/extensions/memory-neo4j/task-ledger.test.ts new file mode 100644 index 00000000000..6c340214540 --- /dev/null +++ b/extensions/memory-neo4j/task-ledger.test.ts @@ -0,0 +1,466 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + findStaleTasks, + parseTaskDate, + parseTaskLedger, + reviewAndArchiveStaleTasks, + serializeTask, + serializeTaskLedger, +} from "./task-ledger.js"; + +// ============================================================================ +// parseTaskDate +// ============================================================================ + +describe("parseTaskDate", () => { + it("parses YYYY-MM-DD HH:MM format", () => { + const date = parseTaskDate("2026-02-14 09:15"); + expect(date).not.toBeNull(); + expect(date!.getFullYear()).toBe(2026); + expect(date!.getMonth()).toBe(1); // February is month 1 + expect(date!.getDate()).toBe(14); + }); + + it("parses YYYY-MM-DD HH:MM with timezone abbreviation", () => { + const date = parseTaskDate("2026-02-14 09:15 MYT"); + expect(date).not.toBeNull(); + expect(date!.getFullYear()).toBe(2026); + }); + + it("parses ISO format", () => { + const date = parseTaskDate("2026-02-14T09:15:00"); + expect(date).not.toBeNull(); + expect(date!.getFullYear()).toBe(2026); + }); + + it("returns null for empty string", () => { + expect(parseTaskDate("")).toBeNull(); + }); + + it("returns null for invalid date", () => { + expect(parseTaskDate("not-a-date")).toBeNull(); + }); +}); + +// ============================================================================ +// parseTaskLedger +// ============================================================================ + +describe("parseTaskLedger", () => { + it("parses a simple task ledger", () => { + const content = [ + "# Active Tasks", + "", + "## TASK-001: Restaurant Booking", + "- **Status:** in_progress", + "- **Started:** 2026-02-14 09:15", + "- **Updated:** 2026-02-14 09:30", + "- **Details:** Graze, 4 pax, 19:30", + "- **Current Step:** Form filled, awaiting confirmation", + "", + "# Completed", + "", + ].join("\n"); + + const ledger = parseTaskLedger(content); + expect(ledger.activeTasks).toHaveLength(1); + expect(ledger.completedTasks).toHaveLength(0); + + const task = ledger.activeTasks[0]; + expect(task.id).toBe("TASK-001"); + expect(task.title).toBe("Restaurant Booking"); + expect(task.status).toBe("in_progress"); + expect(task.started).toBe("2026-02-14 09:15"); + expect(task.updated).toBe("2026-02-14 09:30"); + expect(task.details).toBe("Graze, 4 pax, 19:30"); + expect(task.currentStep).toBe("Form filled, awaiting confirmation"); + expect(task.isCompleted).toBe(false); + }); + + it("parses multiple active tasks", () => { + const content = [ + "# Active Tasks", + "", + "## TASK-001: Task One", + "- **Status:** in_progress", + "- **Started:** 2026-02-14 09:00", + "", + "## TASK-002: Task Two", + "- **Status:** awaiting_input", + "- **Started:** 2026-02-14 10:00", + "", + "# Completed", + ].join("\n"); + + const ledger = parseTaskLedger(content); + expect(ledger.activeTasks).toHaveLength(2); + expect(ledger.activeTasks[0].id).toBe("TASK-001"); + expect(ledger.activeTasks[1].id).toBe("TASK-002"); + }); + + it("parses completed tasks", () => { + const content = [ + "# Active Tasks", + "", + "# Completed", + "", + "## ~~TASK-001: Old Task~~", + "- **Status:** done", + "- **Started:** 2026-02-13 09:00", + "- **Updated:** 2026-02-13 15:00", + ].join("\n"); + + const ledger = parseTaskLedger(content); + expect(ledger.activeTasks).toHaveLength(0); + expect(ledger.completedTasks).toHaveLength(1); + expect(ledger.completedTasks[0].id).toBe("TASK-001"); + expect(ledger.completedTasks[0].isCompleted).toBe(true); + }); + + it("parses blocked tasks", () => { + const content = [ + "# Active Tasks", + "", + "## TASK-001: Blocked Task", + "- **Status:** blocked", + "- **Started:** 2026-02-14 09:00", + "- **Blocked On:** Waiting for API key", + "", + "# Completed", + ].join("\n"); + + const ledger = parseTaskLedger(content); + expect(ledger.activeTasks).toHaveLength(1); + expect(ledger.activeTasks[0].blockedOn).toBe("Waiting for API key"); + }); + + it("handles empty task ledger", () => { + const content = [ + "# Active Tasks", + "", + "# Completed", + "", + ].join("\n"); + + const ledger = parseTaskLedger(content); + expect(ledger.activeTasks).toHaveLength(0); + expect(ledger.completedTasks).toHaveLength(0); + }); + + it("handles Last Updated field variant", () => { + const content = [ + "# Active Tasks", + "", + "## TASK-001: Some Task", + "- **Status:** in_progress", + "- **Last Updated:** 2026-02-14 10:00", + "", + "# Completed", + ].join("\n"); + + const ledger = parseTaskLedger(content); + expect(ledger.activeTasks[0].updated).toBe("2026-02-14 10:00"); + }); +}); + +// ============================================================================ +// findStaleTasks +// ============================================================================ + +describe("findStaleTasks", () => { + const now = new Date("2026-02-15T10:00:00"); + const twentyFourHoursMs = 24 * 60 * 60 * 1000; + + it("identifies tasks older than 24h as stale", () => { + const tasks = [ + { + id: "TASK-001", + title: "Old Task", + status: "in_progress" as const, + updated: "2026-02-14 08:00", + rawLines: [], + isCompleted: false, + }, + ]; + + const stale = findStaleTasks(tasks, now, twentyFourHoursMs); + expect(stale).toHaveLength(1); + expect(stale[0].id).toBe("TASK-001"); + }); + + it("does not mark recent tasks as stale", () => { + const tasks = [ + { + id: "TASK-001", + title: "Recent Task", + status: "in_progress" as const, + updated: "2026-02-15 09:00", + rawLines: [], + isCompleted: false, + }, + ]; + + const stale = findStaleTasks(tasks, now, twentyFourHoursMs); + expect(stale).toHaveLength(0); + }); + + it("skips done tasks", () => { + const tasks = [ + { + id: "TASK-001", + title: "Done Task", + status: "done" as const, + updated: "2026-02-13 08:00", + rawLines: [], + isCompleted: false, + }, + ]; + + const stale = findStaleTasks(tasks, now, twentyFourHoursMs); + expect(stale).toHaveLength(0); + }); + + it("skips already-stale tasks", () => { + const tasks = [ + { + id: "TASK-001", + title: "Already Stale", + status: "stale" as const, + updated: "2026-02-13 08:00", + rawLines: [], + isCompleted: false, + }, + ]; + + const stale = findStaleTasks(tasks, now, twentyFourHoursMs); + expect(stale).toHaveLength(0); + }); + + it("uses started date when updated is missing", () => { + const tasks = [ + { + id: "TASK-001", + title: "No Update Date", + status: "in_progress" as const, + started: "2026-02-14 08:00", + rawLines: [], + isCompleted: false, + }, + ]; + + const stale = findStaleTasks(tasks, now, twentyFourHoursMs); + expect(stale).toHaveLength(1); + }); + + it("marks tasks with no dates as stale", () => { + const tasks = [ + { + id: "TASK-001", + title: "No Dates", + status: "in_progress" as const, + rawLines: [], + isCompleted: false, + }, + ]; + + const stale = findStaleTasks(tasks, now, twentyFourHoursMs); + expect(stale).toHaveLength(1); + }); +}); + +// ============================================================================ +// serializeTask / serializeTaskLedger +// ============================================================================ + +describe("serializeTask", () => { + it("serializes an active task", () => { + const task = { + id: "TASK-001", + title: "My Task", + status: "in_progress" as const, + started: "2026-02-14 09:00", + updated: "2026-02-14 10:00", + details: "Some details", + currentStep: "Step 1", + rawLines: [], + isCompleted: false, + }; + + const lines = serializeTask(task); + expect(lines[0]).toBe("## TASK-001: My Task"); + expect(lines).toContain("- **Status:** in_progress"); + expect(lines).toContain("- **Started:** 2026-02-14 09:00"); + expect(lines).toContain("- **Updated:** 2026-02-14 10:00"); + expect(lines).toContain("- **Details:** Some details"); + expect(lines).toContain("- **Current Step:** Step 1"); + }); + + it("serializes a completed task with strikethrough", () => { + const task = { + id: "TASK-001", + title: "Done Task", + status: "done" as const, + started: "2026-02-14 09:00", + rawLines: [], + isCompleted: true, + }; + + const lines = serializeTask(task); + expect(lines[0]).toBe("## ~~TASK-001: Done Task~~"); + }); +}); + +describe("serializeTaskLedger", () => { + it("round-trips a task ledger", () => { + const ledger = { + activeTasks: [ + { + id: "TASK-001", + title: "Active Task", + status: "in_progress" as const, + started: "2026-02-14 09:00", + updated: "2026-02-14 10:00", + details: "Details here", + rawLines: [], + isCompleted: false, + }, + ], + completedTasks: [ + { + id: "TASK-000", + title: "Old Task", + status: "done" as const, + started: "2026-02-13 09:00", + rawLines: [], + isCompleted: true, + }, + ], + preamble: [], + sectionSeparator: [], + postamble: [], + }; + + const serialized = serializeTaskLedger(ledger); + expect(serialized).toContain("# Active Tasks"); + expect(serialized).toContain("## TASK-001: Active Task"); + expect(serialized).toContain("# Completed"); + expect(serialized).toContain("## ~~TASK-000: Old Task~~"); + }); +}); + +// ============================================================================ +// reviewAndArchiveStaleTasks (integration with filesystem) +// ============================================================================ + +describe("reviewAndArchiveStaleTasks", () => { + let tmpDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "task-ledger-test-")); + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it("returns null when TASKS.md does not exist", async () => { + const result = await reviewAndArchiveStaleTasks(tmpDir); + expect(result).toBeNull(); + }); + + it("returns null for empty TASKS.md", async () => { + await fs.writeFile(path.join(tmpDir, "TASKS.md"), "", "utf-8"); + const result = await reviewAndArchiveStaleTasks(tmpDir); + expect(result).toBeNull(); + }); + + it("archives stale tasks", async () => { + const content = [ + "# Active Tasks", + "", + "## TASK-001: Stale Task", + "- **Status:** in_progress", + "- **Started:** 2026-02-13 08:00", + "- **Updated:** 2026-02-13 09:00", + "", + "## TASK-002: Fresh Task", + "- **Status:** in_progress", + "- **Started:** 2026-02-14 09:00", + "- **Updated:** 2026-02-14 23:00", + "", + "# Completed", + "", + ].join("\n"); + + await fs.writeFile(path.join(tmpDir, "TASKS.md"), content, "utf-8"); + + // "now" is Feb 15, 10:00 โ€” TASK-001 updated Feb 13, 09:00 (>24h ago), TASK-002 updated Feb 14, 23:00 (<24h ago) + const now = new Date("2026-02-15T10:00:00"); + const result = await reviewAndArchiveStaleTasks(tmpDir, undefined, now); + + expect(result).not.toBeNull(); + expect(result!.staleCount).toBe(1); + expect(result!.archivedCount).toBe(1); + expect(result!.archivedIds).toEqual(["TASK-001"]); + + // Verify the file was updated + const updated = await fs.readFile(path.join(tmpDir, "TASKS.md"), "utf-8"); + expect(updated).toContain("## TASK-002: Fresh Task"); + expect(updated).toContain("## ~~TASK-001: Stale Task~~"); + + // Re-parse to verify structure + const ledger = parseTaskLedger(updated); + expect(ledger.activeTasks).toHaveLength(1); + expect(ledger.activeTasks[0].id).toBe("TASK-002"); + expect(ledger.completedTasks).toHaveLength(1); + expect(ledger.completedTasks[0].id).toBe("TASK-001"); + expect(ledger.completedTasks[0].status).toBe("stale"); + }); + + it("does nothing when no tasks are stale", async () => { + const content = [ + "# Active Tasks", + "", + "## TASK-001: Fresh Task", + "- **Status:** in_progress", + "- **Started:** 2026-02-15 09:00", + "- **Updated:** 2026-02-15 09:30", + "", + "# Completed", + ].join("\n"); + + await fs.writeFile(path.join(tmpDir, "TASKS.md"), content, "utf-8"); + + const now = new Date("2026-02-15T10:00:00"); + const result = await reviewAndArchiveStaleTasks(tmpDir, undefined, now); + + expect(result).not.toBeNull(); + expect(result!.staleCount).toBe(0); + expect(result!.archivedCount).toBe(0); + }); + + it("supports custom maxAgeMs", async () => { + const content = [ + "# Active Tasks", + "", + "## TASK-001: Semi-Fresh Task", + "- **Status:** in_progress", + "- **Started:** 2026-02-15 06:00", + "- **Updated:** 2026-02-15 06:00", + "", + "# Completed", + ].join("\n"); + + await fs.writeFile(path.join(tmpDir, "TASKS.md"), content, "utf-8"); + + const now = new Date("2026-02-15T10:00:00"); + const oneHourMs = 60 * 60 * 1000; + + // With 1-hour threshold, task is stale (4 hours old) + const result = await reviewAndArchiveStaleTasks(tmpDir, oneHourMs, now); + expect(result!.archivedCount).toBe(1); + }); +}); diff --git a/extensions/memory-neo4j/task-ledger.ts b/extensions/memory-neo4j/task-ledger.ts new file mode 100644 index 00000000000..1d7fee05fc6 --- /dev/null +++ b/extensions/memory-neo4j/task-ledger.ts @@ -0,0 +1,416 @@ +/** + * Task Ledger (TASKS.md) maintenance utilities. + * + * Parses and updates the structured task ledger file used by agents + * to track active work across compaction events. The sleep cycle uses + * these utilities to archive stale tasks (>24h with no activity). + */ + +import fs from "node:fs/promises"; +import path from "node:path"; + +// ============================================================================ +// Types +// ============================================================================ + +export type TaskStatus = "in_progress" | "awaiting_input" | "blocked" | "done" | "stale" | string; + +export type ParsedTask = { + /** Task ID (e.g. "TASK-001") */ + id: string; + /** Short title */ + title: string; + /** Current status */ + status: TaskStatus; + /** When the task was started (ISO-ish string) */ + started?: string; + /** When the task was last updated (ISO-ish string) */ + updated?: string; + /** Task details/description */ + details?: string; + /** Current step being worked on */ + currentStep?: string; + /** What's blocking progress */ + blockedOn?: string; + /** Raw markdown lines for this task section (for round-tripping) */ + rawLines: string[]; + /** Whether this task is in the completed section */ + isCompleted: boolean; +}; + +export type TaskLedger = { + activeTasks: ParsedTask[]; + completedTasks: ParsedTask[]; + /** Lines before the first task section (header, etc.) */ + preamble: string[]; + /** Lines between active and completed sections */ + sectionSeparator: string[]; + /** Lines after the completed section */ + postamble: string[]; +}; + +export type StaleTaskResult = { + /** Number of tasks found that are stale */ + staleCount: number; + /** Number of tasks archived (moved to completed) */ + archivedCount: number; + /** Task IDs that were archived */ + archivedIds: string[]; +}; + +// ============================================================================ +// Parsing +// ============================================================================ + +/** + * Parse a TASKS.md file content into structured task data. + */ +export function parseTaskLedger(content: string): TaskLedger { + const lines = content.split("\n"); + const activeTasks: ParsedTask[] = []; + const completedTasks: ParsedTask[] = []; + const preamble: string[] = []; + const sectionSeparator: string[] = []; + const postamble: string[] = []; + + let currentSection: "preamble" | "active" | "completed" | "postamble" = "preamble"; + let currentTask: ParsedTask | null = null; + + for (const line of lines) { + const trimmed = line.trim(); + + // Detect section headers + if (/^#\s+Active\s+Tasks/i.test(trimmed)) { + if (currentTask) { + pushTask(currentTask, activeTasks, completedTasks); + currentTask = null; + } + currentSection = "active"; + preamble.push(line); + continue; + } + + if (/^#\s+Completed/i.test(trimmed)) { + if (currentTask) { + pushTask(currentTask, activeTasks, completedTasks); + currentTask = null; + } + currentSection = "completed"; + sectionSeparator.push(line); + continue; + } + + // Detect task headers (## TASK-NNN: Title or ## ~~TASK-NNN: Title~~) + const taskMatch = trimmed.match(/^##\s+(?:~~)?(TASK-\d+):\s*(.+?)(?:~~)?$/); + if (taskMatch) { + if (currentTask) { + pushTask(currentTask, activeTasks, completedTasks); + } + const isStrikethrough = trimmed.includes("~~"); + currentTask = { + id: taskMatch[1], + title: taskMatch[2].replace(/~~/g, "").trim(), + status: isStrikethrough ? "done" : "in_progress", + rawLines: [line], + isCompleted: currentSection === "completed" || isStrikethrough, + }; + continue; + } + + // Parse task fields (- **Field:** Value) + if (currentTask) { + const fieldMatch = trimmed.match(/^-\s+\*\*(.+?):\*\*\s*(.*)$/); + if (fieldMatch) { + const fieldName = fieldMatch[1].toLowerCase(); + const value = fieldMatch[2].trim(); + switch (fieldName) { + case "status": + currentTask.status = value; + break; + case "started": + currentTask.started = value; + break; + case "updated": + case "last updated": + currentTask.updated = value; + break; + case "details": + currentTask.details = value; + break; + case "current step": + currentTask.currentStep = value; + break; + case "blocked on": + currentTask.blockedOn = value; + break; + } + currentTask.rawLines.push(line); + continue; + } + + // Non-field lines within a task + if (trimmed !== "" && !trimmed.startsWith("#")) { + currentTask.rawLines.push(line); + continue; + } + + // Empty line within a task โ€” include it + if (trimmed === "") { + currentTask.rawLines.push(line); + continue; + } + } + + // Lines not part of a task + switch (currentSection) { + case "preamble": + case "active": + preamble.push(line); + break; + case "completed": + sectionSeparator.push(line); + break; + case "postamble": + postamble.push(line); + break; + } + } + + // Push the last task + if (currentTask) { + pushTask(currentTask, activeTasks, completedTasks); + } + + return { activeTasks, completedTasks, preamble, sectionSeparator, postamble }; +} + +function pushTask(task: ParsedTask, active: ParsedTask[], completed: ParsedTask[]) { + if (task.isCompleted || task.status === "done") { + completed.push(task); + } else { + active.push(task); + } +} + +// ============================================================================ +// Staleness Detection +// ============================================================================ + +/** + * Parse a date string from the task ledger. + * Accepts formats like "2026-02-14 09:15", "2026-02-14 09:15 MYT", + * "2026-02-14T09:15:00", etc. + */ +export function parseTaskDate(dateStr: string): Date | null { + if (!dateStr) { + return null; + } + const cleaned = dateStr + .trim() + // Remove timezone abbreviations like MYT, UTC, PST + .replace(/\s+[A-Z]{2,5}$/, "") + // Normalize space-separated date time to ISO + .replace(/^(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2})/, "$1T$2"); + + const date = new Date(cleaned); + if (Number.isNaN(date.getTime())) { + return null; + } + return date; +} + +/** + * Find tasks that are stale (no update in more than `maxAgeMs` milliseconds). + * Default: 24 hours. + */ +export function findStaleTasks( + tasks: ParsedTask[], + now: Date = new Date(), + maxAgeMs: number = 24 * 60 * 60 * 1000, +): ParsedTask[] { + return tasks.filter((task) => { + // Only check active tasks (not already done/stale) + if (task.status === "done" || task.status === "stale") { + return false; + } + + const lastUpdate = task.updated || task.started; + if (!lastUpdate) { + // No date info โ€” consider stale if we can't determine age + return true; + } + + const date = parseTaskDate(lastUpdate); + if (!date) { + return false; // Can't parse date โ€” don't mark as stale + } + + const ageMs = now.getTime() - date.getTime(); + return ageMs > maxAgeMs; + }); +} + +// ============================================================================ +// Task Ledger Serialization +// ============================================================================ + +/** + * Serialize a task back to markdown lines. + * If the task has rawLines from parsing, regenerate only the header and status + * (which may have changed) while preserving other raw content. + * For new/modified tasks without rawLines, generate from parsed fields. + */ +export function serializeTask(task: ParsedTask): string[] { + const titlePrefix = task.isCompleted + ? `## ~~${task.id}: ${task.title}~~` + : `## ${task.id}: ${task.title}`; + + // If we have rawLines and the task was only modified (status/updated changed + // by archival), rebuild from rawLines with updated field values. + if (task.rawLines.length > 0) { + const lines: string[] = [titlePrefix]; + for (const line of task.rawLines.slice(1)) { + const trimmed = line.trim(); + // Replace Status field with current value + if (/^-\s+\*\*Status:\*\*/.test(trimmed)) { + lines.push(`- **Status:** ${task.status}`); + } else if (/^-\s+\*\*(?:Updated|Last Updated):\*\*/.test(trimmed)) { + lines.push(`- **Updated:** ${task.updated ?? ""}`); + } else { + lines.push(line); + } + } + return lines; + } + + // Fallback: generate from parsed fields (for newly created tasks) + const lines: string[] = [titlePrefix]; + lines.push(`- **Status:** ${task.status}`); + if (task.started) { + lines.push(`- **Started:** ${task.started}`); + } + if (task.updated) { + lines.push(`- **Updated:** ${task.updated}`); + } + if (task.details) { + lines.push(`- **Details:** ${task.details}`); + } + if (task.currentStep) { + lines.push(`- **Current Step:** ${task.currentStep}`); + } + if (task.blockedOn) { + lines.push(`- **Blocked On:** ${task.blockedOn}`); + } + return lines; +} + +/** + * Serialize the full task ledger back to markdown. + * Preserves preamble, section separators, and postamble from the original parse. + */ +export function serializeTaskLedger(ledger: TaskLedger): string { + const lines: string[] = []; + + // Use original preamble if available, otherwise generate header + if (ledger.preamble.length > 0) { + lines.push(...ledger.preamble); + } else { + lines.push("# Active Tasks"); + lines.push(""); + } + + // Active tasks + for (const task of ledger.activeTasks) { + lines.push(...serializeTask(task)); + lines.push(""); + } + + // Use original section separator if available, otherwise generate + if (ledger.sectionSeparator.length > 0) { + lines.push(...ledger.sectionSeparator); + } else { + lines.push("# Completed"); + lines.push(""); + } + lines.push(""); + + // Completed tasks + for (const task of ledger.completedTasks) { + lines.push(...serializeTask(task)); + lines.push(""); + } + + // Preserve postamble + if (ledger.postamble.length > 0) { + lines.push(...ledger.postamble); + } + + return lines.join("\n").trimEnd() + "\n"; +} + +// ============================================================================ +// Sleep Cycle Integration +// ============================================================================ + +/** + * Review TASKS.md for stale tasks and archive them. + * This is called during the sleep cycle. + * + * @param workspaceDir - Path to the workspace directory + * @param maxAgeMs - Maximum age before a task is considered stale (default: 24h) + * @param now - Current time (for testing) + * @returns Result of the stale task review, or null if TASKS.md doesn't exist + */ +export async function reviewAndArchiveStaleTasks( + workspaceDir: string, + maxAgeMs: number = 24 * 60 * 60 * 1000, + now: Date = new Date(), +): Promise { + const tasksPath = path.join(workspaceDir, "TASKS.md"); + + let content: string; + try { + content = await fs.readFile(tasksPath, "utf-8"); + } catch { + // TASKS.md doesn't exist โ€” nothing to do + return null; + } + + if (!content.trim()) { + return null; + } + + const ledger = parseTaskLedger(content); + const staleTasks = findStaleTasks(ledger.activeTasks, now, maxAgeMs); + + if (staleTasks.length === 0) { + return { staleCount: 0, archivedCount: 0, archivedIds: [] }; + } + + const archivedIds: string[] = []; + const nowStr = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}-${String(now.getDate()).padStart(2, "0")} ${String(now.getHours()).padStart(2, "0")}:${String(now.getMinutes()).padStart(2, "0")}`; + + for (const task of staleTasks) { + task.status = "stale"; + task.updated = nowStr; + task.isCompleted = true; + + // Move from active to completed + const idx = ledger.activeTasks.indexOf(task); + if (idx !== -1) { + ledger.activeTasks.splice(idx, 1); + } + ledger.completedTasks.push(task); + archivedIds.push(task.id); + } + + // Write back + const updated = serializeTaskLedger(ledger); + await fs.writeFile(tasksPath, updated, "utf-8"); + + return { + staleCount: staleTasks.length, + archivedCount: archivedIds.length, + archivedIds, + }; +} diff --git a/src/agents/pi-embedded-runner/compact.default-instructions.test.ts b/src/agents/pi-embedded-runner/compact.default-instructions.test.ts new file mode 100644 index 00000000000..b6ba7c57908 --- /dev/null +++ b/src/agents/pi-embedded-runner/compact.default-instructions.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it } from "vitest"; +import { DEFAULT_COMPACTION_INSTRUCTIONS } from "./compact.js"; + +describe("DEFAULT_COMPACTION_INSTRUCTIONS", () => { + it("contains priority ordering with numbered items", () => { + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("1."); + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("2."); + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("3."); + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("4."); + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("5."); + }); + + it("prioritizes active tasks first", () => { + const taskLine = DEFAULT_COMPACTION_INSTRUCTIONS.indexOf("active or in-progress tasks"); + const decisionsLine = DEFAULT_COMPACTION_INSTRUCTIONS.indexOf("Key decisions"); + expect(taskLine).toBeLessThan(decisionsLine); + expect(taskLine).toBeGreaterThan(-1); + }); + + it("mentions TASKS.md for task ledger continuity", () => { + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("TASKS.md"); + }); + + it("includes de-prioritization guidance", () => { + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("De-prioritize"); + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("casual conversation"); + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("completed tasks"); + }); + + it("mentions exact values needed to resume work", () => { + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("file paths"); + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("URLs"); + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("IDs"); + }); + + it("includes tool state preservation", () => { + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("Tool state"); + expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("browser sessions"); + }); +}); + +describe("compaction instructions merging", () => { + it("custom instructions are appended to defaults", () => { + const customInstructions = "Also remember to include user preferences."; + const merged = `${DEFAULT_COMPACTION_INSTRUCTIONS}\n\n${customInstructions}`; + + // Defaults come first + expect(merged.indexOf("When summarizing")).toBeLessThan(merged.indexOf(customInstructions)); + // Custom instructions are present + expect(merged).toContain(customInstructions); + // Defaults are not lost + expect(merged).toContain("active or in-progress tasks"); + }); + + it("when no custom instructions, defaults are used alone", () => { + // Simulate the compaction path where customInstructions is undefined + const resolve = (custom?: string) => + custom ? `${DEFAULT_COMPACTION_INSTRUCTIONS}\n\n${custom}` : DEFAULT_COMPACTION_INSTRUCTIONS; + + const result = resolve(undefined); + expect(result).toBe(DEFAULT_COMPACTION_INSTRUCTIONS); + expect(result).not.toContain("\n\nundefined"); + }); +}); diff --git a/src/agents/pi-embedded-runner/system-prompt.post-compaction.test.ts b/src/agents/pi-embedded-runner/system-prompt.post-compaction.test.ts new file mode 100644 index 00000000000..5f33bec1fea --- /dev/null +++ b/src/agents/pi-embedded-runner/system-prompt.post-compaction.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it } from "vitest"; +import { buildAgentSystemPrompt } from "../system-prompt.js"; + +describe("Task Ledger section", () => { + it("includes the Task Ledger section in full prompt mode", () => { + const prompt = buildAgentSystemPrompt({ + workspaceDir: "/tmp/openclaw", + }); + + expect(prompt).toContain("## Task Ledger (TASKS.md)"); + }); + + it("describes the task format with required fields", () => { + const prompt = buildAgentSystemPrompt({ + workspaceDir: "/tmp/openclaw", + }); + + expect(prompt).toContain("**Status:**"); + expect(prompt).toContain("**Started:**"); + expect(prompt).toContain("**Updated:**"); + expect(prompt).toContain("**Current Step:**"); + }); + + it("mentions stale task archival by sleep cycle", () => { + const prompt = buildAgentSystemPrompt({ + workspaceDir: "/tmp/openclaw", + }); + + expect(prompt).toContain("sleep cycle"); + expect(prompt).toContain(">24h"); + }); + + it("omits the section in minimal (subagent) prompt mode", () => { + const prompt = buildAgentSystemPrompt({ + workspaceDir: "/tmp/openclaw", + promptMode: "minimal", + }); + + expect(prompt).not.toContain("## Task Ledger (TASKS.md)"); + }); + + it("omits the section in none prompt mode", () => { + const prompt = buildAgentSystemPrompt({ + workspaceDir: "/tmp/openclaw", + promptMode: "none", + }); + + expect(prompt).not.toContain("## Task Ledger (TASKS.md)"); + }); +}); + +describe("Post-Compaction Recovery", () => { + it("does NOT include a static recovery section (handled by framework injection)", () => { + const prompt = buildAgentSystemPrompt({ + workspaceDir: "/tmp/openclaw", + }); + + // Recovery instructions are injected dynamically via post-compaction-recovery.ts, + // not baked into the system prompt (avoids wasting tokens on every turn). + expect(prompt).not.toContain("## Post-Compaction Recovery"); + }); +}); diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts index c0bb6d55f85..8f934346bde 100644 --- a/src/agents/system-prompt.ts +++ b/src/agents/system-prompt.ts @@ -625,6 +625,38 @@ export function buildAgentSystemPrompt(params: { ); } + // Task Ledger instructions (skip for subagent/none modes) + if (!isMinimal) { + lines.push( + "## Task Ledger (TASKS.md)", + "Maintain a TASKS.md file in the workspace root to track active work across compaction events.", + "Update it whenever you start, progress, or complete a task. Format:", + "", + "```markdown", + "# Active Tasks", + "", + "## TASK-001: ", + "- **Status:** in_progress | awaiting_input | blocked | done", + "- **Started:** YYYY-MM-DD HH:MM", + "- **Updated:** YYYY-MM-DD HH:MM", + "- **Details:** What this task is about", + "- **Current Step:** What you're doing right now", + "- **Blocked On:** (if applicable) What's preventing progress", + "", + "# Completed", + "", + "```", + "", + "Rules:", + "- Create TASKS.md on first task if it doesn't exist.", + "- Update **Updated** timestamp and **Current Step** as you make progress.", + "- Move tasks to Completed when done; include completion date.", + "- Keep IDs sequential (TASK-001, TASK-002, etc.).", + "- Stale tasks (>24h with no update) may be auto-archived by the sleep cycle.", + "", + ); + } + // Skip heartbeats for subagent/none modes if (!isMinimal) { lines.push( diff --git a/src/agents/workspace.tasks-bootstrap.test.ts b/src/agents/workspace.tasks-bootstrap.test.ts new file mode 100644 index 00000000000..bca789bbf10 --- /dev/null +++ b/src/agents/workspace.tasks-bootstrap.test.ts @@ -0,0 +1,79 @@ +import { describe, expect, it } from "vitest"; +import { makeTempWorkspace, writeWorkspaceFile } from "../test-helpers/workspace.js"; +import { + DEFAULT_TASKS_FILENAME, + filterBootstrapFilesForSession, + loadWorkspaceBootstrapFiles, +} from "./workspace.js"; + +describe("TASKS.md bootstrap", () => { + it("DEFAULT_TASKS_FILENAME equals TASKS.md", () => { + expect(DEFAULT_TASKS_FILENAME).toBe("TASKS.md"); + }); + + it("loadWorkspaceBootstrapFiles includes TASKS.md entry", async () => { + const tempDir = await makeTempWorkspace("openclaw-tasks-"); + + const files = await loadWorkspaceBootstrapFiles(tempDir); + const tasksEntry = files.find((f) => f.name === DEFAULT_TASKS_FILENAME); + + expect(tasksEntry).toBeDefined(); + }); + + it("loads TASKS.md content when the file exists", async () => { + const tempDir = await makeTempWorkspace("openclaw-tasks-"); + await writeWorkspaceFile({ dir: tempDir, name: "TASKS.md", content: "- [ ] finish tests" }); + + const files = await loadWorkspaceBootstrapFiles(tempDir); + const tasksEntry = files.find((f) => f.name === DEFAULT_TASKS_FILENAME); + + expect(tasksEntry).toBeDefined(); + expect(tasksEntry!.missing).toBe(false); + expect(tasksEntry!.content).toBe("- [ ] finish tests"); + }); + + it("marks TASKS.md as missing (not error) when the file does not exist", async () => { + const tempDir = await makeTempWorkspace("openclaw-tasks-"); + + const files = await loadWorkspaceBootstrapFiles(tempDir); + const tasksEntry = files.find((f) => f.name === DEFAULT_TASKS_FILENAME); + + expect(tasksEntry).toBeDefined(); + expect(tasksEntry!.missing).toBe(true); + expect(tasksEntry!.content).toBeUndefined(); + }); + + it("TASKS.md is in SUBAGENT_BOOTSTRAP_ALLOWLIST (kept for subagent sessions)", () => { + const files = [ + { + name: DEFAULT_TASKS_FILENAME as const, + path: "/tmp/TASKS.md", + missing: false, + content: "tasks", + }, + { name: "SOUL.md" as const, path: "/tmp/SOUL.md", missing: false, content: "soul" }, + ]; + + const filtered = filterBootstrapFilesForSession(files, "agent:main:subagent:test-123"); + + const tasksKept = filtered.find((f) => f.name === DEFAULT_TASKS_FILENAME); + expect(tasksKept).toBeDefined(); + }); + + it("filterBootstrapFilesForSession drops non-allowlisted files for subagent sessions", () => { + const files = [ + { + name: DEFAULT_TASKS_FILENAME as const, + path: "/tmp/TASKS.md", + missing: false, + content: "tasks", + }, + { name: "SOUL.md" as const, path: "/tmp/SOUL.md", missing: false, content: "soul" }, + ]; + + const filtered = filterBootstrapFilesForSession(files, "agent:main:subagent:test-123"); + + const soulKept = filtered.find((f) => f.name === "SOUL.md"); + expect(soulKept).toBeUndefined(); + }); +}); diff --git a/src/agents/workspace.ts b/src/agents/workspace.ts index 9e1c081c7ec..7c10de2e3b9 100644 --- a/src/agents/workspace.ts +++ b/src/agents/workspace.ts @@ -28,6 +28,7 @@ export const DEFAULT_USER_FILENAME = "USER.md"; export const DEFAULT_HEARTBEAT_FILENAME = "HEARTBEAT.md"; export const DEFAULT_BOOTSTRAP_FILENAME = "BOOTSTRAP.md"; export const DEFAULT_MEMORY_FILENAME = "MEMORY.md"; +export const DEFAULT_TASKS_FILENAME = "TASKS.md"; export const DEFAULT_MEMORY_ALT_FILENAME = "memory.md"; const WORKSPACE_STATE_DIRNAME = ".openclaw"; const WORKSPACE_STATE_FILENAME = "workspace-state.json"; @@ -87,6 +88,7 @@ export type WorkspaceBootstrapFileName = | typeof DEFAULT_HEARTBEAT_FILENAME | typeof DEFAULT_BOOTSTRAP_FILENAME | typeof DEFAULT_MEMORY_FILENAME + | typeof DEFAULT_TASKS_FILENAME | typeof DEFAULT_MEMORY_ALT_FILENAME; export type WorkspaceBootstrapFile = { @@ -444,6 +446,10 @@ export async function loadWorkspaceBootstrapFiles(dir: string): Promise MINIMAL_BOOTSTRAP_ALLOWLIST.has(file.name)); + return files.filter((file) => SUBAGENT_BOOTSTRAP_ALLOWLIST.has(file.name)); } export async function loadExtraBootstrapFiles( diff --git a/src/auto-reply/reply/agent-runner-memory.ts b/src/auto-reply/reply/agent-runner-memory.ts index de81ffec664..9a5fe5bfb04 100644 --- a/src/auto-reply/reply/agent-runner-memory.ts +++ b/src/auto-reply/reply/agent-runner-memory.ts @@ -23,6 +23,7 @@ import { resolveMemoryFlushSettings, shouldRunMemoryFlush, } from "./memory-flush.js"; +import { markNeedsPostCompactionRecovery } from "./post-compaction-recovery.js"; import { incrementCompactionCount } from "./session-updates.js"; export async function runMemoryFlushIfNeeded(params: { @@ -179,6 +180,16 @@ export async function runMemoryFlushIfNeeded(params: { if (typeof nextCount === "number") { memoryFlushCompactionCount = nextCount; } + // P3: Mark session for post-compaction recovery on the next turn. + // This path handles flush-triggered compaction (memory flush forces a compact). + // The main path in agent-runner.ts handles SDK auto-compaction. + // These are mutually exclusive; setting true is idempotent. + await markNeedsPostCompactionRecovery({ + sessionEntry: activeSessionEntry, + sessionStore: activeSessionStore, + sessionKey: params.sessionKey, + storePath: params.storePath, + }); } if (params.storePath && params.sessionKey) { try { diff --git a/src/auto-reply/reply/agent-runner.memory-flush.task-checkpoint.test.ts b/src/auto-reply/reply/agent-runner.memory-flush.task-checkpoint.test.ts new file mode 100644 index 00000000000..c0347ae5315 --- /dev/null +++ b/src/auto-reply/reply/agent-runner.memory-flush.task-checkpoint.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it } from "vitest"; +import { + DEFAULT_MEMORY_FLUSH_PROMPT, + DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT, + resolveMemoryFlushSettings, +} from "./memory-flush.js"; + +describe("memory flush task checkpoint", () => { + describe("DEFAULT_MEMORY_FLUSH_PROMPT", () => { + it("includes task state extraction language", () => { + expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("active task"); + expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("task name"); + expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("current step"); + expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("pending actions"); + }); + + it("instructs to use memory_store with core category and importance 1.0", () => { + expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("memory_store"); + expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("category 'core'"); + expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("importance 1.0"); + }); + }); + + describe("DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT", () => { + it("includes CRITICAL instruction about active tasks", () => { + expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("CRITICAL"); + expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("active task"); + }); + + it("instructs to save task state with core category", () => { + expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("memory_store"); + expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("category='core'"); + expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("importance=1.0"); + }); + + it("mentions task continuity after compaction", () => { + expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("task continuity after compaction"); + }); + }); + + describe("resolveMemoryFlushSettings", () => { + it("returns prompts containing task-related keywords by default", () => { + const settings = resolveMemoryFlushSettings(); + expect(settings).not.toBeNull(); + expect(settings?.prompt).toContain("active task"); + expect(settings?.prompt).toContain("memory_store"); + expect(settings?.systemPrompt).toContain("CRITICAL"); + expect(settings?.systemPrompt).toContain("task continuity"); + }); + + it("preserves task checkpoint language alongside existing content", () => { + const settings = resolveMemoryFlushSettings(); + expect(settings).not.toBeNull(); + // Original content still present + expect(settings?.prompt).toContain("Pre-compaction memory flush"); + expect(settings?.prompt).toContain("durable memories"); + // New task checkpoint content also present + expect(settings?.prompt).toContain("current step"); + expect(settings?.prompt).toContain("pending actions"); + }); + }); +}); diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index c8f8eba129a..826b063fc44 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -36,6 +36,7 @@ import { appendUsageLine, formatResponseUsageLine } from "./agent-runner-utils.j import { createAudioAsVoiceBuffer, createBlockReplyPipeline } from "./block-reply-pipeline.js"; import { resolveBlockStreamingCoalescing } from "./block-streaming.js"; import { createFollowupRunner } from "./followup-runner.js"; +import { markNeedsPostCompactionRecovery } from "./post-compaction-recovery.js"; import { enqueueFollowupRun, type FollowupRun, type QueueSettings } from "./queue.js"; import { createReplyToModeFilterForChannel, resolveReplyToMode } from "./reply-threading.js"; import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-run-accounting.js"; @@ -499,6 +500,16 @@ export async function runReplyAgent(params: { lastCallUsage: runResult.meta.agentMeta?.lastCallUsage, contextTokensUsed, }); + // P3: Mark session for post-compaction recovery on the next turn. + // This path handles SDK auto-compaction (during the agent run itself). + // The memory-flush path in agent-runner-memory.ts handles flush-triggered compaction. + // These are mutually exclusive for a given compaction event; setting true is idempotent. + await markNeedsPostCompactionRecovery({ + sessionEntry: activeSessionEntry, + sessionStore: activeSessionStore, + sessionKey, + storePath, + }); if (verboseEnabled) { const suffix = typeof count === "number" ? ` (count ${count})` : ""; finalPayloads = [{ text: `๐Ÿงน Auto-compaction complete${suffix}.` }, ...finalPayloads]; diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts index 66d64f5be72..04cc2f8ab24 100644 --- a/src/auto-reply/reply/get-reply-run.ts +++ b/src/auto-reply/reply/get-reply-run.ts @@ -41,6 +41,10 @@ import { runReplyAgent } from "./agent-runner.js"; import { applySessionHints } from "./body.js"; import { buildGroupChatContext, buildGroupIntro } from "./groups.js"; import { buildInboundMetaSystemPrompt, buildInboundUserContextPrefix } from "./inbound-meta.js"; +import { + clearPostCompactionRecovery, + prependPostCompactionRecovery, +} from "./post-compaction-recovery.js"; import { resolveQueueSettings } from "./queue.js"; import { routeReply } from "./route-reply.js"; import { BARE_SESSION_RESET_PROMPT } from "./session-reset-prompt.js"; @@ -256,6 +260,18 @@ export async function runPreparedReply( isNewSession, prefixedBodyBase, }); + // P3: Prepend post-compaction recovery instructions if the previous turn + // triggered auto-compaction. This ensures the agent recalls task state from + // memory before responding to the user's next message. + prefixedBodyBase = prependPostCompactionRecovery(prefixedBodyBase, sessionEntry); + if (sessionEntry?.needsPostCompactionRecovery) { + await clearPostCompactionRecovery({ + sessionEntry, + sessionStore, + sessionKey, + storePath, + }); + } prefixedBodyBase = appendUntrustedContext(prefixedBodyBase, sessionCtx.UntrustedContext); const threadStarterBody = ctx.ThreadStarterBody?.trim(); const threadHistoryBody = ctx.ThreadHistoryBody?.trim(); diff --git a/src/auto-reply/reply/memory-flush.ts b/src/auto-reply/reply/memory-flush.ts index a3b50ae3444..35968b7a6ce 100644 --- a/src/auto-reply/reply/memory-flush.ts +++ b/src/auto-reply/reply/memory-flush.ts @@ -12,12 +12,14 @@ export const DEFAULT_MEMORY_FLUSH_PROMPT = [ "Pre-compaction memory flush.", "Store durable memories now (use memory/YYYY-MM-DD.md; create memory/ if needed).", "IMPORTANT: If the file already exists, APPEND new content only and do not overwrite existing entries.", + "If there is an active task in progress, save its state: task name, current step, pending actions, and any critical variables. Use memory_store with category 'core' and importance 1.0 for active task state.", `If nothing to store, reply with ${SILENT_REPLY_TOKEN}.`, ].join(" "); export const DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT = [ "Pre-compaction memory flush turn.", "The session is near auto-compaction; capture durable memories to disk.", + "CRITICAL: If there is an active task being worked on, you MUST save its current state (task name, step, pending actions, key variables) to memory_store with category='core' and importance=1.0. This ensures task continuity after compaction.", `You may reply, but usually ${SILENT_REPLY_TOKEN} is correct.`, ].join(" "); diff --git a/src/auto-reply/reply/post-compaction-recovery.test.ts b/src/auto-reply/reply/post-compaction-recovery.test.ts new file mode 100644 index 00000000000..5c5d00fc3fc --- /dev/null +++ b/src/auto-reply/reply/post-compaction-recovery.test.ts @@ -0,0 +1,102 @@ +import { describe, expect, it } from "vitest"; +import { + getPostCompactionRecoveryPrompt, + POST_COMPACTION_RECOVERY_PROMPT, + prependPostCompactionRecovery, +} from "./post-compaction-recovery.js"; + +describe("post-compaction-recovery", () => { + describe("POST_COMPACTION_RECOVERY_PROMPT", () => { + it("is defined and non-empty", () => { + expect(POST_COMPACTION_RECOVERY_PROMPT).toBeTruthy(); + expect(POST_COMPACTION_RECOVERY_PROMPT.length).toBeGreaterThan(0); + }); + + it("stays under 200 tokens (rough estimate: <800 chars)", () => { + // A rough heuristic: 1 token โ‰ˆ 4 chars. 200 tokens โ‰ˆ 800 chars. + expect(POST_COMPACTION_RECOVERY_PROMPT.length).toBeLessThan(800); + }); + + it("includes memory_recall instruction", () => { + expect(POST_COMPACTION_RECOVERY_PROMPT).toContain("memory_recall"); + }); + + it("includes TASKS.md instruction", () => { + expect(POST_COMPACTION_RECOVERY_PROMPT).toContain("TASKS.md"); + }); + + it("includes Context Reset notification template", () => { + expect(POST_COMPACTION_RECOVERY_PROMPT).toContain("Context Reset"); + }); + }); + + describe("getPostCompactionRecoveryPrompt", () => { + it("returns null when entry is undefined", () => { + expect(getPostCompactionRecoveryPrompt(undefined)).toBeNull(); + }); + + it("returns null when needsPostCompactionRecovery is false", () => { + const entry = { + sessionId: "test", + updatedAt: Date.now(), + needsPostCompactionRecovery: false, + }; + expect(getPostCompactionRecoveryPrompt(entry)).toBeNull(); + }); + + it("returns null when needsPostCompactionRecovery is not set", () => { + const entry = { sessionId: "test", updatedAt: Date.now() }; + expect(getPostCompactionRecoveryPrompt(entry)).toBeNull(); + }); + + it("returns the recovery prompt when needsPostCompactionRecovery is true", () => { + const entry = { + sessionId: "test", + updatedAt: Date.now(), + needsPostCompactionRecovery: true, + }; + expect(getPostCompactionRecoveryPrompt(entry)).toBe(POST_COMPACTION_RECOVERY_PROMPT); + }); + }); + + describe("prependPostCompactionRecovery", () => { + it("returns original body when no recovery needed", () => { + const body = "Hello, how are you?"; + expect(prependPostCompactionRecovery(body, undefined)).toBe(body); + }); + + it("returns original body when flag is false", () => { + const body = "Hello, how are you?"; + const entry = { + sessionId: "test", + updatedAt: Date.now(), + needsPostCompactionRecovery: false, + }; + expect(prependPostCompactionRecovery(body, entry)).toBe(body); + }); + + it("prepends recovery prompt when flag is true", () => { + const body = "Hello, how are you?"; + const entry = { + sessionId: "test", + updatedAt: Date.now(), + needsPostCompactionRecovery: true, + }; + const result = prependPostCompactionRecovery(body, entry); + expect(result).toContain(POST_COMPACTION_RECOVERY_PROMPT); + expect(result).toContain(body); + expect(result.indexOf(POST_COMPACTION_RECOVERY_PROMPT)).toBeLessThan(result.indexOf(body)); + }); + + it("separates recovery prompt from body with double newline", () => { + const body = "test message"; + const entry = { + sessionId: "test", + updatedAt: Date.now(), + needsPostCompactionRecovery: true, + }; + const result = prependPostCompactionRecovery(body, entry); + expect(result).toBe(`${POST_COMPACTION_RECOVERY_PROMPT}\n\n${body}`); + }); + }); +}); diff --git a/src/auto-reply/reply/post-compaction-recovery.ts b/src/auto-reply/reply/post-compaction-recovery.ts new file mode 100644 index 00000000000..3896e0e81c1 --- /dev/null +++ b/src/auto-reply/reply/post-compaction-recovery.ts @@ -0,0 +1,103 @@ +import type { SessionEntry } from "../../config/sessions.js"; +import { updateSessionStore } from "../../config/sessions.js"; + +/** + * Post-compaction recovery prompt injected into the next user message after + * auto-compaction completes. Instructs the agent to recall task state from + * memory and notify the user about the context reset. + * + * Kept under 200 tokens to minimize context overhead. + */ +export const POST_COMPACTION_RECOVERY_PROMPT = [ + "[Post-compaction recovery โ€” mandatory steps]", + "Context was just compacted. Before responding, you MUST:", + '1. Run memory_recall("active task") to check for saved task state.', + "2. Read TASKS.md if it exists in your workspace.", + "3. Compare recovered state against the compaction summary above.", + '4. Notify the user: "๐Ÿ”„ Context Reset โ€” last task: [X], resuming from step [Y]" (or summarize what you recall).', + "Do NOT skip these steps. Proceed with the user's message after recovery.", +].join("\n"); + +/** + * Check whether the session needs post-compaction recovery and return the + * recovery prompt if so. Returns `null` when no recovery is needed. + */ +export function getPostCompactionRecoveryPrompt(entry?: SessionEntry): string | null { + if (!entry?.needsPostCompactionRecovery) { + return null; + } + return POST_COMPACTION_RECOVERY_PROMPT; +} + +/** + * Prepend the post-compaction recovery prompt to the user's message body. + * Returns the original body unchanged if no recovery is needed. + */ +export function prependPostCompactionRecovery(body: string, entry?: SessionEntry): string { + const prompt = getPostCompactionRecoveryPrompt(entry); + if (!prompt) { + return body; + } + return `${prompt}\n\n${body}`; +} + +/** + * Set or clear the post-compaction recovery flag on a session. + */ +async function setPostCompactionRecovery( + value: boolean, + params: { + sessionEntry?: SessionEntry; + sessionStore?: Record; + sessionKey?: string; + storePath?: string; + }, +): Promise { + const { sessionEntry, sessionStore, sessionKey, storePath } = params; + if (!sessionStore || !sessionKey) { + return; + } + const entry = sessionStore[sessionKey] ?? sessionEntry; + if (!entry) { + return; + } + sessionStore[sessionKey] = { + ...entry, + needsPostCompactionRecovery: value, + }; + if (storePath) { + await updateSessionStore(storePath, (store) => { + if (store[sessionKey]) { + store[sessionKey] = { + ...store[sessionKey], + needsPostCompactionRecovery: value, + }; + } + }); + } +} + +/** + * Mark a session as needing post-compaction recovery on the next turn. + */ +export async function markNeedsPostCompactionRecovery(params: { + sessionEntry?: SessionEntry; + sessionStore?: Record; + sessionKey?: string; + storePath?: string; +}): Promise { + return setPostCompactionRecovery(true, params); +} + +/** + * Clear the post-compaction recovery flag after recovery instructions have + * been injected into the prompt. + */ +export async function clearPostCompactionRecovery(params: { + sessionEntry?: SessionEntry; + sessionStore?: Record; + sessionKey?: string; + storePath?: string; +}): Promise { + return setPostCompactionRecovery(false, params); +} diff --git a/src/config/sessions/types.ts b/src/config/sessions/types.ts index 012d59f728d..e7a5c917ef5 100644 --- a/src/config/sessions/types.ts +++ b/src/config/sessions/types.ts @@ -82,6 +82,8 @@ export type SessionEntry = { model?: string; contextTokens?: number; compactionCount?: number; + /** Set after auto-compaction; cleared after the next turn injects recovery instructions. */ + needsPostCompactionRecovery?: boolean; memoryFlushAt?: number; memoryFlushCompactionCount?: number; cliSessionIds?: Record;