task continuity: TASKS.md ledger, post-compaction recovery, entity dedup, credential scanning

Add task ledger (TASKS.md) parsing and stale-task archival for maintaining
agent task state across context compactions. Post-compaction recovery injects
memory_recall + TASKS.md read steps after auto-compaction. Sleep cycle gains
entity dedup (Phase 1d) and credential scanning. Memory flush now extracts
active task checkpoints. Compaction instructions prioritize active tasks.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Tarun Sukhani
2026-02-14 11:12:22 +08:00
parent 4d54736b98
commit a170e25494
20 changed files with 2267 additions and 44 deletions

View File

@@ -292,6 +292,7 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
"--skip-semantic",
"Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)",
)
.option("--workspace <dir>", "Workspace directory for TASKS.md cleanup")
.action(
async (opts: {
agent?: string;
@@ -305,23 +306,31 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
maxSemanticPairs?: string;
concurrency?: string;
skipSemantic?: boolean;
workspace?: string;
}) => {
console.log("\n🌙 Memory Sleep Cycle");
console.log("═════════════════════════════════════════════════════════════");
console.log("Seven-phase memory consolidation (Pareto-based):\n");
console.log(" Phase 1: Deduplication — Merge near-duplicate memories");
console.log("Multi-phase memory consolidation (Pareto-based):\n");
console.log(" Phase 1: Deduplication — Merge near-duplicate memories");
console.log(
" Phase 1b: Semantic Dedup — LLM-based paraphrase detection (0.750.95 band)",
" Phase 1b: Semantic Dedup — LLM-based paraphrase detection (0.750.95 band)",
);
console.log(" Phase 1c: Conflict Detection — Resolve contradictory memories");
console.log(" Phase 1c: Conflict Detection — Resolve contradictory memories");
console.log(" Phase 1d: Entity Dedup — Merge duplicate entity nodes");
console.log(
" Phase 2: Pareto Scoring — Calculate effective scores for all memories",
" Phase 2: Pareto Scoring — Calculate effective scores for all memories",
);
console.log(" Phase 3: Core Promotion — Regular memories above threshold → core");
console.log(" Phase 4: Core Demotion — Core memories below threshold → regular");
console.log(" Phase 5: Extraction — Extract entities and categorize");
console.log(" Phase 6: Decay & Pruning — Remove stale low-importance memories");
console.log(" Phase 7: Orphan Cleanup — Remove disconnected nodes\n");
console.log(
" Phase 3: Core Promotion — Regular memories above threshold → core",
);
console.log(
" Phase 4: Core Demotion — Core memories below threshold → regular",
);
console.log(" Phase 5: Extraction — Extract entities and categorize");
console.log(" Phase 6: Decay & Pruning — Remove stale low-importance memories");
console.log(" Phase 7: Orphan Cleanup — Remove disconnected nodes");
console.log(" Phase 7b: Credential Scan — Remove memories with leaked secrets");
console.log(" Phase 8: Task Ledger Cleanup — Archive stale tasks in TASKS.md\n");
try {
// Validate sleep cycle CLI parameters before running
@@ -396,6 +405,9 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
await db.ensureInitialized();
// Resolve workspace dir for task ledger cleanup
const resolvedWorkspace = opts.workspace?.trim() || undefined;
const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, {
agentId: opts.agent,
dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined,
@@ -409,18 +421,23 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
decayCurves: Object.keys(cfg.decayCurves).length > 0 ? cfg.decayCurves : undefined,
extractionBatchSize: batchSize,
extractionDelayMs: delay,
workspaceDir: resolvedWorkspace,
onPhaseStart: (phase) => {
const phaseNames: Record<string, string> = {
dedup: "Phase 1: Deduplication",
semanticDedup: "Phase 1b: Semantic Deduplication",
conflict: "Phase 1c: Conflict Detection",
entityDedup: "Phase 1d: Entity Deduplication",
pareto: "Phase 2: Pareto Scoring",
promotion: "Phase 3: Core Promotion",
extraction: "Phase 4: Extraction",
decay: "Phase 5: Decay & Pruning",
cleanup: "Phase 6: Orphan Cleanup",
noiseCleanup: "Phase 7: Noise Cleanup",
credentialScan: "Phase 7b: Credential Scan",
taskLedger: "Phase 8: Task Ledger Cleanup",
};
console.log(`\n▶ ${phaseNames[phase]}`);
console.log(`\n▶ ${phaseNames[phase] ?? phase}`);
console.log("─────────────────────────────────────────────────────────────");
},
onProgress: (_phase, message) => {
@@ -457,6 +474,12 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
console.log(
` Cleanup: ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
);
console.log(
` Task Ledger: ${result.taskLedger.archivedCount} stale tasks archived` +
(result.taskLedger.archivedIds.length > 0
? ` (${result.taskLedger.archivedIds.join(", ")})`
: ""),
);
if (result.aborted) {
console.log("\n⚠ Sleep cycle was aborted before completion.");
}

View File

@@ -0,0 +1,327 @@
/**
* Tests for entity deduplication in neo4j-client.ts.
*
* Tests findDuplicateEntityPairs() and mergeEntityPair() using mocked Neo4j driver.
* Verifies substring-matching logic, mention-count based decisions, and merge behavior.
*/
import { describe, it, expect, vi, beforeEach } from "vitest";
import { Neo4jMemoryClient } from "./neo4j-client.js";
// ============================================================================
// Test Helpers
// ============================================================================
function createMockSession() {
return {
run: vi.fn().mockResolvedValue({ records: [] }),
close: vi.fn().mockResolvedValue(undefined),
executeWrite: vi.fn(
async (work: (tx: { run: ReturnType<typeof vi.fn> }) => Promise<unknown>) => {
const mockTx = { run: vi.fn().mockResolvedValue({ records: [] }) };
return work(mockTx);
},
),
};
}
function createMockDriver() {
return {
session: vi.fn().mockReturnValue(createMockSession()),
close: vi.fn().mockResolvedValue(undefined),
};
}
function createMockLogger() {
return {
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
debug: vi.fn(),
};
}
function mockRecord(data: Record<string, unknown>) {
return {
get: (key: string) => data[key],
};
}
// ============================================================================
// Entity Deduplication Tests
// ============================================================================
describe("Entity Deduplication", () => {
let client: Neo4jMemoryClient;
let mockDriver: ReturnType<typeof createMockDriver>;
let mockSession: ReturnType<typeof createMockSession>;
let mockLogger: ReturnType<typeof createMockLogger>;
beforeEach(() => {
mockLogger = createMockLogger();
mockDriver = createMockDriver();
mockSession = createMockSession();
mockDriver.session.mockReturnValue(mockSession);
client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger);
(client as any).driver = mockDriver;
(client as any).indexesReady = true;
});
// --------------------------------------------------------------------------
// findDuplicateEntityPairs()
// --------------------------------------------------------------------------
describe("findDuplicateEntityPairs", () => {
it("finds substring matches: 'tarun' + 'tarun sukhani' (same type)", async () => {
mockSession.run.mockResolvedValueOnce({
records: [
mockRecord({
id1: "e1",
name1: "tarun",
mc1: 5,
id2: "e2",
name2: "tarun sukhani",
mc2: 3,
}),
],
});
const pairs = await client.findDuplicateEntityPairs();
expect(pairs).toHaveLength(1);
// "tarun" has more mentions (5 > 3), so it should be kept
expect(pairs[0].keepId).toBe("e1");
expect(pairs[0].keepName).toBe("tarun");
expect(pairs[0].removeId).toBe("e2");
expect(pairs[0].removeName).toBe("tarun sukhani");
});
it("keeps entity with more mentions regardless of name length", async () => {
mockSession.run.mockResolvedValueOnce({
records: [
mockRecord({
id1: "e1",
name1: "fish speech",
mc1: 2,
id2: "e2",
name2: "fish speech s1 mini",
mc2: 10,
}),
],
});
const pairs = await client.findDuplicateEntityPairs();
expect(pairs).toHaveLength(1);
// "fish speech s1 mini" has more mentions (10 > 2), so it should be kept
expect(pairs[0].keepId).toBe("e2");
expect(pairs[0].keepName).toBe("fish speech s1 mini");
expect(pairs[0].removeId).toBe("e1");
expect(pairs[0].removeName).toBe("fish speech");
});
it("keeps shorter name when mentions are equal", async () => {
mockSession.run.mockResolvedValueOnce({
records: [
mockRecord({
id1: "e1",
name1: "aaditya",
mc1: 5,
id2: "e2",
name2: "aaditya sukhani",
mc2: 5,
}),
],
});
const pairs = await client.findDuplicateEntityPairs();
expect(pairs).toHaveLength(1);
// Equal mentions, so keep the shorter name ("aaditya")
expect(pairs[0].keepId).toBe("e1");
expect(pairs[0].keepName).toBe("aaditya");
expect(pairs[0].removeId).toBe("e2");
expect(pairs[0].removeName).toBe("aaditya sukhani");
});
it("returns empty array when no duplicates exist", async () => {
mockSession.run.mockResolvedValueOnce({ records: [] });
const pairs = await client.findDuplicateEntityPairs();
expect(pairs).toHaveLength(0);
});
it("handles multiple duplicate pairs", async () => {
mockSession.run.mockResolvedValueOnce({
records: [
mockRecord({
id1: "e1",
name1: "tarun",
mc1: 5,
id2: "e2",
name2: "tarun sukhani",
mc2: 3,
}),
mockRecord({
id1: "e3",
name1: "fish speech",
mc1: 2,
id2: "e4",
name2: "fish speech s1 mini",
mc2: 8,
}),
],
});
const pairs = await client.findDuplicateEntityPairs();
expect(pairs).toHaveLength(2);
});
it("handles NULL mention counts (treats as 0)", async () => {
mockSession.run.mockResolvedValueOnce({
records: [
mockRecord({
id1: "e1",
name1: "neo4j",
mc1: null,
id2: "e2",
name2: "neo4j database",
mc2: null,
}),
],
});
const pairs = await client.findDuplicateEntityPairs();
expect(pairs).toHaveLength(1);
// Both NULL (treated as 0), so keep the shorter name
expect(pairs[0].keepId).toBe("e1");
expect(pairs[0].keepName).toBe("neo4j");
});
it("passes the Cypher query with substring matching and type constraint", async () => {
mockSession.run.mockResolvedValueOnce({ records: [] });
await client.findDuplicateEntityPairs();
const query = mockSession.run.mock.calls[0][0] as string;
// Verify the query checks same type
expect(query).toContain("e1.type = e2.type");
// Verify the query checks CONTAINS in both directions
expect(query).toContain("e1.name CONTAINS e2.name");
expect(query).toContain("e2.name CONTAINS e1.name");
// Verify minimum name length filter
expect(query).toContain("size(e1.name) > 2");
});
});
// --------------------------------------------------------------------------
// mergeEntityPair()
// --------------------------------------------------------------------------
describe("mergeEntityPair", () => {
it("transfers MENTIONS and deletes source entity", async () => {
// mergeEntityPair uses executeWrite, so we need to set up the mock transaction
const mockTx = {
run: vi
.fn()
.mockResolvedValueOnce({
// Transfer MENTIONS
records: [mockRecord({ transferred: 3 })],
})
.mockResolvedValueOnce({
// Update mentionCount
records: [],
})
.mockResolvedValueOnce({
// Delete removed entity
records: [],
}),
};
mockSession.executeWrite.mockImplementationOnce(async (work: any) => work(mockTx));
const result = await client.mergeEntityPair("keep-id", "remove-id");
expect(result).toBe(true);
// Should have been called 3 times: transfer, update count, delete
expect(mockTx.run).toHaveBeenCalledTimes(3);
// Verify transfer query
const transferQuery = mockTx.run.mock.calls[0][0] as string;
expect(transferQuery).toContain("MERGE (m)-[:MENTIONS]->(keep)");
expect(transferQuery).toContain("DELETE r");
// Verify update mentionCount
const updateQuery = mockTx.run.mock.calls[1][0] as string;
expect(updateQuery).toContain("mentionCount");
// Verify delete query
const deleteQuery = mockTx.run.mock.calls[2][0] as string;
expect(deleteQuery).toContain("DETACH DELETE e");
});
it("skips mentionCount update when no relationships to transfer", async () => {
const mockTx = {
run: vi
.fn()
.mockResolvedValueOnce({
// Transfer MENTIONS — 0 transferred
records: [mockRecord({ transferred: 0 })],
})
.mockResolvedValueOnce({
// Delete removed entity (mentionCount update is skipped)
records: [],
}),
};
mockSession.executeWrite.mockImplementationOnce(async (work: any) => work(mockTx));
const result = await client.mergeEntityPair("keep-id", "remove-id");
expect(result).toBe(true);
// Only 2 calls: transfer (0 results) and delete (skip update)
expect(mockTx.run).toHaveBeenCalledTimes(2);
});
it("returns false on error", async () => {
mockSession.executeWrite.mockRejectedValueOnce(new Error("Neo4j connection lost"));
const result = await client.mergeEntityPair("keep-id", "remove-id");
expect(result).toBe(false);
});
});
// --------------------------------------------------------------------------
// reconcileEntityMentionCounts()
// --------------------------------------------------------------------------
describe("reconcileEntityMentionCounts", () => {
it("updates entities with NULL mentionCount", async () => {
mockSession.run.mockResolvedValueOnce({
records: [mockRecord({ updated: 42 })],
});
const updated = await client.reconcileEntityMentionCounts();
expect(updated).toBe(42);
const query = mockSession.run.mock.calls[0][0] as string;
expect(query).toContain("mentionCount IS NULL");
expect(query).toContain("SET e.mentionCount = actual");
});
it("returns 0 when all entities have mentionCount set", async () => {
mockSession.run.mockResolvedValueOnce({
records: [mockRecord({ updated: 0 })],
});
const updated = await client.reconcileEntityMentionCounts();
expect(updated).toBe(0);
});
});
});

View File

@@ -2022,4 +2022,108 @@ export class Neo4jMemoryClient {
}
});
}
/**
* Delete non-core, non-pinned memories matching a regex pattern.
* Used by the sleep cycle noise pattern cleanup.
*
* @returns Number of memories deleted
*/
async deleteMemoriesByPattern(pattern: string, agentId?: string, limit = 100): Promise<number> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
const agentFilter = agentId ? "AND m.agentId = $agentId" : "";
const result = await session.run(
`MATCH (m:Memory)
WHERE m.text =~ $pattern
AND coalesce(m.userPinned, false) = false
AND m.category <> 'core'
${agentFilter}
WITH m LIMIT ${limit}
DETACH DELETE m
RETURN count(*) AS removed`,
{ pattern, agentId },
);
return (result.records[0]?.get("removed") as number) ?? 0;
} finally {
await session.close();
}
}
/**
* Fetch all non-core memories (id + text) for a given agent, or all agents.
* Used by the sleep cycle credential scanner.
*/
async fetchNonCoreMemories(agentId?: string): Promise<Array<{ id: string; text: string }>> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
const agentFilter = agentId ? "AND m.agentId = $agentId" : "";
const result = await session.run(
`MATCH (m:Memory)
WHERE m.category <> 'core'
${agentFilter}
RETURN m.id AS id, m.text AS text`,
agentId ? { agentId } : {},
);
return result.records.map((r) => ({
id: r.get("id") as string,
text: r.get("text") as string,
}));
} finally {
await session.close();
}
}
/**
* Delete memories by IDs (DETACH DELETE).
* Used by the sleep cycle credential scanner.
*
* @returns Number of memories deleted
*/
async deleteMemoriesByIds(ids: string[]): Promise<number> {
if (ids.length === 0) {
return 0;
}
await this.ensureInitialized();
const session = this.driver!.session();
try {
const result = await session.run(
`UNWIND $ids AS id
MATCH (m:Memory {id: id})
DETACH DELETE m
RETURN count(*) AS removed`,
{ ids },
);
return (result.records[0]?.get("removed") as number) ?? 0;
} finally {
await session.close();
}
}
/**
* Reconcile mentionCount for all entities by counting actual MENTIONS relationships.
* Fixes entities with NULL or stale mentionCount values (e.g., entities created
* before mentionCount tracking was added).
*
* @returns Number of entities updated
*/
async reconcileEntityMentionCounts(): Promise<number> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
const result = await session.run(
`MATCH (e:Entity)
WHERE e.mentionCount IS NULL
OPTIONAL MATCH (m:Memory)-[:MENTIONS]->(e)
WITH e, count(m) AS actual
SET e.mentionCount = actual
RETURN count(e) AS updated`,
);
return (result.records[0]?.get("updated") as number) ?? 0;
} finally {
await session.close();
}
}
}

View File

@@ -0,0 +1,165 @@
/**
* Tests for credential scanning in the sleep cycle.
*
* Verifies that CREDENTIAL_PATTERNS and detectCredential() correctly
* identify credential-like content in memory text while not flagging
* clean text.
*/
import { describe, it, expect } from "vitest";
import { CREDENTIAL_PATTERNS, detectCredential } from "./sleep-cycle.js";
describe("Credential Detection", () => {
// --------------------------------------------------------------------------
// detectCredential() — should flag dangerous content
// --------------------------------------------------------------------------
describe("should detect credentials", () => {
it("detects API keys (sk-...)", () => {
const result = detectCredential("Use the key sk-abc123def456ghi789jkl012mno345");
expect(result).toBe("API key");
});
it("detects api_key patterns", () => {
const result = detectCredential("Set api_key_live_abcdef1234567890abcdef");
expect(result).toBe("API key");
});
it("detects Bearer tokens", () => {
const result = detectCredential(
"Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature",
);
// Could match either Bearer token or JWT — both are valid detections
expect(result).not.toBeNull();
});
it("detects password assignments (password: X)", () => {
const result = detectCredential("The database password: myS3cretP@ss!");
expect(result).toBe("Password assignment");
});
it("detects password assignments (password=X)", () => {
const result = detectCredential("config has password=hunter2 in it");
expect(result).toBe("Password assignment");
});
it("detects the missed pattern: login with X creds user/pass", () => {
const result = detectCredential("login with radarr creds hullah/fuckbar");
expect(result).toBe("Credentials (user/pass)");
});
it("detects creds user/pass without login prefix", () => {
const result = detectCredential("use creds admin/password123 for the server");
expect(result).toBe("Credentials (user/pass)");
});
it("detects URL-embedded credentials", () => {
const result = detectCredential("Connect to https://admin:secretpass@db.example.com/mydb");
expect(result).toBe("URL credentials");
});
it("detects URL credentials with http://", () => {
const result = detectCredential("http://user:pass@192.168.1.1:8080/api");
expect(result).toBe("URL credentials");
});
it("detects private keys", () => {
const result = detectCredential("-----BEGIN RSA PRIVATE KEY-----\nMIIEow...");
expect(result).toBe("Private key");
});
it("detects AWS access keys", () => {
const result = detectCredential("AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE");
expect(result).toBe("AWS key");
});
it("detects GitHub personal access tokens", () => {
const result = detectCredential("Set GITHUB_TOKEN=ghp_ABCDEFabcdef1234567890");
expect(result).toBe("GitHub/GitLab token");
});
it("detects GitLab tokens", () => {
const result = detectCredential("Use glpat-xxxxxxxxxxxxxxxxxxxx for auth");
expect(result).toBe("GitHub/GitLab token");
});
it("detects JWT tokens", () => {
const result = detectCredential(
"Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c",
);
expect(result).toBe("JWT");
});
it("detects token=value patterns", () => {
const result = detectCredential(
"Set token=abcdef1234567890abcdef1234567890ab for authentication",
);
expect(result).toBe("Token/secret");
});
it("detects secret: value patterns", () => {
const result = detectCredential(
"The client secret: abcdef1234567890abcdef1234567890abcdef12",
);
expect(result).toBe("Token/secret");
});
});
// --------------------------------------------------------------------------
// detectCredential() — should NOT flag clean text
// --------------------------------------------------------------------------
describe("should not flag clean text", () => {
it("does not flag normal text", () => {
expect(detectCredential("Remember to buy groceries tomorrow")).toBeNull();
});
it("does not flag password advice (without actual password)", () => {
expect(
detectCredential("Make sure the password is at least 8 characters long for security"),
).toBeNull();
});
it("does not flag discussion about tokens", () => {
expect(detectCredential("We should use JWT tokens for authentication")).toBeNull();
});
it("does not flag short key-like words", () => {
expect(detectCredential("The key to success is persistence")).toBeNull();
});
it("does not flag URLs without credentials", () => {
expect(detectCredential("Visit https://example.com/api/v1 for docs")).toBeNull();
});
it("does not flag discussion about API key rotation", () => {
expect(detectCredential("Rotate your API keys every 90 days as a best practice")).toBeNull();
});
it("does not flag file paths", () => {
expect(detectCredential("Credentials are stored in /home/user/.secrets/api.json")).toBeNull();
});
it("does not flag casual use of slash in text", () => {
expect(detectCredential("Use the read/write mode for better performance")).toBeNull();
});
});
// --------------------------------------------------------------------------
// CREDENTIAL_PATTERNS — structural checks
// --------------------------------------------------------------------------
describe("CREDENTIAL_PATTERNS structure", () => {
it("has at least 8 patterns", () => {
expect(CREDENTIAL_PATTERNS.length).toBeGreaterThanOrEqual(8);
});
it("each pattern has a label and valid RegExp", () => {
for (const { pattern, label } of CREDENTIAL_PATTERNS) {
expect(pattern).toBeInstanceOf(RegExp);
expect(label).toBeTruthy();
expect(typeof label).toBe("string");
}
});
});
});

View File

@@ -1,18 +1,22 @@
/**
* Eight-phase sleep cycle for memory consolidation.
* Multi-phase sleep cycle for memory consolidation.
*
* Implements a Pareto-based memory ecosystem where core memory
* is bounded to the top 20% of memories by effective score.
*
* Phases:
* 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
* 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
* 1b. SEMANTIC DEDUP - LLM-based paraphrase detection
* 1c. CONFLICT DETECTION - Resolve contradictory memories
* 1d. ENTITY DEDUP - Merge near-duplicate entities (reduce entity bloat)
* 2. PARETO SCORING - Calculate effective scores for all memories
* 3. CORE PROMOTION - Regular memories above threshold -> core
* 4. EXTRACTION - Form entity relationships (strengthen connections)
* 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
* 6. CLEANUP - Remove orphaned entities/tags (garbage collection)
* 7. NOISE CLEANUP - Remove dangerous pattern memories
* 2. PARETO SCORING - Calculate effective scores for all memories
* 3. CORE PROMOTION - Regular memories above threshold -> core
* 4. EXTRACTION - Form entity relationships (strengthen connections)
* 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
* 6. CLEANUP - Remove orphaned entities/tags (garbage collection)
* 7. NOISE CLEANUP - Remove dangerous pattern memories
* 7b. CREDENTIAL SCAN - Remove memories containing leaked credentials
* 8. TASK LEDGER - Archive stale tasks in TASKS.md
*
* Research basis:
* - Pareto principle (20/80 rule) for memory tiering
@@ -27,6 +31,7 @@ import type { Neo4jMemoryClient } from "./neo4j-client.js";
import type { Logger } from "./schema.js";
import { isSemanticDuplicate, resolveConflict, runBackgroundExtraction } from "./extractor.js";
import { makePairKey } from "./schema.js";
import { reviewAndArchiveStaleTasks, type StaleTaskResult } from "./task-ledger.js";
/**
* Sleep Cycle Result - aggregated stats from all phases.
@@ -82,6 +87,18 @@ export type SleepCycleResult = {
tagsRemoved: number;
singleUseTagsRemoved: number;
};
// Phase 7b: Credential Scanning
credentialScan: {
memoriesScanned: number;
credentialsFound: number;
memoriesRemoved: number;
};
// Phase 8: Task Ledger Cleanup
taskLedger: {
staleCount: number;
archivedCount: number;
archivedIds: string[];
};
// Overall
durationMs: number;
aborted: boolean;
@@ -120,6 +137,10 @@ export type SleepCycleOptions = {
decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2)
decayCurves?: Record<string, { halfLifeDays: number }>; // Per-category decay curve overrides
// Phase 8: Task Ledger
workspaceDir?: string; // Workspace dir for TASKS.md (default: resolved from env)
staleTaskMaxAgeMs?: number; // Max age before task is stale (default: 24h)
// Progress callback
onPhaseStart?: (
phase:
@@ -131,11 +152,76 @@ export type SleepCycleOptions = {
| "promotion"
| "decay"
| "extraction"
| "cleanup",
| "cleanup"
| "noiseCleanup"
| "credentialScan"
| "taskLedger",
) => void;
onProgress?: (phase: string, message: string) => void;
};
// ============================================================================
// Credential Detection Patterns
// ============================================================================
/**
* Regex patterns that match credential-like content in memory text.
* Used by the credential scanning phase to find and remove memories
* that accidentally stored secrets, passwords, API keys, or tokens.
*
* These are JavaScript RegExp patterns (case-insensitive).
*/
export const CREDENTIAL_PATTERNS: Array<{ pattern: RegExp; label: string }> = [
// API keys: sk-..., api_key_..., api_key_live_..., apikey-..., etc.
{ pattern: /\b(?:sk|api[_-]?key(?:[_-]\w+)?)[_-][a-z0-9]{16,}/i, label: "API key" },
// Bearer tokens
{ pattern: /bearer\s+[a-z0-9_\-.]{20,}/i, label: "Bearer token" },
// JWT tokens (three base64 segments separated by dots) — check before generic token pattern
{ pattern: /\beyJ[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}/i, label: "JWT" },
// Generic long tokens/secrets (hex or base64, 32+ chars)
{
pattern: /\b(?:token|secret|key)\s*[:=]\s*["']?[a-z0-9+/=_\-]{32,}["']?/i,
label: "Token/secret",
},
// Password patterns: password: X, password=X, password X, passwd=X, pwd=X
{
pattern: /\b(?:password|passwd|pwd)\s*[:=]\s*["']?\S{4,}["']?/i,
label: "Password assignment",
},
// Credentials in "creds user/pass" format: "login with X creds user/pass"
{ pattern: /\bcreds?\s+\S+[/\\]\S+/i, label: "Credentials (user/pass)" },
// URL-embedded credentials: https://user:pass@host
{ pattern: /\/\/[^/\s:]+:[^/\s@]+@/i, label: "URL credentials" },
// Private keys
{ pattern: /-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----/i, label: "Private key" },
// AWS-style keys
{ pattern: /\b(?:AKIA|ASIA)[A-Z0-9]{16}\b/, label: "AWS key" },
// GitHub/GitLab tokens
{ pattern: /\b(?:ghp|gho|ghu|ghs|ghr|glpat)[_-][a-zA-Z0-9]{16,}/i, label: "GitHub/GitLab token" },
];
/**
* Check if a text contains credential-like content.
* Returns the first matching pattern label, or null if clean.
*/
export function detectCredential(text: string): string | null {
for (const { pattern, label } of CREDENTIAL_PATTERNS) {
if (pattern.test(text)) {
return label;
}
}
return null;
}
// ============================================================================
// Sleep Cycle Implementation
// ============================================================================
@@ -180,6 +266,8 @@ export async function runSleepCycle(
extractionBatchSize = 50,
extractionDelayMs = 1000,
singleUseTagMinAgeDays = 14,
workspaceDir,
staleTaskMaxAgeMs,
onPhaseStart,
onProgress,
} = options;
@@ -199,6 +287,8 @@ export async function runSleepCycle(
decay: { memoriesPruned: 0 },
extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 },
cleanup: { entitiesRemoved: 0, tagsRemoved: 0, singleUseTagsRemoved: 0 },
credentialScan: { memoriesScanned: 0, credentialsFound: 0, memoriesRemoved: 0 },
taskLedger: { staleCount: 0, archivedCount: 0, archivedIds: [] },
durationMs: 0,
aborted: false,
};
@@ -443,6 +533,15 @@ export async function runSleepCycle(
logger.info("memory-neo4j: [sleep] Phase 1d: Entity Deduplication");
try {
// Reconcile NULL mentionCounts before dedup so decisions are based on accurate counts
const reconciled = await db.reconcileEntityMentionCounts();
if (reconciled > 0) {
logger.info(
`memory-neo4j: [sleep] Phase 1d: Reconciled mentionCount for ${reconciled} entities`,
);
onProgress?.("entityDedup", `Reconciled ${reconciled} entity mention counts`);
}
const pairs = await db.findDuplicateEntityPairs(agentId);
result.entityDedup.pairsFound = pairs.length;
@@ -749,6 +848,7 @@ export async function runSleepCycle(
// stored (open proposals, action items that trigger rogue sessions).
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("noiseCleanup");
logger.info("memory-neo4j: [sleep] Phase 7: Noise Pattern Cleanup");
try {
@@ -763,29 +863,11 @@ export async function runSleepCycle(
];
let noiseRemoved = 0;
const noiseSession = (db as any).driver!.session();
try {
for (const pattern of noisePatterns) {
if (abortSignal?.aborted) {
break;
}
const agentFilter = agentId ? "AND m.agentId = $agentId" : "";
const result = await noiseSession.run(
`MATCH (m:Memory)
WHERE m.text =~ $pattern
AND coalesce(m.userPinned, false) = false
AND m.category <> 'core'
${agentFilter}
WITH m LIMIT 100
DETACH DELETE m
RETURN count(*) AS removed`,
{ pattern: `.*${pattern}.*`, agentId },
);
noiseRemoved += (result.records[0]?.get("removed") as number) ?? 0;
for (const pattern of noisePatterns) {
if (abortSignal?.aborted) {
break;
}
} finally {
await noiseSession.close();
noiseRemoved += await db.deleteMemoriesByPattern(`.*${pattern}.*`, agentId);
}
if (noiseRemoved > 0) {
@@ -800,6 +882,90 @@ export async function runSleepCycle(
}
}
// --------------------------------------------------------------------------
// Phase 7b: Credential Scanning
// Scans all memories for accidentally stored credentials (API keys,
// passwords, tokens) and removes them. This is a security measure
// to prevent credential leaks in the memory store.
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("credentialScan");
logger.info("memory-neo4j: [sleep] Phase 7b: Credential Scanning");
try {
const allMemories = await db.fetchNonCoreMemories(agentId);
result.credentialScan.memoriesScanned = allMemories.length;
const toRemove: string[] = [];
for (const { id, text } of allMemories) {
if (abortSignal?.aborted) {
break;
}
const matched = detectCredential(text);
if (matched) {
toRemove.push(id);
result.credentialScan.credentialsFound++;
onProgress?.(
"credentialScan",
`Found ${matched} in memory ${id.slice(0, 8)}...: "${text.slice(0, 40)}..."`,
);
logger.warn(
`memory-neo4j: [sleep] Credential detected (${matched}) in memory ${id} — removing`,
);
}
}
if (toRemove.length > 0) {
result.credentialScan.memoriesRemoved = await db.deleteMemoriesByIds(toRemove);
}
logger.info(
`memory-neo4j: [sleep] Phase 7b complete — ${result.credentialScan.memoriesScanned} scanned, ${result.credentialScan.credentialsFound} credentials found, ${result.credentialScan.memoriesRemoved} removed`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 7b error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 8: Task Ledger Cleanup
// Reviews TASKS.md for stale tasks (>24h with no activity) and archives them.
// Requires workspaceDir to be provided (otherwise skipped).
// --------------------------------------------------------------------------
if (!abortSignal?.aborted && workspaceDir) {
onPhaseStart?.("taskLedger");
logger.info("memory-neo4j: [sleep] Phase 8: Task Ledger Cleanup");
try {
const staleResult = await reviewAndArchiveStaleTasks(workspaceDir, staleTaskMaxAgeMs);
if (staleResult) {
result.taskLedger.staleCount = staleResult.staleCount;
result.taskLedger.archivedCount = staleResult.archivedCount;
result.taskLedger.archivedIds = staleResult.archivedIds;
if (staleResult.archivedCount > 0) {
onProgress?.(
"taskLedger",
`Archived ${staleResult.archivedCount} stale tasks: ${staleResult.archivedIds.join(", ")}`,
);
} else {
onProgress?.("taskLedger", "No stale tasks found");
}
} else {
onProgress?.("taskLedger", "TASKS.md not found — skipped");
}
logger.info(
`memory-neo4j: [sleep] Phase 8 complete — ${result.taskLedger.archivedCount} stale tasks archived`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 8 error: ${String(err)}`);
}
} else if (!workspaceDir) {
logger.info("memory-neo4j: [sleep] Phase 8: Task Ledger Cleanup — SKIPPED (no workspace dir)");
}
result.durationMs = Date.now() - startTime;
result.aborted = abortSignal?.aborted ?? false;

View File

@@ -0,0 +1,466 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import {
findStaleTasks,
parseTaskDate,
parseTaskLedger,
reviewAndArchiveStaleTasks,
serializeTask,
serializeTaskLedger,
} from "./task-ledger.js";
// ============================================================================
// parseTaskDate
// ============================================================================
describe("parseTaskDate", () => {
it("parses YYYY-MM-DD HH:MM format", () => {
const date = parseTaskDate("2026-02-14 09:15");
expect(date).not.toBeNull();
expect(date!.getFullYear()).toBe(2026);
expect(date!.getMonth()).toBe(1); // February is month 1
expect(date!.getDate()).toBe(14);
});
it("parses YYYY-MM-DD HH:MM with timezone abbreviation", () => {
const date = parseTaskDate("2026-02-14 09:15 MYT");
expect(date).not.toBeNull();
expect(date!.getFullYear()).toBe(2026);
});
it("parses ISO format", () => {
const date = parseTaskDate("2026-02-14T09:15:00");
expect(date).not.toBeNull();
expect(date!.getFullYear()).toBe(2026);
});
it("returns null for empty string", () => {
expect(parseTaskDate("")).toBeNull();
});
it("returns null for invalid date", () => {
expect(parseTaskDate("not-a-date")).toBeNull();
});
});
// ============================================================================
// parseTaskLedger
// ============================================================================
describe("parseTaskLedger", () => {
it("parses a simple task ledger", () => {
const content = [
"# Active Tasks",
"",
"## TASK-001: Restaurant Booking",
"- **Status:** in_progress",
"- **Started:** 2026-02-14 09:15",
"- **Updated:** 2026-02-14 09:30",
"- **Details:** Graze, 4 pax, 19:30",
"- **Current Step:** Form filled, awaiting confirmation",
"",
"# Completed",
"<!-- Move done tasks here with completion date -->",
].join("\n");
const ledger = parseTaskLedger(content);
expect(ledger.activeTasks).toHaveLength(1);
expect(ledger.completedTasks).toHaveLength(0);
const task = ledger.activeTasks[0];
expect(task.id).toBe("TASK-001");
expect(task.title).toBe("Restaurant Booking");
expect(task.status).toBe("in_progress");
expect(task.started).toBe("2026-02-14 09:15");
expect(task.updated).toBe("2026-02-14 09:30");
expect(task.details).toBe("Graze, 4 pax, 19:30");
expect(task.currentStep).toBe("Form filled, awaiting confirmation");
expect(task.isCompleted).toBe(false);
});
it("parses multiple active tasks", () => {
const content = [
"# Active Tasks",
"",
"## TASK-001: Task One",
"- **Status:** in_progress",
"- **Started:** 2026-02-14 09:00",
"",
"## TASK-002: Task Two",
"- **Status:** awaiting_input",
"- **Started:** 2026-02-14 10:00",
"",
"# Completed",
].join("\n");
const ledger = parseTaskLedger(content);
expect(ledger.activeTasks).toHaveLength(2);
expect(ledger.activeTasks[0].id).toBe("TASK-001");
expect(ledger.activeTasks[1].id).toBe("TASK-002");
});
it("parses completed tasks", () => {
const content = [
"# Active Tasks",
"",
"# Completed",
"",
"## ~~TASK-001: Old Task~~",
"- **Status:** done",
"- **Started:** 2026-02-13 09:00",
"- **Updated:** 2026-02-13 15:00",
].join("\n");
const ledger = parseTaskLedger(content);
expect(ledger.activeTasks).toHaveLength(0);
expect(ledger.completedTasks).toHaveLength(1);
expect(ledger.completedTasks[0].id).toBe("TASK-001");
expect(ledger.completedTasks[0].isCompleted).toBe(true);
});
it("parses blocked tasks", () => {
const content = [
"# Active Tasks",
"",
"## TASK-001: Blocked Task",
"- **Status:** blocked",
"- **Started:** 2026-02-14 09:00",
"- **Blocked On:** Waiting for API key",
"",
"# Completed",
].join("\n");
const ledger = parseTaskLedger(content);
expect(ledger.activeTasks).toHaveLength(1);
expect(ledger.activeTasks[0].blockedOn).toBe("Waiting for API key");
});
it("handles empty task ledger", () => {
const content = [
"# Active Tasks",
"",
"# Completed",
"<!-- Move done tasks here with completion date -->",
].join("\n");
const ledger = parseTaskLedger(content);
expect(ledger.activeTasks).toHaveLength(0);
expect(ledger.completedTasks).toHaveLength(0);
});
it("handles Last Updated field variant", () => {
const content = [
"# Active Tasks",
"",
"## TASK-001: Some Task",
"- **Status:** in_progress",
"- **Last Updated:** 2026-02-14 10:00",
"",
"# Completed",
].join("\n");
const ledger = parseTaskLedger(content);
expect(ledger.activeTasks[0].updated).toBe("2026-02-14 10:00");
});
});
// ============================================================================
// findStaleTasks
// ============================================================================
describe("findStaleTasks", () => {
const now = new Date("2026-02-15T10:00:00");
const twentyFourHoursMs = 24 * 60 * 60 * 1000;
it("identifies tasks older than 24h as stale", () => {
const tasks = [
{
id: "TASK-001",
title: "Old Task",
status: "in_progress" as const,
updated: "2026-02-14 08:00",
rawLines: [],
isCompleted: false,
},
];
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
expect(stale).toHaveLength(1);
expect(stale[0].id).toBe("TASK-001");
});
it("does not mark recent tasks as stale", () => {
const tasks = [
{
id: "TASK-001",
title: "Recent Task",
status: "in_progress" as const,
updated: "2026-02-15 09:00",
rawLines: [],
isCompleted: false,
},
];
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
expect(stale).toHaveLength(0);
});
it("skips done tasks", () => {
const tasks = [
{
id: "TASK-001",
title: "Done Task",
status: "done" as const,
updated: "2026-02-13 08:00",
rawLines: [],
isCompleted: false,
},
];
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
expect(stale).toHaveLength(0);
});
it("skips already-stale tasks", () => {
const tasks = [
{
id: "TASK-001",
title: "Already Stale",
status: "stale" as const,
updated: "2026-02-13 08:00",
rawLines: [],
isCompleted: false,
},
];
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
expect(stale).toHaveLength(0);
});
it("uses started date when updated is missing", () => {
const tasks = [
{
id: "TASK-001",
title: "No Update Date",
status: "in_progress" as const,
started: "2026-02-14 08:00",
rawLines: [],
isCompleted: false,
},
];
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
expect(stale).toHaveLength(1);
});
it("marks tasks with no dates as stale", () => {
const tasks = [
{
id: "TASK-001",
title: "No Dates",
status: "in_progress" as const,
rawLines: [],
isCompleted: false,
},
];
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
expect(stale).toHaveLength(1);
});
});
// ============================================================================
// serializeTask / serializeTaskLedger
// ============================================================================
describe("serializeTask", () => {
it("serializes an active task", () => {
const task = {
id: "TASK-001",
title: "My Task",
status: "in_progress" as const,
started: "2026-02-14 09:00",
updated: "2026-02-14 10:00",
details: "Some details",
currentStep: "Step 1",
rawLines: [],
isCompleted: false,
};
const lines = serializeTask(task);
expect(lines[0]).toBe("## TASK-001: My Task");
expect(lines).toContain("- **Status:** in_progress");
expect(lines).toContain("- **Started:** 2026-02-14 09:00");
expect(lines).toContain("- **Updated:** 2026-02-14 10:00");
expect(lines).toContain("- **Details:** Some details");
expect(lines).toContain("- **Current Step:** Step 1");
});
it("serializes a completed task with strikethrough", () => {
const task = {
id: "TASK-001",
title: "Done Task",
status: "done" as const,
started: "2026-02-14 09:00",
rawLines: [],
isCompleted: true,
};
const lines = serializeTask(task);
expect(lines[0]).toBe("## ~~TASK-001: Done Task~~");
});
});
describe("serializeTaskLedger", () => {
it("round-trips a task ledger", () => {
const ledger = {
activeTasks: [
{
id: "TASK-001",
title: "Active Task",
status: "in_progress" as const,
started: "2026-02-14 09:00",
updated: "2026-02-14 10:00",
details: "Details here",
rawLines: [],
isCompleted: false,
},
],
completedTasks: [
{
id: "TASK-000",
title: "Old Task",
status: "done" as const,
started: "2026-02-13 09:00",
rawLines: [],
isCompleted: true,
},
],
preamble: [],
sectionSeparator: [],
postamble: [],
};
const serialized = serializeTaskLedger(ledger);
expect(serialized).toContain("# Active Tasks");
expect(serialized).toContain("## TASK-001: Active Task");
expect(serialized).toContain("# Completed");
expect(serialized).toContain("## ~~TASK-000: Old Task~~");
});
});
// ============================================================================
// reviewAndArchiveStaleTasks (integration with filesystem)
// ============================================================================
describe("reviewAndArchiveStaleTasks", () => {
let tmpDir: string;
beforeEach(async () => {
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "task-ledger-test-"));
});
afterEach(async () => {
await fs.rm(tmpDir, { recursive: true, force: true });
});
it("returns null when TASKS.md does not exist", async () => {
const result = await reviewAndArchiveStaleTasks(tmpDir);
expect(result).toBeNull();
});
it("returns null for empty TASKS.md", async () => {
await fs.writeFile(path.join(tmpDir, "TASKS.md"), "", "utf-8");
const result = await reviewAndArchiveStaleTasks(tmpDir);
expect(result).toBeNull();
});
it("archives stale tasks", async () => {
const content = [
"# Active Tasks",
"",
"## TASK-001: Stale Task",
"- **Status:** in_progress",
"- **Started:** 2026-02-13 08:00",
"- **Updated:** 2026-02-13 09:00",
"",
"## TASK-002: Fresh Task",
"- **Status:** in_progress",
"- **Started:** 2026-02-14 09:00",
"- **Updated:** 2026-02-14 23:00",
"",
"# Completed",
"<!-- Move done tasks here with completion date -->",
].join("\n");
await fs.writeFile(path.join(tmpDir, "TASKS.md"), content, "utf-8");
// "now" is Feb 15, 10:00 — TASK-001 updated Feb 13, 09:00 (>24h ago), TASK-002 updated Feb 14, 23:00 (<24h ago)
const now = new Date("2026-02-15T10:00:00");
const result = await reviewAndArchiveStaleTasks(tmpDir, undefined, now);
expect(result).not.toBeNull();
expect(result!.staleCount).toBe(1);
expect(result!.archivedCount).toBe(1);
expect(result!.archivedIds).toEqual(["TASK-001"]);
// Verify the file was updated
const updated = await fs.readFile(path.join(tmpDir, "TASKS.md"), "utf-8");
expect(updated).toContain("## TASK-002: Fresh Task");
expect(updated).toContain("## ~~TASK-001: Stale Task~~");
// Re-parse to verify structure
const ledger = parseTaskLedger(updated);
expect(ledger.activeTasks).toHaveLength(1);
expect(ledger.activeTasks[0].id).toBe("TASK-002");
expect(ledger.completedTasks).toHaveLength(1);
expect(ledger.completedTasks[0].id).toBe("TASK-001");
expect(ledger.completedTasks[0].status).toBe("stale");
});
it("does nothing when no tasks are stale", async () => {
const content = [
"# Active Tasks",
"",
"## TASK-001: Fresh Task",
"- **Status:** in_progress",
"- **Started:** 2026-02-15 09:00",
"- **Updated:** 2026-02-15 09:30",
"",
"# Completed",
].join("\n");
await fs.writeFile(path.join(tmpDir, "TASKS.md"), content, "utf-8");
const now = new Date("2026-02-15T10:00:00");
const result = await reviewAndArchiveStaleTasks(tmpDir, undefined, now);
expect(result).not.toBeNull();
expect(result!.staleCount).toBe(0);
expect(result!.archivedCount).toBe(0);
});
it("supports custom maxAgeMs", async () => {
const content = [
"# Active Tasks",
"",
"## TASK-001: Semi-Fresh Task",
"- **Status:** in_progress",
"- **Started:** 2026-02-15 06:00",
"- **Updated:** 2026-02-15 06:00",
"",
"# Completed",
].join("\n");
await fs.writeFile(path.join(tmpDir, "TASKS.md"), content, "utf-8");
const now = new Date("2026-02-15T10:00:00");
const oneHourMs = 60 * 60 * 1000;
// With 1-hour threshold, task is stale (4 hours old)
const result = await reviewAndArchiveStaleTasks(tmpDir, oneHourMs, now);
expect(result!.archivedCount).toBe(1);
});
});

View File

@@ -0,0 +1,416 @@
/**
* Task Ledger (TASKS.md) maintenance utilities.
*
* Parses and updates the structured task ledger file used by agents
* to track active work across compaction events. The sleep cycle uses
* these utilities to archive stale tasks (>24h with no activity).
*/
import fs from "node:fs/promises";
import path from "node:path";
// ============================================================================
// Types
// ============================================================================
export type TaskStatus = "in_progress" | "awaiting_input" | "blocked" | "done" | "stale" | string;
export type ParsedTask = {
/** Task ID (e.g. "TASK-001") */
id: string;
/** Short title */
title: string;
/** Current status */
status: TaskStatus;
/** When the task was started (ISO-ish string) */
started?: string;
/** When the task was last updated (ISO-ish string) */
updated?: string;
/** Task details/description */
details?: string;
/** Current step being worked on */
currentStep?: string;
/** What's blocking progress */
blockedOn?: string;
/** Raw markdown lines for this task section (for round-tripping) */
rawLines: string[];
/** Whether this task is in the completed section */
isCompleted: boolean;
};
export type TaskLedger = {
activeTasks: ParsedTask[];
completedTasks: ParsedTask[];
/** Lines before the first task section (header, etc.) */
preamble: string[];
/** Lines between active and completed sections */
sectionSeparator: string[];
/** Lines after the completed section */
postamble: string[];
};
export type StaleTaskResult = {
/** Number of tasks found that are stale */
staleCount: number;
/** Number of tasks archived (moved to completed) */
archivedCount: number;
/** Task IDs that were archived */
archivedIds: string[];
};
// ============================================================================
// Parsing
// ============================================================================
/**
* Parse a TASKS.md file content into structured task data.
*/
export function parseTaskLedger(content: string): TaskLedger {
const lines = content.split("\n");
const activeTasks: ParsedTask[] = [];
const completedTasks: ParsedTask[] = [];
const preamble: string[] = [];
const sectionSeparator: string[] = [];
const postamble: string[] = [];
let currentSection: "preamble" | "active" | "completed" | "postamble" = "preamble";
let currentTask: ParsedTask | null = null;
for (const line of lines) {
const trimmed = line.trim();
// Detect section headers
if (/^#\s+Active\s+Tasks/i.test(trimmed)) {
if (currentTask) {
pushTask(currentTask, activeTasks, completedTasks);
currentTask = null;
}
currentSection = "active";
preamble.push(line);
continue;
}
if (/^#\s+Completed/i.test(trimmed)) {
if (currentTask) {
pushTask(currentTask, activeTasks, completedTasks);
currentTask = null;
}
currentSection = "completed";
sectionSeparator.push(line);
continue;
}
// Detect task headers (## TASK-NNN: Title or ## ~~TASK-NNN: Title~~)
const taskMatch = trimmed.match(/^##\s+(?:~~)?(TASK-\d+):\s*(.+?)(?:~~)?$/);
if (taskMatch) {
if (currentTask) {
pushTask(currentTask, activeTasks, completedTasks);
}
const isStrikethrough = trimmed.includes("~~");
currentTask = {
id: taskMatch[1],
title: taskMatch[2].replace(/~~/g, "").trim(),
status: isStrikethrough ? "done" : "in_progress",
rawLines: [line],
isCompleted: currentSection === "completed" || isStrikethrough,
};
continue;
}
// Parse task fields (- **Field:** Value)
if (currentTask) {
const fieldMatch = trimmed.match(/^-\s+\*\*(.+?):\*\*\s*(.*)$/);
if (fieldMatch) {
const fieldName = fieldMatch[1].toLowerCase();
const value = fieldMatch[2].trim();
switch (fieldName) {
case "status":
currentTask.status = value;
break;
case "started":
currentTask.started = value;
break;
case "updated":
case "last updated":
currentTask.updated = value;
break;
case "details":
currentTask.details = value;
break;
case "current step":
currentTask.currentStep = value;
break;
case "blocked on":
currentTask.blockedOn = value;
break;
}
currentTask.rawLines.push(line);
continue;
}
// Non-field lines within a task
if (trimmed !== "" && !trimmed.startsWith("#")) {
currentTask.rawLines.push(line);
continue;
}
// Empty line within a task — include it
if (trimmed === "") {
currentTask.rawLines.push(line);
continue;
}
}
// Lines not part of a task
switch (currentSection) {
case "preamble":
case "active":
preamble.push(line);
break;
case "completed":
sectionSeparator.push(line);
break;
case "postamble":
postamble.push(line);
break;
}
}
// Push the last task
if (currentTask) {
pushTask(currentTask, activeTasks, completedTasks);
}
return { activeTasks, completedTasks, preamble, sectionSeparator, postamble };
}
function pushTask(task: ParsedTask, active: ParsedTask[], completed: ParsedTask[]) {
if (task.isCompleted || task.status === "done") {
completed.push(task);
} else {
active.push(task);
}
}
// ============================================================================
// Staleness Detection
// ============================================================================
/**
* Parse a date string from the task ledger.
* Accepts formats like "2026-02-14 09:15", "2026-02-14 09:15 MYT",
* "2026-02-14T09:15:00", etc.
*/
export function parseTaskDate(dateStr: string): Date | null {
if (!dateStr) {
return null;
}
const cleaned = dateStr
.trim()
// Remove timezone abbreviations like MYT, UTC, PST
.replace(/\s+[A-Z]{2,5}$/, "")
// Normalize space-separated date time to ISO
.replace(/^(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2})/, "$1T$2");
const date = new Date(cleaned);
if (Number.isNaN(date.getTime())) {
return null;
}
return date;
}
/**
* Find tasks that are stale (no update in more than `maxAgeMs` milliseconds).
* Default: 24 hours.
*/
export function findStaleTasks(
tasks: ParsedTask[],
now: Date = new Date(),
maxAgeMs: number = 24 * 60 * 60 * 1000,
): ParsedTask[] {
return tasks.filter((task) => {
// Only check active tasks (not already done/stale)
if (task.status === "done" || task.status === "stale") {
return false;
}
const lastUpdate = task.updated || task.started;
if (!lastUpdate) {
// No date info — consider stale if we can't determine age
return true;
}
const date = parseTaskDate(lastUpdate);
if (!date) {
return false; // Can't parse date — don't mark as stale
}
const ageMs = now.getTime() - date.getTime();
return ageMs > maxAgeMs;
});
}
// ============================================================================
// Task Ledger Serialization
// ============================================================================
/**
* Serialize a task back to markdown lines.
* If the task has rawLines from parsing, regenerate only the header and status
* (which may have changed) while preserving other raw content.
* For new/modified tasks without rawLines, generate from parsed fields.
*/
export function serializeTask(task: ParsedTask): string[] {
const titlePrefix = task.isCompleted
? `## ~~${task.id}: ${task.title}~~`
: `## ${task.id}: ${task.title}`;
// If we have rawLines and the task was only modified (status/updated changed
// by archival), rebuild from rawLines with updated field values.
if (task.rawLines.length > 0) {
const lines: string[] = [titlePrefix];
for (const line of task.rawLines.slice(1)) {
const trimmed = line.trim();
// Replace Status field with current value
if (/^-\s+\*\*Status:\*\*/.test(trimmed)) {
lines.push(`- **Status:** ${task.status}`);
} else if (/^-\s+\*\*(?:Updated|Last Updated):\*\*/.test(trimmed)) {
lines.push(`- **Updated:** ${task.updated ?? ""}`);
} else {
lines.push(line);
}
}
return lines;
}
// Fallback: generate from parsed fields (for newly created tasks)
const lines: string[] = [titlePrefix];
lines.push(`- **Status:** ${task.status}`);
if (task.started) {
lines.push(`- **Started:** ${task.started}`);
}
if (task.updated) {
lines.push(`- **Updated:** ${task.updated}`);
}
if (task.details) {
lines.push(`- **Details:** ${task.details}`);
}
if (task.currentStep) {
lines.push(`- **Current Step:** ${task.currentStep}`);
}
if (task.blockedOn) {
lines.push(`- **Blocked On:** ${task.blockedOn}`);
}
return lines;
}
/**
* Serialize the full task ledger back to markdown.
* Preserves preamble, section separators, and postamble from the original parse.
*/
export function serializeTaskLedger(ledger: TaskLedger): string {
const lines: string[] = [];
// Use original preamble if available, otherwise generate header
if (ledger.preamble.length > 0) {
lines.push(...ledger.preamble);
} else {
lines.push("# Active Tasks");
lines.push("");
}
// Active tasks
for (const task of ledger.activeTasks) {
lines.push(...serializeTask(task));
lines.push("");
}
// Use original section separator if available, otherwise generate
if (ledger.sectionSeparator.length > 0) {
lines.push(...ledger.sectionSeparator);
} else {
lines.push("# Completed");
lines.push("<!-- Move done tasks here with completion date -->");
}
lines.push("");
// Completed tasks
for (const task of ledger.completedTasks) {
lines.push(...serializeTask(task));
lines.push("");
}
// Preserve postamble
if (ledger.postamble.length > 0) {
lines.push(...ledger.postamble);
}
return lines.join("\n").trimEnd() + "\n";
}
// ============================================================================
// Sleep Cycle Integration
// ============================================================================
/**
* Review TASKS.md for stale tasks and archive them.
* This is called during the sleep cycle.
*
* @param workspaceDir - Path to the workspace directory
* @param maxAgeMs - Maximum age before a task is considered stale (default: 24h)
* @param now - Current time (for testing)
* @returns Result of the stale task review, or null if TASKS.md doesn't exist
*/
export async function reviewAndArchiveStaleTasks(
workspaceDir: string,
maxAgeMs: number = 24 * 60 * 60 * 1000,
now: Date = new Date(),
): Promise<StaleTaskResult | null> {
const tasksPath = path.join(workspaceDir, "TASKS.md");
let content: string;
try {
content = await fs.readFile(tasksPath, "utf-8");
} catch {
// TASKS.md doesn't exist — nothing to do
return null;
}
if (!content.trim()) {
return null;
}
const ledger = parseTaskLedger(content);
const staleTasks = findStaleTasks(ledger.activeTasks, now, maxAgeMs);
if (staleTasks.length === 0) {
return { staleCount: 0, archivedCount: 0, archivedIds: [] };
}
const archivedIds: string[] = [];
const nowStr = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}-${String(now.getDate()).padStart(2, "0")} ${String(now.getHours()).padStart(2, "0")}:${String(now.getMinutes()).padStart(2, "0")}`;
for (const task of staleTasks) {
task.status = "stale";
task.updated = nowStr;
task.isCompleted = true;
// Move from active to completed
const idx = ledger.activeTasks.indexOf(task);
if (idx !== -1) {
ledger.activeTasks.splice(idx, 1);
}
ledger.completedTasks.push(task);
archivedIds.push(task.id);
}
// Write back
const updated = serializeTaskLedger(ledger);
await fs.writeFile(tasksPath, updated, "utf-8");
return {
staleCount: staleTasks.length,
archivedCount: archivedIds.length,
archivedIds,
};
}

View File

@@ -0,0 +1,64 @@
import { describe, expect, it } from "vitest";
import { DEFAULT_COMPACTION_INSTRUCTIONS } from "./compact.js";
describe("DEFAULT_COMPACTION_INSTRUCTIONS", () => {
it("contains priority ordering with numbered items", () => {
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("1.");
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("2.");
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("3.");
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("4.");
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("5.");
});
it("prioritizes active tasks first", () => {
const taskLine = DEFAULT_COMPACTION_INSTRUCTIONS.indexOf("active or in-progress tasks");
const decisionsLine = DEFAULT_COMPACTION_INSTRUCTIONS.indexOf("Key decisions");
expect(taskLine).toBeLessThan(decisionsLine);
expect(taskLine).toBeGreaterThan(-1);
});
it("mentions TASKS.md for task ledger continuity", () => {
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("TASKS.md");
});
it("includes de-prioritization guidance", () => {
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("De-prioritize");
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("casual conversation");
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("completed tasks");
});
it("mentions exact values needed to resume work", () => {
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("file paths");
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("URLs");
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("IDs");
});
it("includes tool state preservation", () => {
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("Tool state");
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("browser sessions");
});
});
describe("compaction instructions merging", () => {
it("custom instructions are appended to defaults", () => {
const customInstructions = "Also remember to include user preferences.";
const merged = `${DEFAULT_COMPACTION_INSTRUCTIONS}\n\n${customInstructions}`;
// Defaults come first
expect(merged.indexOf("When summarizing")).toBeLessThan(merged.indexOf(customInstructions));
// Custom instructions are present
expect(merged).toContain(customInstructions);
// Defaults are not lost
expect(merged).toContain("active or in-progress tasks");
});
it("when no custom instructions, defaults are used alone", () => {
// Simulate the compaction path where customInstructions is undefined
const resolve = (custom?: string) =>
custom ? `${DEFAULT_COMPACTION_INSTRUCTIONS}\n\n${custom}` : DEFAULT_COMPACTION_INSTRUCTIONS;
const result = resolve(undefined);
expect(result).toBe(DEFAULT_COMPACTION_INSTRUCTIONS);
expect(result).not.toContain("\n\nundefined");
});
});

View File

@@ -0,0 +1,62 @@
import { describe, expect, it } from "vitest";
import { buildAgentSystemPrompt } from "../system-prompt.js";
describe("Task Ledger section", () => {
it("includes the Task Ledger section in full prompt mode", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
});
expect(prompt).toContain("## Task Ledger (TASKS.md)");
});
it("describes the task format with required fields", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
});
expect(prompt).toContain("**Status:**");
expect(prompt).toContain("**Started:**");
expect(prompt).toContain("**Updated:**");
expect(prompt).toContain("**Current Step:**");
});
it("mentions stale task archival by sleep cycle", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
});
expect(prompt).toContain("sleep cycle");
expect(prompt).toContain(">24h");
});
it("omits the section in minimal (subagent) prompt mode", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
promptMode: "minimal",
});
expect(prompt).not.toContain("## Task Ledger (TASKS.md)");
});
it("omits the section in none prompt mode", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
promptMode: "none",
});
expect(prompt).not.toContain("## Task Ledger (TASKS.md)");
});
});
describe("Post-Compaction Recovery", () => {
it("does NOT include a static recovery section (handled by framework injection)", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
});
// Recovery instructions are injected dynamically via post-compaction-recovery.ts,
// not baked into the system prompt (avoids wasting tokens on every turn).
expect(prompt).not.toContain("## Post-Compaction Recovery");
});
});

View File

@@ -625,6 +625,38 @@ export function buildAgentSystemPrompt(params: {
);
}
// Task Ledger instructions (skip for subagent/none modes)
if (!isMinimal) {
lines.push(
"## Task Ledger (TASKS.md)",
"Maintain a TASKS.md file in the workspace root to track active work across compaction events.",
"Update it whenever you start, progress, or complete a task. Format:",
"",
"```markdown",
"# Active Tasks",
"",
"## TASK-001: <short title>",
"- **Status:** in_progress | awaiting_input | blocked | done",
"- **Started:** YYYY-MM-DD HH:MM",
"- **Updated:** YYYY-MM-DD HH:MM",
"- **Details:** What this task is about",
"- **Current Step:** What you're doing right now",
"- **Blocked On:** (if applicable) What's preventing progress",
"",
"# Completed",
"<!-- Move done tasks here with completion date -->",
"```",
"",
"Rules:",
"- Create TASKS.md on first task if it doesn't exist.",
"- Update **Updated** timestamp and **Current Step** as you make progress.",
"- Move tasks to Completed when done; include completion date.",
"- Keep IDs sequential (TASK-001, TASK-002, etc.).",
"- Stale tasks (>24h with no update) may be auto-archived by the sleep cycle.",
"",
);
}
// Skip heartbeats for subagent/none modes
if (!isMinimal) {
lines.push(

View File

@@ -0,0 +1,79 @@
import { describe, expect, it } from "vitest";
import { makeTempWorkspace, writeWorkspaceFile } from "../test-helpers/workspace.js";
import {
DEFAULT_TASKS_FILENAME,
filterBootstrapFilesForSession,
loadWorkspaceBootstrapFiles,
} from "./workspace.js";
describe("TASKS.md bootstrap", () => {
it("DEFAULT_TASKS_FILENAME equals TASKS.md", () => {
expect(DEFAULT_TASKS_FILENAME).toBe("TASKS.md");
});
it("loadWorkspaceBootstrapFiles includes TASKS.md entry", async () => {
const tempDir = await makeTempWorkspace("openclaw-tasks-");
const files = await loadWorkspaceBootstrapFiles(tempDir);
const tasksEntry = files.find((f) => f.name === DEFAULT_TASKS_FILENAME);
expect(tasksEntry).toBeDefined();
});
it("loads TASKS.md content when the file exists", async () => {
const tempDir = await makeTempWorkspace("openclaw-tasks-");
await writeWorkspaceFile({ dir: tempDir, name: "TASKS.md", content: "- [ ] finish tests" });
const files = await loadWorkspaceBootstrapFiles(tempDir);
const tasksEntry = files.find((f) => f.name === DEFAULT_TASKS_FILENAME);
expect(tasksEntry).toBeDefined();
expect(tasksEntry!.missing).toBe(false);
expect(tasksEntry!.content).toBe("- [ ] finish tests");
});
it("marks TASKS.md as missing (not error) when the file does not exist", async () => {
const tempDir = await makeTempWorkspace("openclaw-tasks-");
const files = await loadWorkspaceBootstrapFiles(tempDir);
const tasksEntry = files.find((f) => f.name === DEFAULT_TASKS_FILENAME);
expect(tasksEntry).toBeDefined();
expect(tasksEntry!.missing).toBe(true);
expect(tasksEntry!.content).toBeUndefined();
});
it("TASKS.md is in SUBAGENT_BOOTSTRAP_ALLOWLIST (kept for subagent sessions)", () => {
const files = [
{
name: DEFAULT_TASKS_FILENAME as const,
path: "/tmp/TASKS.md",
missing: false,
content: "tasks",
},
{ name: "SOUL.md" as const, path: "/tmp/SOUL.md", missing: false, content: "soul" },
];
const filtered = filterBootstrapFilesForSession(files, "agent:main:subagent:test-123");
const tasksKept = filtered.find((f) => f.name === DEFAULT_TASKS_FILENAME);
expect(tasksKept).toBeDefined();
});
it("filterBootstrapFilesForSession drops non-allowlisted files for subagent sessions", () => {
const files = [
{
name: DEFAULT_TASKS_FILENAME as const,
path: "/tmp/TASKS.md",
missing: false,
content: "tasks",
},
{ name: "SOUL.md" as const, path: "/tmp/SOUL.md", missing: false, content: "soul" },
];
const filtered = filterBootstrapFilesForSession(files, "agent:main:subagent:test-123");
const soulKept = filtered.find((f) => f.name === "SOUL.md");
expect(soulKept).toBeUndefined();
});
});

View File

@@ -28,6 +28,7 @@ export const DEFAULT_USER_FILENAME = "USER.md";
export const DEFAULT_HEARTBEAT_FILENAME = "HEARTBEAT.md";
export const DEFAULT_BOOTSTRAP_FILENAME = "BOOTSTRAP.md";
export const DEFAULT_MEMORY_FILENAME = "MEMORY.md";
export const DEFAULT_TASKS_FILENAME = "TASKS.md";
export const DEFAULT_MEMORY_ALT_FILENAME = "memory.md";
const WORKSPACE_STATE_DIRNAME = ".openclaw";
const WORKSPACE_STATE_FILENAME = "workspace-state.json";
@@ -87,6 +88,7 @@ export type WorkspaceBootstrapFileName =
| typeof DEFAULT_HEARTBEAT_FILENAME
| typeof DEFAULT_BOOTSTRAP_FILENAME
| typeof DEFAULT_MEMORY_FILENAME
| typeof DEFAULT_TASKS_FILENAME
| typeof DEFAULT_MEMORY_ALT_FILENAME;
export type WorkspaceBootstrapFile = {
@@ -444,6 +446,10 @@ export async function loadWorkspaceBootstrapFiles(dir: string): Promise<Workspac
name: DEFAULT_BOOTSTRAP_FILENAME,
filePath: path.join(resolvedDir, DEFAULT_BOOTSTRAP_FILENAME),
},
{
name: DEFAULT_TASKS_FILENAME,
filePath: path.join(resolvedDir, DEFAULT_TASKS_FILENAME),
},
];
entries.push(...(await resolveMemoryBootstrapEntries(resolvedDir)));
@@ -465,7 +471,11 @@ export async function loadWorkspaceBootstrapFiles(dir: string): Promise<Workspac
return result;
}
const MINIMAL_BOOTSTRAP_ALLOWLIST = new Set([DEFAULT_AGENTS_FILENAME, DEFAULT_TOOLS_FILENAME]);
const SUBAGENT_BOOTSTRAP_ALLOWLIST = new Set([
DEFAULT_AGENTS_FILENAME,
DEFAULT_TOOLS_FILENAME,
DEFAULT_TASKS_FILENAME,
]);
export function filterBootstrapFilesForSession(
files: WorkspaceBootstrapFile[],
@@ -474,7 +484,7 @@ export function filterBootstrapFilesForSession(
if (!sessionKey || (!isSubagentSessionKey(sessionKey) && !isCronSessionKey(sessionKey))) {
return files;
}
return files.filter((file) => MINIMAL_BOOTSTRAP_ALLOWLIST.has(file.name));
return files.filter((file) => SUBAGENT_BOOTSTRAP_ALLOWLIST.has(file.name));
}
export async function loadExtraBootstrapFiles(

View File

@@ -23,6 +23,7 @@ import {
resolveMemoryFlushSettings,
shouldRunMemoryFlush,
} from "./memory-flush.js";
import { markNeedsPostCompactionRecovery } from "./post-compaction-recovery.js";
import { incrementCompactionCount } from "./session-updates.js";
export async function runMemoryFlushIfNeeded(params: {
@@ -179,6 +180,16 @@ export async function runMemoryFlushIfNeeded(params: {
if (typeof nextCount === "number") {
memoryFlushCompactionCount = nextCount;
}
// P3: Mark session for post-compaction recovery on the next turn.
// This path handles flush-triggered compaction (memory flush forces a compact).
// The main path in agent-runner.ts handles SDK auto-compaction.
// These are mutually exclusive; setting true is idempotent.
await markNeedsPostCompactionRecovery({
sessionEntry: activeSessionEntry,
sessionStore: activeSessionStore,
sessionKey: params.sessionKey,
storePath: params.storePath,
});
}
if (params.storePath && params.sessionKey) {
try {

View File

@@ -0,0 +1,62 @@
import { describe, expect, it } from "vitest";
import {
DEFAULT_MEMORY_FLUSH_PROMPT,
DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT,
resolveMemoryFlushSettings,
} from "./memory-flush.js";
describe("memory flush task checkpoint", () => {
describe("DEFAULT_MEMORY_FLUSH_PROMPT", () => {
it("includes task state extraction language", () => {
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("active task");
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("task name");
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("current step");
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("pending actions");
});
it("instructs to use memory_store with core category and importance 1.0", () => {
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("memory_store");
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("category 'core'");
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("importance 1.0");
});
});
describe("DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT", () => {
it("includes CRITICAL instruction about active tasks", () => {
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("CRITICAL");
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("active task");
});
it("instructs to save task state with core category", () => {
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("memory_store");
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("category='core'");
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("importance=1.0");
});
it("mentions task continuity after compaction", () => {
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("task continuity after compaction");
});
});
describe("resolveMemoryFlushSettings", () => {
it("returns prompts containing task-related keywords by default", () => {
const settings = resolveMemoryFlushSettings();
expect(settings).not.toBeNull();
expect(settings?.prompt).toContain("active task");
expect(settings?.prompt).toContain("memory_store");
expect(settings?.systemPrompt).toContain("CRITICAL");
expect(settings?.systemPrompt).toContain("task continuity");
});
it("preserves task checkpoint language alongside existing content", () => {
const settings = resolveMemoryFlushSettings();
expect(settings).not.toBeNull();
// Original content still present
expect(settings?.prompt).toContain("Pre-compaction memory flush");
expect(settings?.prompt).toContain("durable memories");
// New task checkpoint content also present
expect(settings?.prompt).toContain("current step");
expect(settings?.prompt).toContain("pending actions");
});
});
});

View File

@@ -36,6 +36,7 @@ import { appendUsageLine, formatResponseUsageLine } from "./agent-runner-utils.j
import { createAudioAsVoiceBuffer, createBlockReplyPipeline } from "./block-reply-pipeline.js";
import { resolveBlockStreamingCoalescing } from "./block-streaming.js";
import { createFollowupRunner } from "./followup-runner.js";
import { markNeedsPostCompactionRecovery } from "./post-compaction-recovery.js";
import { enqueueFollowupRun, type FollowupRun, type QueueSettings } from "./queue.js";
import { createReplyToModeFilterForChannel, resolveReplyToMode } from "./reply-threading.js";
import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-run-accounting.js";
@@ -499,6 +500,16 @@ export async function runReplyAgent(params: {
lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
contextTokensUsed,
});
// P3: Mark session for post-compaction recovery on the next turn.
// This path handles SDK auto-compaction (during the agent run itself).
// The memory-flush path in agent-runner-memory.ts handles flush-triggered compaction.
// These are mutually exclusive for a given compaction event; setting true is idempotent.
await markNeedsPostCompactionRecovery({
sessionEntry: activeSessionEntry,
sessionStore: activeSessionStore,
sessionKey,
storePath,
});
if (verboseEnabled) {
const suffix = typeof count === "number" ? ` (count ${count})` : "";
finalPayloads = [{ text: `🧹 Auto-compaction complete${suffix}.` }, ...finalPayloads];

View File

@@ -41,6 +41,10 @@ import { runReplyAgent } from "./agent-runner.js";
import { applySessionHints } from "./body.js";
import { buildGroupChatContext, buildGroupIntro } from "./groups.js";
import { buildInboundMetaSystemPrompt, buildInboundUserContextPrefix } from "./inbound-meta.js";
import {
clearPostCompactionRecovery,
prependPostCompactionRecovery,
} from "./post-compaction-recovery.js";
import { resolveQueueSettings } from "./queue.js";
import { routeReply } from "./route-reply.js";
import { BARE_SESSION_RESET_PROMPT } from "./session-reset-prompt.js";
@@ -256,6 +260,18 @@ export async function runPreparedReply(
isNewSession,
prefixedBodyBase,
});
// P3: Prepend post-compaction recovery instructions if the previous turn
// triggered auto-compaction. This ensures the agent recalls task state from
// memory before responding to the user's next message.
prefixedBodyBase = prependPostCompactionRecovery(prefixedBodyBase, sessionEntry);
if (sessionEntry?.needsPostCompactionRecovery) {
await clearPostCompactionRecovery({
sessionEntry,
sessionStore,
sessionKey,
storePath,
});
}
prefixedBodyBase = appendUntrustedContext(prefixedBodyBase, sessionCtx.UntrustedContext);
const threadStarterBody = ctx.ThreadStarterBody?.trim();
const threadHistoryBody = ctx.ThreadHistoryBody?.trim();

View File

@@ -12,12 +12,14 @@ export const DEFAULT_MEMORY_FLUSH_PROMPT = [
"Pre-compaction memory flush.",
"Store durable memories now (use memory/YYYY-MM-DD.md; create memory/ if needed).",
"IMPORTANT: If the file already exists, APPEND new content only and do not overwrite existing entries.",
"If there is an active task in progress, save its state: task name, current step, pending actions, and any critical variables. Use memory_store with category 'core' and importance 1.0 for active task state.",
`If nothing to store, reply with ${SILENT_REPLY_TOKEN}.`,
].join(" ");
export const DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT = [
"Pre-compaction memory flush turn.",
"The session is near auto-compaction; capture durable memories to disk.",
"CRITICAL: If there is an active task being worked on, you MUST save its current state (task name, step, pending actions, key variables) to memory_store with category='core' and importance=1.0. This ensures task continuity after compaction.",
`You may reply, but usually ${SILENT_REPLY_TOKEN} is correct.`,
].join(" ");

View File

@@ -0,0 +1,102 @@
import { describe, expect, it } from "vitest";
import {
getPostCompactionRecoveryPrompt,
POST_COMPACTION_RECOVERY_PROMPT,
prependPostCompactionRecovery,
} from "./post-compaction-recovery.js";
describe("post-compaction-recovery", () => {
describe("POST_COMPACTION_RECOVERY_PROMPT", () => {
it("is defined and non-empty", () => {
expect(POST_COMPACTION_RECOVERY_PROMPT).toBeTruthy();
expect(POST_COMPACTION_RECOVERY_PROMPT.length).toBeGreaterThan(0);
});
it("stays under 200 tokens (rough estimate: <800 chars)", () => {
// A rough heuristic: 1 token ≈ 4 chars. 200 tokens ≈ 800 chars.
expect(POST_COMPACTION_RECOVERY_PROMPT.length).toBeLessThan(800);
});
it("includes memory_recall instruction", () => {
expect(POST_COMPACTION_RECOVERY_PROMPT).toContain("memory_recall");
});
it("includes TASKS.md instruction", () => {
expect(POST_COMPACTION_RECOVERY_PROMPT).toContain("TASKS.md");
});
it("includes Context Reset notification template", () => {
expect(POST_COMPACTION_RECOVERY_PROMPT).toContain("Context Reset");
});
});
describe("getPostCompactionRecoveryPrompt", () => {
it("returns null when entry is undefined", () => {
expect(getPostCompactionRecoveryPrompt(undefined)).toBeNull();
});
it("returns null when needsPostCompactionRecovery is false", () => {
const entry = {
sessionId: "test",
updatedAt: Date.now(),
needsPostCompactionRecovery: false,
};
expect(getPostCompactionRecoveryPrompt(entry)).toBeNull();
});
it("returns null when needsPostCompactionRecovery is not set", () => {
const entry = { sessionId: "test", updatedAt: Date.now() };
expect(getPostCompactionRecoveryPrompt(entry)).toBeNull();
});
it("returns the recovery prompt when needsPostCompactionRecovery is true", () => {
const entry = {
sessionId: "test",
updatedAt: Date.now(),
needsPostCompactionRecovery: true,
};
expect(getPostCompactionRecoveryPrompt(entry)).toBe(POST_COMPACTION_RECOVERY_PROMPT);
});
});
describe("prependPostCompactionRecovery", () => {
it("returns original body when no recovery needed", () => {
const body = "Hello, how are you?";
expect(prependPostCompactionRecovery(body, undefined)).toBe(body);
});
it("returns original body when flag is false", () => {
const body = "Hello, how are you?";
const entry = {
sessionId: "test",
updatedAt: Date.now(),
needsPostCompactionRecovery: false,
};
expect(prependPostCompactionRecovery(body, entry)).toBe(body);
});
it("prepends recovery prompt when flag is true", () => {
const body = "Hello, how are you?";
const entry = {
sessionId: "test",
updatedAt: Date.now(),
needsPostCompactionRecovery: true,
};
const result = prependPostCompactionRecovery(body, entry);
expect(result).toContain(POST_COMPACTION_RECOVERY_PROMPT);
expect(result).toContain(body);
expect(result.indexOf(POST_COMPACTION_RECOVERY_PROMPT)).toBeLessThan(result.indexOf(body));
});
it("separates recovery prompt from body with double newline", () => {
const body = "test message";
const entry = {
sessionId: "test",
updatedAt: Date.now(),
needsPostCompactionRecovery: true,
};
const result = prependPostCompactionRecovery(body, entry);
expect(result).toBe(`${POST_COMPACTION_RECOVERY_PROMPT}\n\n${body}`);
});
});
});

View File

@@ -0,0 +1,103 @@
import type { SessionEntry } from "../../config/sessions.js";
import { updateSessionStore } from "../../config/sessions.js";
/**
* Post-compaction recovery prompt injected into the next user message after
* auto-compaction completes. Instructs the agent to recall task state from
* memory and notify the user about the context reset.
*
* Kept under 200 tokens to minimize context overhead.
*/
export const POST_COMPACTION_RECOVERY_PROMPT = [
"[Post-compaction recovery — mandatory steps]",
"Context was just compacted. Before responding, you MUST:",
'1. Run memory_recall("active task") to check for saved task state.',
"2. Read TASKS.md if it exists in your workspace.",
"3. Compare recovered state against the compaction summary above.",
'4. Notify the user: "🔄 Context Reset — last task: [X], resuming from step [Y]" (or summarize what you recall).',
"Do NOT skip these steps. Proceed with the user's message after recovery.",
].join("\n");
/**
* Check whether the session needs post-compaction recovery and return the
* recovery prompt if so. Returns `null` when no recovery is needed.
*/
export function getPostCompactionRecoveryPrompt(entry?: SessionEntry): string | null {
if (!entry?.needsPostCompactionRecovery) {
return null;
}
return POST_COMPACTION_RECOVERY_PROMPT;
}
/**
* Prepend the post-compaction recovery prompt to the user's message body.
* Returns the original body unchanged if no recovery is needed.
*/
export function prependPostCompactionRecovery(body: string, entry?: SessionEntry): string {
const prompt = getPostCompactionRecoveryPrompt(entry);
if (!prompt) {
return body;
}
return `${prompt}\n\n${body}`;
}
/**
* Set or clear the post-compaction recovery flag on a session.
*/
async function setPostCompactionRecovery(
value: boolean,
params: {
sessionEntry?: SessionEntry;
sessionStore?: Record<string, SessionEntry>;
sessionKey?: string;
storePath?: string;
},
): Promise<void> {
const { sessionEntry, sessionStore, sessionKey, storePath } = params;
if (!sessionStore || !sessionKey) {
return;
}
const entry = sessionStore[sessionKey] ?? sessionEntry;
if (!entry) {
return;
}
sessionStore[sessionKey] = {
...entry,
needsPostCompactionRecovery: value,
};
if (storePath) {
await updateSessionStore(storePath, (store) => {
if (store[sessionKey]) {
store[sessionKey] = {
...store[sessionKey],
needsPostCompactionRecovery: value,
};
}
});
}
}
/**
* Mark a session as needing post-compaction recovery on the next turn.
*/
export async function markNeedsPostCompactionRecovery(params: {
sessionEntry?: SessionEntry;
sessionStore?: Record<string, SessionEntry>;
sessionKey?: string;
storePath?: string;
}): Promise<void> {
return setPostCompactionRecovery(true, params);
}
/**
* Clear the post-compaction recovery flag after recovery instructions have
* been injected into the prompt.
*/
export async function clearPostCompactionRecovery(params: {
sessionEntry?: SessionEntry;
sessionStore?: Record<string, SessionEntry>;
sessionKey?: string;
storePath?: string;
}): Promise<void> {
return setPostCompactionRecovery(false, params);
}

View File

@@ -82,6 +82,8 @@ export type SessionEntry = {
model?: string;
contextTokens?: number;
compactionCount?: number;
/** Set after auto-compaction; cleared after the next turn injects recovery instructions. */
needsPostCompactionRecovery?: boolean;
memoryFlushAt?: number;
memoryFlushCompactionCount?: number;
cliSessionIds?: Record<string, string>;